rapidsai · rapids-bot · Dec 12, 2024 · Nov 27, 2024 · Nov 27, 2024 · Dec 3, 2024
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -49,3 +49,4 @@
 europe_osm = Dataset(meta_path / "europe_osm.yaml")
 # 1.5 GB
 hollywood = Dataset(meta_path / "hollywood.yaml")
+amazon0302 = Dataset(meta_path / "amazon0302.yaml")
@@ -0,0 +1,26 @@
+name: amazon0302
+file_type: .gz
+description:
+  This network was collected by crawling the Amazon website. It is based on the
+  "Customers Who Bought This Item Also Bought" feature of the Amazon website.
+  If product i is frequently co-purchased with product j, the graph contains a
+  directed edge from i to j. The data was collected in March 02 2003.
+author: J. Leskovec, L. Adamic and B. Adamic
+refs:
+  J. Leskovec, L. Adamic and B. Adamic. The Dynamics of Viral Marketing.
+  ACM Transactions on the Web (ACM TWEB), 1(1), 2007.
+delim: "\t"
+header: 3
+col_names:
+  - FromNodeId
+  - ToNodeId
+col_types:
+  - int32
+  - int32
+has_loop: false
+is_directed: true
+is_multigraph: false
+is_symmetric: false
+number_of_edges: 1234877
+number_of_nodes: 262111
+url: https://snap.stanford.edu/data/amazon0302.txt.gz
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -38,6 +38,7 @@
     cit_patents,
     europe_osm,
     hollywood,
+    amazon0302,
     # twitter,
 )
 
@@ -71,4 +72,10 @@
     toy_graph_undirected,
 ]
 DEFAULT_DATASETS = [dolphins, netscience, karate_disjoint]
-BENCHMARKING_DATASETS = [soc_livejournal, cit_patents, europe_osm, hollywood]
+BENCHMARKING_DATASETS = [
+    soc_livejournal,
+    cit_patents,
+    europe_osm,
+    hollywood,
+    amazon0302,
+]
@@ -104,6 +104,7 @@ def is_symmetric(dataset):
         return True
     else:
         df = dataset.get_edgelist(download=True)
+        df.rename(columns={df.columns[0]: "src", df.columns[1]: "dst"}, inplace=True)
         df_a = df.sort_values("src")
 
         # create df with swapped src/dst columns

@@ -528,7 +528,7 @@ def create_list_series_from_2d_ar(ar, index):
         cp.arange(start=0, stop=len(data) + 1, step=n_cols), dtype="int32"
     )
     mask_col = cp.full(shape=n_rows, fill_value=True)
-    mask = cudf._lib.transform.bools_to_mask(as_column(mask_col))
+    mask = as_column(mask_col).as_mask()
     lc = cudf.core.column.ListColumn(
         data=None,
         size=n_rows,