diff --git a/python/cugraph/cugraph/datasets/__init__.py b/python/cugraph/cugraph/datasets/__init__.py index ac18274d354..ecf10f3c4ef 100644 --- a/python/cugraph/cugraph/datasets/__init__.py +++ b/python/cugraph/cugraph/datasets/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -49,3 +49,4 @@ europe_osm = Dataset(meta_path / "europe_osm.yaml") # 1.5 GB hollywood = Dataset(meta_path / "hollywood.yaml") +amazon0302 = Dataset(meta_path / "amazon0302.yaml") diff --git a/python/cugraph/cugraph/datasets/metadata/amazon0302.yaml b/python/cugraph/cugraph/datasets/metadata/amazon0302.yaml new file mode 100644 index 00000000000..b02c936a06e --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/amazon0302.yaml @@ -0,0 +1,26 @@ +name: amazon0302 +file_type: .gz +description: + This network was collected by crawling the Amazon website. It is based on the + "Customers Who Bought This Item Also Bought" feature of the Amazon website. + If product i is frequently co-purchased with product j, the graph contains a + directed edge from i to j. The data was collected in March 02 2003. +author: J. Leskovec, L. Adamic and B. Adamic +refs: + J. Leskovec, L. Adamic and B. Adamic. The Dynamics of Viral Marketing. + ACM Transactions on the Web (ACM TWEB), 1(1), 2007. +delim: "\t" +header: 3 +col_names: + - FromNodeId + - ToNodeId +col_types: + - int32 + - int32 +has_loop: false +is_directed: true +is_multigraph: false +is_symmetric: false +number_of_edges: 1234877 +number_of_nodes: 262111 +url: https://snap.stanford.edu/data/amazon0302.txt.gz diff --git a/python/cugraph/cugraph/testing/__init__.py b/python/cugraph/cugraph/testing/__init__.py index 2b4a4fd3ebf..5c89159bcff 100644 --- a/python/cugraph/cugraph/testing/__init__.py +++ b/python/cugraph/cugraph/testing/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -38,6 +38,7 @@ cit_patents, europe_osm, hollywood, + amazon0302, # twitter, ) @@ -71,4 +72,10 @@ toy_graph_undirected, ] DEFAULT_DATASETS = [dolphins, netscience, karate_disjoint] -BENCHMARKING_DATASETS = [soc_livejournal, cit_patents, europe_osm, hollywood] +BENCHMARKING_DATASETS = [ + soc_livejournal, + cit_patents, + europe_osm, + hollywood, + amazon0302, +] diff --git a/python/cugraph/cugraph/tests/utils/test_dataset.py b/python/cugraph/cugraph/tests/utils/test_dataset.py index 3873cd1c3e4..9895eb61c82 100644 --- a/python/cugraph/cugraph/tests/utils/test_dataset.py +++ b/python/cugraph/cugraph/tests/utils/test_dataset.py @@ -104,6 +104,7 @@ def is_symmetric(dataset): return True else: df = dataset.get_edgelist(download=True) + df.rename(columns={df.columns[0]: "src", df.columns[1]: "dst"}, inplace=True) df_a = df.sort_values("src") # create df with swapped src/dst columns diff --git a/python/cugraph/cugraph/utilities/utils.py b/python/cugraph/cugraph/utilities/utils.py index 5bad68a79e2..0257da4ffc0 100644 --- a/python/cugraph/cugraph/utilities/utils.py +++ b/python/cugraph/cugraph/utilities/utils.py @@ -528,7 +528,7 @@ def create_list_series_from_2d_ar(ar, index): cp.arange(start=0, stop=len(data) + 1, step=n_cols), dtype="int32" ) mask_col = cp.full(shape=n_rows, fill_value=True) - mask = cudf._lib.transform.bools_to_mask(as_column(mask_col)) + mask = as_column(mask_col).as_mask() lc = cudf.core.column.ListColumn( data=None, size=n_rows,