Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into publish_xml
Browse files Browse the repository at this point in the history
  • Loading branch information
acostadon authored Dec 12, 2024
2 parents 89cb610 + 5a33526 commit 872f0e9
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 4 deletions.
3 changes: 2 additions & 1 deletion python/cugraph/cugraph/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -49,3 +49,4 @@
europe_osm = Dataset(meta_path / "europe_osm.yaml")
# 1.5 GB
hollywood = Dataset(meta_path / "hollywood.yaml")
amazon0302 = Dataset(meta_path / "amazon0302.yaml")
26 changes: 26 additions & 0 deletions python/cugraph/cugraph/datasets/metadata/amazon0302.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
name: amazon0302
file_type: .gz
description:
This network was collected by crawling the Amazon website. It is based on the
"Customers Who Bought This Item Also Bought" feature of the Amazon website.
If product i is frequently co-purchased with product j, the graph contains a
directed edge from i to j. The data was collected in March 02 2003.
author: J. Leskovec, L. Adamic and B. Adamic
refs:
J. Leskovec, L. Adamic and B. Adamic. The Dynamics of Viral Marketing.
ACM Transactions on the Web (ACM TWEB), 1(1), 2007.
delim: "\t"
header: 3
col_names:
- FromNodeId
- ToNodeId
col_types:
- int32
- int32
has_loop: false
is_directed: true
is_multigraph: false
is_symmetric: false
number_of_edges: 1234877
number_of_nodes: 262111
url: https://snap.stanford.edu/data/amazon0302.txt.gz
11 changes: 9 additions & 2 deletions python/cugraph/cugraph/testing/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -38,6 +38,7 @@
cit_patents,
europe_osm,
hollywood,
amazon0302,
# twitter,
)

Expand Down Expand Up @@ -71,4 +72,10 @@
toy_graph_undirected,
]
DEFAULT_DATASETS = [dolphins, netscience, karate_disjoint]
BENCHMARKING_DATASETS = [soc_livejournal, cit_patents, europe_osm, hollywood]
BENCHMARKING_DATASETS = [
soc_livejournal,
cit_patents,
europe_osm,
hollywood,
amazon0302,
]
1 change: 1 addition & 0 deletions python/cugraph/cugraph/tests/utils/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def is_symmetric(dataset):
return True
else:
df = dataset.get_edgelist(download=True)
df.rename(columns={df.columns[0]: "src", df.columns[1]: "dst"}, inplace=True)
df_a = df.sort_values("src")

# create df with swapped src/dst columns
Expand Down
2 changes: 1 addition & 1 deletion python/cugraph/cugraph/utilities/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,7 @@ def create_list_series_from_2d_ar(ar, index):
cp.arange(start=0, stop=len(data) + 1, step=n_cols), dtype="int32"
)
mask_col = cp.full(shape=n_rows, fill_value=True)
mask = cudf._lib.transform.bools_to_mask(as_column(mask_col))
mask = as_column(mask_col).as_mask()
lc = cudf.core.column.ListColumn(
data=None,
size=n_rows,
Expand Down

0 comments on commit 872f0e9

Please sign in to comment.