Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
  • Loading branch information
Xiang Song committed Nov 30, 2023
1 parent c1c6425 commit b144593
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 5 deletions.
7 changes: 5 additions & 2 deletions python/graphstorm/gconstruct/remap_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,11 @@ def remap_node_pred(pred_ntypes, pred_dir,
pred_files.sort()
num_parts = len(pred_files)
logging.debug("{%s} has {%d} prediction files", ntype, num_parts)
assert len(nid_files) == len(pred_files), \
"Expect the number of nid files equal to " \
"the number of prediction result files, but get " \
f"{len(nid_files)} and {len(pred_files)}"

files_to_remove += [os.path.join(input_pred_dir, nid_file) \
for nid_file in nid_files]
files_to_remove += [os.path.join(input_pred_dir, pred_file) \
Expand Down Expand Up @@ -491,7 +496,6 @@ def remap_node_pred(pred_ntypes, pred_dir,
f"pred.{pred_file[:pred_file.rindex('.')]}"),
"chunk_size": out_chunk_size,
"output_func": output_func,
"preserve_input": preserve_input,
})

multiprocessing_remap(task_list, num_proc, worker_remap_node_data)
Expand Down Expand Up @@ -615,7 +619,6 @@ def remap_edge_pred(pred_etypes, pred_dir,
f"pred.{pred_file[:pred_file.rindex('.')]}"),
"chunk_size": out_chunk_size,
"output_func": output_func,
"preserve_input": preserve_input
})

multiprocessing_remap(task_list, num_proc, worker_remap_edge_pred)
Expand Down
5 changes: 2 additions & 3 deletions tests/unit-tests/gconstruct/test_remap_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,10 @@ def test_worker_remap_node_data(data_col):

worker_remap_node_data(data_path, nid_path, ntypes[0], data_col,
output_path_prefix, chunk_size,
write_data_parquet_file, preserve_input=True)
write_data_parquet_file)
worker_remap_node_data(data_path, nid_path, ntypes[0], data_col,
output_path_prefix, chunk_size,
partial(write_data_csv_file, delimiter=","),
preserve_input=True)
partial(write_data_csv_file, delimiter=","))
def read_csv(file, delimiter=","):
data = pd.read_csv(file, delimiter=delimiter)
nid = data["nid"].to_numpy()
Expand Down

0 comments on commit b144593

Please sign in to comment.