diff --git a/graphstorm-processing/graphstorm_processing/config/data_config_base.py b/graphstorm-processing/graphstorm_processing/config/data_config_base.py index 0014348b6a..f737adaded 100644 --- a/graphstorm-processing/graphstorm_processing/config/data_config_base.py +++ b/graphstorm-processing/graphstorm_processing/config/data_config_base.py @@ -15,7 +15,7 @@ """ from dataclasses import dataclass -from typing import Sequence, Optional +from typing import Optional from graphstorm_processing.constants import SUPPORTED_FILE_TYPES @@ -27,7 +27,7 @@ class DataStorageConfig: """ format: str - files: Sequence[str] + files: list[str] separator: Optional[str] = None def __post_init__(self): @@ -39,3 +39,7 @@ def __post_init__(self): raise ValueError( f"File paths need to be relative (not starting with '/'), got : {file}" ) + + for idx, file in enumerate(self.files): + if file.startswith("./"): + self.files[idx] = file[2:] diff --git a/graphstorm-processing/graphstorm_processing/graph_loaders/dist_heterogeneous_loader.py b/graphstorm-processing/graphstorm_processing/graph_loaders/dist_heterogeneous_loader.py index db56720840..af76ab40e8 100644 --- a/graphstorm-processing/graphstorm_processing/graph_loaders/dist_heterogeneous_loader.py +++ b/graphstorm-processing/graphstorm_processing/graph_loaders/dist_heterogeneous_loader.py @@ -939,7 +939,7 @@ def process_node_data(self, node_configs: Sequence[NodeConfig]) -> Dict: self.graph_info["ntype_to_label_masks"] = defaultdict(list) for node_config in node_configs: files = node_config.files - file_paths = [f"{self.input_prefix}/{f}" for f in files] + file_paths = [os.path.join(self.input_prefix, f) for f in files] node_type = node_config.ntype node_col = node_config.node_col diff --git a/graphstorm-processing/tests/resources/small_heterogeneous_graph/gsprocessing-config.json b/graphstorm-processing/tests/resources/small_heterogeneous_graph/gsprocessing-config.json index d06cfd283e..675109bb53 100644 --- a/graphstorm-processing/tests/resources/small_heterogeneous_graph/gsprocessing-config.json +++ b/graphstorm-processing/tests/resources/small_heterogeneous_graph/gsprocessing-config.json @@ -17,7 +17,7 @@ "data": { "format": "csv", "files": [ - "nodes/movie.csv" + "./nodes/movie.csv" ], "separator": "," }, @@ -104,7 +104,7 @@ "data": { "format": "csv", "files": [ - "edges/movie-included_in-genre.csv" + "./edges/movie-included_in-genre.csv" ], "separator": "," },