Skip to content

Commit

Permalink
add converter for gconstruct bucket
Browse files Browse the repository at this point in the history
  • Loading branch information
jalencato committed Oct 30, 2023
1 parent 0d37ea3 commit 51de128
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,17 @@ def _convert_feature(feats: list[dict]) -> list[dict]:
if gconstruct_transform_dict["name"] == "max_min_norm":
gsp_transformation_dict["name"] = "numerical"
gsp_transformation_dict["kwargs"] = {"normalizer": "min-max", "imputer": "mean"}
elif gconstruct_transform_dict["name"] == "bucket_numerical":
gsp_transformation_dict["name"] = "numerical"
assert "bucket_cnt" in gconstruct_transform_dict, \
"bucket_cnt should be in the gconstruct bucket feature transform field"
assert "range" in gconstruct_transform_dict, \
"range should be in the gconstruct bucket feature transform field"
gsp_transformation_dict["kwargs"] = {"normalizer": "bucket_numerical",
"bucket_cnt": gconstruct_transform_dict['bucket_cnt'],
"range": gconstruct_transform_dict['range'],
"slide_window_size": gconstruct_transform_dict['slide_window_size'],
"imputer": "mean"}
# TODO: Add support for other common transformations here
else:
raise ValueError(
Expand Down
15 changes: 15 additions & 0 deletions graphstorm-processing/tests/test_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,10 @@ def test_convert_gsprocessing(converter: GConstructConfigConverter):
"features": [
{"feature_col": ["citation_time"], "feature_name": "feat"},
{"feature_col": ["num_citations"], "transform": {"name": "max_min_norm"}},
{"feature_col": ["num_citations"], "transform": {"name": "bucket_numerical",
"bucket_cnt": 9,
"range": [10, 100],
"slide_window_size": 5}},
],
"labels": [
{"label_col": "label", "task_type": "classification", "split_pct": [0.8, 0.1, 0.1]}
Expand Down Expand Up @@ -252,6 +256,17 @@ def test_convert_gsprocessing(converter: GConstructConfigConverter):
"kwargs": {"normalizer": "min-max", "imputer": "mean"},
},
},
{
"column": "num_citations",
"transformation": {
"name": "numerical",
"kwargs": {"normalizer": "bucket_numerical",
"bucket_cnt": 9,
"range": [10, 100],
"slide_window_size": 5,
"imputer": "mean"},
},
},
]
assert nodes_output["labels"] == [
{
Expand Down

0 comments on commit 51de128

Please sign in to comment.