awslabs · thvasilo · Nov 14, 2024 · Nov 8, 2024 · Nov 13, 2024 · Nov 13, 2024
diff --git a/docs/source/cli/graph-construction/distributed/example.rst b/docs/source/cli/graph-construction/distributed/example.rst
@@ -259,7 +259,9 @@ the graph structure, features, and labels. In more detail:
   GSProcessing will use the transformation values listed here
   instead of creating new ones, ensuring that models trained with the original
   data can still be used in the newly transformed data. Currently only
-  categorical transformations can be re-applied.
+  categorical and numerical transformations can be re-applied. Note that
+  the Rank-Gauss transformation does not support re-application, it may
+  only work for transductive tasks.
 * ``updated_row_counts_metadata.json``:
   This file is meant to be used as the input configuration for the
   distributed partitioning pipeline. ``gs-repartition`` produces
@@ -313,7 +315,7 @@ you can use the following command to run the partition job locally:
         --num-parts 2 \
         --dgl-tool-path ./dgl/tools \
         --partition-algorithm random \
-        --ip-config ip_list.txt 
+        --ip-config ip_list.txt
 
 The command above will first do graph partitioning to determine the ownership for each partition and save the results.
 Then it will do data dispatching to physically assign the partitions to graph data and dispatch them to each machine.

diff --git a/graphstorm-processing/graphstorm_processing/data_transformations/dist_feature_transformer.py b/graphstorm-processing/graphstorm_processing/data_transformations/dist_feature_transformer.py
@@ -31,7 +31,7 @@
 )
 
 
-class DistFeatureTransformer(object):
+class DistFeatureTransformer:
     """
     Given a feature configuration selects the correct transformation type,
     which can then be be applied through a call to apply_transformation.
@@ -56,7 +56,9 @@ def __init__(
         if feat_type == "no-op":
             self.transformation = NoopTransformation(**default_kwargs, **args_dict)
         elif feat_type == "numerical":
-            self.transformation = DistNumericalTransformation(**default_kwargs, **args_dict)
+            self.transformation = DistNumericalTransformation(
+                **default_kwargs, **args_dict, json_representation=json_representation
+            )
         elif feat_type == "multi-numerical":
             self.transformation = DistMultiNumericalTransformation(**default_kwargs, **args_dict)
         elif feat_type == "bucket-numerical":

diff --git a/...cessing/data_transformations/dist_transformations/dist_bucket_numerical_transformation.py b/...cessing/data_transformations/dist_transformations/dist_bucket_numerical_transformation.py
@@ -67,7 +67,7 @@ def get_transformation_name() -> str:
         return "DistBucketNumericalTransformation"
 
     def apply(self, input_df: DataFrame) -> DataFrame:
-        imputed_df = apply_imputation(self.cols, self.shared_imputation, input_df)
+        imputed_df = apply_imputation(self.cols, self.shared_imputation, input_df).imputed_df
         # TODO: Make range optional by getting min/max from data.
         min_val, max_val = self.range