Skip to content

Commit

Permalink
add
Browse files Browse the repository at this point in the history
  • Loading branch information
jalencato committed Feb 16, 2024
1 parent 11723f8 commit 62f9118
Showing 1 changed file with 22 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import numpy as np
from numpy.testing import assert_array_equal, assert_array_almost_equal, assert_almost_equal
from pyspark.sql import SparkSession, DataFrame, functions as F
from pyspark.sql.types import ArrayType, FloatType, DoubleType, StructField, StructType, StringType
from pyspark.sql.types import ArrayType, FloatType, DoubleType, StructField, StructType, StringType, LongType
from scipy.special import erfinv

from graphstorm_processing.data_transformations.dist_transformations import (
Expand Down Expand Up @@ -113,6 +113,27 @@ def test_numerical_transformation_without_transformation(input_df: DataFrame, ch
assert row["salary"] == expected_salary


@pytest.mark.parametrize("out_dtype", ["float32", "float64"])
def test_numerical_min_max_transformation_precision(spark: SparkSession, out_dtype):
"""Test numerical transformation without any transformation applied"""
# Adjust the number to be an integer
high_precision_integer = 1.2345678901234562
data = [(high_precision_integer,)]
schema = StructType([StructField("age", FloatType(), True)])
input_df = spark.createDataFrame(data, schema=schema)

dist_numerical_transformation = DistNumericalTransformation(
["age"], imputer="none", normalizer="min-max", out_dtype=out_dtype
)

transformed_df = dist_numerical_transformation.apply(input_df)
column_data_type = [field.dataType for field in transformed_df.schema.fields if field.name == "age"][0]
if out_dtype == "float32":
assert isinstance(column_data_type, FloatType), f"The column 'age' is not of type FloatType."
elif out_dtype == "float64":
assert isinstance(column_data_type, DoubleType), f"The column 'age' is not of type DoubleType."


def test_numerical_transformation_with_median_imputer_and_std_norm(
input_df: DataFrame, check_df_schema
):
Expand Down

0 comments on commit 62f9118

Please sign in to comment.