diff --git a/.github/workflow_scripts/gsprocessing_lint.sh b/.github/workflow_scripts/gsprocessing_lint.sh index e43da86db0..174aed85b8 100644 --- a/.github/workflow_scripts/gsprocessing_lint.sh +++ b/.github/workflow_scripts/gsprocessing_lint.sh @@ -4,6 +4,7 @@ cd ../../ set -ex pip install pylint==2.17.5 - pylint --rcfile=./tests/lint/pylintrc ./graphstorm-processing/graphstorm_processing/ +pip install black==24.2.0 +black --check ./graphstorm-processing/ diff --git a/graphstorm-processing/graphstorm_processing/config/categorical_configs.py b/graphstorm-processing/graphstorm_processing/config/categorical_configs.py index 4438ccdf68..72e95a9945 100644 --- a/graphstorm-processing/graphstorm_processing/config/categorical_configs.py +++ b/graphstorm-processing/graphstorm_processing/config/categorical_configs.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + from typing import Mapping from .feature_config_base import FeatureConfig diff --git a/graphstorm-processing/graphstorm_processing/config/config_conversion/__init__.py b/graphstorm-processing/graphstorm_processing/config/config_conversion/__init__.py index 49ae4536ba..ed1be585a3 100644 --- a/graphstorm-processing/graphstorm_processing/config/config_conversion/__init__.py +++ b/graphstorm-processing/graphstorm_processing/config/config_conversion/__init__.py @@ -1,4 +1,5 @@ """ This module contains the classes to convert different specifications to GSProcessing """ + from .gconstruct_converter import GConstructConfigConverter diff --git a/graphstorm-processing/graphstorm_processing/config/config_conversion/converter_base.py b/graphstorm-processing/graphstorm_processing/config/config_conversion/converter_base.py index dd67dbc049..109b9d2617 100644 --- a/graphstorm-processing/graphstorm_processing/config/config_conversion/converter_base.py +++ b/graphstorm-processing/graphstorm_processing/config/config_conversion/converter_base.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + import abc from abc import abstractmethod from typing import Any diff --git a/graphstorm-processing/graphstorm_processing/config/config_conversion/meta_configuration.py b/graphstorm-processing/graphstorm_processing/config/config_conversion/meta_configuration.py index f41e0fa2cc..c8e2297517 100644 --- a/graphstorm-processing/graphstorm_processing/config/config_conversion/meta_configuration.py +++ b/graphstorm-processing/graphstorm_processing/config/config_conversion/meta_configuration.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + from dataclasses import dataclass from typing import Optional diff --git a/graphstorm-processing/graphstorm_processing/config/feature_config_base.py b/graphstorm-processing/graphstorm_processing/config/feature_config_base.py index 85e447bd7b..20979eda2a 100644 --- a/graphstorm-processing/graphstorm_processing/config/feature_config_base.py +++ b/graphstorm-processing/graphstorm_processing/config/feature_config_base.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + import abc from typing import Any, Mapping, Sequence diff --git a/graphstorm-processing/graphstorm_processing/config/label_config_base.py b/graphstorm-processing/graphstorm_processing/config/label_config_base.py index 754ac7e491..fca0a82ec0 100644 --- a/graphstorm-processing/graphstorm_processing/config/label_config_base.py +++ b/graphstorm-processing/graphstorm_processing/config/label_config_base.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + import abc from typing import Any, Dict, Optional diff --git a/graphstorm-processing/graphstorm_processing/config/numerical_configs.py b/graphstorm-processing/graphstorm_processing/config/numerical_configs.py index 585862b85b..acd737f51d 100644 --- a/graphstorm-processing/graphstorm_processing/config/numerical_configs.py +++ b/graphstorm-processing/graphstorm_processing/config/numerical_configs.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + from typing import Mapping import numbers diff --git a/graphstorm-processing/graphstorm_processing/data_transformations/dist_label_loader.py b/graphstorm-processing/graphstorm_processing/data_transformations/dist_label_loader.py index cb48ffaa23..5782a683da 100644 --- a/graphstorm-processing/graphstorm_processing/data_transformations/dist_label_loader.py +++ b/graphstorm-processing/graphstorm_processing/data_transformations/dist_label_loader.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + from dataclasses import dataclass from typing import Dict, List diff --git a/graphstorm-processing/graphstorm_processing/data_transformations/dist_transformations/base_dist_transformation.py b/graphstorm-processing/graphstorm_processing/data_transformations/dist_transformations/base_dist_transformation.py index f9bf36d68a..21b31e1154 100644 --- a/graphstorm-processing/graphstorm_processing/data_transformations/dist_transformations/base_dist_transformation.py +++ b/graphstorm-processing/graphstorm_processing/data_transformations/dist_transformations/base_dist_transformation.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + from abc import ABC, abstractmethod from typing import Sequence diff --git a/graphstorm-processing/graphstorm_processing/data_transformations/dist_transformations/dist_bucket_numerical_transformation.py b/graphstorm-processing/graphstorm_processing/data_transformations/dist_transformations/dist_bucket_numerical_transformation.py index 31ce99c050..a0deac5f6d 100644 --- a/graphstorm-processing/graphstorm_processing/data_transformations/dist_transformations/dist_bucket_numerical_transformation.py +++ b/graphstorm-processing/graphstorm_processing/data_transformations/dist_transformations/dist_bucket_numerical_transformation.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + from typing import List from pyspark.sql import DataFrame diff --git a/graphstorm-processing/graphstorm_processing/data_transformations/dist_transformations/dist_category_transformation.py b/graphstorm-processing/graphstorm_processing/data_transformations/dist_transformations/dist_category_transformation.py index 1ae3060a8a..b03a0c9333 100644 --- a/graphstorm-processing/graphstorm_processing/data_transformations/dist_transformations/dist_category_transformation.py +++ b/graphstorm-processing/graphstorm_processing/data_transformations/dist_transformations/dist_category_transformation.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + from typing import Dict, List, Optional, Sequence import numpy as np diff --git a/graphstorm-processing/graphstorm_processing/data_transformations/dist_transformations/dist_label_transformation.py b/graphstorm-processing/graphstorm_processing/data_transformations/dist_transformations/dist_label_transformation.py index 9e32f06953..4370048c71 100644 --- a/graphstorm-processing/graphstorm_processing/data_transformations/dist_transformations/dist_label_transformation.py +++ b/graphstorm-processing/graphstorm_processing/data_transformations/dist_transformations/dist_label_transformation.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + import json from typing import Dict, Sequence diff --git a/graphstorm-processing/graphstorm_processing/data_transformations/dist_transformations/dist_noop_transformation.py b/graphstorm-processing/graphstorm_processing/data_transformations/dist_transformations/dist_noop_transformation.py index a87029b38d..bd785eb212 100644 --- a/graphstorm-processing/graphstorm_processing/data_transformations/dist_transformations/dist_noop_transformation.py +++ b/graphstorm-processing/graphstorm_processing/data_transformations/dist_transformations/dist_noop_transformation.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + from typing import List, Optional from pyspark.sql import DataFrame diff --git a/graphstorm-processing/graphstorm_processing/data_transformations/dist_transformations/dist_numerical_transformation.py b/graphstorm-processing/graphstorm_processing/data_transformations/dist_transformations/dist_numerical_transformation.py index 7387a81a41..5b51a702bd 100644 --- a/graphstorm-processing/graphstorm_processing/data_transformations/dist_transformations/dist_numerical_transformation.py +++ b/graphstorm-processing/graphstorm_processing/data_transformations/dist_transformations/dist_numerical_transformation.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + import logging from typing import Optional, Sequence import uuid diff --git a/graphstorm-processing/graphstorm_processing/data_transformations/s3_utils.py b/graphstorm-processing/graphstorm_processing/data_transformations/s3_utils.py index 4bdfd2be93..d098efe3fa 100644 --- a/graphstorm-processing/graphstorm_processing/data_transformations/s3_utils.py +++ b/graphstorm-processing/graphstorm_processing/data_transformations/s3_utils.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + import logging from typing import List, Optional diff --git a/graphstorm-processing/graphstorm_processing/data_transformations/spark_utils.py b/graphstorm-processing/graphstorm_processing/data_transformations/spark_utils.py index 4cc60a43fa..56c2f46a5c 100644 --- a/graphstorm-processing/graphstorm_processing/data_transformations/spark_utils.py +++ b/graphstorm-processing/graphstorm_processing/data_transformations/spark_utils.py @@ -8,6 +8,7 @@ that only exists for the Python 3.9/Spark 3.2 container https://github.com/aws/sagemaker-spark-container/blob/4ef476fd535040f245def3d38c59fe43062e88a9/src/smspark/bootstrapper.py#L375 """ + import logging import uuid from typing import Tuple, Sequence diff --git a/graphstorm-processing/graphstorm_processing/distributed_executor.py b/graphstorm-processing/graphstorm_processing/distributed_executor.py index 61451a5a5e..791ef5d939 100644 --- a/graphstorm-processing/graphstorm_processing/distributed_executor.py +++ b/graphstorm-processing/graphstorm_processing/distributed_executor.py @@ -45,6 +45,7 @@ When set to true (default), we add reverse edges for each edge type. """ + import dataclasses import argparse import json diff --git a/graphstorm-processing/graphstorm_processing/graph_loaders/heterogeneous_graphloader.py b/graphstorm-processing/graphstorm_processing/graph_loaders/heterogeneous_graphloader.py index e1428b7af9..330a592f6d 100644 --- a/graphstorm-processing/graphstorm_processing/graph_loaders/heterogeneous_graphloader.py +++ b/graphstorm-processing/graphstorm_processing/graph_loaders/heterogeneous_graphloader.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + from .loader_base import GraphLoader diff --git a/graphstorm-processing/graphstorm_processing/graph_loaders/loader_base.py b/graphstorm-processing/graphstorm_processing/graph_loaders/loader_base.py index f743b1d663..ebdb2067c0 100644 --- a/graphstorm-processing/graphstorm_processing/graph_loaders/loader_base.py +++ b/graphstorm-processing/graphstorm_processing/graph_loaders/loader_base.py @@ -15,6 +15,7 @@ Base class for graph data processing. """ + from abc import ABC from typing import Dict, List import os diff --git a/graphstorm-processing/graphstorm_processing/graph_loaders/row_count_utils.py b/graphstorm-processing/graphstorm_processing/graph_loaders/row_count_utils.py index 2069c487ca..f3b5894070 100644 --- a/graphstorm-processing/graphstorm_processing/graph_loaders/row_count_utils.py +++ b/graphstorm-processing/graphstorm_processing/graph_loaders/row_count_utils.py @@ -15,6 +15,7 @@ This module is used to determine row counts for Parquet files. """ + import logging import os from typing import Dict, List, Sequence diff --git a/graphstorm-processing/graphstorm_processing/repartition_files.py b/graphstorm-processing/graphstorm_processing/repartition_files.py index 7be6667c06..d76d265609 100644 --- a/graphstorm-processing/graphstorm_processing/repartition_files.py +++ b/graphstorm-processing/graphstorm_processing/repartition_files.py @@ -24,6 +24,7 @@ the same edge/node type have the same number of rows per corresponding part-file. The output is written to storage and a new metadata JSON file is generated. """ + import argparse import json import logging @@ -931,10 +932,10 @@ def main(): "Repartitioning structure files for reverse edge type '%s'", reverse_edge_type_name, ) - edge_structure_meta[ - reverse_edge_type_name - ] = repartitioner.repartition_parquet_files( - edge_structure_meta[reverse_edge_type_name], most_frequent_counts + edge_structure_meta[reverse_edge_type_name] = ( + repartitioner.repartition_parquet_files( + edge_structure_meta[reverse_edge_type_name], most_frequent_counts + ) ) # Repartition edge feature files if the row counts don't match the most frequent diff --git a/graphstorm-processing/pyproject.toml b/graphstorm-processing/pyproject.toml index bf0955f0dc..b5a1b857b7 100644 --- a/graphstorm-processing/pyproject.toml +++ b/graphstorm-processing/pyproject.toml @@ -36,7 +36,7 @@ coverage = ">=7.0.0" sphinx = ">=6.0.0" mypy = ">=1.0.0" types-psutil = "^5.9.5.15" -black = "~23.7.0" +black = "~24.2.0" pre-commit = "^3.3.3" types-mock = "^5.1.0.1" pylint = "~2.17.5" diff --git a/graphstorm-processing/scripts/convert_gconstruct_config.py b/graphstorm-processing/scripts/convert_gconstruct_config.py index 8ac8d29b66..1f3f2106c1 100644 --- a/graphstorm-processing/scripts/convert_gconstruct_config.py +++ b/graphstorm-processing/scripts/convert_gconstruct_config.py @@ -20,6 +20,7 @@ Entry point for graph spec conversion. Allows us to convert a graph data specification from GConstruct to the format used by GSProcessing. """ + import argparse import json diff --git a/graphstorm-processing/scripts/run_distributed_processing.py b/graphstorm-processing/scripts/run_distributed_processing.py index 679694955e..4f6582c70c 100644 --- a/graphstorm-processing/scripts/run_distributed_processing.py +++ b/graphstorm-processing/scripts/run_distributed_processing.py @@ -65,6 +65,7 @@ --sm-estimator-parameters \"volume_size=100 subnets=['subnet-123','subnet-345'] security_group_ids=['sg-1234','sg-3456']\" """ + import argparse import logging from pathlib import Path diff --git a/graphstorm-processing/scripts/run_repartitioning.py b/graphstorm-processing/scripts/run_repartitioning.py index 6335f25449..dd2f699319 100644 --- a/graphstorm-processing/scripts/run_repartitioning.py +++ b/graphstorm-processing/scripts/run_repartitioning.py @@ -57,6 +57,7 @@ security_group_ids=['sg-1234','sg-3456']\"`` """ + import argparse import logging from time import strftime, gmtime diff --git a/graphstorm-processing/scripts/script_utils.py b/graphstorm-processing/scripts/script_utils.py index 214f81417b..c8ada3cbee 100644 --- a/graphstorm-processing/scripts/script_utils.py +++ b/graphstorm-processing/scripts/script_utils.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + import argparse from ast import literal_eval from typing import Any, Dict, Sequence, Optional diff --git a/graphstorm-processing/tests/conftest.py b/graphstorm-processing/tests/conftest.py index 911e0ab375..f34f1418f2 100644 --- a/graphstorm-processing/tests/conftest.py +++ b/graphstorm-processing/tests/conftest.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + import os import sys import logging diff --git a/graphstorm-processing/tests/test_dist_bucket_transformation.py b/graphstorm-processing/tests/test_dist_bucket_transformation.py index 060eb6a22c..95c407237e 100644 --- a/graphstorm-processing/tests/test_dist_bucket_transformation.py +++ b/graphstorm-processing/tests/test_dist_bucket_transformation.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + from pyspark.sql import DataFrame, SparkSession import numpy as np from numpy.testing import assert_array_equal diff --git a/graphstorm-processing/tests/test_dist_category_transformation.py b/graphstorm-processing/tests/test_dist_category_transformation.py index 74d155fb76..51a5662013 100644 --- a/graphstorm-processing/tests/test_dist_category_transformation.py +++ b/graphstorm-processing/tests/test_dist_category_transformation.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + from typing import Tuple, Iterator import os import pytest diff --git a/graphstorm-processing/tests/test_dist_label_loader.py b/graphstorm-processing/tests/test_dist_label_loader.py index 3ba747d538..5ea3d65bf9 100644 --- a/graphstorm-processing/tests/test_dist_label_loader.py +++ b/graphstorm-processing/tests/test_dist_label_loader.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + from typing import List import numpy as np diff --git a/graphstorm-processing/tests/test_dist_noop_transformation.py b/graphstorm-processing/tests/test_dist_noop_transformation.py index 8604adf67f..1f55417626 100644 --- a/graphstorm-processing/tests/test_dist_noop_transformation.py +++ b/graphstorm-processing/tests/test_dist_noop_transformation.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + from numpy.testing import assert_array_equal from pyspark.sql import functions as F, DataFrame, SparkSession diff --git a/graphstorm-processing/tests/test_dist_numerical_transformation.py b/graphstorm-processing/tests/test_dist_numerical_transformation.py index 816a0420c5..0e8ed61306 100644 --- a/graphstorm-processing/tests/test_dist_numerical_transformation.py +++ b/graphstorm-processing/tests/test_dist_numerical_transformation.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + import os import pytest diff --git a/graphstorm-processing/tests/test_repartition_files.py b/graphstorm-processing/tests/test_repartition_files.py index 01d3ee08e9..720e89cc2d 100644 --- a/graphstorm-processing/tests/test_repartition_files.py +++ b/graphstorm-processing/tests/test_repartition_files.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + import json from pathlib import Path import os