Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GSProcessing CI Update] Add black check to GSProcessing Lint Check #749

Merged
merged 8 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflow_scripts/gsprocessing_lint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ cd ../../
set -ex

pip install pylint==2.17.5

pylint --rcfile=./tests/lint/pylintrc ./graphstorm-processing/graphstorm_processing/

pip install black==24.2.0
black --check ./graphstorm-processing/
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

from typing import Mapping
from .feature_config_base import FeatureConfig

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""
This module contains the classes to convert different specifications to GSProcessing
"""

from .gconstruct_converter import GConstructConfigConverter
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

import abc
from abc import abstractmethod
from typing import Any
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

from dataclasses import dataclass
from typing import Optional

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

import abc
from typing import Any, Mapping, Sequence

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

import abc
from typing import Any, Dict, Optional

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

from typing import Mapping
import numbers

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

from dataclasses import dataclass
from typing import Dict, List

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

from abc import ABC, abstractmethod
from typing import Sequence

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

from typing import List

from pyspark.sql import DataFrame
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

from typing import Dict, List, Optional, Sequence

import numpy as np
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

import json
from typing import Dict, Sequence

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

from typing import List, Optional

from pyspark.sql import DataFrame
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

import logging
from typing import Optional, Sequence
import uuid
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

import logging
from typing import List, Optional

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
that only exists for the Python 3.9/Spark 3.2 container
https://github.com/aws/sagemaker-spark-container/blob/4ef476fd535040f245def3d38c59fe43062e88a9/src/smspark/bootstrapper.py#L375
"""

import logging
import uuid
from typing import Tuple, Sequence
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
When set to true (default), we add reverse edges for each edge type.

"""

import dataclasses
import argparse
import json
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

from .loader_base import GraphLoader


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

Base class for graph data processing.
"""

from abc import ABC
from typing import Dict, List
import os
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

This module is used to determine row counts for Parquet files.
"""

import logging
import os
from typing import Dict, List, Sequence
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
the same edge/node type have the same number of rows per corresponding part-file.
The output is written to storage and a new metadata JSON file is generated.
"""

import argparse
import json
import logging
Expand Down Expand Up @@ -931,10 +932,10 @@ def main():
"Repartitioning structure files for reverse edge type '%s'",
reverse_edge_type_name,
)
edge_structure_meta[
reverse_edge_type_name
] = repartitioner.repartition_parquet_files(
edge_structure_meta[reverse_edge_type_name], most_frequent_counts
edge_structure_meta[reverse_edge_type_name] = (
repartitioner.repartition_parquet_files(
edge_structure_meta[reverse_edge_type_name], most_frequent_counts
)
)

# Repartition edge feature files if the row counts don't match the most frequent
Expand Down
2 changes: 1 addition & 1 deletion graphstorm-processing/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ coverage = ">=7.0.0"
sphinx = ">=6.0.0"
mypy = ">=1.0.0"
types-psutil = "^5.9.5.15"
black = "~23.7.0"
black = "~24.2.0"
pre-commit = "^3.3.3"
types-mock = "^5.1.0.1"
pylint = "~2.17.5"
Expand Down
1 change: 1 addition & 0 deletions graphstorm-processing/scripts/convert_gconstruct_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
Entry point for graph spec conversion. Allows us to convert a graph
data specification from GConstruct to the format used by GSProcessing.
"""

import argparse
import json

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
--sm-estimator-parameters \"volume_size=100 subnets=['subnet-123','subnet-345']
security_group_ids=['sg-1234','sg-3456']\"
"""

import argparse
import logging
from pathlib import Path
Expand Down
1 change: 1 addition & 0 deletions graphstorm-processing/scripts/run_repartitioning.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
security_group_ids=['sg-1234','sg-3456']\"``

"""

import argparse
import logging
from time import strftime, gmtime
Expand Down
1 change: 1 addition & 0 deletions graphstorm-processing/scripts/script_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

import argparse
from ast import literal_eval
from typing import Any, Dict, Sequence, Optional
Expand Down
1 change: 1 addition & 0 deletions graphstorm-processing/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

import os
import sys
import logging
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

from pyspark.sql import DataFrame, SparkSession
import numpy as np
from numpy.testing import assert_array_equal
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

from typing import Tuple, Iterator
import os
import pytest
Expand Down
1 change: 1 addition & 0 deletions graphstorm-processing/tests/test_dist_label_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

from typing import List

import numpy as np
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

from numpy.testing import assert_array_equal
from pyspark.sql import functions as F, DataFrame, SparkSession

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

import os

import pytest
Expand Down
1 change: 1 addition & 0 deletions graphstorm-processing/tests/test_repartition_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

import json
from pathlib import Path
import os
Expand Down
Loading