smart-on-fhir · dogversioning · Feb 13, 2024 · Feb 13, 2024 · Feb 13, 2024 · mikix
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -10,7 +10,7 @@ jobs:
       - name: Set up Python 
         uses: actions/setup-python@v4
         with:
-          python-version: 3.9
+          python-version: '3.10'
 
       - name: Install dependencies
         run: |
@@ -23,23 +23,17 @@ jobs:
   lint:
     runs-on: ubuntu-22.04
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
+      - name: Set up Python 
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
       - name: Install linters
         run: |
           python -m pip install --upgrade pip
           pip install ".[dev]"
-      - name: Run pycodestyle
-        run: |
-          pycodestyle scripts src tests --max-line-length=88
-      - name: Run pylint
-        if: success() || failure() # still run pylint if above checks fail
-        run: |
-          pylint scripts src tests
-      - name: Run bandit
-        if: success() || failure() # still run bandit if above checks fail
-        run: |
-          bandit -r scripts src
-      - name: Run black
+      - name: Run ruff
         if: success() || failure() # still run black if above checks fails
         run: |
-          black --check --verbose .
+          ruff check
+          ruff format --check
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,17 +1,10 @@
+default_install_hook_types: [pre-commit, pre-push]
 repos:
-  - repo: https://github.com/psf/black
-    #this version is synced with the black mentioned in .github/workflows/ci.yml
-    rev: 22.12.0
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.2.1
     hooks:
-      - id: black
-        entry: bash -c 'black "$@"; git add -u' --
-        # It is recommended to specify the latest version of Python
-        # supported by your project here, or alternatively use
-        # pre-commit's default_language_version, see
-        # https://pre-commit.com/#top_level-default_language_version
-        language_version: python3.9
-  - repo: https://github.com/pycqa/isort
-    rev: 5.12.0
-    hooks:
-      - id: isort
-        args: ["--profile", "black", "--filter-files"]
+      - name: Ruff formatting
+        id: ruff-format
+      - name: Ruff linting
+        id: ruff
+        stages: [pre-push]
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,15 +1,15 @@
 [project]
 name = "aggregator"
-requires-python = ">= 3.9"
-version = "0.1.3"
+requires-python = ">= 3.10"
+version = "0.3.0"
 # This project is designed to run on the AWS serverless application framework (SAM).
 # The project dependencies are handled via AWS layers. These are only required for
 # local development.
 dependencies= [
     "arrow >=1.2.3",
-    "awswrangler >=2.19.0, <3",
+    "awswrangler >=3.5, <4",
     "boto3",
-    "pandas >=1.5.0, <2"
+    "pandas >=2, <3"
 ]
 authors = [
   { name="Matt Garber", email="[email protected]" },
@@ -45,23 +45,26 @@ test = [
     "pytest-mock"
 ]
 dev = [
-    "bandit",
-    "black==22.12.0",
-    "isort==5.12.0",
+    "ruff == 0.2.1",
     "pre-commit",
-    "pylint",
-    "pycodestyle"
 ]
+[tool.ruff]
+target-version = "py310"
 
-[tool.coverage.run]
-command_line="-m pytest"
-source=["./src/"]
-
-[tool.coverage.report]
-show_missing=true
-
-[tool.isort]
-profile = "black"
-src_paths = ["src", "tests"]
-skip_glob = [".aws_sam"]
-
+[tool.ruff.lint]
+select = [
+    "A",  # prevent using keywords that clobber python builtins
+    "B",  # bugbear: security warnings
+    "E",  # pycodestyle
+    "F",  # pyflakes
+    "I",  # isort
+    "ISC",  # implicit string concatenation
+    "PLE",  # pylint errors
+    "RUF",  # the ruff developer's own rules
+    "UP",  # alert you when better syntax is available in your python version
+]
+ignore = [
+# Recommended ingore from `ruff format` due to in-project conflicts with check.
+# It's expected that this will be fixed in the coming months.
+    "ISC001"
+]
diff --git a/scripts/cumulus_upload_data.py b/scripts/cumulus_upload_data.py
@@ -107,7 +107,7 @@ def upload_file(cli_args):
     if args["test"]:
         args_dict["user"] = os.environ.get("CUMULUS_TEST_UPLOAD_USER", "general")
         args_dict["file"] = (
-            f"{str(Path(__file__).resolve().parents[1])}"
+            f"{Path(__file__).resolve().parents[1]!s}"
             f"/tests/test_data/count_synthea_patient.parquet"
         )
         args_dict["auth"] = os.environ.get("CUMULUS_TEST_UPLOAD_AUTH", "secretval")

diff --git a/src/handlers/dashboard/filter_config.py b/src/handlers/dashboard/filter_config.py
@@ -73,10 +73,7 @@ def _parse_filter_req(filter_req):
     if "," in filter_req:
         return " AND ".join(_parse_filter_req(x) for x in filter_req.split(","))
     filter_req_split = filter_req.split(":")
-    if (
-        filter_req_split[1]
-        in _FILTER_MAP_ONE_PARAM.keys()  # pylint: disable=consider-iterating-dictionary
-    ):
+    if filter_req_split[1] in _FILTER_MAP_ONE_PARAM:
         return _FILTER_MAP_ONE_PARAM[filter_req_split[1]] % filter_req_split[0]
     return _FILTER_MAP_TWO_PARAM[filter_req_split[1]] % (
         filter_req_split[0],

diff --git a/src/handlers/dashboard/get_chart_data.py b/src/handlers/dashboard/get_chart_data.py
@@ -13,7 +13,7 @@
 from ..shared.functions import get_latest_data_package_version, http_response
 
 
-def _get_table_cols(table_name: str, version: str = None) -> list:
+def _get_table_cols(table_name: str, version: str | None = None) -> list:
     """Returns the columns associated with a table.
 
     Since running an athena query takes a decent amount of time due to queueing
@@ -29,7 +29,8 @@ def _get_table_cols(table_name: str, version: str = None) -> list:
     s3_key = f"{prefix}/{version}/{table_name}__aggregate.csv"
     s3_client = boto3.client("s3")
     s3_iter = s3_client.get_object(
-        Bucket=s3_bucket_name, Key=s3_key  # type: ignore[arg-type]
+        Bucket=s3_bucket_name,
+        Key=s3_key,
     )["Body"].iter_lines()
     return next(s3_iter).decode().split(",")
 
@@ -41,7 +42,7 @@ def _build_query(query_params: dict, filters: list, path_params: dict) -> str:
     filter_str = get_filter_string(filters)
     if filter_str != "":
         filter_str = f"AND {filter_str}"
-    count_col = [c for c in columns if c.startswith("cnt")][0]
+    count_col = next(c for c in columns if c.startswith("cnt"))
     columns.remove(count_col)
     select_str = f"{query_params['column']}, sum({count_col}) as {count_col}"
     group_str = f"{query_params['column']}"

diff --git a/src/handlers/shared/decorators.py b/src/handlers/shared/decorators.py
@@ -14,7 +14,7 @@ def error_decorator(func):
         def wrapper(*args, **kwargs):
             try:
                 res = func(*args, **kwargs)
-            except Exception as e:  # pylint: disable=broad-except
+            except Exception as e:
                 trace = []
                 tb = e.__traceback__
                 while tb is not None:

diff --git a/src/handlers/shared/functions.py b/src/handlers/shared/functions.py
@@ -3,7 +3,6 @@
 import json
 import logging
 from datetime import datetime, timezone
-from typing import Optional
 
 import boto3
 
@@ -77,7 +76,7 @@ def update_metadata(
     data_package: str,
     version: str,
     target: str,
-    dt: Optional[datetime] = None,
+    dt: datetime | None = None,
     meta_type: str = JsonFilename.TRANSACTIONS.value,
 ):
     """Safely updates items in metadata dictionary

diff --git a/src/handlers/site_upload/api_gateway_authorizer.py b/src/handlers/site_upload/api_gateway_authorizer.py
@@ -3,7 +3,6 @@
 """
 
 # pylint: disable=invalid-name,pointless-string-statement
-from __future__ import print_function
 
 import os
 import re
@@ -28,7 +27,7 @@ def lambda_handler(event, context):
         if auth_token not in user_db.keys() or auth_header[0] != "Basic":
             raise AuthError
     except (AuthError, KeyError):
-        raise AuthError(event)  # pylint: disable=raise-missing-from
+        raise AuthError(event)  # noqa: B904
 
     principalId = user_db[auth_token]["site"]
 
@@ -66,7 +65,7 @@ class HttpVerb:
     ALL = "*"
 
 
-class AuthPolicy(object):  # pylint: disable=missing-class-docstring; # pragma: no cover
+class AuthPolicy:
     awsAccountId = ""
     """The AWS account id the policy will be generated for. This is used to
     create the method ARNs."""
@@ -81,8 +80,8 @@ class AuthPolicy(object):  # pylint: disable=missing-class-docstring; # pragma:
     conditions statement.
     the build method processes these lists and generates the approriate
     statements for the final policy"""
-    allowMethods = []
-    denyMethods = []
+    allowMethods = []  # noqa: RUF012
+    denyMethods = []  # noqa: RUF012
 
     restApiId = "<<restApiId>>"
     """ Replace the placeholder value with a default API Gateway API id to be used in
@@ -211,15 +210,15 @@ def allowMethodWithConditions(self, verb, resource, conditions):
         methods and includes a condition for the policy statement. More on AWS policy
         conditions here:
         http://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_elements.html#Condition
-        """  # noqa: E501
+        """
         self._addMethod("Allow", verb, resource, conditions)
 
     def denyMethodWithConditions(self, verb, resource, conditions):
         """Adds an API Gateway method (Http verb + Resource path) to the list of denied
         methods and includes a condition for the policy statement. More on AWS policy
         conditions here:
         http://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_elements.html#Condition
-        """  # noqa: E501
+        """
         self._addMethod("Deny", verb, resource, conditions)
 
     def build(self):

diff --git a/src/handlers/site_upload/powerset_merge.py b/src/handlers/site_upload/powerset_merge.py
@@ -189,7 +189,7 @@ def expand_and_concat_sets(
         .reset_index()
         # this last line makes "cnt" the first column in the set, matching the
         # library style
-        .filter(["cnt"] + data_cols)
+        .filter(["cnt", *data_cols])
     )
     return agg_df
 
@@ -209,8 +209,9 @@ def generate_csv_from_parquet(bucket_name: str, bucket_root: str, subbucket_path
     awswrangler.s3.to_csv(
         last_valid_df,
         (
-            f"s3://{bucket_name}/{bucket_root}/"
-            f"{subbucket_path}".replace(".parquet", ".csv")
+            f"s3://{bucket_name}/{bucket_root}/{subbucket_path}".replace(
+                ".parquet", ".csv"
+            )
         ),
         index=False,
         quoting=csv.QUOTE_NONE,
@@ -250,7 +251,6 @@ def merge_powersets(manager: S3Manager) -> None:
                 e,
             )
     for latest_path in latest_file_list:
-
         if manager.version not in latest_path:
             continue
         site_specific_name = get_s3_site_filename_suffix(latest_path)
@@ -299,7 +299,7 @@ def merge_powersets(manager: S3Manager) -> None:
             manager.update_local_metadata(
                 TransactionKeys.LAST_AGGREGATION.value, site=latest_site
             )
-        except Exception as e:  # pylint: disable=broad-except
+        except Exception as e:
             manager.merge_error_handler(
                 latest_path,
                 subbucket_path,
@@ -315,6 +315,14 @@ def merge_powersets(manager: S3Manager) -> None:
                     manager.site,
                 )
                 manager.update_local_metadata(TransactionKeys.LAST_AGGREGATION.value)
+
+    if df.empty:
+        manager.merge_error_handler(
+            latest_path,
+            subbucket_path,
+            OSError("File not found"),
+        )
+
     manager.write_local_metadata()
 
     # In this section, we are trying to accomplish two things: