diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 849f5e6..096099c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -23,23 +23,17 @@ jobs: lint: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' - name: Install linters run: | python -m pip install --upgrade pip pip install ".[dev]" - - name: Run pycodestyle - run: | - pycodestyle scripts src tests --max-line-length=88 - - name: Run pylint - if: success() || failure() # still run pylint if above checks fail - run: | - pylint scripts src tests - - name: Run bandit - if: success() || failure() # still run bandit if above checks fail - run: | - bandit -r scripts src - - name: Run black + - name: Run ruff if: success() || failure() # still run black if above checks fails run: | - black --check --verbose . + ruff check + ruff format --check diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 144f529..3fd8eb3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,17 +1,10 @@ +default_install_hook_types: [pre-commit, pre-push] repos: - - repo: https://github.com/psf/black - #this version is synced with the black mentioned in .github/workflows/ci.yml - rev: 22.12.0 + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.2.1 hooks: - - id: black - entry: bash -c 'black "$@"; git add -u' -- - # It is recommended to specify the latest version of Python - # supported by your project here, or alternatively use - # pre-commit's default_language_version, see - # https://pre-commit.com/#top_level-default_language_version - language_version: python3.9 - - repo: https://github.com/pycqa/isort - rev: 5.12.0 - hooks: - - id: isort - args: ["--profile", "black", "--filter-files"] + - name: Ruff formatting + id: ruff-format + - name: Ruff linting + id: ruff + stages: [pre-push] \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index be210b8..5a35f7d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,15 +1,15 @@ [project] name = "aggregator" -requires-python = ">= 3.9" -version = "0.1.3" +requires-python = ">= 3.10" +version = "0.3.0" # This project is designed to run on the AWS serverless application framework (SAM). # The project dependencies are handled via AWS layers. These are only required for # local development. dependencies= [ "arrow >=1.2.3", - "awswrangler >=2.19.0, <3", + "awswrangler >=3.5, <4", "boto3", - "pandas >=1.5.0, <2" + "pandas >=2, <3" ] authors = [ { name="Matt Garber", email="matthew.garber@childrens.harvard.edu" }, @@ -45,23 +45,26 @@ test = [ "pytest-mock" ] dev = [ - "bandit", - "black==22.12.0", - "isort==5.12.0", + "ruff == 0.2.1", "pre-commit", - "pylint", - "pycodestyle" ] +[tool.ruff] +target-version = "py310" -[tool.coverage.run] -command_line="-m pytest" -source=["./src/"] - -[tool.coverage.report] -show_missing=true - -[tool.isort] -profile = "black" -src_paths = ["src", "tests"] -skip_glob = [".aws_sam"] - +[tool.ruff.lint] +select = [ + "A", # prevent using keywords that clobber python builtins + "B", # bugbear: security warnings + "E", # pycodestyle + "F", # pyflakes + "I", # isort + "ISC", # implicit string concatenation + "PLE", # pylint errors + "RUF", # the ruff developer's own rules + "UP", # alert you when better syntax is available in your python version +] +ignore = [ +# Recommended ingore from `ruff format` due to in-project conflicts with check. +# It's expected that this will be fixed in the coming months. + "ISC001" +] \ No newline at end of file diff --git a/scripts/cumulus_upload_data.py b/scripts/cumulus_upload_data.py index 6975c80..66ad8f6 100755 --- a/scripts/cumulus_upload_data.py +++ b/scripts/cumulus_upload_data.py @@ -107,7 +107,7 @@ def upload_file(cli_args): if args["test"]: args_dict["user"] = os.environ.get("CUMULUS_TEST_UPLOAD_USER", "general") args_dict["file"] = ( - f"{str(Path(__file__).resolve().parents[1])}" + f"{Path(__file__).resolve().parents[1]!s}" f"/tests/test_data/count_synthea_patient.parquet" ) args_dict["auth"] = os.environ.get("CUMULUS_TEST_UPLOAD_AUTH", "secretval") diff --git a/src/handlers/dashboard/get_chart_data.py b/src/handlers/dashboard/get_chart_data.py index b1ae477..0a99d16 100644 --- a/src/handlers/dashboard/get_chart_data.py +++ b/src/handlers/dashboard/get_chart_data.py @@ -13,7 +13,7 @@ from ..shared.functions import get_latest_data_package_version, http_response -def _get_table_cols(table_name: str, version: str = None) -> list: +def _get_table_cols(table_name: str, version: str | None = None) -> list: """Returns the columns associated with a table. Since running an athena query takes a decent amount of time due to queueing @@ -29,7 +29,8 @@ def _get_table_cols(table_name: str, version: str = None) -> list: s3_key = f"{prefix}/{version}/{table_name}__aggregate.csv" s3_client = boto3.client("s3") s3_iter = s3_client.get_object( - Bucket=s3_bucket_name, Key=s3_key # type: ignore[arg-type] + Bucket=s3_bucket_name, + Key=s3_key, # type: ignore[arg-type] )["Body"].iter_lines() return next(s3_iter).decode().split(",") @@ -41,7 +42,7 @@ def _build_query(query_params: dict, filters: list, path_params: dict) -> str: filter_str = get_filter_string(filters) if filter_str != "": filter_str = f"AND {filter_str}" - count_col = [c for c in columns if c.startswith("cnt")][0] + count_col = [next(c for c in columns if c.startswith("cnt"))][0] columns.remove(count_col) select_str = f"{query_params['column']}, sum({count_col}) as {count_col}" group_str = f"{query_params['column']}" diff --git a/src/handlers/shared/decorators.py b/src/handlers/shared/decorators.py index 5a88b41..1f8a69a 100644 --- a/src/handlers/shared/decorators.py +++ b/src/handlers/shared/decorators.py @@ -13,8 +13,11 @@ def error_decorator(func): @functools.wraps(func) def wrapper(*args, **kwargs): try: + print("hi") res = func(*args, **kwargs) + except Exception as e: # pylint: disable=broad-except + print("bye") trace = [] tb = e.__traceback__ while tb is not None: diff --git a/src/handlers/shared/functions.py b/src/handlers/shared/functions.py index 2309ef8..5a5e5a8 100644 --- a/src/handlers/shared/functions.py +++ b/src/handlers/shared/functions.py @@ -3,7 +3,6 @@ import json import logging from datetime import datetime, timezone -from typing import Optional import boto3 @@ -77,7 +76,7 @@ def update_metadata( data_package: str, version: str, target: str, - dt: Optional[datetime] = None, + dt: datetime | None = None, meta_type: str = JsonFilename.TRANSACTIONS.value, ): """Safely updates items in metadata dictionary diff --git a/src/handlers/site_upload/api_gateway_authorizer.py b/src/handlers/site_upload/api_gateway_authorizer.py index aee9a4c..97800fa 100644 --- a/src/handlers/site_upload/api_gateway_authorizer.py +++ b/src/handlers/site_upload/api_gateway_authorizer.py @@ -3,7 +3,6 @@ """ # pylint: disable=invalid-name,pointless-string-statement -from __future__ import print_function import os import re @@ -28,7 +27,7 @@ def lambda_handler(event, context): if auth_token not in user_db.keys() or auth_header[0] != "Basic": raise AuthError except (AuthError, KeyError): - raise AuthError(event) # pylint: disable=raise-missing-from + raise AuthError(event) # noqa: B904 principalId = user_db[auth_token]["site"] @@ -66,7 +65,7 @@ class HttpVerb: ALL = "*" -class AuthPolicy(object): # pylint: disable=missing-class-docstring; # pragma: no cover +class AuthPolicy: # pylint: disable=missing-class-docstring; # pragma: no cover awsAccountId = "" """The AWS account id the policy will be generated for. This is used to create the method ARNs.""" @@ -81,8 +80,8 @@ class AuthPolicy(object): # pylint: disable=missing-class-docstring; # pragma: conditions statement. the build method processes these lists and generates the approriate statements for the final policy""" - allowMethods = [] - denyMethods = [] + allowMethods = [] # noqa: RUF012 + denyMethods = [] # noqa: RUF012 restApiId = "<>" """ Replace the placeholder value with a default API Gateway API id to be used in @@ -211,7 +210,7 @@ def allowMethodWithConditions(self, verb, resource, conditions): methods and includes a condition for the policy statement. More on AWS policy conditions here: http://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_elements.html#Condition - """ # noqa: E501 + """ self._addMethod("Allow", verb, resource, conditions) def denyMethodWithConditions(self, verb, resource, conditions): @@ -219,7 +218,7 @@ def denyMethodWithConditions(self, verb, resource, conditions): methods and includes a condition for the policy statement. More on AWS policy conditions here: http://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_elements.html#Condition - """ # noqa: E501 + """ self._addMethod("Deny", verb, resource, conditions) def build(self): diff --git a/src/handlers/site_upload/powerset_merge.py b/src/handlers/site_upload/powerset_merge.py index e65a41f..47134fc 100644 --- a/src/handlers/site_upload/powerset_merge.py +++ b/src/handlers/site_upload/powerset_merge.py @@ -189,7 +189,7 @@ def expand_and_concat_sets( .reset_index() # this last line makes "cnt" the first column in the set, matching the # library style - .filter(["cnt"] + data_cols) + .filter(["cnt", *data_cols]) ) return agg_df @@ -209,8 +209,9 @@ def generate_csv_from_parquet(bucket_name: str, bucket_root: str, subbucket_path awswrangler.s3.to_csv( last_valid_df, ( - f"s3://{bucket_name}/{bucket_root}/" - f"{subbucket_path}".replace(".parquet", ".csv") + f"s3://{bucket_name}/{bucket_root}/" f"{subbucket_path}".replace( + ".parquet", ".csv" + ) ), index=False, quoting=csv.QUOTE_NONE, @@ -250,7 +251,6 @@ def merge_powersets(manager: S3Manager) -> None: e, ) for latest_path in latest_file_list: - if manager.version not in latest_path: continue site_specific_name = get_s3_site_filename_suffix(latest_path) @@ -315,6 +315,14 @@ def merge_powersets(manager: S3Manager) -> None: manager.site, ) manager.update_local_metadata(TransactionKeys.LAST_AGGREGATION.value) + + if df.empty: + manager.merge_error_handler( + latest_path, + subbucket_path, + OSError("File not found"), + ) + manager.write_local_metadata() # In this section, we are trying to accomplish two things: diff --git a/template.yaml b/template.yaml index b053489..a6868ac 100644 --- a/template.yaml +++ b/template.yaml @@ -60,7 +60,7 @@ Resources: Properties: FunctionName: !Sub 'CumulusAggFetchAuthorizer-${DeployStage}' Handler: src/handlers/site_upload/api_gateway_authorizer.lambda_handler - Runtime: python3.9 + Runtime: python3.10 MemorySize: 128 Timeout: 100 Description: Validates credentials before providing signed urls @@ -83,7 +83,7 @@ Resources: Properties: FunctionName: !Sub 'CumulusAggFetchUploadUrl-${DeployStage}' Handler: src/handlers/site_upload/fetch_upload_url.upload_url_handler - Runtime: python3.9 + Runtime: python3.10 MemorySize: 128 Timeout: 100 Description: Generates a presigned URL for uploading files to S3 @@ -113,7 +113,7 @@ Resources: Properties: FunctionName: !Sub 'CumulusAggProcessUpload-${DeployStage}' Handler: src/handlers/site_upload/process_upload.process_upload_handler - Runtime: python3.9 + Runtime: python3.10 MemorySize: 128 Timeout: 800 Description: Handles initial relocation of upload data @@ -142,7 +142,7 @@ Resources: FunctionName: !Sub 'CumulusAggPowersetMerge-${DeployStage}' Layers: [arn:aws:lambda:us-east-1:336392948345:layer:AWSSDKPandas-Python39:1] Handler: src/handlers/site_upload/powerset_merge.powerset_merge_handler - Runtime: python3.9 + Runtime: python3.10 MemorySize: 8192 Timeout: 800 Description: Merges and aggregates powerset count data @@ -173,7 +173,7 @@ Resources: FunctionName: !Sub 'CumulusAggStudyPeriod-${DeployStage}' Layers: [arn:aws:lambda:us-east-1:336392948345:layer:AWSSDKPandas-Python39:1] Handler: src/handlers/site_upload/study_period.study_period_handler - Runtime: python3.9 + Runtime: python3.10 MemorySize: 512 Timeout: 800 Description: Handles metadata outside of upload/processing for studies @@ -201,7 +201,7 @@ Resources: FunctionName: !Sub 'CumulusAggCacheAPI-${DeployStage}' Layers: [arn:aws:lambda:us-east-1:336392948345:layer:AWSSDKPandas-Python39:1] Handler: src/handlers/site_upload/cache_api.cache_api_handler - Runtime: python3.9 + Runtime: python3.10 MemorySize: 512 Timeout: 800 Description: Caches selected database queries to S3 @@ -249,7 +249,7 @@ Resources: FunctionName: !Sub 'CumulusAggDashboardGetChartData-${DeployStage}' Layers: [arn:aws:lambda:us-east-1:336392948345:layer:AWSSDKPandas-Python39:1] Handler: src/handlers/dashboard/get_chart_data.chart_data_handler - Runtime: python3.9 + Runtime: python3.10 MemorySize: 2048 Timeout: 100 Description: Retrieve data for chart display in Cumulus Dashboard @@ -296,7 +296,7 @@ Resources: Properties: FunctionName: !Sub 'CumulusAggDashboardGetMetadata-${DeployStage}' Handler: src/handlers/dashboard/get_metadata.metadata_handler - Runtime: python3.9 + Runtime: python3.10 MemorySize: 128 Timeout: 100 Description: Retrieve data about site uploads @@ -344,7 +344,7 @@ Resources: FunctionName: !Sub 'CumulusAggDashboardDataPackages-${DeployStage}' Layers: [arn:aws:lambda:us-east-1:336392948345:layer:AWSSDKPandas-Python39:1] Handler: src/handlers/dashboard/get_data_packages.data_packages_handler - Runtime: python3.9 + Runtime: python3.10 MemorySize: 512 Timeout: 100 Description: Retrieve data for chart display in Cumulus Dashboard @@ -400,7 +400,7 @@ Resources: Properties: FunctionName: !Sub 'CumulusAggDashboardStudyPeriods-${DeployStage}' Handler: src/handlers/dashboard/get_study_periods.study_periods_handler - Runtime: python3.9 + Runtime: python3.10 MemorySize: 128 Timeout: 100 Description: Retrieve data about the study period diff --git a/tests/conftest.py b/tests/conftest.py index dfcc345..b011150 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -27,8 +27,8 @@ import boto3 import pytest from moto import mock_athena, mock_s3, mock_sns -from scripts.credential_management import create_auth, create_meta +from scripts.credential_management import create_auth, create_meta from src.handlers.shared.enums import BucketPath, JsonFilename from src.handlers.shared.functions import write_metadata from tests.utils import ( diff --git a/tests/dashboard/test_filter_config.py b/tests/dashboard/test_filter_config.py index a4ce9dc..c804c2b 100644 --- a/tests/dashboard/test_filter_config.py +++ b/tests/dashboard/test_filter_config.py @@ -4,7 +4,7 @@ @pytest.mark.parametrize( - "input,output", + "input_str,output_str", [ # Checking individual conversions (["col:strEq:str"], "col LIKE 'str'"), @@ -141,5 +141,5 @@ ), ], ) -def test_filter_string(input, output): - assert get_filter_string(input) == output +def test_filter_string(input_str, output_str): + assert get_filter_string(input_str) == output_str diff --git a/tests/dashboard/test_get_chart_data.py b/tests/dashboard/test_get_chart_data.py index 6ccb89b..8decd12 100644 --- a/tests/dashboard/test_get_chart_data.py +++ b/tests/dashboard/test_get_chart_data.py @@ -2,7 +2,6 @@ import os from unittest import mock -import boto3 import pandas import pytest @@ -10,11 +9,8 @@ from tests.utils import ( EXISTING_DATA_P, EXISTING_STUDY, - EXISTING_VERSION, MOCK_ENV, - TEST_BUCKET, TEST_GLUE_DB, - TEST_WORKGROUP, ) @@ -22,9 +18,9 @@ def mock_get_table_cols(name): return ["cnt", "gender", "race"] -def mock_data_frame(filter): +def mock_data_frame(filter_param): df = pandas.read_csv("tests/test_data/cube_simple_example.csv", na_filter=False) - if filter != []: + if filter_param != []: df = df[df["gender"] == "female"] return df @@ -109,8 +105,6 @@ def test_format_payload(query_params, filters, expected_payload): def test_get_data_cols(mock_bucket): - s3_client = boto3.client("s3", region_name="us-east-1") - s3_res = s3_client.list_objects_v2(Bucket=TEST_BUCKET) table_name = f"{EXISTING_STUDY}__{EXISTING_DATA_P}" res = get_chart_data._get_table_cols(table_name) cols = pandas.read_csv("./tests/test_data/count_synthea_patient_agg.csv").columns diff --git a/tests/dashboard/test_get_metadata.py b/tests/dashboard/test_get_metadata.py index 586815f..7166a9b 100644 --- a/tests/dashboard/test_get_metadata.py +++ b/tests/dashboard/test_get_metadata.py @@ -1,24 +1,14 @@ import json -import os -from datetime import datetime, timezone -from unittest import mock -import boto3 import pytest from src.handlers.dashboard.get_metadata import metadata_handler -from src.handlers.shared.enums import BucketPath -from src.handlers.shared.functions import read_metadata, write_metadata from tests.utils import ( EXISTING_DATA_P, EXISTING_SITE, EXISTING_STUDY, - EXISTING_VERSION, NEW_SITE, NEW_STUDY, - OTHER_SITE, - OTHER_STUDY, - TEST_BUCKET, get_mock_metadata, ) diff --git a/tests/dashboard/test_get_study_periods.py b/tests/dashboard/test_get_study_periods.py index 408a4e2..4e9c0a2 100644 --- a/tests/dashboard/test_get_study_periods.py +++ b/tests/dashboard/test_get_study_periods.py @@ -1,23 +1,13 @@ import json -import os -from datetime import datetime, timezone -from unittest import mock -import boto3 import pytest from src.handlers.dashboard.get_study_periods import study_periods_handler -from src.handlers.shared.enums import BucketPath -from src.handlers.shared.functions import read_metadata, write_metadata from tests.utils import ( - EXISTING_DATA_P, EXISTING_SITE, EXISTING_STUDY, - EXISTING_VERSION, NEW_SITE, NEW_STUDY, - OTHER_SITE, - OTHER_STUDY, get_mock_study_metadata, ) diff --git a/tests/dashboard/test_get_subscriptions.py b/tests/dashboard/test_get_subscriptions.py index 5b928ca..5fcfff3 100644 --- a/tests/dashboard/test_get_subscriptions.py +++ b/tests/dashboard/test_get_subscriptions.py @@ -1,12 +1,8 @@ import os from unittest import mock -import awswrangler -import pandas -from pytest_mock import MockerFixture - from src.handlers.dashboard.get_data_packages import data_packages_handler -from tests.utils import DATA_PACKAGE_COUNT, MOCK_ENV, get_mock_metadata +from tests.utils import DATA_PACKAGE_COUNT, MOCK_ENV @mock.patch.dict(os.environ, MOCK_ENV) diff --git a/tests/site_upload/test_api_gateway_authorizer.py b/tests/site_upload/test_api_gateway_authorizer.py index 94bdc82..19c58cb 100644 --- a/tests/site_upload/test_api_gateway_authorizer.py +++ b/tests/site_upload/test_api_gateway_authorizer.py @@ -1,21 +1,17 @@ -import json -import os from contextlib import nullcontext as does_not_raise -from unittest import mock import pytest -from pytest_mock import MockerFixture -from src.handlers.site_upload.api_gateway_authorizer import lambda_handler -from tests.utils import TEST_BUCKET, get_mock_auth +from src.handlers.site_upload import api_gateway_authorizer +from tests import utils @pytest.mark.parametrize( "auth,expects", [ - (f"Basic {list(get_mock_auth().keys())[0]}", does_not_raise()), - ("Basic other_auth", pytest.raises(Exception)), - (None, pytest.raises(Exception)), + (f"Basic {next(iter(utils.get_mock_auth().keys()))}", does_not_raise()), + ("Basic other_auth", pytest.raises(api_gateway_authorizer.AuthError)), + (None, pytest.raises(AttributeError)), ], ) def test_validate_pw(auth, expects, mock_bucket): @@ -25,4 +21,5 @@ def test_validate_pw(auth, expects, mock_bucket): "methodArn": "arn:aws:execute-api:us-east-1:11223:123/Prod/post/lambda", } with expects: - res = lambda_handler(event, {}) + res = api_gateway_authorizer.lambda_handler(event, {}) + assert res["policyDocument"]["Statement"][0]["Effect"] == "Allow" diff --git a/tests/site_upload/test_cache_api.py b/tests/site_upload/test_cache_api.py index 836677b..3953355 100644 --- a/tests/site_upload/test_cache_api.py +++ b/tests/site_upload/test_cache_api.py @@ -1,7 +1,6 @@ import os from unittest import mock -import awswrangler import pandas import pytest @@ -24,7 +23,6 @@ def mock_data_packages(*args, **kwargs): ], ) def test_cache_api(mocker, mock_bucket, subject, message, mock_result, status): - mock_query_result = mocker.patch("awswrangler.athena.read_sql_query") mock_query_result.side_effect = mock_result event = {"Records": [{"Sns": {"Subject": subject, "Message": message}}]} diff --git a/tests/site_upload/test_fetch_upload_url.py b/tests/site_upload/test_fetch_upload_url.py index 766c57b..b96417b 100644 --- a/tests/site_upload/test_fetch_upload_url.py +++ b/tests/site_upload/test_fetch_upload_url.py @@ -1,8 +1,5 @@ import json -import os -from unittest import mock -import boto3 import pytest from src.handlers.shared.enums import BucketPath @@ -12,8 +9,6 @@ EXISTING_SITE, EXISTING_STUDY, EXISTING_VERSION, - TEST_BUCKET, - get_mock_metadata, ) diff --git a/tests/site_upload/test_powerset_merge.py b/tests/site_upload/test_powerset_merge.py index 52fbed8..daa2585 100644 --- a/tests/site_upload/test_powerset_merge.py +++ b/tests/site_upload/test_powerset_merge.py @@ -11,7 +11,7 @@ from pandas import DataFrame, read_parquet from src.handlers.shared.enums import BucketPath -from src.handlers.shared.functions import read_metadata, write_metadata +from src.handlers.shared.functions import read_metadata from src.handlers.site_upload.powerset_merge import ( MergeError, expand_and_concat_sets, @@ -25,12 +25,9 @@ EXISTING_VERSION, ITEM_COUNT, MOCK_ENV, - NEW_DATA_P, NEW_SITE, NEW_STUDY, NEW_VERSION, - OTHER_SITE, - OTHER_STUDY, TEST_BUCKET, get_mock_metadata, ) @@ -213,7 +210,7 @@ def test_powerset_merge_single_upload( "study" ]["encounter"]["099"]["last_aggregation"] ) - if upload_file is not None: + if upload_file is not None and study != NEW_STUDY: # checking to see that merge powerset didn't touch last upload assert ( metadata[site][study][data_package.split("__")[1]][version][ @@ -263,7 +260,6 @@ def test_powerset_merge_join_study_data( mock_bucket, mock_notification, ): - s3_client = boto3.client("s3", region_name="us-east-1") s3_client.upload_file( upload_file, @@ -302,6 +298,7 @@ def test_powerset_merge_join_study_data( ] } res = powerset_merge_handler(event, {}) + print(res) assert res["statusCode"] == 200 errors = 0 s3_res = s3_client.list_objects_v2(Bucket=TEST_BUCKET) @@ -340,7 +337,7 @@ def test_powerset_merge_join_study_data( def test_expand_and_concat(mock_bucket, upload_file, load_empty, raises): with raises: df = read_parquet("./tests/test_data/count_synthea_patient_agg.parquet") - s3_path = f"/test/uploaded.parquet" + s3_path = "/test/uploaded.parquet" s3_client = boto3.client("s3", region_name="us-east-1") s3_client.upload_file( upload_file, diff --git a/tests/site_upload/test_process_upload.py b/tests/site_upload/test_process_upload.py index 787ca05..2c51b85 100644 --- a/tests/site_upload/test_process_upload.py +++ b/tests/site_upload/test_process_upload.py @@ -1,4 +1,3 @@ -import os from datetime import datetime, timezone import boto3 @@ -6,7 +5,7 @@ from freezegun import freeze_time from src.handlers.shared.enums import BucketPath -from src.handlers.shared.functions import read_metadata, write_metadata +from src.handlers.shared.functions import read_metadata from src.handlers.site_upload.process_upload import process_upload_handler from tests.utils import ( EXISTING_DATA_P, @@ -16,10 +15,7 @@ ITEM_COUNT, NEW_DATA_P, NEW_SITE, - NEW_STUDY, NEW_VERSION, - OTHER_SITE, - OTHER_STUDY, TEST_BUCKET, ) diff --git a/tests/site_upload/test_study_period.py b/tests/site_upload/test_study_period.py index 2657161..61ce6fe 100644 --- a/tests/site_upload/test_study_period.py +++ b/tests/site_upload/test_study_period.py @@ -1,5 +1,4 @@ import csv -import os from datetime import datetime, timezone import boto3 @@ -7,21 +6,17 @@ from freezegun import freeze_time from src.handlers.shared.enums import BucketPath -from src.handlers.shared.functions import read_metadata, write_metadata +from src.handlers.shared.functions import read_metadata from src.handlers.site_upload.study_period import study_period_handler from tests.utils import ( EXISTING_DATA_P, EXISTING_SITE, EXISTING_STUDY, EXISTING_VERSION, - NEW_DATA_P, NEW_SITE, NEW_STUDY, NEW_VERSION, - OTHER_SITE, - OTHER_STUDY, TEST_BUCKET, - get_mock_study_metadata, ) @@ -114,7 +109,7 @@ def test_process_upload( metadata[site][study][version]["last_data_update"] == datetime.now(timezone.utc).isoformat() ) - with open("./tests/test_data/meta_date.csv", "r") as file: + with open("./tests/test_data/meta_date.csv") as file: reader = csv.reader(file) # discarding CSV header row next(reader)