PR feedback

smart-on-fhir · Oct 15, 2024 · 5ad23a8 · 5ad23a8
1 parent 8e5589e
commit 5ad23a8
Show file tree

Hide file tree

Showing 7 changed files with 34 additions and 33 deletions.
diff --git a/scripts/migrations/migration.004.s3_name_with_id.py b/scripts/migrations/migration.004.s3_name_with_id.py
@@ -29,15 +29,14 @@ def s3_name_with_id(bucket: str):
     contents = res["Contents"]
     moved_files = 0
     for s3_file in contents:
-        if s3_file["Key"].split("/")[0] == "aggregates":
-            key = s3_file["Key"]
-            key_array = key.split("/")
-            if len(key_array[3]) == 3:
-                key_array[3] = f"{key_array[2]}_{key_array[3]}"
-                new_key = "/".join(key_array)
-                client.copy({"Bucket": bucket, "Key": key}, bucket, new_key)
-                client.delete_object(Bucket=bucket, Key=key)
-                moved_files += 1
+        key = s3_file["Key"]
+        key_array = key.split("/")
+        if key_array[0] == "aggregates" and len(key_array[3]) == 3:
+            key_array[3] = f"{key_array[2]}__{key_array[3]}"
+            new_key = "/".join(key_array)
+            client.copy({"Bucket": bucket, "Key": key}, bucket, new_key)
+            client.delete_object(Bucket=bucket, Key=key)
+            moved_files += 1
     print(f"Updated {moved_files} aggregates")
 
 

diff --git a/src/handlers/dashboard/get_chart_data.py b/src/handlers/dashboard/get_chart_data.py
@@ -26,10 +26,11 @@ def _get_table_cols(dp_id: str, version: str | None = None) -> list:
     """
 
     s3_bucket_name = os.environ.get("BUCKET_NAME")
-    prefix = f"{enums.BucketPath.CSVAGGREGATE.value}/{dp_id.split('__')[0]}/{dp_id[:-4]}"
+    dp_name = dp_id.rsplit("__", 1)[0]
+    prefix = f"{enums.BucketPath.CSVAGGREGATE.value}/{dp_id.split('__')[0]}/{dp_name}"
     if version is None:
         version = functions.get_latest_data_package_version(s3_bucket_name, prefix)
-    s3_key = f"{prefix}/{version}/{dp_id[:-4]}__aggregate.csv"
+    s3_key = f"{prefix}/{version}/{dp_name}__aggregate.csv"
     s3_client = boto3.client("s3")
     try:
         s3_iter = s3_client.get_object(
@@ -117,6 +118,8 @@ def chart_data_handler(event, context):
         res = _format_payload(df, query_params, filters)
         res = functions.http_response(200, res)
     except errors.AggregatorS3Error:
+        # while the API is publicly accessible, we've been asked to not pass
+        # helpful error messages back. revisit when dashboard is in AWS.
         res = functions.http_response(
             404, f"Aggregate for {path_params['data_package_id']} not found"
         )

diff --git a/src/handlers/site_upload/powerset_merge.py b/src/handlers/site_upload/powerset_merge.py
@@ -44,7 +44,7 @@ def __init__(self, event):
         self.study = s3_key_array[1]
         self.data_package = s3_key_array[2].split("__")[1]
         self.site = s3_key_array[3]
-        self.version = s3_key_array[4][-3:]
+        self.version = s3_key_array[4].split("__")[-1]
         self.metadata = functions.read_metadata(self.s3_client, self.s3_bucket_name)
         self.types_metadata = functions.read_metadata(
             self.s3_client,
@@ -87,7 +87,7 @@ def write_parquet(self, df: pandas.DataFrame, is_new_data_package: bool) -> None
         parquet_aggregate_path = (
             f"s3://{self.s3_bucket_name}/{enums.BucketPath.AGGREGATE.value}/"
             f"{self.study}/{self.study}__{self.data_package}/"
-            f"{self.study}__{self.data_package}_{self.version}/"
+            f"{self.study}__{self.data_package}__{self.version}/"
             f"{self.study}__{self.data_package}__aggregate.parquet"
         )
         awswrangler.s3.to_parquet(df, parquet_aggregate_path, index=False)

diff --git a/template.yaml b/template.yaml
@@ -350,7 +350,7 @@ Resources:
           Type: Api
           Properties:
             RestApiId: !Ref DashboardApiGateway
-            Path: /data_packages/{data_package_id}/chart
+            Path: /data-packages/{data_package_id}/chart
             Method: GET
             RequestParameters: 
               - method.request.querystring.column:
@@ -417,7 +417,7 @@ Resources:
           Type: Api
           Properties:
             RestApiId: !Ref DashboardApiGateway
-            Path: /last_valid/{study}/{data_package}/{site}/{version}/{filename}
+            Path: /last-valid/{study}/{data_package}/{site}/{version}/{filename}
             Method: GET
       Policies:
         - S3ReadPolicy:
@@ -463,7 +463,7 @@ Resources:
           Type: Api
           Properties:
             RestApiId: !Ref DashboardApiGateway
-            Path: /last_valid/
+            Path: /last-valid/
             Method: GET
       Policies:
         - S3ReadPolicy:
@@ -571,7 +571,7 @@ Resources:
           Type: Api
           Properties:
             RestApiId: !Ref DashboardApiGateway
-            Path: /data_packages
+            Path: /data-packages
             Method: GET
             RequestParameters: 
               - method.request.querystring.name:
@@ -580,7 +580,7 @@ Resources:
           Type: Api
           Properties:
             RestApiId: !Ref DashboardApiGateway
-            Path: /data_packages/{data_package_id}
+            Path: /data-packages/{data_package_id}
             Method: GET
       # TODO: it :should: be possible to move these policies to a central role/policy
       # set that can be referenced in multiple places; see

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -49,7 +49,7 @@ def _init_mock_data(s3_client, bucket, study, data_package, version):
         "./tests/test_data/count_synthea_patient_agg.parquet",
         bucket,
         f"{enums.BucketPath.AGGREGATE.value}/{study}/"
-        f"{study}__{data_package}/{study}__{data_package}_{version}/"
+        f"{study}__{data_package}/{study}__{data_package}__{version}/"
         f"{study}__{data_package}__aggregate.parquet",
     )
     s3_client.upload_file(

diff --git a/tests/dashboard/test_get_chart_data.py b/tests/dashboard/test_get_chart_data.py
@@ -104,8 +104,8 @@ def test_format_payload(query_params, filters, expected_payload):
 
 
 def test_get_data_cols(mock_bucket):
-    table_name = f"{EXISTING_STUDY}__{EXISTING_DATA_P}_{EXISTING_VERSION}"
-    res = get_chart_data._get_table_cols(table_name)
+    table_id = f"{EXISTING_STUDY}__{EXISTING_DATA_P}__{EXISTING_VERSION}"
+    res = get_chart_data._get_table_cols(table_id)
     cols = pandas.read_csv("./tests/test_data/count_synthea_patient_agg.csv").columns
     assert res == list(cols)
 

diff --git a/tests/site_upload/test_powerset_merge.py b/tests/site_upload/test_powerset_merge.py
@@ -92,17 +92,17 @@
             200,
             ITEM_COUNT + 4,
         ),
-        # (  # ensuring that a data package that is a substring does not get
-        #     # merged by substr match
-        #     "./tests/test_data/count_synthea_patient.parquet",
-        #     f"/{EXISTING_STUDY}/{EXISTING_STUDY}__{EXISTING_DATA_P[0:-2]}/{EXISTING_SITE}/"
-        #     f"{EXISTING_STUDY}__{EXISTING_DATA_P[0:-2]}_{EXISTING_VERSION}/encount.parquet",
-        #     f"/{EXISTING_STUDY}/{EXISTING_STUDY}__{EXISTING_DATA_P[0:-2]}/"
-        #     f"{EXISTING_STUDY}__{EXISTING_DATA_P[0:-2]}_{EXISTING_VERSION}/encount.parquet",
-        #     False,
-        #     200,
-        #     ITEM_COUNT + 4,
-        # ),
+        (  # ensuring that a data package that is a substring does not get
+            # merged by substr match
+            "./tests/test_data/count_synthea_patient.parquet",
+            f"/{EXISTING_STUDY}/{EXISTING_STUDY}__{EXISTING_DATA_P[0:-2]}/{EXISTING_SITE}/"
+            f"{EXISTING_VERSION}/encount.parquet",
+            f"/{EXISTING_STUDY}/{EXISTING_STUDY}__{EXISTING_DATA_P[0:-2]}/{EXISTING_SITE}/"
+            f"{EXISTING_VERSION}/encount.parquet",
+            False,
+            200,
+            ITEM_COUNT + 4,
+        ),
         (  # Empty file upload
             None,
             f"/{NEW_STUDY}/{NEW_STUDY}__{EXISTING_DATA_P}/{EXISTING_SITE}"
@@ -136,7 +136,6 @@ def test_powerset_merge_single_upload(
     mock_notification,
 ):
     s3_client = boto3.client("s3", region_name="us-east-1")
-    s3_res = s3_client.list_objects_v2(Bucket=TEST_BUCKET)
     if upload_file is not None:
         s3_client.upload_file(
             upload_file,