Skip to content

Commit

Permalink
PR feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
dogversioning committed Oct 15, 2024
1 parent 8e5589e commit 5ad23a8
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 33 deletions.
17 changes: 8 additions & 9 deletions scripts/migrations/migration.004.s3_name_with_id.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,14 @@ def s3_name_with_id(bucket: str):
contents = res["Contents"]
moved_files = 0
for s3_file in contents:
if s3_file["Key"].split("/")[0] == "aggregates":
key = s3_file["Key"]
key_array = key.split("/")
if len(key_array[3]) == 3:
key_array[3] = f"{key_array[2]}_{key_array[3]}"
new_key = "/".join(key_array)
client.copy({"Bucket": bucket, "Key": key}, bucket, new_key)
client.delete_object(Bucket=bucket, Key=key)
moved_files += 1
key = s3_file["Key"]
key_array = key.split("/")
if key_array[0] == "aggregates" and len(key_array[3]) == 3:
key_array[3] = f"{key_array[2]}__{key_array[3]}"
new_key = "/".join(key_array)
client.copy({"Bucket": bucket, "Key": key}, bucket, new_key)
client.delete_object(Bucket=bucket, Key=key)
moved_files += 1
print(f"Updated {moved_files} aggregates")


Expand Down
7 changes: 5 additions & 2 deletions src/handlers/dashboard/get_chart_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,11 @@ def _get_table_cols(dp_id: str, version: str | None = None) -> list:
"""

s3_bucket_name = os.environ.get("BUCKET_NAME")
prefix = f"{enums.BucketPath.CSVAGGREGATE.value}/{dp_id.split('__')[0]}/{dp_id[:-4]}"
dp_name = dp_id.rsplit("__", 1)[0]
prefix = f"{enums.BucketPath.CSVAGGREGATE.value}/{dp_id.split('__')[0]}/{dp_name}"
if version is None:
version = functions.get_latest_data_package_version(s3_bucket_name, prefix)
s3_key = f"{prefix}/{version}/{dp_id[:-4]}__aggregate.csv"
s3_key = f"{prefix}/{version}/{dp_name}__aggregate.csv"
s3_client = boto3.client("s3")
try:
s3_iter = s3_client.get_object(
Expand Down Expand Up @@ -117,6 +118,8 @@ def chart_data_handler(event, context):
res = _format_payload(df, query_params, filters)
res = functions.http_response(200, res)
except errors.AggregatorS3Error:
# while the API is publicly accessible, we've been asked to not pass
# helpful error messages back. revisit when dashboard is in AWS.
res = functions.http_response(
404, f"Aggregate for {path_params['data_package_id']} not found"
)
Expand Down
4 changes: 2 additions & 2 deletions src/handlers/site_upload/powerset_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def __init__(self, event):
self.study = s3_key_array[1]
self.data_package = s3_key_array[2].split("__")[1]
self.site = s3_key_array[3]
self.version = s3_key_array[4][-3:]
self.version = s3_key_array[4].split("__")[-1]
self.metadata = functions.read_metadata(self.s3_client, self.s3_bucket_name)
self.types_metadata = functions.read_metadata(
self.s3_client,
Expand Down Expand Up @@ -87,7 +87,7 @@ def write_parquet(self, df: pandas.DataFrame, is_new_data_package: bool) -> None
parquet_aggregate_path = (
f"s3://{self.s3_bucket_name}/{enums.BucketPath.AGGREGATE.value}/"
f"{self.study}/{self.study}__{self.data_package}/"
f"{self.study}__{self.data_package}_{self.version}/"
f"{self.study}__{self.data_package}__{self.version}/"
f"{self.study}__{self.data_package}__aggregate.parquet"
)
awswrangler.s3.to_parquet(df, parquet_aggregate_path, index=False)
Expand Down
10 changes: 5 additions & 5 deletions template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ Resources:
Type: Api
Properties:
RestApiId: !Ref DashboardApiGateway
Path: /data_packages/{data_package_id}/chart
Path: /data-packages/{data_package_id}/chart
Method: GET
RequestParameters:
- method.request.querystring.column:
Expand Down Expand Up @@ -417,7 +417,7 @@ Resources:
Type: Api
Properties:
RestApiId: !Ref DashboardApiGateway
Path: /last_valid/{study}/{data_package}/{site}/{version}/{filename}
Path: /last-valid/{study}/{data_package}/{site}/{version}/{filename}
Method: GET
Policies:
- S3ReadPolicy:
Expand Down Expand Up @@ -463,7 +463,7 @@ Resources:
Type: Api
Properties:
RestApiId: !Ref DashboardApiGateway
Path: /last_valid/
Path: /last-valid/
Method: GET
Policies:
- S3ReadPolicy:
Expand Down Expand Up @@ -571,7 +571,7 @@ Resources:
Type: Api
Properties:
RestApiId: !Ref DashboardApiGateway
Path: /data_packages
Path: /data-packages
Method: GET
RequestParameters:
- method.request.querystring.name:
Expand All @@ -580,7 +580,7 @@ Resources:
Type: Api
Properties:
RestApiId: !Ref DashboardApiGateway
Path: /data_packages/{data_package_id}
Path: /data-packages/{data_package_id}
Method: GET
# TODO: it :should: be possible to move these policies to a central role/policy
# set that can be referenced in multiple places; see
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def _init_mock_data(s3_client, bucket, study, data_package, version):
"./tests/test_data/count_synthea_patient_agg.parquet",
bucket,
f"{enums.BucketPath.AGGREGATE.value}/{study}/"
f"{study}__{data_package}/{study}__{data_package}_{version}/"
f"{study}__{data_package}/{study}__{data_package}__{version}/"
f"{study}__{data_package}__aggregate.parquet",
)
s3_client.upload_file(
Expand Down
4 changes: 2 additions & 2 deletions tests/dashboard/test_get_chart_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ def test_format_payload(query_params, filters, expected_payload):


def test_get_data_cols(mock_bucket):
table_name = f"{EXISTING_STUDY}__{EXISTING_DATA_P}_{EXISTING_VERSION}"
res = get_chart_data._get_table_cols(table_name)
table_id = f"{EXISTING_STUDY}__{EXISTING_DATA_P}__{EXISTING_VERSION}"
res = get_chart_data._get_table_cols(table_id)
cols = pandas.read_csv("./tests/test_data/count_synthea_patient_agg.csv").columns
assert res == list(cols)

Expand Down
23 changes: 11 additions & 12 deletions tests/site_upload/test_powerset_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,17 +92,17 @@
200,
ITEM_COUNT + 4,
),
# ( # ensuring that a data package that is a substring does not get
# # merged by substr match
# "./tests/test_data/count_synthea_patient.parquet",
# f"/{EXISTING_STUDY}/{EXISTING_STUDY}__{EXISTING_DATA_P[0:-2]}/{EXISTING_SITE}/"
# f"{EXISTING_STUDY}__{EXISTING_DATA_P[0:-2]}_{EXISTING_VERSION}/encount.parquet",
# f"/{EXISTING_STUDY}/{EXISTING_STUDY}__{EXISTING_DATA_P[0:-2]}/"
# f"{EXISTING_STUDY}__{EXISTING_DATA_P[0:-2]}_{EXISTING_VERSION}/encount.parquet",
# False,
# 200,
# ITEM_COUNT + 4,
# ),
( # ensuring that a data package that is a substring does not get
# merged by substr match
"./tests/test_data/count_synthea_patient.parquet",
f"/{EXISTING_STUDY}/{EXISTING_STUDY}__{EXISTING_DATA_P[0:-2]}/{EXISTING_SITE}/"
f"{EXISTING_VERSION}/encount.parquet",
f"/{EXISTING_STUDY}/{EXISTING_STUDY}__{EXISTING_DATA_P[0:-2]}/{EXISTING_SITE}/"
f"{EXISTING_VERSION}/encount.parquet",
False,
200,
ITEM_COUNT + 4,
),
( # Empty file upload
None,
f"/{NEW_STUDY}/{NEW_STUDY}__{EXISTING_DATA_P}/{EXISTING_SITE}"
Expand Down Expand Up @@ -136,7 +136,6 @@ def test_powerset_merge_single_upload(
mock_notification,
):
s3_client = boto3.client("s3", region_name="us-east-1")
s3_res = s3_client.list_objects_v2(Bucket=TEST_BUCKET)
if upload_file is not None:
s3_client.upload_file(
upload_file,
Expand Down

0 comments on commit 5ad23a8

Please sign in to comment.