Skip to content

Commit

Permalink
parquet testing
Browse files Browse the repository at this point in the history
  • Loading branch information
lchen-2101 committed Nov 7, 2024
1 parent c9c5b6b commit 765cb6b
Show file tree
Hide file tree
Showing 10 changed files with 757 additions and 64 deletions.
14 changes: 14 additions & 0 deletions Lambda_Dockerfile copy
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
FROM --platform=linux/amd64 public.ecr.aws/lambda/python:3.12

RUN dnf install -y git

ARG reqs="lambda_requirements copy.txt"

COPY ${reqs} ${LAMBDA_TASK_ROOT}/requirements.txt

RUN pip install -r requirements.txt --target "${LAMBDA_TASK_ROOT}"

COPY src/ ${LAMBDA_TASK_ROOT}

# Pass the name of the function handler as an argument to the runtime
CMD [ "regtech_data_validator.lambda_wrapper_parquet.lambda_handler" ]
12 changes: 12 additions & 0 deletions lambda_requirements copy.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
polars
awslambdaric
pandera
ujson
boto3
tabulate
fsspec
s3fs
sqlalchemy
pydantic
psycopg2-binary
pyarrow
89 changes: 46 additions & 43 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions poetry.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[virtualenvs]
in-project = true
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ ujson = "^5.9.0"
matplotlib = "^3.9.0"
fsspec = "^2024.6.1"
polars = "^1.6.0"
pyarrow = "^17.0.0"
pyarrow = "^18.0.0"
boto3 = "~1.34.0"
s3fs = "^2024.9.0"

Expand All @@ -43,6 +43,7 @@ typer = "^0.12.5"

[tool.poetry.scripts]
cfpb-val = 'regtech_data_validator.cli:app'
parquet-test = 'regtech_data_validator.lambda_wrapper_parquet:test'

# Black formatting
[tool.black]
Expand Down
30 changes: 15 additions & 15 deletions src/regtech_data_validator/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,21 +100,21 @@ def validate(

status = "SUCCESS" if total_findings == 0 else "FAILURE"

match output:
case OutputFormat.CSV:
print(df_to_csv(final_df))
case OutputFormat.POLARS:
print(df_to_str(final_df))
case OutputFormat.JSON:
print(df_to_json(final_df, max_group_size=200))
case OutputFormat.TABLE:
print(df_to_table(final_df))
case OutputFormat.DOWNLOAD:
df_to_download(final_df)
print(f"Final DF Height: {final_df.height}")
print(f"Took {(datetime.now() - start).total_seconds()} seconds")
case _:
raise ValueError(f'output format "{output}" not supported')
# match output:
# case OutputFormat.CSV:
# print(df_to_csv(final_df))
# case OutputFormat.POLARS:
# print(df_to_str(final_df))
# case OutputFormat.JSON:
# print(df_to_json(final_df, max_group_size=200))
# case OutputFormat.TABLE:
# print(df_to_table(final_df))
# case OutputFormat.DOWNLOAD:
# df_to_download(final_df)
# print(f"Final DF Height: {final_df.height}")
# print(f"Took {(datetime.now() - start).total_seconds()} seconds")
# case _:
# raise ValueError(f'output format "{output}" not supported')

typer.echo(
f"Status: {status}, Total Errors: {total_findings}, Validation Phase: {final_phase}",
Expand Down
30 changes: 30 additions & 0 deletions src/regtech_data_validator/lambda_wrapper_parquet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from regtech_data_validator.service_parquet import service_validate_parquet


def lambda_handler(event, context):
request = event['responsePayload'] if 'responsePayload' in event else event

bucket = request['Records'][0]['s3']['bucket']['name']
file = request['Records'][0]['s3']['object']['key']
service_validate_parquet(bucket, file)


# def test():
# event = {
# "Records": [{
# "s3": {
# "bucket": {
# "name": "cfpb-regtech-devpub-lc-test"
# },
# "object": {
# "key": "upload/2024/1234364890REGTECH006/6265_pqs/"
# },
# }}
# ]
# }
# # s3://cfpb-regtech-devpub-lc-test/upload/2024/1234364890REGTECH006/6254.csv/
# lambda_handler(event, None)


# if __name__ == "__main__":
# test()
Loading

0 comments on commit 765cb6b

Please sign in to comment.