-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Proof of concept files for running as lambda
- Loading branch information
Showing
5 changed files
with
125 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
FROM --platform=linux/amd64 public.ecr.aws/lambda/python:3.12 | ||
|
||
RUN dnf install -y git | ||
|
||
COPY requirements.txt ${LAMBDA_TASK_ROOT}/requirements.txt | ||
|
||
RUN pip install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" | ||
|
||
COPY src/ ${LAMBDA_TASK_ROOT} | ||
|
||
# Pass the name of the function handler as an argument to the runtime | ||
CMD [ "regtech_data_validator.lambda_wrapper.lambda_handler" ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
polars | ||
awslambdaric | ||
pandera | ||
ujson | ||
boto3 | ||
tabulate | ||
fsspec | ||
s3fs | ||
sqlalchemy | ||
pydantic | ||
psycopg2-binary | ||
pyarrow | ||
sbl_filing_api@git+https://github.com/cfpb/sbl-filing-api |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from regtech_data_validator.service import service_validate | ||
|
||
def lambda_handler(event, context): | ||
|
||
bucket = event['Records'][0]['s3']['bucket']['name'] | ||
file = event['Records'][0]['s3']['object']['key'] | ||
service_validate(bucket, file) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
import logging | ||
import polars as pl | ||
from fsspec import AbstractFileSystem, filesystem | ||
from pydantic import PostgresDsn | ||
from sqlalchemy import create_engine | ||
from sqlalchemy.orm import sessionmaker | ||
from urllib import parse | ||
|
||
from regtech_data_validator.validator import validate_batch_csv, ValidationSchemaError | ||
from sbl_filing_api.entities.models.dao import SubmissionDAO, UserActionDAO | ||
from sbl_filing_api.entities.models.model_enums import SubmissionState, UserActionType | ||
|
||
|
||
logger = logging.getLogger() | ||
logger.setLevel("INFO") | ||
|
||
def get_validation_db_connection(): | ||
postgres_dsn = PostgresDsn.build( | ||
scheme="postgresql+psycopg2", | ||
username="appuser", | ||
password=parse.quote("OieuISykG/I1qdnJ", safe=""), | ||
host="regtech-validations-main.cowleab4pwre.us-east-1.rds.amazonaws.com", | ||
path="validation_findings", | ||
) | ||
conn_str = str(postgres_dsn) | ||
return create_engine(conn_str) | ||
|
||
|
||
def service_validate(bucket, file): | ||
lei = file.split("/")[2] | ||
submission_id = file.split("/")[3].split(".csv")[0] | ||
|
||
filing_conn = None | ||
|
||
try: | ||
filing_conn = sessionmaker(bind=get_filing_db_connection())() | ||
submission = filing_conn.query(SubmissionDAO).filter_by(id=submission_id).first() | ||
submission.state = SubmissionState.VALIDATION_IN_PROGRESS | ||
filing_conn.commit() | ||
|
||
try: | ||
s3_path = f"{bucket}/{file}" | ||
|
||
fs: AbstractFileSystem = filesystem("filecache", target_protocol='s3', cache_storage='/tmp/files/') | ||
with fs.open(s3_path, "r") as f: | ||
final_state = SubmissionState.VALIDATION_SUCCESSFUL | ||
for findings, phase in validate_batch_csv(f.name, {lei:lei}, batch_size=50000, batch_count=1): | ||
findings = findings.with_columns( | ||
phase=pl.lit(phase), | ||
submission_id=pl.lit(submission_id) | ||
) | ||
if final_state != SubmissionState.VALIDATION_WITH_ERRORS: | ||
final_state = SubmissionState.VALIDATION_WITH_ERRORS if findings.filter(pl.col('validation_type') == 'Error').height > 0 else SubmissionState.VALIDATION_WITH_WARNINGS | ||
findings.write_database(table_name="findings", connection=get_validation_db_connection(), if_table_exists="append") | ||
submission.state = final_state | ||
filing_conn.commit() | ||
|
||
except ValidationSchemaError as vse: | ||
logger.exception("The file is malformed.") | ||
submission = filing_conn.query(SubmissionDAO).filter_by(id=submission_id).first() | ||
submission.state = SubmissionState.SUBMISSION_UPLOAD_MALFORMED | ||
filing_conn.commit() | ||
|
||
except Exception as err: | ||
logger.exception(f"Error processing submission file.") | ||
if filing_conn: | ||
submission = filing_conn.query(SubmissionDAO).filter_by(id=submission_id).first() | ||
submission.state = SubmissionState.VALIDATION_ERROR | ||
filing_conn.commit() | ||
|
||
except Exception as err: | ||
logger.exception(f"Error processing submission file.") | ||
if filing_conn: | ||
submission = filing_conn.query(SubmissionDAO).filter_by(id=submission_id).first() | ||
submission.state = SubmissionState.VALIDATION_ERROR | ||
filing_conn.commit() | ||
|
||
|
||
|
||
def get_filing_db_connection(): | ||
postgres_dsn = PostgresDsn.build( | ||
scheme="postgresql+psycopg2", | ||
username="filing_user", | ||
password=parse.quote("ZB8I8/3ZOzMfz6eM", safe=""), | ||
host="regtech-sbl-devpub-cd-eval.cowleab4pwre.us-east-1.rds.amazonaws.com", | ||
path="filing_db", | ||
) | ||
conn_str = str(postgres_dsn) | ||
return create_engine(conn_str) |