From 470a8f39153f23cb5f00b2e08e48fd1e6f14ce85 Mon Sep 17 00:00:00 2001 From: jcadam14 <41971533+jcadam14@users.noreply.github.com> Date: Thu, 7 Nov 2024 14:31:51 -0700 Subject: [PATCH] Updated to remove writing of download report, returns byte data (#270) Closes #269 Changed df_to_download to return the byte object of the report data (which the filing api can then use directly to store to S3). Updated pytests to reflect change --- src/regtech_data_validator/cli.py | 2 +- src/regtech_data_validator/data_formatters.py | 33 ++++--------------- tests/test_output_formats.py | 18 ++-------- 3 files changed, 10 insertions(+), 43 deletions(-) diff --git a/src/regtech_data_validator/cli.py b/src/regtech_data_validator/cli.py index 1ec6513..1ab0397 100644 --- a/src/regtech_data_validator/cli.py +++ b/src/regtech_data_validator/cli.py @@ -108,7 +108,7 @@ def validate( case OutputFormat.TABLE: print(df_to_table(final_df)) case OutputFormat.DOWNLOAD: - df_to_download(final_df) + print(df_to_download(final_df)) case _: raise ValueError(f'output format "{output}" not supported') diff --git a/src/regtech_data_validator/data_formatters.py b/src/regtech_data_validator/data_formatters.py index 62bd790..faf7864 100644 --- a/src/regtech_data_validator/data_formatters.py +++ b/src/regtech_data_validator/data_formatters.py @@ -1,7 +1,5 @@ -import boto3 import ujson import polars as pl -import fsspec from tabulate import tabulate @@ -116,13 +114,13 @@ def format_findings(df: pl.DataFrame, phase, checks): def df_to_download( df: pl.DataFrame, - path: str = "download_report.csv", warning_count: int = 0, error_count: int = 0, max_errors: int = 1000000, ): if df.is_empty(): # return headers of csv for 'emtpy' report + buffer = BytesIO() empty_df = pl.DataFrame( { "validation_type": [], @@ -134,9 +132,9 @@ def df_to_download( "validation_description": [], } ) - with fsspec.open(path, mode='wb') as f: - empty_df.write_csv(f, quote_style='non_numeric') - return + empty_df.write_csv(buffer, quote_style='non_numeric', include_header=True) + buffer.seek(0) + return buffer.getvalue() # get the check for the phase the results were in, so we can pull out static data from each # found check @@ -196,26 +194,9 @@ def df_to_download( f'"Your register contains {total_errors} {error_type}, however, only {max_errors} records are displayed in this report. To see additional {error_type}, correct the listed records, and upload a new file."\n'.encode() ) - if path.startswith("s3"): - sorted_df.write_csv(buffer, quote_style='non_numeric', include_header=False) - buffer.seek(0) - upload(path, buffer.getvalue()) - else: - with fsspec.open(path, mode='wb') as f: - sorted_df.write_csv(buffer, quote_style='non_numeric', include_header=False) - buffer.seek(0) - f.write(buffer.getvalue()) - - -def upload(path: str, content: bytes) -> None: - bucket = path.split("s3://")[1].split("/")[0] - opath = path.split("s3://")[1].replace(bucket + "/", "") - s3 = boto3.client("s3") - s3.put_object( - Bucket=bucket, - Key=opath, - Body=content, - ) + sorted_df.write_csv(buffer, quote_style='non_numeric', include_header=False) + buffer.seek(0) + return buffer.getvalue() def df_to_csv(df: pl.DataFrame) -> str: diff --git a/tests/test_output_formats.py b/tests/test_output_formats.py index c45fd2a..db50b2b 100644 --- a/tests/test_output_formats.py +++ b/tests/test_output_formats.py @@ -1,9 +1,6 @@ import polars as pl import ujson -import tempfile -from pathlib import Path - from regtech_data_validator import global_data from regtech_data_validator.data_formatters import df_to_csv, df_to_str, df_to_json, df_to_table, df_to_download from regtech_data_validator.validation_results import ValidationPhase @@ -254,14 +251,8 @@ def test_download_csv(self): """ ).strip('\n') - gf = tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.csv') - temp_path = Path(gf.name) - df_to_download(self.findings_df, str(temp_path.resolve())) - with open(temp_path, 'r') as output: - actual_output = output.read() - print(f"{actual_output}") + actual_output = df_to_download(self.findings_df).decode('utf-8') assert actual_output.strip() == expected_output - temp_path.unlink() def test_empty_download_csv(self): expected_output = dedent( @@ -270,10 +261,5 @@ def test_empty_download_csv(self): """ ).strip('\n') - gf = tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.csv') - temp_path = Path(gf.name) - df_to_download(pl.DataFrame(), str(temp_path.resolve())) - with open(temp_path, 'r') as output: - actual_output = output.read() + actual_output = df_to_download(pl.DataFrame()).decode('utf-8') assert actual_output.strip() == expected_output - temp_path.unlink()