diff --git a/src/regtech_data_validator/cli.py b/src/regtech_data_validator/cli.py index 5561f4c..70f60e4 100644 --- a/src/regtech_data_validator/cli.py +++ b/src/regtech_data_validator/cli.py @@ -101,7 +101,6 @@ def validate( if all_findings: final_df = pl.concat(all_findings, how="diagonal") - final_df = final_df.with_columns(phase=pl.lit(final_phase.value)) status = "SUCCESS" if total_findings == 0 else "FAILURE" diff --git a/src/regtech_data_validator/data_formatters.py b/src/regtech_data_validator/data_formatters.py index 0150b09..c4ee9c8 100644 --- a/src/regtech_data_validator/data_formatters.py +++ b/src/regtech_data_validator/data_formatters.py @@ -40,7 +40,7 @@ def get_checks(phase): # which corresponds to severity, error/warning code, name of error/warning, row number in sblar, UID, fig link, # error/warning description (markdown formatted), single/multi/register, and the fields and values associated with the error/warning. # Each row in the final dataframe represents all data for that one finding. -def format_findings(df: pl.DataFrame, checks): +def format_findings(df: pl.DataFrame, phase, checks): final_df = pl.DataFrame() sorted_df = df.with_columns(pl.col('validation_id').cast(pl.Categorical(ordering='lexical'))).sort('validation_id') @@ -109,6 +109,7 @@ def format_findings(df: pl.DataFrame, checks): + sorted_columns ) final_df = pl.concat([final_df, df_pivot], how="diagonal") + final_df = final_df.with_columns(phase=pl.lit(final_phase.value)) return final_df diff --git a/src/regtech_data_validator/validator.py b/src/regtech_data_validator/validator.py index aa5835d..c05b920 100644 --- a/src/regtech_data_validator/validator.py +++ b/src/regtech_data_validator/validator.py @@ -164,7 +164,7 @@ def validate_batch_csv( # validate, and therefore validate_chunks, can return an empty dataframe for findings if not findings.is_empty(): has_syntax_errors = True - rf = format_findings(findings, syntax_checks) + rf = format_findings(findings, ValidationPhase.SYNTACTICAL.value, syntax_checks) yield rf, ValidationPhase.SYNTACTICAL if not has_syntax_errors: @@ -175,13 +175,13 @@ def validate_batch_csv( findings = validate(register_schema, uids) if not findings.is_empty(): rf = format_findings( - findings, [check for col_schema in register_schema.columns.values() for check in col_schema.checks] + findings, ValidationPhase.LOGICAL.value, [check for col_schema in register_schema.columns.values() for check in col_schema.checks] ) yield rf, ValidationPhase.LOGICAL for findings in validate_chunks(logic_schema, real_path, batch_size, batch_count): # validate, and therefore validate_chunks, can return an empty dataframe for findings if not findings.is_empty(): - rf = format_findings(findings, logic_checks) + rf = format_findings(findings, ValidationPhase.LOGICAL.value, logic_checks) yield rf, ValidationPhase.LOGICAL if os.path.isdir("/tmp/s3"):