Added phase to dataframe

cfpb · Sep 26, 2024 · 0089588 · 0089588
1 parent 2f0493d
commit 0089588
Show file tree

Hide file tree

Showing 3 changed files with 5 additions and 5 deletions.
diff --git a/src/regtech_data_validator/cli.py b/src/regtech_data_validator/cli.py
@@ -101,7 +101,6 @@ def validate(
 
     if all_findings:
         final_df = pl.concat(all_findings, how="diagonal")
-        final_df = final_df.with_columns(phase=pl.lit(final_phase.value))
 
     status = "SUCCESS" if total_findings == 0 else "FAILURE"
 

diff --git a/src/regtech_data_validator/data_formatters.py b/src/regtech_data_validator/data_formatters.py
@@ -40,7 +40,7 @@ def get_checks(phase):
 # which corresponds to severity, error/warning code, name of error/warning, row number in sblar, UID, fig link,
 # error/warning description (markdown formatted), single/multi/register, and the fields and values associated with the error/warning.
 # Each row in the final dataframe represents all data for that one finding.
-def format_findings(df: pl.DataFrame, checks):
+def format_findings(df: pl.DataFrame, phase, checks):
     final_df = pl.DataFrame()
 
     sorted_df = df.with_columns(pl.col('validation_id').cast(pl.Categorical(ordering='lexical'))).sort('validation_id')
@@ -109,6 +109,7 @@ def format_findings(df: pl.DataFrame, checks):
             + sorted_columns
         )
         final_df = pl.concat([final_df, df_pivot], how="diagonal")
+        final_df = final_df.with_columns(phase=pl.lit(final_phase.value))
     return final_df
 
 

diff --git a/src/regtech_data_validator/validator.py b/src/regtech_data_validator/validator.py
@@ -164,7 +164,7 @@ def validate_batch_csv(
         # validate, and therefore validate_chunks, can return an empty dataframe for findings
         if not findings.is_empty():
             has_syntax_errors = True
-            rf = format_findings(findings, syntax_checks)
+            rf = format_findings(findings,  ValidationPhase.SYNTACTICAL.value, syntax_checks)
             yield rf, ValidationPhase.SYNTACTICAL
 
     if not has_syntax_errors:
@@ -175,13 +175,13 @@ def validate_batch_csv(
         findings = validate(register_schema, uids)
         if not findings.is_empty():
             rf = format_findings(
-                findings, [check for col_schema in register_schema.columns.values() for check in col_schema.checks]
+                findings, ValidationPhase.LOGICAL.value, [check for col_schema in register_schema.columns.values() for check in col_schema.checks]
             )
             yield rf, ValidationPhase.LOGICAL
         for findings in validate_chunks(logic_schema, real_path, batch_size, batch_count):
             # validate, and therefore validate_chunks, can return an empty dataframe for findings
             if not findings.is_empty():
-                rf = format_findings(findings, logic_checks)
+                rf = format_findings(findings,  ValidationPhase.LOGICAL.value, logic_checks)
                 yield rf, ValidationPhase.LOGICAL
 
     if os.path.isdir("/tmp/s3"):