Skip to content

Commit

Permalink
Added phase to dataframe
Browse files Browse the repository at this point in the history
  • Loading branch information
jcadam14 committed Sep 26, 2024
1 parent 2f0493d commit 0089588
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 5 deletions.
1 change: 0 additions & 1 deletion src/regtech_data_validator/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ def validate(

if all_findings:
final_df = pl.concat(all_findings, how="diagonal")
final_df = final_df.with_columns(phase=pl.lit(final_phase.value))

status = "SUCCESS" if total_findings == 0 else "FAILURE"

Expand Down
3 changes: 2 additions & 1 deletion src/regtech_data_validator/data_formatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def get_checks(phase):
# which corresponds to severity, error/warning code, name of error/warning, row number in sblar, UID, fig link,
# error/warning description (markdown formatted), single/multi/register, and the fields and values associated with the error/warning.
# Each row in the final dataframe represents all data for that one finding.
def format_findings(df: pl.DataFrame, checks):
def format_findings(df: pl.DataFrame, phase, checks):
final_df = pl.DataFrame()

sorted_df = df.with_columns(pl.col('validation_id').cast(pl.Categorical(ordering='lexical'))).sort('validation_id')
Expand Down Expand Up @@ -109,6 +109,7 @@ def format_findings(df: pl.DataFrame, checks):
+ sorted_columns
)
final_df = pl.concat([final_df, df_pivot], how="diagonal")
final_df = final_df.with_columns(phase=pl.lit(final_phase.value))
return final_df


Expand Down
6 changes: 3 additions & 3 deletions src/regtech_data_validator/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def validate_batch_csv(
# validate, and therefore validate_chunks, can return an empty dataframe for findings
if not findings.is_empty():
has_syntax_errors = True
rf = format_findings(findings, syntax_checks)
rf = format_findings(findings, ValidationPhase.SYNTACTICAL.value, syntax_checks)
yield rf, ValidationPhase.SYNTACTICAL

if not has_syntax_errors:
Expand All @@ -175,13 +175,13 @@ def validate_batch_csv(
findings = validate(register_schema, uids)
if not findings.is_empty():
rf = format_findings(
findings, [check for col_schema in register_schema.columns.values() for check in col_schema.checks]
findings, ValidationPhase.LOGICAL.value, [check for col_schema in register_schema.columns.values() for check in col_schema.checks]
)
yield rf, ValidationPhase.LOGICAL
for findings in validate_chunks(logic_schema, real_path, batch_size, batch_count):
# validate, and therefore validate_chunks, can return an empty dataframe for findings
if not findings.is_empty():
rf = format_findings(findings, logic_checks)
rf = format_findings(findings, ValidationPhase.LOGICAL.value, logic_checks)
yield rf, ValidationPhase.LOGICAL

if os.path.isdir("/tmp/s3"):
Expand Down

0 comments on commit 0089588

Please sign in to comment.