From 018e10b34bf23c941cb7cd231cc228d4394c4c1f Mon Sep 17 00:00:00 2001 From: Nargis Sultani Date: Wed, 23 Aug 2023 10:48:04 -0400 Subject: [PATCH 1/8] Switched to two phase validation and delted schema.py --- src/validator/check_functions.py | 4 + src/validator/create_schemas.py | 3 +- src/validator/main.py | 35 +- src/validator/phase_validations.py | 65 +- src/validator/schema.py | 3357 ---------------------------- 5 files changed, 96 insertions(+), 3368 deletions(-) delete mode 100644 src/validator/schema.py diff --git a/src/validator/check_functions.py b/src/validator/check_functions.py index b42372e9..bf61c3aa 100644 --- a/src/validator/check_functions.py +++ b/src/validator/check_functions.py @@ -349,6 +349,10 @@ def _has_valid_enum_pair_validation_helper( result = series == condition_value else: result = series != condition_value + for i, v in result.items(): + if v is False: + print(i) + return result diff --git a/src/validator/create_schemas.py b/src/validator/create_schemas.py index 37579f4e..0fc78c76 100644 --- a/src/validator/create_schemas.py +++ b/src/validator/create_schemas.py @@ -13,7 +13,8 @@ def get_schema_by_phase_for_lei(template: dict, phase: str, lei: str = None): - for column, validations in get_phase_1_and_2_validations_for_lei(lei): + for column in get_phase_1_and_2_validations_for_lei(lei): + validations = get_phase_1_and_2_validations_for_lei(lei)[column] template[column].checks = validations[phase] return DataFrameSchema(template) diff --git a/src/validator/main.py b/src/validator/main.py index 0a46a01d..c3e43ef2 100644 --- a/src/validator/main.py +++ b/src/validator/main.py @@ -8,6 +8,8 @@ import sys import pandas as pd +from create_schemas import (get_phase_1_schema_for_lei, + get_phase_2_schema_for_lei) from pandera.errors import SchemaErrors from schema import get_schema_for_lei @@ -28,11 +30,13 @@ def run_validation_on_df(df: pd.DataFrame, lei: str) -> None: print(df) print("") - sblar_schema = get_schema_for_lei(lei) + phase_1_failure_cases = None + phase_1_sblar_chema = get_phase_1_schema_for_lei(lei) try: - sblar_schema(df, lazy=True) + phase_1_sblar_chema(df, lazy=True) except SchemaErrors as errors: + phase_1_failure_cases = errors.failure_cases for error in errors.schema_errors: # Name of the column in the dataframe being checked column_name = error["error"].schema.name @@ -48,9 +52,34 @@ def run_validation_on_df(df: pd.DataFrame, lei: str) -> None: # this is just a string that we'd need to parse manually check_output = error["error"].args[0] - print(f"Validation `{check_name}` failed for column `{column_name}`") + print(f"Phase 1 Validation `{check_name}` failed for column `{column_name}`") print(check_output) print("") + + if phase_1_failure_cases is None: + + phase_2_sblar_chema = get_phase_2_schema_for_lei(lei) + try: + phase_2_sblar_chema(df, lazy=True) + except SchemaErrors as errors: + for error in errors.schema_errors: + # Name of the column in the dataframe being checked + column_name = error["error"].schema.name + + # built in checks such as unique=True are different than custom + # checks unfortunately so the name needs to be accessed differently + try: + check_name = error["error"].check.name + # This will either be a boolean series or a single bool + check_output = error["error"].check_output + except AttributeError: + check_name = error["error"].check + # this is just a string that we'd need to parse manually + check_output = error["error"].args[0] + + print(f"Phase 2 Validation `{check_name}` failed for column `{column_name}`") + print(check_output) + print("") if __name__ == "__main__": diff --git a/src/validator/phase_validations.py b/src/validator/phase_validations.py index 36bda0c9..e6856790 100644 --- a/src/validator/phase_validations.py +++ b/src/validator/phase_validations.py @@ -1131,20 +1131,19 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): ], "phase_2": [], }, - "pricing_mca_addcost_flag": {"phase_1": [], "phase_2": []}, - "pricing_mca_addcost": {"phase_1": [], "phase_2": []}, - "pricing_prepenalty_allowed": { + "pricing_mca_addcost_flag": { "phase_1": [ SBLCheck( is_valid_enum, - name="pricing_prepenalty_allowed.invalid_enum_value", + name="pricing_mca_addcost_flag.invalid_enum_value", description=( - "'Prepayment penalty could be imposed' must equal 1, 2, or 999." + "'MCA/sales-based: additional cost for merchant cash " + "advances or other sales-based financing: NA flag' " + "must equal 900 or 999." ), element_wise=True, accepted_values=[ - "1", - "2", + "900", "999", ], ), @@ -1172,6 +1171,58 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): ), ], }, + "pricing_mca_addcost": { + "phase_1": [ + SBLCheck( + is_number, + name="pricing_mca_addcost.invalid_numeric_format", + description=( + "When present, 'MCA/sales-based: additional cost for " + "merchant cash advances or other sales-based financing' " + "must be a numeric value" + ), + element_wise=True, + accept_blank=True, + ), + ], + "phase_2": [ + SBLCheck( + has_no_conditional_field_conflict, + name="pricing_mca_addcost.conditional_field_conflict", + description=( + "When 'MCA/sales-based: additional cost for merchant " + "cash advances or other sales-based financing: NA flag' " + "does not equal 900 (applicable), 'MCA/sales-based: " + "additional cost for merchant cash advances or other " + "sales-based financing' must be blank. When 'MCA/sales-based: " + "additional cost for merchant cash advances or other " + "sales-based financing: NA flag' equals 900, MCA/sales-based: " + "additional cost for merchant cash advances or other " + "sales-based financing’ must not be blank." + ), + groupby="pricing_mca_addcost_flag", + condition_values={"900"}, + ), + ], + }, + "pricing_prepenalty_allowed": { + "phase_1": [ + SBLCheck( + is_valid_enum, + name="pricing_prepenalty_allowed.invalid_enum_value", + description=( + "'Prepayment penalty could be imposed' must equal 1, 2, or 999." + ), + element_wise=True, + accepted_values=[ + "1", + "2", + "999", + ], + ), + ], + "phase_2": [], + }, "pricing_prepenalty_exists": { "phase_1": [ SBLCheck( diff --git a/src/validator/schema.py b/src/validator/schema.py deleted file mode 100644 index da6e2111..00000000 --- a/src/validator/schema.py +++ /dev/null @@ -1,3357 +0,0 @@ -"""Create a Pandera SBLAR DataFrameSchema object. - -Refer to the Pandera documentation for details. -https://pandera.readthedocs.io/en/stable/dataframe_schemas.html - -The only major modification from native Pandera is the use of custom -Check classes to differentiate between warnings and errors. """ - -import global_data -from check_functions import ( - has_correct_length, - has_no_conditional_field_conflict, - has_valid_enum_pair, - has_valid_fieldset_pair, - has_valid_format, - has_valid_multi_field_value_count, - has_valid_value_count, - is_date, - is_date_after, - is_date_before_in_days, - is_date_in_range, - is_greater_than, - is_greater_than_or_equal_to, - is_less_than, - is_number, - is_unique_column, - is_unique_in_field, - is_valid_code, - is_valid_enum, - meets_multi_value_field_restriction, - string_contains, -) -from checks import SBLCheck -from pandera import Column, DataFrameSchema - -# read and populate global naics code (this should be called only once) -global_data.read_naics_codes() - -# read and populate global census geoids (this should be called only once) -global_data.read_geoids() - - -def get_schema_for_lei(lei: str): - return DataFrameSchema( - { - "uid": Column( - str, - title="Field 1: Unique identifier", - checks=[ - SBLCheck.str_length( - 21, - 45, - name="uid.invalid_text_length", - description=( - "'Unique identifier' must be at least 21 characters " - "in length and at most 45 characters in length." - ), - ), - SBLCheck( - has_valid_format, - name="uid.invalid_text_pattern", - description=( - "'Unique identifier' may contain any combination of " - "numbers and/or uppercase letters (i.e., 0-9 and A-Z), " - "and must not contain any other characters." - ), - element_wise=True, - regex="^[A-Z0-9]+$", - ), - SBLCheck( - is_unique_column, - name="uid.duplicates_in_dataset", - description=( - "Any 'unique identifier' may not be used in more than one " - "record within a small business lending application register." - ), - groupby="uid", - ), - SBLCheck( - string_contains, - name="uid.invalid_uid_lei", - description=( - "The first 20 characters of the 'unique identifier' should match " - "the Legal Entity Identifier (LEI) for the financial institution." - ), - element_wise=True, - containing_value=lei, - end_idx=20, - ), - ], - ), - "app_date": Column( - str, - title="Field 2: Application date", - checks=[ - SBLCheck( - is_date, - name="app_date.invalid_date_format", - description=( - "'Application date' must be a real calendar " - "date using YYYYMMDD format." - ), - element_wise=True, - ), - ], - ), - "app_method": Column( - str, - title="Field 3: Application method", - checks=[ - SBLCheck( - is_valid_enum, - name="app_method.invalid_enum_value", - description="'Application method' must equal 1, 2, 3, or 4.", - element_wise=True, - accepted_values=[ - "1", - "2", - "3", - "4", - ], - ), - ], - ), - "app_recipient": Column( - str, - title="Field 4: Application recipient", - checks=[ - SBLCheck( - is_valid_enum, - name="app_recipient.invalid_enum_value", - description="'Application recipient' must equal 1 or 2", - element_wise=True, - accepted_values=[ - "1", - "2", - ], - ), - ], - ), - "ct_credit_product": Column( - str, - title="Field 5: Credit product", - checks=[ - SBLCheck( - is_valid_enum, - name="ct_credit_product.invalid_enum_value", - description=( - "'Credit product' must equal 1, 2, 3, 4, 5, 6, 7, 8, " - "977, or 988." - ), - element_wise=True, - accepted_values=[ - "1", - "2", - "3", - "4", - "5", - "6", - "7", - "8", - "977", - "988", - ], - ), - ], - ), - "ct_credit_product_ff": Column( - str, - title="Field 6: Free-form text field for other credit products", - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="ct_credit_product_ff.invalid_text_length", - description=( - "'Free-form text field for other credit products' " - "must not exceed 300 characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="ct_credit_product_ff.conditional_field_conflict", - description=( - "When 'credit product' does not equal 977 (other), 'free-form" - " text field for other credit products' must be blank." - "When 'credit product' equals 977, 'free-form text field " - "for other credit products' must not be blank." - ), - groupby="ct_credit_product", - condition_values={"977"}, - ), - ], - ), - "ct_guarantee": Column( - str, - title="Field 7: Type of guarantee", - nullable=True, - checks=[ - SBLCheck( - has_valid_value_count, - name="ct_guarantee.invalid_number_of_values", - description=( - "'Type of guarantee' must contain at least one and at" - " most five values, separated by semicolons." - ), - element_wise=True, - min_length=1, - max_length=5, - ), - SBLCheck( - is_unique_in_field, - warning=True, - name="ct_guarantee.duplicates_in_field", - description=( - "'Type of guarantee' should not contain " - "duplicated values." - ), - element_wise=True, - ), - SBLCheck( - meets_multi_value_field_restriction, - warning=True, - name="ct_guarantee.multi_value_field_restriction", - description=( - "When 'type of guarantee' contains 999 (no guarantee)," - " 'type of guarantee' should not contain more than one" - " value." - ), - element_wise=True, - single_values={"999"}, - ), - SBLCheck( - is_valid_enum, - name="ct_guarantee.invalid_enum_value", - description=( - "Each value in 'type of guarantee' (separated by " - " semicolons) must equal 1, 2, 3, 4, 5, 6, 7, 8," - " 9, 10, 11, 977, or 999." - ), - element_wise=True, - accepted_values=[ - "1", - "2", - "3", - "4", - "5", - "6", - "7", - "8", - "9", - "10", - "11", - "977", - "999", - ], - ), - ], - ), - "ct_guarantee_ff": Column( - str, - title="Field 8: Free-form text field for other guarantee", - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="ct_guarantee_ff.invalid_text_length", - description=( - "'Free-form text field for other guarantee' must not " - "exceed 300 characters in length" - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="ct_guarantee_ff.conditional_field_conflict", - description=( - "When 'type of guarantee' does not contain 977 (other), " - "'free-form text field for other guarantee' must be blank. " - "When 'type of guarantee' contains 977, 'free-form text field" - " for other guarantee' must not be blank." - ), - groupby="ct_guarantee", - condition_values={"977"}, - ), - SBLCheck( - has_valid_multi_field_value_count, - warning=True, - name="ct_guarantee_ff.multi_invalid_number_of_values", - description=( - "'Type of guarantee' and 'free-form text field for other " - "guarantee' combined should not contain more than five values. " - "Code 977 (other), within 'type of guarantee', does not count " - "toward the maximum number of values for the purpose of this " - "validation check." - ), - groupby="ct_guarantee", - ignored_values={"977"}, - max_length=5, - ), - ], - ), - "ct_loan_term_flag": Column( - str, - title="Field 9: Loan term: NA/NP flag", - checks=[ - SBLCheck( - is_valid_enum, - name="ct_loan_term_flag.invalid_enum_value", - description=( - "Each value in 'Loan term: NA/NP flag' (separated by " - " semicolons) must equal 900, 988, or 999." - ), - element_wise=True, - accepted_values=[ - "900", - "988", - "999", - ], - ), - SBLCheck( - has_valid_enum_pair, - name="ct_loan_term_flag.enum_value_conflict", - description=( - "When 'credit product' equals 1 (term loan - unsecured) or 2" - "(term loan - secured), 'loan term: NA/NP flag' must not equal" - "999 (not applicable)." - "When 'credit product' equals 988 (not provided by applicant " - "and otherwise undetermined), 'loan term: NA/NP flag' must" - "equal 999." - ), - groupby="ct_credit_product", - conditions=[ - { - "condition_values": {"1", "2"}, - "is_equal_condition": True, - "target_value": "999", - "should_equal_target": False, - }, - { - "condition_values": {"988"}, - "is_equal_condition": True, - "target_value": "999", - "should_equal_target": True, - }, - ], - ), - ], - ), - "ct_loan_term": Column( - str, - title="Field 10: Loan term", - nullable=True, - checks=[ - SBLCheck( - has_no_conditional_field_conflict, - name="ct_loan_term.conditional_field_conflict", - description=( - "When 'loan term: NA/NP flag' does not equal 900 (applicable " - "and reported), 'loan term' must be blank. When 'loan term:" - "NA/NP flag' equals 900, 'loan term' must not be blank." - ), - groupby="ct_loan_term_flag", - condition_values={"900"}, - ), - SBLCheck( - is_number, - name="ct_loan_term.invalid_numeric_format", - description="When present, 'loan term' must be a whole number.", - element_wise=True, - accept_blank=True, - ), - SBLCheck( - is_greater_than_or_equal_to, - name="ct_loan_term.invalid_numeric_value", - description=( - "When present, 'loan term' must be greater than or equal" - "to 1." - ), - element_wise=True, - min_value="1", - accept_blank=True, - ), - SBLCheck( - is_less_than, - name="ct_loan_term.unreasonable_numeric_value", - description=( - "When present, 'loan term' should be less than 1200" - "(100 years)." - ), - element_wise=True, - max_value="1200", - accept_blank=True, - ), - ], - ), - "credit_purpose": Column( - str, - title="Field 11: Credit purpose", - checks=[ - SBLCheck( - is_valid_enum, - name="credit_purpose.invalid_enum_value", - description=( - "Each value in 'credit purpose' (separated by " - " semicolons) must equal 1, 2, 3, 4, 5, 6, 7, 8," - " 9, 10, 11, 977, 988, or 999." - ), - element_wise=True, - accepted_values=[ - "1", - "2", - "3", - "4", - "5", - "6", - "7", - "8", - "9", - "10", - "11", - "977", - "988", - "999", - ], - ), - SBLCheck( - has_valid_value_count, - name="credit_purpose.invalid_number_of_values", - description=( - "'Credit purpose' must contain at least one and at" - " most three values, separated by semicolons." - ), - element_wise=True, - min_length=1, - max_length=3, - ), - SBLCheck( - meets_multi_value_field_restriction, - warning=True, - name="credit_purpose.multi_value_field_restriction", - description=( - "When 'credit purpose' contains 988 or 999," - " 'credit purpose' should not contain more than one" - " value." - ), - element_wise=True, - single_values={ - "988", - "999", - }, - ), - SBLCheck( - is_unique_in_field, - warning=True, - name="credit_purpose.duplicates_in_field", - description=( - "'Credit purpose' should not contain " " duplicated values." - ), - element_wise=True, - ), - ], - ), - "credit_purpose_ff": Column( - str, - title="Field 12: Free-form text field for other credit purpose", - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="credit_purpose_ff.invalid_text_length", - description=( - "'Free-form text field for other credit purpose' " - " must not exceed 300 characters in length" - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="credit_purpose_ff.conditional_field_conflict", - description=( - "When 'credit purpose' does not contain 977 (other)," - "'free-form text field for other credit purpose' must be blank." - "When 'credit purpose' contains 977, 'free-form text field for" - "other credit purpose' must not be blank." - ), - groupby="credit_purpose", - condition_values={"977"}, - ), - SBLCheck( - has_valid_value_count, - name="credit_purpose_ff.invalid_number_of_values", - description=( - "'Other Credit purpose' must not contain more " - " than one other credit purpose." - ), - element_wise=True, - min_length=0, - max_length=1, - ), - ], - ), - "amount_applied_for_flag": Column( - str, - title="Field 13: Amount applied for: NA/NP flag", - checks=[ - SBLCheck( - is_valid_enum, - name="amount_applied_for_flag.invalid_enum_value", - description=( - "'Amount applied For: NA/NP flag' must equal 900, 988, or 999." - ), - element_wise=True, - accepted_values=[ - "900", - "988", - "999", - ], - ), - ], - ), - "amount_applied_for": Column( - str, - title="Field 14: Amount applied for", - nullable=True, - checks=[ - SBLCheck( - has_no_conditional_field_conflict, - name="amount_applied_for.conditional_field_conflict", - description=( - "When 'amount applied for: NA/NP flag' does not equal 900 " - "(applicable and reported), 'amount applied for' must be blank." - "When 'amount applied for: NA/NP flag' equals 900, " - "'amount applied for' must not be blank." - ), - groupby="amount_applied_for_flag", - condition_values={"900"}, - ), - SBLCheck( - is_number, - name="amount_applied_for.invalid_numeric_format", - description=( - "When present, 'amount applied for' must be a numeric" - "value." - ), - element_wise=True, - accept_blank=True, - ), - SBLCheck( - is_greater_than, - name="amount_applied_for.invalid_numeric_value", - description=( - "When present, 'amount applied for' must be greater than 0." - ), - element_wise=True, - min_value="0", - accept_blank=True, - ), - ], - ), - "amount_approved": Column( - str, - title="Field 15: Amount approved or originated", - nullable=True, - checks=[ - SBLCheck( - is_number, - name="amount_approved.invalid_numeric_format", - description=( - "When present, 'amount approved or originated' " - "must be a numeric value." - ), - element_wise=True, - accept_blank=True, - ), - SBLCheck( - is_greater_than, - name="amount_approved.invalid_numeric_value", - description=( - "When present, 'amount approved or originated' " - "must be greater than 0." - ), - element_wise=True, - min_value="0", - accept_blank=True, - ), - SBLCheck( - has_no_conditional_field_conflict, - name="amount_approved.conditional_field_conflict", - description=( - "When 'action taken' does not equal 1 (originated) " - "or 2 (approved but not accepted), 'amount approved " - " or originated' must be blank. When 'action taken' " - "equals 1 or 2, 'amount approved or originated' must " - "not be blank." - ), - groupby="action_taken", - condition_values={"1", "2"}, - ), - ], - ), - "action_taken": Column( - str, - title="Field 16: Action taken", - checks=[ - SBLCheck( - is_valid_enum, - name="action_taken.invalid_enum_value", - description="'Action taken' must equal 1, 2, 3, 4, or 5.", - element_wise=True, - accepted_values=[ - "1", - "2", - "3", - "4", - "5", - ], - ), - SBLCheck( - has_valid_fieldset_pair, - name="pricing_all.conditional_fieldset_conflict", - description=( - "When 'action taken' equals 3 (denied), " - "4 (withdrawn by applicant), or 5 " - "(incomplete), the following fields must" - " all equal 999 (not applicable): " - "'Interest rate type', 'MCA/sales-based: " - "additional cost for merchant cash advances" - " or other sales-based financing: NA flag', " - "'Prepayment penalty could be imposed', " - "'Prepayment penalty exists'). And the " - " following fields must all be blank: " - "'Total origination charges', 'Amount of " - "total broker fees', 'Initial annual charges'" - ), - groupby=[ - "pricing_interest_rate_type", - "pricing_mca_addcost_flag", - "pricing_prepenalty_allowed", - "pricing_prepenalty_exists", - "pricing_origination_charges", - "pricing_broker_fees", - "pricing_initial_charges", - ], - condition_values=["3", "4", "5"], - should_fieldset_key_equal_to={ - "pricing_interest_rate_type": (0, True, "999"), - "pricing_mca_addcost_flag": (1, True, "999"), - "pricing_prepenalty_allowed": (2, True, "999"), - "pricing_prepenalty_exists": (3, True, "999"), - "pricing_origination_charges": (4, True, ""), - "pricing_broker_fees": (5, True, ""), - "pricing_initial_charges": (6, True, ""), - }, - ), - SBLCheck( - has_valid_fieldset_pair, - name="pricing_charges.conditional_fieldset_conflict", - description=( - "When 'action taken' equals 1 (originated)" - " or 2 (approved but not accepted), the " - "following fields all must not be blank: " - "'Total origination charges', 'Amount of " - "total broker fees', 'Initial annual " - "charges'. And the following fields must " - "not equal 999 (not applicable): 'Prepayment " - "penalty could be imposed', 'Prepayment " - "penalty exists'" - ), - groupby=[ - "pricing_origination_charges", - "pricing_broker_fees", - "pricing_initial_charges", - "pricing_prepenalty_allowed", - "pricing_prepenalty_exists", - ], - condition_values=["1", "2"], - should_fieldset_key_equal_to={ - "pricing_origination_charges": (0, False, ""), - "pricing_broker_fees": (1, False, ""), - "pricing_initial_charges": (2, False, ""), - "pricing_prepenalty_allowed": (3, False, "999"), - "pricing_prepenalty_exists": (4, False, "999"), - }, - ), - ], - ), - "action_taken_date": Column( - str, - title="Field 17: Action taken date", - checks=[ - SBLCheck( - is_date, - name="action_taken_date.invalid_date_format", - description=( - "'Action taken date' must be a real calendar" - " date using YYYYMMDD format." - ), - element_wise=True, - ), - SBLCheck( - is_date_in_range, - name="action_taken_date.invalid_date_value", - description=( - "The date indicated by 'action taken date' must occur" - " within the current reporting period:" - " October 1, 2024 to December 31, 2024." - ), - element_wise=True, - start_date_value="20241001", - end_date_value="20241231", - ), - SBLCheck( - is_date_after, - name="action_taken_date.date_value_conflict", - description=( - "The date indicated by 'action taken date'" - " must occur on or after 'application date'." - ), - groupby="app_date", - ), - SBLCheck( - is_date_before_in_days, - name="action_taken_date.unreasonable_date_value", - description=( - "The date indicated by 'application date' should" - " generally be less than two years (730 days) before" - " 'action taken date'." - ), - groupby="app_date", - days_value=730, - ), - ], - ), - "denial_reasons": Column( - str, - title="Field 18: Denial reason(s)", - checks=[ - SBLCheck( - is_valid_enum, - name="denial_reasons.invalid_enum_value", - description=( - "Each value in 'denial reason(s)' (separated by semicolons)" - "must equal 1, 2, 3, 4, 5, 6, 7, 8, 9, 977, or 999." - ), - element_wise=True, - accepted_values=[ - "1", - "2", - "3", - "4", - "5", - "6", - "7", - "8", - "9", - "977", - "999", - ], - ), - SBLCheck( - has_valid_value_count, - name="denial_reasons.invalid_number_of_values", - description=( - "'Denial reason(s)' must contain at least one and at most four" - "values, separated by semicolons." - ), - element_wise=True, - min_length=1, - max_length=4, - ), - SBLCheck( - has_valid_enum_pair, - name="denial_reasons.enum_value_conflict", - description=( - "When 'action taken' equals 3, 'denial reason(s)' must not" - "contain 999. When 'action taken' does not equal 3, 'denial" - "reason(s)' must equal 999." - ), - groupby="action_taken", - conditions=[ - { - "condition_values": {"3"}, - "is_equal_condition": True, - "target_value": "999", - "should_equal_target": False, - }, - { - "condition_values": {"3"}, - "is_equal_condition": False, - "target_value": "999", - "should_equal_target": True, - }, - ], - ), - SBLCheck( - meets_multi_value_field_restriction, - warning=True, - name="denial_reasons.multi_value_field_restriction", - description=( - "When 'denial reason(s)' contains 999 (not applicable)," - "'denial reason(s)' should not contain more than one value." - ), - element_wise=True, - single_values={"999"}, - ), - SBLCheck( - is_unique_in_field, - warning=True, - name="denial_reasons.duplicates_in_field", - description=( - "'Denial reason(s)' should not contain duplicated values." - ), - element_wise=True, - ), - ], - ), - "denial_reasons_ff": Column( - str, - title="Field 19: Free-form text field for other denial reason(s)", - nullable=True, - checks=[ - SBLCheck.str_length( - min_value=0, - max_value=300, - name="denial_reasons_ff.invalid_text_length", - description=( - "'Free-form text field for other denial reason(s)'" - "must not exceed 300 characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="denial_reasons_ff.conditional_field_conflict", - description=( - "When 'denial reason(s)' does not contain 977 (other), field" - "'free-form text field for other denial reason(s)' must be" - "blank. When 'denial reason(s)' contains 977, 'free-form text" - "field for other denial reason(s)' must not be blank." - ), - groupby="denial_reasons", - condition_values={"977"}, - ), - ], - ), - "pricing_interest_rate_type": Column( - str, - title="Field 20: Interest rate type", - checks=[ - SBLCheck( - is_valid_enum, - name="pricing_interest_rate_type.invalid_enum_value", - description=( - "Each value in 'Interest rate type' (separated by " - " semicolons) Must equal 1, 2, 3, 4, 5, 6, or 999" - ), - element_wise=True, - accepted_values=[ - "1", - "2", - "3", - "4", - "5", - "6", - "999", - ], - ), - ], - ), - "pricing_init_rate_period": Column( - str, - title="Field 21: Initial rate period", - nullable=True, - checks=[ - SBLCheck( - has_no_conditional_field_conflict, - name="pricing_init_rate_period.conditional_field_conflict", - description=( - "When 'interest rate type' does not equal 3 (initial rate " - "period > 12 months, variable interest), 4 (initial rate " - "period > 12 months, fixed interest), 5 (initial rate period " - "<= 12 months, variable interest), or 6 (initial rate period " - "<= 12 months, fixed interest), 'initial rate period' must " - "be blank. When 'interest rate type' equals 3, 4, 5, or 6, " - "'initial rate period' must not be blank" - ), - groupby="pricing_interest_rate_type", - condition_values={"3", "4", "5", "6"}, - ), - SBLCheck( - is_number, - name="pricing_init_rate_period.invalid_numeric_format", - description=( - "When present, 'initial rate period' must be a whole number.", - ), - element_wise=True, - accept_blank=True, - ), - SBLCheck( - is_greater_than, - name="pricing_init_rate_period.invalid_numeric_value", - description=( - "When present, 'initial rate period' must be greater than 0", - ), - element_wise=True, - min_value="0", - accept_blank=True, - ), - ], - ), - "pricing_fixed_rate": Column( - str, - title="Field 22: Fixed rate: interest rate", - nullable=True, - checks=[ - SBLCheck( - is_number, - name="pricing_fixed_rate.invalid_numeric_format", - description=( - "When present, 'fixed rate: interest rate'" - " must be a numeric value." - ), - element_wise=True, - accept_blank=True, - ), - SBLCheck( - has_no_conditional_field_conflict, - name="pricing_fixed_rate.conditional_field_conflict", - description=( - "When 'interest rate type' does not equal 2" - " (fixed interest rate, no initial rate period)," - " 4 (initial rate period > 12 months, fixed interest" - " rate), or 6 (initial rate period <= 12 months, fixed" - " interest rate), 'fixed rate: interest rate' must be" - " blank. When 'interest rate type' equals 2, 4, or 6," - " 'fixed rate: interest rate' must not be blank." - ), - groupby="pricing_interest_rate_type", - condition_values={"2", "4", "6"}, - ), - SBLCheck( - is_greater_than, - name="pricing_fixed_rate.unreasonable_numeric_value", - description=( - "When present, 'fixed rate: interest rate'" - " should generally be greater than 0.1." - ), - element_wise=True, - min_value="0.1", - accept_blank=True, - ), - ], - ), - "pricing_adj_margin": Column( - str, - title="Field 23: Adjustable rate transaction: margin", - nullable=True, - checks=[ - SBLCheck( - is_number, - name="pricing_adj_margin.invalid_numeric_format", - description=( - "When present, 'adjustable rate transaction:" - " margin' must be a numeric value." - ), - element_wise=True, - accept_blank=True, - ), - SBLCheck( - has_no_conditional_field_conflict, - name="pricing_adj_margin.conditional_field_conflict", - description=( - "When 'interest rate type' does not equal 1" - " (adjustable interest rate, no initial rate period), 3 " - "(initial rate period > 12 months, adjustable interest rate)," - " or 5 (initial rate period <= 12 months, variable interest" - " rate), 'adjustable rate transaction: margin' must be blank." - " When 'interest rate type' equals 1, 3, or 5, 'variable" - " rate transaction: margin' must not be blank." - ), - groupby="pricing_interest_rate_type", - condition_values={"1", "3", "5"}, - ), - SBLCheck( - is_greater_than, - name="pricing_adj_margin.unreasonable_numeric_value", - description=( - "When present, 'adjustable rate transaction:" - " margin' should generally be greater than 0.1." - ), - element_wise=True, - min_value="0.1", - accept_blank=True, - ), - ], - ), - "pricing_adj_index_name": Column( - str, - title="Field 24: Adjustable rate transaction: index name", - checks=[ - SBLCheck( - is_valid_enum, - name="pricing_adj_index_name.invalid_enum_value", - description=( - "'Adjustable rate transaction: index name' must equal 1, 2, 3," - " 4, 5, 6, 7, 8, 9, 10, 977, or 999." - ), - element_wise=True, - accepted_values=[ - "1", - "2", - "3", - "4", - "5", - "6", - "7", - "8", - "9", - "10", - "977", - "999", - ], - ), - SBLCheck( - has_valid_enum_pair, - name="pricing_adj_index_name.enum_value_conflict", - description=( - "When 'interest rate type' does not equal 1 (variable interest" - "rate, no initial rate period), 3 (initial rate period > 12" - "months, adjustable interest rate), or 5 (initial rate" - "period <= 12 months, adjustable interest rate), 'adjustable" - " rate transaction: index name' must equal 999." - "When 'interest rate type' equals 1, 3, or 5, 'adjustable rate" - "transaction: index name' must not equal 999." - ), - groupby="pricing_interest_rate_type", - conditions=[ - { - "condition_values": {"1", "3", "5"}, - "is_equal_condition": False, - "target_value": "999", - "should_equal_target": True, - }, - { - "condition_values": {"1", "3", "5"}, - "is_equal_condition": True, - "target_value": "999", - "should_equal_target": False, - }, - ], - ), - ], - ), - "pricing_adj_index_name_ff": Column( - str, - title="Field 25: Adjustable rate transaction: index name: other", - nullable=True, - checks=[ - SBLCheck.str_length( - min_value=0, - max_value=300, - name="pricing_adj_index_name_ff.invalid_text_length", - description=( - "'Adjustable rate transaction: index name: other' must " - "not exceed 300 characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="pricing_adj_index_name_ff.conditional_field_conflict", - description=( - "When 'adjustable rate transaction: index name' does not equal" - "977 (other), 'adjustable rate transaction: index name: other'" - "must be blank." - "When 'adjustable rate transaction: index name' equals 977," - "'adjustable rate transaction: index name: other' must not be" - "blank." - ), - groupby="pricing_adj_index_name", - condition_values={"977"}, - ), - ], - ), - "pricing_adj_index_value": Column( - str, - title="Field 26: Adjustable rate transaction: index value", - checks=[ - SBLCheck( - is_number, - name="pricing_adj_index_value.invalid_numeric_format", - description="When present, 'adjustable rate transaction:" - " index value' must be a numeric value.", - element_wise=True, - accept_blank=True, - ), - SBLCheck( - has_no_conditional_field_conflict, - name="pricing_adj_index_value.conditional_field_conflict", - description=( - "When 'interest rate type' does not equal 1 (variable" - " interest rate, no initial rate period)," - " or 3 (initial rate period > 12 months, variable interest" - " rate), 'adjustable rate transaction: index value' must be" - " blank. When 'interest rate type' equals 1 or 3," - " 'adjustable rate transaction: index value' must not be blank." - ), - groupby="pricing_interest_rate_type", - condition_values={"1", "3"}, - ), - ], - ), - "pricing_origination_charges": Column( - str, - title="Field 27: Total origination charges", - nullable=True, - checks=[ - SBLCheck( - is_number, - name="pricing_origination_charges.invalid_numeric_format", - description=( - "When present, 'total origination charges' must be a numeric", - "value.", - ), - element_wise=True, - accept_blank=True, - ), - ], - ), - "pricing_broker_fees": Column( - str, - title="Field 28: Amount of total broker fees", - nullable=True, - checks=[ - SBLCheck( - is_number, - name="pricing_broker_fees.invalid_numeric_format", - description=( - "When present, 'amount of total broker fees' must be a", - "numeric value.", - ), - element_wise=True, - accept_blank=True, - ), - ], - ), - "pricing_initial_charges": Column( - str, - title="Field 29: Initial annual charges", - nullable=True, - checks=[ - SBLCheck( - is_number, - name="pricing_initial_charges.invalid_numeric_format", - description=( - "When present, 'initial annual charges' must be a" - "numeric value." - ), - element_wise=True, - accept_blank=True, - ), - ], - ), - "pricing_mca_addcost_flag": Column( - str, - title=( - "Field 30: MCA/sales-based: additional cost for merchant cash " - "advances or other sales-based financing: NA flag" - ), - checks=[ - SBLCheck( - is_valid_enum, - name="pricing_mca_addcost_flag.invalid_enum_value", - description=( - "'MCA/sales-based: additional cost for merchant cash " - "advances or other sales-based financing: NA flag' " - "must equal 900 or 999." - ), - element_wise=True, - accepted_values=[ - "900", - "999", - ], - ), - SBLCheck( - has_valid_enum_pair, - name="pricing_mca_addcost_flag.enum_value_conflict", - description=( - "When 'credit product' does not equal 7 (merchant cash " - "advance), 8 (other sales-based financing transaction) " - "or 977 (other), 'MCA/sales-based: additional cost for " - "merchant cash advances or other sales-based financing: " - "NA flag' must be 999 (not applicable)." - ), - groupby="ct_credit_product", - conditions=[ - { - "condition_values": {"7", "8", "977"}, - "is_equal_condition": False, - "target_value": "999", - "should_equal_target": True, - } - ], - ), - ], - ), - "pricing_mca_addcost": Column( - str, - title=( - "Field 31: MCA/sales-based: additional cost for merchant cash ", - "advances or other sales-based financing", - ), - checks=[ - SBLCheck( - has_no_conditional_field_conflict, - name="pricing_mca_addcost.conditional_field_conflict", - description=( - "When 'MCA/sales-based: additional cost for merchant " - "cash advances or other sales-based financing: NA flag' " - "does not equal 900 (applicable), 'MCA/sales-based: " - "additional cost for merchant cash advances or other " - "sales-based financing' must be blank. When 'MCA/sales-based: " - "additional cost for merchant cash advances or other " - "sales-based financing: NA flag' equals 900, MCA/sales-based: " - "additional cost for merchant cash advances or other " - "sales-based financing’ must not be blank." - ), - groupby="pricing_mca_addcost_flag", - condition_values={"900"}, - ), - SBLCheck( - is_number, - name="pricing_mca_addcost.invalid_numeric_format", - description=( - "When present, 'MCA/sales-based: additional cost for " - "merchant cash advances or other sales-based financing' " - "must be a numeric value" - ), - element_wise=True, - accept_blank=True, - ), - ], - ), - "pricing_prepenalty_allowed": Column( - str, - title="Field 32: Prepayment penalty could be imposed", - checks=[ - SBLCheck( - is_valid_enum, - name="pricing_prepenalty_allowed.invalid_enum_value", - description=( - "'Prepayment penalty could be imposed' must equal 1, 2, or 999." - ), - element_wise=True, - accepted_values=[ - "1", - "2", - "999", - ], - ), - ], - ), - "pricing_prepenalty_exists": Column( - str, - title="Field 33: Prepayment penalty exists", - checks=[ - SBLCheck( - is_valid_enum, - name="pricing_prepenalty_exists.invalid_enum_value", - description="'Prepayment penalty exists' must equal 1, 2, or 999.", - element_wise=True, - accepted_values=[ - "1", - "2", - "999", - ], - ), - ], - ), - "census_tract_adr_type": Column( - str, - title="Field 34: Type of address", - checks=[ - SBLCheck( - is_valid_enum, - name="census_tract_adr_type.invalid_enum_value", - description=( - "'Census tract: type of address' must equal 1, 2, 3, or 988." - ), - element_wise=True, - accepted_values=[ - "1", - "2", - "3", - "988", - ], - ), - ], - ), - "census_tract_number": Column( - str, - title="Field 35: Tract number", - nullable=True, - checks=[ - SBLCheck( - has_correct_length, - name="census_tract_number.invalid_text_length", - description=( - "When present, 'census tract: tract number' must " - "be a GEOID with exactly 11 digits." - ), - element_wise=True, - accepted_length=11, - accept_blank=True, - ), - SBLCheck( - has_valid_enum_pair, - name="census_tract_number.conditional_field_conflict", - description=( - "When 'census tract: type of address' equals 988 (not " - "provided by applicant and otherwise undetermined), " - "'census tract: tract number' must be blank." - "When 'census tract: type of address' equals 1 (address" - " or location where the loan proceeds will principally " - "be applied), 2 (address or location of borrower's main " - "office or headquarters), or 3 (another address or " - "location associated with the applicant), 'census tract:" - " tract number' must not be blank." - ), - groupby="census_tract_adr_type", - conditions=[ - { - "condition_values": {"1", "2", "3"}, - "is_equal_condition": True, - "target_value": "", - "should_equal_target": False, - }, - { - "condition_values": {"988"}, - "is_equal_condition": True, - "target_value": "", - "should_equal_target": True, - }, - ], - ), - SBLCheck( - is_valid_code, - name="census_tract_number.invalid_geoid", - description=( - "When present, 'census tract: tract number' " - "should be a valid census tract GEOID as defined " - "by the U.S. Census Bureau." - ), - element_wise=True, - accept_blank=True, - codes=global_data.census_geoids, - ), - ], - ), - "gross_annual_revenue_flag": Column( - str, - title="Field 36: Gross annual revenue: NP flag", - checks=[ - SBLCheck( - is_valid_enum, - name="gross_annual_revenue_flag.invalid_enum_value", - description=( - "'Gross annual revenue: NP flag' must equal 900 or 988." - ), - element_wise=True, - accepted_values=[ - "900", - "988", - ], - ), - ], - ), - "gross_annual_revenue": Column( - str, - title="Field 37: Gross annual revenue", - nullable=True, - checks=[ - SBLCheck( - is_number, - name="gross_annual_revenue.invalid_numeric_format", - description=( - "When present, 'gross annual revenue' must be a numeric value." - ), - element_wise=True, - accept_blank=True, - ), - SBLCheck( - has_no_conditional_field_conflict, - name="gross_annual_revenue.conditional_field_conflict", - description=( - "When 'gross annual revenue: NP flag' does not equal 900 " - "(reported), 'gross annual revenue' must be blank. When " - "'gross annual revenue: NP flag' equals 900, " - "'gross annual revenue' must not be blank." - ), - groupby="gross_annual_revenue_flag", - condition_values={"900"}, - ), - ], - ), - "naics_code_flag": Column( - str, - title=( - "Field 38: North American Industry Classification System (NAICS)" - "code: NP flag" - ), - checks=[ - SBLCheck( - is_valid_enum, - name="naics_code_flag.invalid_enum_value", - description=( - "'North American Industry Classification System (NAICS) " - "code: NP flag' must equal 900 or 988." - ), - element_wise=True, - accepted_values=[ - "900", - "988", - ], - ), - ], - ), - "naics_code": Column( - str, - title=( - "Field 39: North American Industry Classification" - "System (NAICS) code" - ), - nullable=True, - checks=[ - SBLCheck( - is_number, - name="naics_code.invalid_naics_format", - description=( - "'North American Industry Classification System " - "(NAICS) code' may only contain numeric characters." - ), - element_wise=True, - accept_blank=True, - ), - SBLCheck( - has_correct_length, - name="naics_code.invalid_text_length", - description=( - "When present, 'North American Industry Classification System " - "(NAICS) code' must be three digits in length." - ), - element_wise=True, - accepted_length=3, - accept_blank=True, - ), - SBLCheck( - is_valid_code, - name="naics_code.invalid_naics_value", - description=( - "When present, 'North American Industry Classification System " - "(NAICS) code' should be a valid NAICS code." - ), - element_wise=True, - accept_blank=True, - codes=global_data.naics_codes, - ), - SBLCheck( - has_no_conditional_field_conflict, - name="naics_code.conditional_field_conflict", - description=( - "When 'North American Industry Classification System (NAICS) " - " code: NP flag' does not equal 900 (reported), 'North American" - " Industry Classification System (NAICS) code' must be blank." - "When 'North American Industry Classification System (NAICS) " - "code: NP flag' equals 900, 'North American Industry " - "Classification System (NAICS) code' must not be blank." - ), - groupby="naics_code_flag", - condition_values={"900"}, - ), - ], - ), - "number_of_workers": Column( - str, - title="Field 40: Number of workers", - checks=[ - SBLCheck( - is_valid_enum, - name="number_of_workers.invalid_enum_value", - description=( - "'Number of workers' must equal 1, 2, 3, 4, 5, 6, 7, 8, 9," - " or 988." - ), - element_wise=True, - accepted_values=[ - "1", - "2", - "3", - "4", - "5", - "6", - "7", - "8", - "9", - "988", - ], - ), - ], - ), - "time_in_business_type": Column( - str, - title="Field 41: Type of response", - checks=[ - SBLCheck( - is_valid_enum, - name="time_in_business_type.invalid_enum_value", - description=( - "'Time in business: type of response'" - " must equal 1, 2, 3, or 988." - ), - element_wise=True, - accepted_values=[ - "1", - "2", - "3", - "988", - ], - ), - ], - ), - "time_in_business": Column( - str, - title="Field 42: Time in business", - nullable=True, - checks=[ - SBLCheck( - is_number, - name="time_in_business.invalid_numeric_format", - description=( - "When present, 'time in business' must be a whole number." - ), - element_wise=True, - accept_blank=True, - ), - SBLCheck( - is_greater_than_or_equal_to, - name="time_in_business.invalid_numeric_value", - description=( - "When present, 'time in business'" - " must be greater than or equal to 0.", - ), - element_wise=True, - min_value="0", - accept_blank=True, - ), - SBLCheck( - has_no_conditional_field_conflict, - name="time_in_business.conditional_field_conflict", - description=( - "When 'time in business: type of response' does not" - " equal 1 (the number of years an applicant has been" - " in business is collected or obtained by the financial" - " institution), 'time in business' must be blank. When" - " 'time in business: type of response' equals 1," - " 'time in business' must not be blank." - ), - groupby="time_in_business_type", - condition_values={"1"}, - ), - ], - ), - "business_ownership_status": Column( - str, - title="Field 43: Business ownership status", - checks=[ - SBLCheck( - is_valid_enum, - name="business_ownership_status.invalid_enum_value", - description=( - "Each value in 'business ownership status'" - " (separated by semicolons) must equal 1, 2, 3," - " 955, 966, or 988." - ), - element_wise=True, - accepted_values=[ - "1", - "2", - "3", - "955", - "966", - "988", - ], - ), - SBLCheck( - has_valid_value_count, - name="business_ownership_status.invalid_number_of_values", - description=( - "'Business ownership status' must" - " contain at least one value." - ), - element_wise=True, - min_length=1, - ), - SBLCheck( - is_unique_in_field, - warning=True, - name="business_ownership_status.duplicates_in_field", - description=( - "'Business ownership status' should" - " not contain duplicated values." - ), - element_wise=True, - ), - SBLCheck( - meets_multi_value_field_restriction, - warning=True, - name="business_ownership_status.multi_value_field_restriction", - description=( - "When 'business ownership status' contains 966" - " (the applicant responded that they did not wish" - " to provide this information) or 988 (not provided" - " by applicant), 'business ownership status' should" - " not contain more than one value." - ), - element_wise=True, - single_values={"966", "988"}, - ), - ], - ), - "num_principal_owners_flag": Column( - str, - title="Field 44: Number of principal owners: NP flag", - checks=[ - SBLCheck( - is_valid_enum, - name="num_principal_owners_flag.invalid_enum_value", - description=( - "'Number of principal owners: NP flag' must equal 900 or 988." - ), - element_wise=True, - accepted_values=[ - "900", - "988", - ], - ), - ], - ), - "num_principal_owners": Column( - str, - title="Field 45: Number of principal owners", - nullable=True, - checks=[ - SBLCheck( - is_valid_enum, - name="num_principal_owners.invalid_enum_value", - description=( - "When present, 'number of principal owners' must equal " - "0, 1, 2, 3, or 4." - ), - element_wise=True, - accepted_values=["0", "1", "2", "3", "4"], - accept_blank=True, - ), - SBLCheck( - has_no_conditional_field_conflict, - name="num_principal_owners.conditional_field_conflict", - description=( - "When 'number of principal owners: NP flag' does not equal 900 " - "(reported), 'number of principal owners' must be blank." - "When 'number of principal owners: NP flag' equals 900, " - "'number of principal owners' must not be blank." - ), - groupby="num_principal_owners_flag", - condition_values={"900"}, - ), - SBLCheck( - has_valid_fieldset_pair, - name="po_demographics_0.conditional_fieldset_conflict", - description=( - "When 'number of principal owners' equals 0 or is blank, " - "demographic fields for principal owners 1, 2, 3, and 4 " - "should be blank." - ), - groupby=[ - "po_1_ethnicity", - "po_1_race", - "po_1_gender_flag", - "po_2_ethnicity", - "po_2_race", - "po_2_gender_flag", - "po_3_ethnicity", - "po_3_race", - "po_3_gender_flag", - "po_4_ethnicity", - "po_4_race", - "po_4_gender_flag", - ], - condition_values=["0", ""], - should_fieldset_key_equal_to={ - "po_1_ethnicity": (0, True, ""), - "po_1_race": (1, True, ""), - "po_1_gender_flag": (2, True, ""), - "po_2_ethnicity": (3, True, ""), - "po_2_race": (4, True, ""), - "po_2_gender_flag": (5, True, ""), - "po_3_ethnicity": (6, True, ""), - "po_3_race": (7, True, ""), - "po_3_gender_flag": (8, True, ""), - "po_4_ethnicity": (9, True, ""), - "po_4_race": (10, True, ""), - "po_4_gender_flag": (11, True, ""), - }, - ), - SBLCheck( - has_valid_fieldset_pair, - name="po_demographics_1.conditional_fieldset_conflict", - description=( - "When 'number of principal owners' equals 1, " - "'ethnicity of principal owner 1', 'race of principal owner 1'," - " and 'sex/gender of principal owner 1: NP flag' should not be" - " blank. Demographic fields for principal owners 2, 3, and 4 " - "should be blank." - ), - groupby=[ - "po_1_ethnicity", - "po_1_race", - "po_1_gender_flag", - "po_2_ethnicity", - "po_2_race", - "po_2_gender_flag", - "po_3_ethnicity", - "po_3_race", - "po_3_gender_flag", - "po_4_ethnicity", - "po_4_race", - "po_4_gender_flag", - ], - condition_values=["1"], - should_fieldset_key_equal_to={ - "po_1_ethnicity": (0, False, ""), - "po_1_race": (1, False, ""), - "po_1_gender_flag": (2, False, ""), - "po_2_ethnicity": (3, True, ""), - "po_2_race": (4, True, ""), - "po_2_gender_flag": (5, True, ""), - "po_3_ethnicity": (6, True, ""), - "po_3_race": (7, True, ""), - "po_3_gender_flag": (8, True, ""), - "po_4_ethnicity": (9, True, ""), - "po_4_race": (10, True, ""), - "po_4_gender_flag": (11, True, ""), - }, - ), - SBLCheck( - has_valid_fieldset_pair, - name="po_demographics_2.conditional_fieldset_conflict", - description=( - "When 'number of principal owners' equals 2, " - "'ethnicity of principal owner 1 and 2', 'race of principal " - "owner 1 and 2', and 'sex/gender of principal owner 1 and 2: " - "NP flag' should not be blank." - ), - groupby=[ - "po_1_ethnicity", - "po_1_race", - "po_1_gender_flag", - "po_2_ethnicity", - "po_2_race", - "po_2_gender_flag", - "po_3_ethnicity", - "po_3_race", - "po_3_gender_flag", - "po_4_ethnicity", - "po_4_race", - "po_4_gender_flag", - ], - condition_values=["2"], - should_fieldset_key_equal_to={ - "po_1_ethnicity": (0, False, ""), - "po_1_race": (1, False, ""), - "po_1_gender_flag": (2, False, ""), - "po_2_ethnicity": (3, False, ""), - "po_2_race": (4, False, ""), - "po_2_gender_flag": (5, False, ""), - "po_3_ethnicity": (6, True, ""), - "po_3_race": (7, True, ""), - "po_3_gender_flag": (8, True, ""), - "po_4_ethnicity": (9, True, ""), - "po_4_race": (10, True, ""), - "po_4_gender_flag": (11, True, ""), - }, - ), - SBLCheck( - has_valid_fieldset_pair, - name="po_demographics_3.conditional_fieldset_conflict", - description=( - "When 'number of principal owners' equals 3, " - "'ethnicity of principal owner 1, 2, and 3', 'race of principal" - " owner 1, 2, and 3', and 'sex/gender of principal owner 1, 2, " - "and 3: NP flag' should not be blank. Demographic fields for " - "principal owner 4 should be blank." - ), - groupby=[ - "po_1_ethnicity", - "po_1_race", - "po_1_gender_flag", - "po_2_ethnicity", - "po_2_race", - "po_2_gender_flag", - "po_3_ethnicity", - "po_3_race", - "po_3_gender_flag", - "po_4_ethnicity", - "po_4_race", - "po_4_gender_flag", - ], - condition_values=["3"], - should_fieldset_key_equal_to={ - "po_1_ethnicity": (0, False, ""), - "po_1_race": (1, False, ""), - "po_1_gender_flag": (2, False, ""), - "po_2_ethnicity": (3, False, ""), - "po_2_race": (4, False, ""), - "po_2_gender_flag": (5, False, ""), - "po_3_ethnicity": (6, False, ""), - "po_3_race": (7, False, ""), - "po_3_gender_flag": (8, False, ""), - "po_4_ethnicity": (9, True, ""), - "po_4_race": (10, True, ""), - "po_4_gender_flag": (11, True, ""), - }, - ), - SBLCheck( - has_valid_fieldset_pair, - name="po_demographics_4.conditional_fieldset_conflict", - description=( - "When 'number of principal owners' equals 4, " - "'ethnicity of principal owner 1, 2, 3, and 4', " - "'race of principal owner 1, 2, 3, and 4', " - "and 'sex/gender of principal owner 1, 2, 3, and 4: NP flag'" - " should not be blank." - ), - groupby=[ - "po_1_ethnicity", - "po_1_race", - "po_1_gender_flag", - "po_2_ethnicity", - "po_2_race", - "po_2_gender_flag", - "po_3_ethnicity", - "po_3_race", - "po_3_gender_flag", - "po_4_ethnicity", - "po_4_race", - "po_4_gender_flag", - ], - condition_values=["4"], - should_fieldset_key_equal_to={ - "po_1_ethnicity": (0, False, ""), - "po_1_race": (1, False, ""), - "po_1_gender_flag": (2, False, ""), - "po_2_ethnicity": (3, False, ""), - "po_2_race": (4, False, ""), - "po_2_gender_flag": (5, False, ""), - "po_3_ethnicity": (6, False, ""), - "po_3_race": (7, False, ""), - "po_3_gender_flag": (8, False, ""), - "po_4_ethnicity": (9, False, ""), - "po_4_race": (10, False, ""), - "po_4_gender_flag": (11, False, ""), - }, - ), - ], - ), - "po_1_ethnicity": Column( - str, - title="Field 46: Ethnicity of principal owner 1", - nullable=True, - checks=[ - SBLCheck( - is_valid_enum, - name="po_1_ethnicity.invalid_enum_value", - description=( - "When present, each value in 'ethnicity" - " of principal owner 1' (separated by" - " semicolons) must equal 1, 11, 12," - " 13, 14, 2, 966, 977, or 988." - ), - element_wise=True, - accepted_values=[ - "1", - "11", - "12", - "13", - "14", - "2", - "966", - "977", - "988", - ], - accept_blank=True, - ), - SBLCheck( - is_unique_in_field, - warning=True, - name="po_1_ethnicity.duplicates_in_field", - description=( - "'Ethnicity of principal owner 1' should" - " not contain duplicated values." - ), - element_wise=True, - ), - SBLCheck( - meets_multi_value_field_restriction, - warning=True, - name="po_1_ethnicity.multi_value_field_restriction", - description=( - "When 'ethnicity of principal owner 1' contains" - " 966 (the applicant responded that they did" - " not wish to provide this information) or 988" - " (not provided by applicant), 'ethnicity of" - " principal owner 1' should not contain more than one value." - ), - element_wise=True, - single_values={"966", "988"}, - ), - ], - ), - "po_1_ethnicity_ff": Column( - str, - title=( - "Field 47: Ethnicity of principal owner 1: free-form text field for" - "other Hispanic or Latino ethnicity" - ), - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="po_1_ethnicity_ff.invalid_text_length", - description=( - "'Ethnicity of principal owner 1: free-form" - " text field for other Hispanic or Latino'" - " must not exceed 300 characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="po_1_ethnicity_ff.conditional_field_conflict", - description=( - "When 'ethnicity of principal owner 1' does not" - " contain 977 (the applicant responded in the" - " free-form text field), 'ethnicity of principal" - " owner 1: free-form text field for other Hispanic" - " or Latino' must be blank. When 'ethnicity of principal" - " owner 1' contains 977, 'ethnicity of principal" - " owner 1: free-form text field for other Hispanic" - " or Latino' must not be blank." - ), - groupby="po_1_ethnicity", - condition_values={"977"}, - ), - ], - ), - "po_1_race": Column( - str, - title="Field 48: Race of principal owner 1", - nullable=True, - checks=[ - SBLCheck( - is_valid_enum, - name="po_1_race.invalid_enum_value", - description=( - "When present, each value in 'race" - " of principal owner 1' (separated by" - " semicolons) must equal 1, 2, 21, 22," - " 23, 24, 25, 26, 27, 3, 31, 32, 33," - " 34, 35, 36, 37, 4, 41, 42, 43, 44," - " 5, 966, 971, 972, 973, 974, or 988." - ), - element_wise=True, - accepted_values=[ - "1", - "2", - "21", - "22", - "23", - "24", - "25", - "26", - "27", - "3", - "31", - "32", - "33", - "34", - "35", - "36", - "37", - "4", - "41", - "42", - "43", - "44", - "5", - "966", - "971", - "972", - "973", - "974", - "988", - ], - accept_blank=True, - ), - SBLCheck( - is_unique_in_field, - warning=True, - name="po_1_race.duplicates_in_field", - description=( - "'Race of principal owner 1' should" - " not contain duplicated values." - ), - element_wise=True, - ), - SBLCheck( - meets_multi_value_field_restriction, - warning=True, - name="po_1_race.multi_value_field_restriction", - description=( - "When 'race of principal owner 1' contains" - " 966 (the applicant responded that they" - " did not wish to provide this information)" - " or 988 (not provided by applicant)," - " 'race of principal owner 1' should not" - " contain more than one value." - ), - element_wise=True, - single_values={"966", "988"}, - ), - ], - ), - "po_1_race_anai_ff": Column( - str, - title=( - "Field 49: Race of principal owner 1: free-form text field for" - "American Indian or Alaska Native Enrolled or Principal Tribe" - ), - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="po_1_race_anai_ff.invalid_text_length", - description=( - "'Race of principal owner 1: free-form" - " text field for American Indian or Alaska" - " Native Enrolled or Principal Tribe' must" - " not exceed 300 characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="po_1_race_anai_ff.conditional_field_conflict", - description=( - "When 'race of principal owner 1' does not" - " contain 971 (the applicant responded in" - " the free-form text field for American Indian" - " or Alaska Native Enrolled or Principal Tribe)," - " 'race of principal owner 1: free-form text" - " field for American Indian or Alaska Native" - " Enrolled or Principal Tribe' must be blank." - " When 'race of principal owner 1' contains 971," - " 'race of principal owner 1: free-form text field" - " for American Indian or Alaska Native Enrolled or" - " Principal Tribe' must not be blank." - ), - groupby="po_1_race", - condition_values={"971"}, - ), - ], - ), - "po_1_race_asian_ff": Column( - str, - title=( - "Field 50: Race of principal owner 1: free-form text field for other" - "Asian race" - ), - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="po_1_race_asian_ff.invalid_text_length", - description=( - "'Race of principal owner 1: free-form text" - " field for other Asian' must not exceed 300" - " characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="po_1_race_asian_ff.conditional_field_conflict", - description=( - "When 'race of principal owner 1' does not contain" - " 972 (the applicant responded in the free-form text" - " field for other Asian race), 'race of principal" - " owner 1: free-form text field for other Asian' must" - " be blank. When 'race of principal owner 1' contains" - " 972, 'race of principal owner 1: free-form text field" - " for other Asian' must not be blank." - ), - groupby="po_1_race", - condition_values={"972"}, - ), - ], - ), - "po_1_race_baa_ff": Column( - str, - title=( - "Field 51: Race of principal owner 1: free-form text field for other" - "Black or African American race" - ), - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="po_1_race_baa_ff.invalid_text_length", - description=( - "'Race of principal owner 1: free-form text" - " field for other Black or African American'" - " must not exceed 300 characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="po_1_race_baa_ff.conditional_field_conflict", - description=( - "When 'race of principal owner 1' does not contain 973" - " (the applicant responded in the free-form text field" - " for other Black or African race), 'race of principal" - " owner 1: free-form text field for other Black or African" - " American' must be blank. When 'race of principal owner 1'" - " contains 973, 'race of principal owner 1: free-form text" - " field for other Black or African American' must not be blank." - ), - groupby="po_1_race", - condition_values={"973"}, - ), - ], - ), - "po_1_race_pi_ff": Column( - str, - title=( - "Field 52: Race of principal owner 1: free-form text field for other" - "Pacific Islander race" - ), - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="po_1_race_pi_ff.invalid_text_length", - description=( - "'Race of principal owner 1: free-form text" - " field for other Pacific Islander race' must" - " not exceed 300 characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="po_1_race_pi_ff.conditional_field_conflict", - description=( - "When 'race of principal owner 1' does not contain 974" - " (the applicant responded in the free-form text field" - " for other Pacific Islander race), 'race of principal" - " owner 1: free-form text field for other Pacific Islander" - " race' must be blank. When 'race of principal owner 1'" - " contains 974, 'race of principal owner 1: free-form text" - " field for other Pacific Islander race' must not be blank." - ), - groupby="po_1_race", - condition_values={"974"}, - ), - ], - ), - "po_1_gender_flag": Column( - str, - title="Field 53: Sex/gender of principal owner 1: NP flag", - nullable=True, - checks=[ - SBLCheck( - is_valid_enum, - name="po_1_gender_flag.invalid_enum_value", - description=( - "When present, 'sex/gender of principal" - " owner 1: NP flag' must equal 1, 966, or 988." - ), - element_wise=True, - accepted_values=[ - "1", - "966", - "988", - ], - accept_blank=True, - ), - ], - ), - "po_1_gender_ff": Column( - str, - title=( - "Field 54: Sex/gender of principal owner 1: free-form text field for" - "self-identified sex/gender" - ), - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="po_1_gender_ff.invalid_text_length", - description=( - "'Sex/gender of principal owner 1: free-form" - " text field for self-identified sex/gender'" - " must not exceed 300 characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="po_1_gender_ff.conditional_field_conflict", - description=( - "When 'sex/gender of principal owner 1: NP flag'" - " does not equal 1 (the applicant responded in the" - " free-form text field), 'sex/gender of principal" - " owner 1: free-form text field for self-identified" - " sex/gender' must be blank. When 'sex/gender of" - " principal owner 1: NP flag' equals 1, 'sex/gender" - " of principal owner 1: free-form text field for" - " self-identified sex/gender' must not be blank." - ), - groupby="po_1_gender_flag", - condition_values={"1"}, - ), - ], - ), - "po_2_ethnicity": Column( - str, - title="Field 55: Ethnicity of principal owner 2", - nullable=True, - checks=[ - SBLCheck( - is_valid_enum, - name="po_2_ethnicity.invalid_enum_value", - description=( - "When present, each value in 'ethnicity" - " of principal owner 2' (separated by" - " semicolons) must equal 1, 11, 12," - " 13, 14, 2, 966, 977, or 988." - ), - element_wise=True, - accepted_values=[ - "1", - "11", - "12", - "13", - "14", - "2", - "966", - "977", - "988", - ], - accept_blank=True, - ), - SBLCheck( - is_unique_in_field, - warning=True, - name="po_2_ethnicity.duplicates_in_field", - description=( - "'Ethnicity of principal owner 2' should" - " not contain duplicated values." - ), - element_wise=True, - ), - SBLCheck( - meets_multi_value_field_restriction, - warning=True, - name="po_2_ethnicity.multi_value_field_restriction", - description=( - "When 'ethnicity of principal owner 2' contains" - " 966 (the applicant responded that they did" - " not wish to provide this information) or 988" - " (not provided by applicant), 'ethnicity of" - " principal owner 2' should not contain more than one value." - ), - element_wise=True, - single_values={"966", "988"}, - ), - ], - ), - "po_2_ethnicity_ff": Column( - str, - title=( - "Field 56: Ethnicity of principal owner 2: free-form text field for" - "other Hispanic or Latino ethnicity" - ), - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="po_2_ethnicity_ff.invalid_text_length", - description=( - "'Ethnicity of principal owner 2: free-form" - " text field for other Hispanic or Latino'" - " must not exceed 300 characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="po_2_ethnicity_ff.conditional_field_conflict", - description=( - "When 'ethnicity of principal owner 2' does not" - " contain 977 (the applicant responded in the" - " free-form text field), 'ethnicity of principal" - " owner 2: free-form text field for other Hispanic" - " or Latino' must be blank. When 'ethnicity of principal" - " owner 2' contains 977, 'ethnicity of principal" - " owner 2: free-form text field for other Hispanic" - " or Latino' must not be blank." - ), - groupby="po_2_ethnicity", - condition_values={"977"}, - ), - ], - ), - "po_2_race": Column( - str, - title="Field 57: Race of principal owner 2", - nullable=True, - checks=[ - SBLCheck( - is_valid_enum, - name="po_2_race.invalid_enum_value", - description=( - "When present, each value in 'race" - " of principal owner 2' (separated by" - " semicolons) must equal 1, 2, 21, 22," - " 23, 24, 25, 26, 27, 3, 31, 32, 33," - " 34, 35, 36, 37, 4, 41, 42, 43, 44," - " 5, 966, 971, 972, 973, 974, or 988." - ), - element_wise=True, - accepted_values=[ - "1", - "2", - "21", - "22", - "23", - "24", - "25", - "26", - "27", - "3", - "31", - "32", - "33", - "34", - "35", - "36", - "37", - "4", - "41", - "42", - "43", - "44", - "5", - "966", - "971", - "972", - "973", - "974", - "988", - ], - accept_blank=True, - ), - SBLCheck( - is_unique_in_field, - warning=True, - name="po_2_race.duplicates_in_field", - description=( - "'Race of principal owner 2' should" - " not contain duplicated values." - ), - element_wise=True, - ), - SBLCheck( - meets_multi_value_field_restriction, - warning=True, - name="po_2_race.multi_value_field_restriction", - description=( - "When 'race of principal owner 2' contains" - " 966 (the applicant responded that they" - " did not wish to provide this information)" - " or 988 (not provided by applicant)," - " 'race of principal owner 2' should not" - " contain more than one value." - ), - element_wise=True, - single_values={"966", "988"}, - ), - ], - ), - "po_2_race_anai_ff": Column( - str, - title=( - "Field 58: Race of principal owner 2: free-form text field for" - "American Indian or Alaska Native Enrolled or Principal Tribe" - ), - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="po_2_race_anai_ff.invalid_text_length", - description=( - "'Race of principal owner 2: free-form" - " text field for American Indian or Alaska" - " Native Enrolled or Principal Tribe' must" - " not exceed 300 characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="po_2_race_anai_ff.conditional_field_conflict", - description=( - "When 'race of principal owner 2' does not" - " contain 971 (the applicant responded in" - " the free-form text field for American Indian" - " or Alaska Native Enrolled or Principal Tribe)," - " 'race of principal owner 2: free-form text" - " field for American Indian or Alaska Native" - " Enrolled or Principal Tribe' must be blank." - " When 'race of principal owner 2' contains 971," - " 'race of principal owner 2: free-form text field" - " for American Indian or Alaska Native Enrolled or" - " Principal Tribe' must not be blank." - ), - groupby="po_2_race", - condition_values={"971"}, - ), - ], - ), - "po_2_race_asian_ff": Column( - str, - title=( - "Field 59: Race of principal owner 2: free-form text field for other" - "Asian race" - ), - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="po_2_race_asian_ff.invalid_text_length", - description=( - "'Race of principal owner 2: free-form text" - " field for other Asian' must not exceed 300" - " characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="po_2_race_asian_ff.conditional_field_conflict", - description=( - "When 'race of principal owner 2' does not contain" - " 972 (the applicant responded in the free-form text" - " field for other Asian race), 'race of principal" - " owner 2: free-form text field for other Asian' must" - " be blank. When 'race of principal owner 2' contains" - " 972, 'race of principal owner 2: free-form text field" - " for other Asian' must not be blank." - ), - groupby="po_2_race", - condition_values={"972"}, - ), - ], - ), - "po_2_race_baa_ff": Column( - str, - title=( - "Field 60: Race of principal owner 2: free-form text field for other" - "Black or African American race" - ), - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="po_2_race_baa_ff.invalid_text_length", - description=( - "'Race of principal owner 2: free-form text" - " field for other Black or African American'" - " must not exceed 300 characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="po_2_race_baa_ff.conditional_field_conflict", - description=( - "When 'race of principal owner 2' does not contain 973" - " (the applicant responded in the free-form text field" - " for other Black or African race), 'race of principal" - " owner 2: free-form text field for other Black or African" - " American' must be blank. When 'race of principal owner 2'" - " contains 973, 'race of principal owner 2: free-form text" - " field for other Black or African American' must not be blank." - ), - groupby="po_2_race", - condition_values={"973"}, - ), - ], - ), - "po_2_race_pi_ff": Column( - str, - title=( - "Field 61: Race of principal owner 2: free-form text field for other" - "Pacific Islander race" - ), - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="po_2_race_pi_ff.invalid_text_length", - description=( - "'Race of principal owner 2: free-form text" - " field for other Pacific Islander race' must" - " not exceed 300 characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="po_2_race_pi_ff.conditional_field_conflict", - description=( - "When 'race of principal owner 2' does not contain 974" - " (the applicant responded in the free-form text field" - " for other Pacific Islander race), 'race of principal" - " owner 2: free-form text field for other Pacific Islander" - " race' must be blank. When 'race of principal owner 2'" - " contains 974, 'race of principal owner 2: free-form text" - " field for other Pacific Islander race' must not be blank." - ), - groupby="po_2_race", - condition_values={"974"}, - ), - ], - ), - "po_2_gender_flag": Column( - str, - title="Field 62: Sex/gender of principal owner 2: NP flag", - nullable=True, - checks=[ - SBLCheck( - is_valid_enum, - name="po_2_gender_flag.invalid_enum_value", - description=( - "When present, 'sex/gender of principal" - " owner 2: NP flag' must equal 1, 966, or 988." - ), - element_wise=True, - accepted_values=[ - "1", - "966", - "988", - ], - accept_blank=True, - ), - ], - ), - "po_2_gender_ff": Column( - str, - title=( - "Field 63: Sex/gender of principal owner 2: free-form text field for" - "self-identified sex/gender" - ), - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="po_2_gender_ff.invalid_text_length", - description=( - "'Sex/gender of principal owner 2: free-form" - " text field for self-identified sex/gender'" - " must not exceed 300 characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="po_2_gender_ff.conditional_field_conflict", - description=( - "When 'sex/gender of principal owner 2: NP flag'" - " does not equal 1 (the applicant responded in the" - " free-form text field), 'sex/gender of principal" - " owner 2: free-form text field for self-identified" - " sex/gender' must be blank. When 'sex/gender of" - " principal owner 2: NP flag' equals 1, 'sex/gender" - " of principal owner 2: free-form text field for" - " self-identified sex/gender' must not be blank." - ), - groupby="po_2_gender_flag", - condition_values={"1"}, - ), - ], - ), - "po_3_ethnicity": Column( - str, - title="Field 64: Ethnicity of principal owner 3", - nullable=True, - checks=[ - SBLCheck( - is_valid_enum, - name="po_3_ethnicity.invalid_enum_value", - description=( - "When present, each value in 'ethnicity" - " of principal owner 3' (separated by" - " semicolons) must equal 1, 11, 12," - " 13, 14, 2, 966, 977, or 988." - ), - element_wise=True, - accepted_values=[ - "1", - "11", - "12", - "13", - "14", - "2", - "966", - "977", - "988", - ], - accept_blank=True, - ), - SBLCheck( - is_unique_in_field, - warning=True, - name="po_3_ethnicity.duplicates_in_field", - description=( - "'Ethnicity of principal owner 3' should" - " not contain duplicated values." - ), - element_wise=True, - ), - SBLCheck( - meets_multi_value_field_restriction, - warning=True, - name="po_3_ethnicity.multi_value_field_restriction", - description=( - "When 'ethnicity of principal owner 3' contains" - " 966 (the applicant responded that they did" - " not wish to provide this information) or 988" - " (not provided by applicant), 'ethnicity of" - " principal owner 3' should not contain more than one value." - ), - element_wise=True, - single_values={"966", "988"}, - ), - ], - ), - "po_3_ethnicity_ff": Column( - str, - title=( - "Field 65: Ethnicity of principal owner 3: free-form text field for" - "other Hispanic or Latino ethnicity" - ), - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="po_3_ethnicity_ff.invalid_text_length", - description=( - "'Ethnicity of principal owner 3: free-form" - " text field for other Hispanic or Latino'" - " must not exceed 300 characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="po_3_ethnicity_ff.conditional_field_conflict", - description=( - "When 'ethnicity of principal owner 3' does not" - " contain 977 (the applicant responded in the" - " free-form text field), 'ethnicity of principal" - " owner 3: free-form text field for other Hispanic" - " or Latino' must be blank. When 'ethnicity of principal" - " owner 3' contains 977, 'ethnicity of principal" - " owner 3: free-form text field for other Hispanic" - " or Latino' must not be blank." - ), - groupby="po_3_ethnicity", - condition_values={"977"}, - ), - ], - ), - "po_3_race": Column( - str, - title="Field 66: Race of principal owner 3", - nullable=True, - checks=[ - SBLCheck( - is_valid_enum, - name="po_3_race.invalid_enum_value", - description=( - "When present, each value in 'race" - " of principal owner 3' (separated by" - " semicolons) must equal 1, 2, 21, 22," - " 23, 24, 25, 26, 27, 3, 31, 32, 33," - " 34, 35, 36, 37, 4, 41, 42, 43, 44," - " 5, 966, 971, 972, 973, 974, or 988." - ), - element_wise=True, - accepted_values=[ - "1", - "2", - "21", - "22", - "23", - "24", - "25", - "26", - "27", - "3", - "31", - "32", - "33", - "34", - "35", - "36", - "37", - "4", - "41", - "42", - "43", - "44", - "5", - "966", - "971", - "972", - "973", - "974", - "988", - ], - accept_blank=True, - ), - SBLCheck( - is_unique_in_field, - warning=True, - name="po_3_race.duplicates_in_field", - description=( - "'Race of principal owner 3' should" - " not contain duplicated values." - ), - element_wise=True, - ), - SBLCheck( - meets_multi_value_field_restriction, - warning=True, - name="po_3_race.multi_value_field_restriction", - description=( - "When 'race of principal owner 3' contains" - " 966 (the applicant responded that they" - " did not wish to provide this information)" - " or 988 (not provided by applicant)," - " 'race of principal owner 3' should not" - " contain more than one value." - ), - element_wise=True, - single_values={"966", "988"}, - ), - ], - ), - "po_3_race_anai_ff": Column( - str, - title=( - "Field 67: Race of principal owner 3: free-form text field for" - "American Indian or Alaska Native Enrolled or Principal Tribe" - ), - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="po_3_race_anai_ff.invalid_text_length", - description=( - "'Race of principal owner 3: free-form" - " text field for American Indian or Alaska" - " Native Enrolled or Principal Tribe' must" - " not exceed 300 characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="po_3_race_anai_ff.conditional_field_conflict", - description=( - "When 'race of principal owner 3' does not" - " contain 971 (the applicant responded in" - " the free-form text field for American Indian" - " or Alaska Native Enrolled or Principal Tribe)," - " 'race of principal owner 3: free-form text" - " field for American Indian or Alaska Native" - " Enrolled or Principal Tribe' must be blank." - " When 'race of principal owner 3' contains 971," - " 'race of principal owner 3: free-form text field" - " for American Indian or Alaska Native Enrolled or" - " Principal Tribe' must not be blank." - ), - groupby="po_3_race", - condition_values={"971"}, - ), - ], - ), - "po_3_race_asian_ff": Column( - str, - title=( - "Field 68: Race of principal owner 3: free-form text field for other" - "Asian race" - ), - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="po_3_race_asian_ff.invalid_text_length", - description=( - "'Race of principal owner 3: free-form text" - " field for other Asian' must not exceed 300" - " characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="po_3_race_asian_ff.conditional_field_conflict", - description=( - "When 'race of principal owner 3' does not contain" - " 972 (the applicant responded in the free-form text" - " field for other Asian race), 'race of principal" - " owner 3: free-form text field for other Asian' must" - " be blank. When 'race of principal owner 3' contains" - " 972, 'race of principal owner 3: free-form text field" - " for other Asian' must not be blank." - ), - groupby="po_3_race", - condition_values={"972"}, - ), - ], - ), - "po_3_race_baa_ff": Column( - str, - title=( - "Field 69: Race of principal owner 3: free-form text field for other" - "Black or African American race" - ), - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="po_3_race_baa_ff.invalid_text_length", - description=( - "'Race of principal owner 3: free-form text" - " field for other Black or African American'" - " must not exceed 300 characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="po_3_race_baa_ff.conditional_field_conflict", - description=( - "When 'race of principal owner 3' does not contain 973" - " (the applicant responded in the free-form text field" - " for other Black or African race), 'race of principal" - " owner 3: free-form text field for other Black or African" - " American' must be blank. When 'race of principal owner 3'" - " contains 973, 'race of principal owner 3: free-form text" - " field for other Black or African American' must not be blank." - ), - groupby="po_3_race", - condition_values={"973"}, - ), - ], - ), - "po_3_race_pi_ff": Column( - str, - title=( - "Field 70: Race of principal owner 3: free-form text field for other" - "Pacific Islander race" - ), - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="po_3_race_pi_ff.invalid_text_length", - description=( - "'Race of principal owner 3: free-form text" - " field for other Pacific Islander race' must" - " not exceed 300 characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="po_3_race_pi_ff.conditional_field_conflict", - description=( - "When 'race of principal owner 3' does not contain 974" - " (the applicant responded in the free-form text field" - " for other Pacific Islander race), 'race of principal" - " owner 3: free-form text field for other Pacific Islander" - " race' must be blank. When 'race of principal owner 3'" - " contains 974, 'race of principal owner 3: free-form text" - " field for other Pacific Islander race' must not be blank." - ), - groupby="po_3_race", - condition_values={"974"}, - ), - ], - ), - "po_3_gender_flag": Column( - str, - title="Field 71: Sex/gender of principal owner 3: NP flag", - nullable=True, - checks=[ - SBLCheck( - is_valid_enum, - name="po_3_gender_flag.invalid_enum_value", - description=( - "When present, 'sex/gender of principal" - " owner 3: NP flag' must equal 1, 966, or 988." - ), - element_wise=True, - accepted_values=[ - "1", - "966", - "988", - ], - accept_blank=True, - ), - ], - ), - "po_3_gender_ff": Column( - str, - title=( - "Field 72: Sex/gender of principal owner 3: free-form text field for" - "self-identified sex/gender" - ), - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="po_3_gender_ff.invalid_text_length", - description=( - "'Sex/gender of principal owner 3: free-form" - " text field for self-identified sex/gender'" - " must not exceed 300 characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="po_3_gender_ff.conditional_field_conflict", - description=( - "When 'sex/gender of principal owner 3: NP flag'" - " does not equal 1 (the applicant responded in the" - " free-form text field), 'sex/gender of principal" - " owner 3: free-form text field for self-identified" - " sex/gender' must be blank. When 'sex/gender of" - " principal owner 3: NP flag' equals 1, 'sex/gender" - " of principal owner 3: free-form text field for" - " self-identified sex/gender' must not be blank." - ), - groupby="po_3_gender_flag", - condition_values={"1"}, - ), - ], - ), - "po_4_ethnicity": Column( - str, - title="Field 73: Ethnicity of principal owner 4", - nullable=True, - checks=[ - SBLCheck( - is_valid_enum, - name="po_4_ethnicity.invalid_enum_value", - description=( - "When present, each value in 'ethnicity" - " of principal owner 4' (separated by" - " semicolons) must equal 1, 11, 12," - " 13, 14, 2, 966, 977, or 988." - ), - element_wise=True, - accepted_values=[ - "1", - "11", - "12", - "13", - "14", - "2", - "966", - "977", - "988", - ], - accept_blank=True, - ), - SBLCheck( - is_unique_in_field, - warning=True, - name="po_4_ethnicity.duplicates_in_field", - description=( - "'Ethnicity of principal owner 4' should" - " not contain duplicated values." - ), - element_wise=True, - ), - SBLCheck( - meets_multi_value_field_restriction, - warning=True, - name="po_4_ethnicity.multi_value_field_restriction", - description=( - "When 'ethnicity of principal owner 4' contains" - " 966 (the applicant responded that they did" - " not wish to provide this information) or 988" - " (not provided by applicant), 'ethnicity of" - " principal owner 4' should not contain more than one value." - ), - element_wise=True, - single_values={"966", "988"}, - ), - ], - ), - "po_4_ethnicity_ff": Column( - str, - title=( - "Field 74: Ethnicity of principal owner 4: free-form text field for" - "other Hispanic or Latino ethnicity" - ), - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="po_4_ethnicity_ff.invalid_text_length", - description=( - "'Ethnicity of principal owner 4: free-form" - " text field for other Hispanic or Latino'" - " must not exceed 300 characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="po_4_ethnicity_ff.conditional_field_conflict", - description=( - "When 'ethnicity of principal owner 4' does not" - " contain 977 (the applicant responded in the" - " free-form text field), 'ethnicity of principal" - " owner 4: free-form text field for other Hispanic" - " or Latino' must be blank. When 'ethnicity of principal" - " owner 4' contains 977, 'ethnicity of principal" - " owner 4: free-form text field for other Hispanic" - " or Latino' must not be blank." - ), - groupby="po_4_ethnicity", - condition_values={"977"}, - ), - ], - ), - "po_4_race": Column( - str, - title="Field 75: Race of principal owner 4", - nullable=True, - checks=[ - SBLCheck( - is_valid_enum, - name="po_4_race.invalid_enum_value", - description=( - "When present, each value in 'race" - " of principal owner 4' (separated by" - " semicolons) must equal 1, 2, 21, 22," - " 23, 24, 25, 26, 27, 3, 31, 32, 33," - " 34, 35, 36, 37, 4, 41, 42, 43, 44," - " 5, 966, 971, 972, 973, 974, or 988." - ), - element_wise=True, - accepted_values=[ - "1", - "2", - "21", - "22", - "23", - "24", - "25", - "26", - "27", - "3", - "31", - "32", - "33", - "34", - "35", - "36", - "37", - "4", - "41", - "42", - "43", - "44", - "5", - "966", - "971", - "972", - "973", - "974", - "988", - ], - accept_blank=True, - ), - SBLCheck( - is_unique_in_field, - warning=True, - name="po_4_race.duplicates_in_field", - description=( - "'Race of principal owner 4' should" - " not contain duplicated values." - ), - element_wise=True, - ), - SBLCheck( - meets_multi_value_field_restriction, - warning=True, - name="po_4_race.multi_value_field_restriction", - description=( - "When 'race of principal owner 4' contains" - " 966 (the applicant responded that they" - " did not wish to provide this information)" - " or 988 (not provided by applicant)," - " 'race of principal owner 4' should not" - " contain more than one value." - ), - element_wise=True, - single_values={"966", "988"}, - ), - ], - ), - "po_4_race_anai_ff": Column( - str, - title=( - "Field 76: Race of principal owner 4: free-form text field for" - "American Indian or Alaska Native Enrolled or Principal Tribe" - ), - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="po_4_race_anai_ff.invalid_text_length", - description=( - "'Race of principal owner 4: free-form" - " text field for American Indian or Alaska" - " Native Enrolled or Principal Tribe' must" - " not exceed 300 characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="po_4_race_anai_ff.conditional_field_conflict", - description=( - "When 'race of principal owner 4' does not" - " contain 971 (the applicant responded in" - " the free-form text field for American Indian" - " or Alaska Native Enrolled or Principal Tribe)," - " 'race of principal owner 4: free-form text" - " field for American Indian or Alaska Native" - " Enrolled or Principal Tribe' must be blank." - " When 'race of principal owner 4' contains 971," - " 'race of principal owner 4: free-form text field" - " for American Indian or Alaska Native Enrolled or" - " Principal Tribe' must not be blank." - ), - groupby="po_4_race", - condition_values={"971"}, - ), - ], - ), - "po_4_race_asian_ff": Column( - str, - title=( - "Field 77: Race of principal owner 4: free-form text field for other" - "Asian race" - ), - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="po_4_race_asian_ff.invalid_text_length", - description=( - "'Race of principal owner 4: free-form text" - " field for other Asian' must not exceed 300" - " characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="po_4_race_asian_ff.conditional_field_conflict", - description=( - "When 'race of principal owner 4' does not contain" - " 972 (the applicant responded in the free-form text" - " field for other Asian race), 'race of principal" - " owner 4: free-form text field for other Asian' must" - " be blank. When 'race of principal owner 4' contains" - " 972, 'race of principal owner 4: free-form text field" - " for other Asian' must not be blank." - ), - groupby="po_4_race", - condition_values={"972"}, - ), - ], - ), - "po_4_race_baa_ff": Column( - str, - title=( - "Field 78: Race of principal owner 4: free-form text field for other" - "Black or African American race" - ), - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="po_4_race_baa_ff.invalid_text_length", - description=( - "'Race of principal owner 4: free-form text" - " field for other Black or African American'" - " must not exceed 300 characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="po_4_race_baa_ff.conditional_field_conflict", - description=( - "When 'race of principal owner 4' does not contain 973" - " (the applicant responded in the free-form text field" - " for other Black or African race), 'race of principal" - " owner 4: free-form text field for other Black or African" - " American' must be blank. When 'race of principal owner 4'" - " contains 973, 'race of principal owner 4: free-form text" - " field for other Black or African American' must not be blank." - ), - groupby="po_4_race", - condition_values={"973"}, - ), - ], - ), - "po_4_race_pi_ff": Column( - str, - title=( - "Field 79: Race of principal owner 4: free-form text field for other" - "Pacific Islander race" - ), - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="po_4_race_pi_ff.invalid_text_length", - description=( - "'Race of principal owner 4: free-form text" - " field for other Pacific Islander race' must" - " not exceed 300 characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="po_4_race_pi_ff.conditional_field_conflict", - description=( - "When 'race of principal owner 4' does not contain 974" - " (the applicant responded in the free-form text field" - " for other Pacific Islander race), 'race of principal" - " owner 4: free-form text field for other Pacific Islander" - " race' must be blank. When 'race of principal owner 4'" - " contains 974, 'race of principal owner 4: free-form text" - " field for other Pacific Islander race' must not be blank." - ), - groupby="po_4_race", - condition_values={"974"}, - ), - ], - ), - "po_4_gender_flag": Column( - str, - title="Field 80: Sex/gender of principal owner 4: NP flag", - nullable=True, - checks=[ - SBLCheck( - is_valid_enum, - name="po_4_gender_flag.invalid_enum_value", - description=( - "When present, 'sex/gender of principal" - " owner 4: NP flag' must equal 1, 966, or 988." - ), - element_wise=True, - accepted_values=[ - "1", - "966", - "988", - ], - accept_blank=True, - ), - ], - ), - "po_4_gender_ff": Column( - str, - title=( - "Field 81: Sex/gender of principal owner 4: free-form text field for" - "self-identified sex/gender" - ), - nullable=True, - checks=[ - SBLCheck.str_length( - 0, - 300, - name="po_4_gender_ff.invalid_text_length", - description=( - "'Sex/gender of principal owner 4: free-form" - " text field for self-identified sex/gender'" - " must not exceed 300 characters in length." - ), - ), - SBLCheck( - has_no_conditional_field_conflict, - name="po_4_gender_ff.conditional_field_conflict", - description=( - "When 'sex/gender of principal owner 4: NP flag'" - " does not equal 1 (the applicant responded in the" - " free-form text field), 'sex/gender of principal" - " owner 4: free-form text field for self-identified" - " sex/gender' must be blank. When 'sex/gender of" - " principal owner 4: NP flag' equals 1, 'sex/gender" - " of principal owner 4: free-form text field for" - " self-identified sex/gender' must not be blank." - ), - groupby="po_4_gender_flag", - condition_values={"1"}, - ), - ], - ), - }, - ) From 27073f8ae0b83b734751146c625b596eedacff40 Mon Sep 17 00:00:00 2001 From: Nargis Sultani Date: Wed, 23 Aug 2023 10:51:50 -0400 Subject: [PATCH 2/8] removed deleted schema import --- src/validator/main.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/validator/main.py b/src/validator/main.py index c3e43ef2..a002c87e 100644 --- a/src/validator/main.py +++ b/src/validator/main.py @@ -8,10 +8,8 @@ import sys import pandas as pd -from create_schemas import (get_phase_1_schema_for_lei, - get_phase_2_schema_for_lei) +from create_schemas import get_phase_1_schema_for_lei, get_phase_2_schema_for_lei from pandera.errors import SchemaErrors -from schema import get_schema_for_lei def csv_to_df(path: str) -> pd.DataFrame: @@ -52,12 +50,13 @@ def run_validation_on_df(df: pd.DataFrame, lei: str) -> None: # this is just a string that we'd need to parse manually check_output = error["error"].args[0] - print(f"Phase 1 Validation `{check_name}` failed for column `{column_name}`") + print( + f"Phase 1 Validation `{check_name}` failed for column `{column_name}`" + ) print(check_output) print("") - - if phase_1_failure_cases is None: + if phase_1_failure_cases is None: phase_2_sblar_chema = get_phase_2_schema_for_lei(lei) try: phase_2_sblar_chema(df, lazy=True) @@ -77,7 +76,9 @@ def run_validation_on_df(df: pd.DataFrame, lei: str) -> None: # this is just a string that we'd need to parse manually check_output = error["error"].args[0] - print(f"Phase 2 Validation `{check_name}` failed for column `{column_name}`") + print( + f"Phase 2 Validation `{check_name}` failed for column `{column_name}`" + ) print(check_output) print("") From 733996f6c9c4e945da6a24dc04c25ab9ed05b7a1 Mon Sep 17 00:00:00 2001 From: Nargis Sultani Date: Wed, 23 Aug 2023 11:49:12 -0400 Subject: [PATCH 3/8] created a function to print errors --- src/validator/create_schemas.py | 22 ++++++++++++++++ src/validator/main.py | 45 +++------------------------------ 2 files changed, 26 insertions(+), 41 deletions(-) diff --git a/src/validator/create_schemas.py b/src/validator/create_schemas.py index 0fc78c76..e10cce0e 100644 --- a/src/validator/create_schemas.py +++ b/src/validator/create_schemas.py @@ -2,6 +2,7 @@ with validations listed in phase 1 and phase 2.""" from pandera import DataFrameSchema +from pandera.errors import SchemaErrors from phase_validations import get_phase_1_and_2_validations_for_lei from schema_template import get_template @@ -19,6 +20,27 @@ def get_schema_by_phase_for_lei(template: dict, phase: str, lei: str = None): return DataFrameSchema(template) +def print_schema_errors(errors: SchemaErrors, phase: str): + for error in errors.schema_errors: + # Name of the column in the dataframe being checked + column_name = error["error"].schema.name + + # built in checks such as unique=True are different than custom + # checks unfortunately so the name needs to be accessed differently + try: + check_name = error["error"].check.name + # This will either be a boolean series or a single bool + check_output = error["error"].check_output + except AttributeError: + check_name = error["error"].check + # this is just a string that we'd need to parse manually + check_output = error["error"].args[0] + + print(f"{phase} Validation `{check_name}` failed for column `{column_name}`") + print(check_output) + print("") + + def get_phase_1_schema_for_lei(lei: str = None): return get_schema_by_phase_for_lei(phase_1_template, "phase_1", lei) diff --git a/src/validator/main.py b/src/validator/main.py index a002c87e..971e9d6b 100644 --- a/src/validator/main.py +++ b/src/validator/main.py @@ -8,7 +8,8 @@ import sys import pandas as pd -from create_schemas import get_phase_1_schema_for_lei, get_phase_2_schema_for_lei +from create_schemas import (get_phase_1_schema_for_lei, + get_phase_2_schema_for_lei, print_schema_errors) from pandera.errors import SchemaErrors @@ -35,52 +36,14 @@ def run_validation_on_df(df: pd.DataFrame, lei: str) -> None: phase_1_sblar_chema(df, lazy=True) except SchemaErrors as errors: phase_1_failure_cases = errors.failure_cases - for error in errors.schema_errors: - # Name of the column in the dataframe being checked - column_name = error["error"].schema.name - - # built in checks such as unique=True are different than custom - # checks unfortunately so the name needs to be accessed differently - try: - check_name = error["error"].check.name - # This will either be a boolean series or a single bool - check_output = error["error"].check_output - except AttributeError: - check_name = error["error"].check - # this is just a string that we'd need to parse manually - check_output = error["error"].args[0] - - print( - f"Phase 1 Validation `{check_name}` failed for column `{column_name}`" - ) - print(check_output) - print("") + print_schema_errors(errors, "Phase 1") if phase_1_failure_cases is None: phase_2_sblar_chema = get_phase_2_schema_for_lei(lei) try: phase_2_sblar_chema(df, lazy=True) except SchemaErrors as errors: - for error in errors.schema_errors: - # Name of the column in the dataframe being checked - column_name = error["error"].schema.name - - # built in checks such as unique=True are different than custom - # checks unfortunately so the name needs to be accessed differently - try: - check_name = error["error"].check.name - # This will either be a boolean series or a single bool - check_output = error["error"].check_output - except AttributeError: - check_name = error["error"].check - # this is just a string that we'd need to parse manually - check_output = error["error"].args[0] - - print( - f"Phase 2 Validation `{check_name}` failed for column `{column_name}`" - ) - print(check_output) - print("") + print_schema_errors(errors, "Phase 2") if __name__ == "__main__": From ca776144a6b384f54e14b93a64e1bbbefd5e5779 Mon Sep 17 00:00:00 2001 From: Nargis Sultani Date: Wed, 23 Aug 2023 11:53:47 -0400 Subject: [PATCH 4/8] ran black formatter --- src/validator/main.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/validator/main.py b/src/validator/main.py index 971e9d6b..52907b64 100644 --- a/src/validator/main.py +++ b/src/validator/main.py @@ -8,8 +8,11 @@ import sys import pandas as pd -from create_schemas import (get_phase_1_schema_for_lei, - get_phase_2_schema_for_lei, print_schema_errors) +from create_schemas import ( + get_phase_1_schema_for_lei, + get_phase_2_schema_for_lei, + print_schema_errors, +) from pandera.errors import SchemaErrors From 5a97d510a4fb3476fc3f2d778fc29ce55500d4a0 Mon Sep 17 00:00:00 2001 From: Nargis Sultani Date: Wed, 23 Aug 2023 11:59:30 -0400 Subject: [PATCH 5/8] fixed typo --- src/validator/main.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/validator/main.py b/src/validator/main.py index 52907b64..433275be 100644 --- a/src/validator/main.py +++ b/src/validator/main.py @@ -34,17 +34,17 @@ def run_validation_on_df(df: pd.DataFrame, lei: str) -> None: phase_1_failure_cases = None - phase_1_sblar_chema = get_phase_1_schema_for_lei(lei) + phase_1_sblar_schema = get_phase_1_schema_for_lei(lei) try: - phase_1_sblar_chema(df, lazy=True) + phase_1_sblar_schema(df, lazy=True) except SchemaErrors as errors: phase_1_failure_cases = errors.failure_cases print_schema_errors(errors, "Phase 1") if phase_1_failure_cases is None: - phase_2_sblar_chema = get_phase_2_schema_for_lei(lei) + phase_2_sblar_schema = get_phase_2_schema_for_lei(lei) try: - phase_2_sblar_chema(df, lazy=True) + phase_2_sblar_schema(df, lazy=True) except SchemaErrors as errors: print_schema_errors(errors, "Phase 2") From a5c4098e6423b40e7db9402b15a2dfe92dfec371 Mon Sep 17 00:00:00 2001 From: Nargis Sultani Date: Fri, 25 Aug 2023 14:28:49 -0400 Subject: [PATCH 6/8] removed debug statement --- src/validator/check_functions.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/validator/check_functions.py b/src/validator/check_functions.py index bf61c3aa..b42372e9 100644 --- a/src/validator/check_functions.py +++ b/src/validator/check_functions.py @@ -349,10 +349,6 @@ def _has_valid_enum_pair_validation_helper( result = series == condition_value else: result = series != condition_value - for i, v in result.items(): - if v is False: - print(i) - return result From da2b54210d09c15954e6ff364dac794f7ddc53d3 Mon Sep 17 00:00:00 2001 From: Nargis Sultani Date: Fri, 25 Aug 2023 15:07:27 -0400 Subject: [PATCH 7/8] addressed the comment and set error["error"] to a variable --- src/validator/create_schemas.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/validator/create_schemas.py b/src/validator/create_schemas.py index e10cce0e..d5c47456 100644 --- a/src/validator/create_schemas.py +++ b/src/validator/create_schemas.py @@ -23,18 +23,19 @@ def get_schema_by_phase_for_lei(template: dict, phase: str, lei: str = None): def print_schema_errors(errors: SchemaErrors, phase: str): for error in errors.schema_errors: # Name of the column in the dataframe being checked - column_name = error["error"].schema.name + schema_error = error["error"] + column_name = schema_error.schema.name # built in checks such as unique=True are different than custom # checks unfortunately so the name needs to be accessed differently try: - check_name = error["error"].check.name + check_name = schema_error.check.name # This will either be a boolean series or a single bool - check_output = error["error"].check_output + check_output = schema_error.check_output except AttributeError: - check_name = error["error"].check + check_name = schema_error.check # this is just a string that we'd need to parse manually - check_output = error["error"].args[0] + check_output = schema_error.args[0] print(f"{phase} Validation `{check_name}` failed for column `{column_name}`") print(check_output) From 44b1f5013f11156c6c7bf113b66079fd69c9c99b Mon Sep 17 00:00:00 2001 From: Nargis Sultani Date: Fri, 25 Aug 2023 15:24:39 -0400 Subject: [PATCH 8/8] fixed errors pointed by ruff --- src/validator/phase_validations.py | 23 ++++++++++++----------- src/validator/schema_template.py | 8 ++++---- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/src/validator/phase_validations.py b/src/validator/phase_validations.py index e6856790..6c88afac 100644 --- a/src/validator/phase_validations.py +++ b/src/validator/phase_validations.py @@ -75,8 +75,9 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): string_contains, name="uid.invalid_uid_lei", description=( - "The first 20 characters of the 'unique identifier' should match " - "the Legal Entity Identifier (LEI) for the financial institution." + "The first 20 characters of the 'unique identifier' should" + " match the Legal Entity Identifier (LEI) for the financial" + " institution." ), element_wise=True, containing_value=lei, @@ -950,11 +951,11 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): description=( "When 'interest rate type' does not equal 1" " (adjustable interest rate, no initial rate period)," - " 3 (initial rate period > 12 months, adjustable interest rate)," - " or 5 (initial rate period <= 12 months, variable interest" - " rate), 'adjustable rate transaction: margin' must be blank." - " When 'interest rate type' equals 1, 3, or 5, 'variable" - " rate transaction: margin' must not be blank." + " 3 (initial rate period > 12 months, adjustable interest" + " rate), or 5 (initial rate period <= 12 months, variable " + "interest rate), 'adjustable rate transaction: margin' must " + "be blank. When 'interest rate type' equals 1, 3, or 5, " + "'variable rate transaction: margin' must not be blank." ), groupby="pricing_interest_rate_type", condition_values={"1", "3", "5"}, @@ -978,8 +979,8 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): is_valid_enum, name="pricing_adj_index_name.invalid_enum_value", description=( - "'Adjustable rate transaction: index name' must equal 1, 2, 3, 4," - "5, 6, 7, 8, 9, 10, 977, or 999." + "'Adjustable rate transaction: index name' must equal " + "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 977, or 999." ), element_wise=True, accepted_values=[ @@ -1036,8 +1037,8 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): max_value=300, name="pricing_adj_index_name_ff.invalid_text_length", description=( - "'Adjustable rate transaction: index name: other' must not exceed" - "300 characters in length." + "'Adjustable rate transaction: index name: other' must not" + " exceed 300 characters in length." ), ), ], diff --git a/src/validator/schema_template.py b/src/validator/schema_template.py index 2b320047..c1b334c2 100644 --- a/src/validator/schema_template.py +++ b/src/validator/schema_template.py @@ -505,13 +505,13 @@ ), } + def get_template() -> Dict: - """Returns a deep copy of the above schema_template object. - + """Returns a deep copy of the above schema_template object. + This is done because this dictionary template is going to be modified both by the phase 1 imputer and phase 2 imputer. This can cause absolute havoc in a program and it's proactically impossible to debug.""" - + return deepcopy(_schema_template) - \ No newline at end of file