diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 6fc52aad..9656043b 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -49,7 +49,7 @@ "python.testing.unittestEnabled": false, "python.testing.pytestArgs": [ "--rootdir", - "${workspaceFolder}/src/tests" + "${workspaceFolder}/tests" ] } } diff --git a/.github/workflows/linters.yml b/.github/workflows/linters.yml index 2f242985..db76efdf 100644 --- a/.github/workflows/linters.yml +++ b/.github/workflows/linters.yml @@ -8,8 +8,11 @@ jobs: steps: - uses: actions/checkout@v3 - uses: psf/black@stable + with: + options: "--check --diff --verbose" + version: "~= 22.0" ruff: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - uses: chartboost/ruff-action@v1 \ No newline at end of file + - uses: chartboost/ruff-action@v1 diff --git a/config.py b/config.py deleted file mode 100644 index 00a125f2..00000000 --- a/config.py +++ /dev/null @@ -1,19 +0,0 @@ -# path to original/raw NAICS excel file -NAICS_EXCEL_PATH = "./data/naics/raw/2-6 digit_2022_Codes.xlsx" -# path to parsed/filtered naics codes file -NAICS_CSV_PATH = "./data/naics/processed/2022_codes.csv" -# column header text containing naics code -NAICS_CODE_COL = "2022 NAICS US Code" -# column header text containing naics title/description -NAICS_TITLE_COL = "2022 NAICS US Title" - -# path to original/raw NAICS zip file -CENSUS_RAW_ZIP_PATH = "./data/census/raw/CensusFlatFile2022.zip" -# path to parsed/filtered naics codes file -CENSUS_PROCESSED_CSV_PATH = "./data/census/processed/Census2022.processed.csv" -# census file col indexes -CENSUS_STATE_COL_INDEX = 2 -CENSUS_COUNTY_COL_INDEX = 3 -CENSUS_TRACT_COL_INDEX = 4 - -CENSUS_GEOID_COL = "geoid" diff --git a/data/census/README.md b/data/census/README.md new file mode 100644 index 00000000..0b57af2e --- /dev/null +++ b/data/census/README.md @@ -0,0 +1,3 @@ +# FFIEC's Census Flat File + +- https://www.ffiec.gov/censusapp.htm diff --git a/tools/process_census.py b/data/census/process_census.py similarity index 56% rename from tools/process_census.py rename to data/census/process_census.py index 0686b9c5..95a1445d 100644 --- a/tools/process_census.py +++ b/data/census/process_census.py @@ -5,10 +5,12 @@ import pandas as pd -ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # noqa: E402 -sys.path.append(ROOT_DIR) # noqa: E402 +# census file col indexes +CENSUS_STATE_COL_INDEX = 2 +CENSUS_COUNTY_COL_INDEX = 3 +CENSUS_TRACT_COL_INDEX = 4 -import config # noqa: E402 +CENSUS_GEOID_COL = "geoid" # helper function to check number (float/int/negative) @@ -21,24 +23,22 @@ def _is_number(s): # helper function to unzip census file and extract CSV file -def _extract_census_zip_file(): - CENSUS_TMP_CSV_PATH = config.CENSUS_RAW_ZIP_PATH + ".tmp.csv" +def _extract_census_zip_file(raw_src): + census_tmp_csv_path = raw_src + ".tmp.csv" # unzip and extract csv files - with zipfile.ZipFile(config.CENSUS_RAW_ZIP_PATH, "r") as zip_ref: + with zipfile.ZipFile(raw_src, "r") as zip_ref: for file in zip_ref.namelist(): # iterate over files in archive if file[-4:] == ".csv": - print("Extracting CSV to {}".format(CENSUS_TMP_CSV_PATH)) - with open(CENSUS_TMP_CSV_PATH, "wb") as outfile: + print("Extracting CSV to {}".format(census_tmp_csv_path)) + with open(census_tmp_csv_path, "wb") as outfile: outfile.write(zip_ref.read(file)) - # it should only have one csv file - return CENSUS_TMP_CSV_PATH + # it should only have one csv file + + return census_tmp_csv_path # helper function to read extracted csv file and filter only geo-tract-id -def _read_census_csv(src_path: str, csv_path: str): - STATE_COL = config.CENSUS_STATE_COL_INDEX - COUNTY_COL = config.CENSUS_COUNTY_COL_INDEX - TRACT_COL = config.CENSUS_TRACT_COL_INDEX +def _process_census_csv(src_path: str, csv_path: str): # check paths if not os.path.isfile(src_path): @@ -52,14 +52,14 @@ def _read_census_csv(src_path: str, csv_path: str): ) # add header - result = [[config.CENSUS_GEOID_COL]] + result = [[CENSUS_GEOID_COL]] # read excel file # and create csv data list for index, row in df.iterrows(): - state_value = str(row[STATE_COL]) - county_value = str(row[COUNTY_COL]) - tract_value = str(row[TRACT_COL]) + state_value = str(row[CENSUS_STATE_COL_INDEX]) + county_value = str(row[CENSUS_COUNTY_COL_INDEX]) + tract_value = str(row[CENSUS_TRACT_COL_INDEX]) if ( _is_number(state_value) and _is_number(county_value) @@ -84,14 +84,23 @@ def _read_census_csv(src_path: str, csv_path: str): - output to defined output file """ if __name__ == "__main__": - CSV_PATH = config.CENSUS_PROCESSED_CSV_PATH - - if os.path.isfile(CSV_PATH): - error_msg = "Output {} csv file existed".format(CSV_PATH) - raise FileExistsError(error_msg) - - tmp_census_csv_file = _extract_census_zip_file() - print("Reading extracted CSV File . {}".format(tmp_census_csv_file)) - _read_census_csv(tmp_census_csv_file, CSV_PATH) - print("Removing extracted CSV File") + if len(sys.argv) != 3: + print(f"Usage: {sys.argv[0]} ") + exit(1) + + raw_src = sys.argv[1] + csv_dest = sys.argv[2] + + if not os.path.isfile(raw_src): + print(f"source file not existed: {raw_src}") + exit(2) + + if os.path.isfile(csv_dest): + print("destination file already existed: {csv_dest}") + exit(3) + + tmp_census_csv_file = _extract_census_zip_file(raw_src) + print(f"Reading extracted CSV file: {tmp_census_csv_file}") + _process_census_csv(tmp_census_csv_file, csv_dest) + print("Removing extracted CSV file") os.remove(tmp_census_csv_file) diff --git a/data/naics/README.md b/data/naics/README.md new file mode 100644 index 00000000..fce44290 --- /dev/null +++ b/data/naics/README.md @@ -0,0 +1,3 @@ +# North American Industry Classification System (NAICS) codes + +- https://www.census.gov/naics/?48967 diff --git a/data/naics/process_naics.py b/data/naics/process_naics.py new file mode 100644 index 00000000..f6e1a251 --- /dev/null +++ b/data/naics/process_naics.py @@ -0,0 +1,57 @@ +import csv +import os +import sys + +import pandas as pd + + +# column header text containing naics code +NAICS_CODE_COL = "2022 NAICS US Code" +# column header text containing naics title/description +NAICS_TITLE_COL = "2022 NAICS US Title" + + +""" +filter NAICS data with only 3 digit codes + +Raises: + FileNotFoundError: when input excel file not existed + FileExistsError: when output csv file existed +""" +if __name__ == "__main__": + if len(sys.argv) != 3: + print(f"Usage: {sys.argv[0]} ") + exit(1) + + raw_src = sys.argv[1] + csv_dest = sys.argv[2] + + if not os.path.isfile(raw_src): + print(f"source file not existed: {raw_src}") + exit(2) + + if os.path.isfile(csv_dest): + print("destination file already existed: {csv_dest}") + exit(3) + + df = pd.read_excel(raw_src, dtype=str, na_filter=False) + + print(f'source file successfully read: {raw_src}') + + # add header + result = [["code", "title"]] + + # read excel file + # and create csv data list + for index, row in df.iterrows(): + code = str(row[NAICS_CODE_COL]) + if len(code) == 3: + a_row = [code, str(row[NAICS_TITLE_COL])] + result.append(a_row) + + # output data to csv file + with open(csv_dest, "w") as f: + writer = csv.writer(f) + writer.writerows(result) + + print(f'destination file successfully written: {csv_dest}') diff --git a/poetry.lock b/poetry.lock index 7fcbe376..600691d0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. [[package]] name = "black" @@ -136,6 +136,17 @@ files = [ [package.extras] toml = ["tomli"] +[[package]] +name = "et-xmlfile" +version = "1.1.0" +description = "An implementation of lxml.xmlfile for the standard library" +optional = false +python-versions = ">=3.6" +files = [ + {file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"}, + {file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"}, +] + [[package]] name = "iniconfig" version = "2.0.0" @@ -203,6 +214,20 @@ files = [ {file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"}, ] +[[package]] +name = "openpyxl" +version = "3.1.2" +description = "A Python library to read/write Excel 2010 xlsx/xlsm files" +optional = false +python-versions = ">=3.6" +files = [ + {file = "openpyxl-3.1.2-py2.py3-none-any.whl", hash = "sha256:f91456ead12ab3c6c2e9491cf33ba6d08357d802192379bb482f1033ade496f5"}, + {file = "openpyxl-3.1.2.tar.gz", hash = "sha256:a6f5977418eff3b2d5500d54d9db50c8277a368436f4e4f8ddb1be3422870184"}, +] + +[package.dependencies] +et-xmlfile = "*" + [[package]] name = "packaging" version = "23.1" @@ -642,4 +667,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "03e6adb7dcecd12194f8c44033d68666019c5bb52f8fd4bccd7301067832c9e1" +content-hash = "ac6360d9068e34f6bbad74a6c3339a85dd1968267f7272b48b8a99dfc5702812" diff --git a/pyproject.toml b/pyproject.toml index b84d616d..ea4c165f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,9 @@ pytest-cov = "4.1.0" black = "23.3.0" ruff = "0.0.259" +[tool.poetry.group.data.dependencies] +openpyxl = "^3.1.2" + [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" @@ -33,7 +36,6 @@ exclude = ''' | .gitignore | .github | data - | tools )/ ''' @@ -49,18 +51,18 @@ addopts = [ "--cov-branch", "--cov-report=xml", "--cov-report=term", - "--cov=src", + "--cov=regtech_data_validator", "-vv", "--strict-markers", "-rfE", ] testpaths = [ - "src/tests", + "tests", ] [tool.coverage.run] relative_files = true -source = ["src"] +source = ["regtech_data_validator"] [tool.coverage.report] -skip_empty = true \ No newline at end of file +skip_empty = true diff --git a/__init__.py b/regtech_data_validator/__init__.py similarity index 100% rename from __init__.py rename to regtech_data_validator/__init__.py diff --git a/src/validator/check_functions.py b/regtech_data_validator/check_functions.py similarity index 100% rename from src/validator/check_functions.py rename to regtech_data_validator/check_functions.py diff --git a/regtech_data_validator/checks.py b/regtech_data_validator/checks.py new file mode 100644 index 00000000..802dcbc7 --- /dev/null +++ b/regtech_data_validator/checks.py @@ -0,0 +1,47 @@ +""" +Subclasses of Pandera's `Check` class +""" + +from enum import StrEnum +from typing import Any, Callable, Type + +from pandera import Check +from pandera.backends.base import BaseCheckBackend +from pandera.backends.pandas.checks import PandasCheckBackend + + +class Severity(StrEnum): + ERROR = 'error' + WARNING = 'warning' + + +class SBLCheck(Check): + """ + A Pandera.Check subclasss that requires a `name` and an `id` be + specified. Additionally, an attribute named `warning` is added to + the class to enable distinction between warnings and errors. The + default value of warning is `False` which corresponds to an error. + Don't use this class directly. Make use of the SBLErrorCheck and + SBLWarningCheck subclasses below. + """ + + def __init__(self, check_fn: Callable, id: str, name: str, description: str, severity: Severity, **check_kwargs): + """ + Subclass of Pandera's `Check`, with special handling for severity level + Args: + check_fn (Callable): A function which evaluates the validity of the column(s) being tested. + id (str, required): Unique identifier for a check + name (str, required): Unique name for a check + description (str, required): Long-form description of a check + severity (Severity, required): The severity of a check (error or warning) + check_kwargs (Any, optional): Parameters passed to `check_fn` function + """ + + self.severity = severity + + super().__init__(check_fn, title=id, name=name, description=description, **check_kwargs) + + @classmethod + def get_backend(cls, check_obj: Any) -> Type[BaseCheckBackend]: + """Assume Pandas DataFrame and return PandasCheckBackend""" + return PandasCheckBackend diff --git a/src/validator/create_schemas.py b/regtech_data_validator/create_schemas.py similarity index 63% rename from src/validator/create_schemas.py rename to regtech_data_validator/create_schemas.py index cc3cf8d7..bbb5e99d 100644 --- a/src/validator/create_schemas.py +++ b/regtech_data_validator/create_schemas.py @@ -2,11 +2,13 @@ with validations listed in phase 1 and phase 2.""" import pandas as pd -from checks import SBLCheck from pandera import DataFrameSchema -from pandera.errors import SchemaErrors -from phase_validations import get_phase_1_and_2_validations_for_lei -from schema_template import get_template +from pandera.errors import SchemaErrors, SchemaError + +from regtech_data_validator.checks import SBLCheck +from regtech_data_validator.phase_validations import get_phase_1_and_2_validations_for_lei +from regtech_data_validator.schema_template import get_template + # Get separate schema templates for phase 1 and 2 @@ -15,58 +17,59 @@ phase_2_template = get_template() -def get_schema_by_phase_for_lei(template: dict, phase: str, lei: str = None): +def get_schema_by_phase_for_lei(template: dict, phase: str, lei: str | None = None): for column in get_phase_1_and_2_validations_for_lei(lei): validations = get_phase_1_and_2_validations_for_lei(lei)[column] template[column].checks = validations[phase] return DataFrameSchema(template) -def get_phase_1_schema_for_lei(lei: str = None): +def get_phase_1_schema_for_lei(lei: str | None = None): return get_schema_by_phase_for_lei(phase_1_template, "phase_1", lei) -def get_phase_2_schema_for_lei(lei: str = None): +def get_phase_2_schema_for_lei(lei: str | None = None): return get_schema_by_phase_for_lei(phase_2_template, "phase_2", lei) -def validate(schema: DataFrameSchema, df: pd.DataFrame): +def validate(schema: DataFrameSchema, df: pd.DataFrame) -> list[dict]: """ validate received dataframe with schema and return list of schema errors - Args: schema (DataFrameSchema): schema to be used for validation df (pd.DataFrame): data parsed into dataframe - Returns: - list of schema error + list of validation findings (warnings and errors) """ findings = [] try: schema(df, lazy=True) - except SchemaErrors as errors: - for error in errors.schema_errors: - check: SBLCheck = error.check - column_name = error.schema.name - check_id = "n/a" + except SchemaErrors as err: + # WARN: SchemaErrors.schema_errors is supposed to be of type + # list[dict[str,Any]], but it's actually of type SchemaError + schema_error: SchemaError + for schema_error in err.schema_errors: # type: ignore + check = schema_error.check + column_name = schema_error.schema.name + + if not check: + raise RuntimeError( + f'SchemaError occurred with no associated Check for {column_name} column' + ) from schema_error + + if not isinstance(check, SBLCheck): + raise RuntimeError( + f'Check {check} type on {column_name} column not supported. Must be of type {SBLCheck}' + ) from schema_error fields: list[str] = [column_name] - if hasattr(check, "name") and hasattr(check, "id"): - check_name: str = check.name - check_id: str = check.id - - if check.groupby: - fields += check.groupby # type: ignore + if check.groupby: + fields += check.groupby # type: ignore - # This will either be a boolean series or a single bool - check_output = error.check_output - else: - # This means this check's column has unique set to True. - # we shouldn't be using Unique flag as it doesn't return series of - # validation result . it returns just a printout result string/txt - raise AttributeError(f"{str(check)}") + # This will either be a boolean series or a single bool + check_output = schema_error.check_output # Remove duplicates, but keep as `list` for JSON-friendliness fields = list(set(fields)) @@ -92,11 +95,11 @@ def validate(schema: DataFrameSchema, df: pd.DataFrame): validation_findings = { "validation": { - "id": check_id, - "name": check_name, + "id": check.title, + "name": check.name, "description": check.description, + "severity": check.severity, "fields": fields, - "severity": "warning" if check.warning else "error", }, "records": records, } @@ -106,7 +109,7 @@ def validate(schema: DataFrameSchema, df: pd.DataFrame): return findings -def validate_phases(df: pd.DataFrame, lei: str = None) -> list: +def validate_phases(df: pd.DataFrame, lei: str | None = None) -> list: phase1_findings = validate(get_phase_1_schema_for_lei(lei), df) if phase1_findings: return phase1_findings diff --git a/data/census/processed/Census2022.processed.csv b/regtech_data_validator/data/census/Census2022.processed.csv similarity index 100% rename from data/census/processed/Census2022.processed.csv rename to regtech_data_validator/data/census/Census2022.processed.csv diff --git a/tools/__init__.py b/regtech_data_validator/data/census/__init__.py similarity index 100% rename from tools/__init__.py rename to regtech_data_validator/data/census/__init__.py diff --git a/data/naics/processed/2022_codes.csv b/regtech_data_validator/data/naics/2022_codes.csv similarity index 100% rename from data/naics/processed/2022_codes.csv rename to regtech_data_validator/data/naics/2022_codes.csv diff --git a/src/tests/.gitkeep b/regtech_data_validator/data/naics/__init__.py similarity index 100% rename from src/tests/.gitkeep rename to regtech_data_validator/data/naics/__init__.py diff --git a/regtech_data_validator/global_data.py b/regtech_data_validator/global_data.py new file mode 100644 index 00000000..d02d4fc7 --- /dev/null +++ b/regtech_data_validator/global_data.py @@ -0,0 +1,20 @@ +import csv +from importlib.resources import files + + +# global variable for NAICS codes +naics_codes: dict[str, str] = {} +naics_file_path = files('regtech_data_validator.data.naics').joinpath('2022_codes.csv') + +with naics_file_path.open('r') as f: + for row in csv.DictReader(f): + naics_codes[row['code']] = row['title'] + + +# global variable for Census GEOIDs +census_geoids: set[str] = set() +census_file_path = files('regtech_data_validator.data.census').joinpath('Census2022.processed.csv') + +with census_file_path.open('r') as f: + for row in csv.DictReader(f): + census_geoids.add(row['geoid']) diff --git a/src/validator/main.py b/regtech_data_validator/main.py similarity index 71% rename from src/validator/main.py rename to regtech_data_validator/main.py index 20972759..bed8bb3a 100644 --- a/src/validator/main.py +++ b/regtech_data_validator/main.py @@ -5,29 +5,33 @@ Run from the terminal to see the generated output. """ -import pprint +import json import sys import pandas as pd -from create_schemas import validate_phases + +from regtech_data_validator.create_schemas import validate_phases def csv_to_df(path: str) -> pd.DataFrame: return pd.read_csv(path, dtype=str, na_filter=False) -def run_validation_on_df(df: pd.DataFrame, lei: str) -> None: +def run_validation_on_df(df: pd.DataFrame, lei: str | None) -> None: """ Run validation on the supplied dataframe and print a report to the terminal. """ - pprint.pprint(validate_phases(df, lei)) + validation_dict = validate_phases(df, lei) + validation_json = json.dumps(validation_dict, indent=4) + print(validation_json) -if __name__ == "__main__": + +def main(): csv_path = None - lei: str = None + lei: str | None = None if len(sys.argv) == 1: raise ValueError("csv_path arg not provided") elif len(sys.argv) == 2: @@ -40,3 +44,7 @@ def run_validation_on_df(df: pd.DataFrame, lei: str) -> None: df = csv_to_df(csv_path) run_validation_on_df(df, lei) + + +if __name__ == "__main__": + main() diff --git a/src/validator/phase_validations.py b/regtech_data_validator/phase_validations.py similarity index 93% rename from src/validator/phase_validations.py rename to regtech_data_validator/phase_validations.py index fc7b0a22..20b23c06 100644 --- a/src/validator/phase_validations.py +++ b/regtech_data_validator/phase_validations.py @@ -4,8 +4,8 @@ an instance of a PanderaSchema object for phase 1 and phase 2.""" -import global_data -from check_functions import ( +from regtech_data_validator import global_data +from regtech_data_validator.check_functions import ( has_correct_length, has_no_conditional_field_conflict, has_valid_enum_pair, @@ -28,13 +28,10 @@ meets_multi_value_field_restriction, string_contains, ) -from checks import SBLCheck +from regtech_data_validator.checks import SBLCheck, Severity -# read and populate global naics code (this should be called only once) -global_data.read_naics_codes() - -def get_phase_1_and_2_validations_for_lei(lei: str = None): +def get_phase_1_and_2_validations_for_lei(lei: str | None = None): return { "uid": { "phase_1": [ @@ -46,6 +43,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "Any 'unique identifier' may not be used in more than one " "record within a small business lending application register." ), + severity=Severity.ERROR, groupby="uid", ), SBLCheck.str_length( @@ -57,6 +55,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "'Unique identifier' must be at least 21 characters " "in length and at most 45 characters in length." ), + severity=Severity.ERROR, ), SBLCheck( has_valid_format, @@ -67,6 +66,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "numbers and/or uppercase letters (i.e., 0-9 and A-Z), " "and must not contain any other characters." ), + severity=Severity.ERROR, element_wise=True, regex="^[A-Z0-9]+$", ), @@ -79,6 +79,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " match the Legal Entity Identifier (LEI) for the financial" " institution." ), + severity=Severity.WARNING, element_wise=True, containing_value=lei, end_idx=20, @@ -93,6 +94,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0020", name="app_date.invalid_date_format", description="'Application date' must be a real calendar date using YYYYMMDD format.", + severity=Severity.ERROR, element_wise=True, ), ], @@ -105,6 +107,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0040", name="app_method.invalid_enum_value", description="'Application method' must equal 1, 2, 3, or 4.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -123,6 +126,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0060", name="app_recipient.invalid_enum_value", description="'Application recipient' must equal 1 or 2", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -139,6 +143,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0080", name="ct_credit_product.invalid_enum_value", description="'Credit product' must equal 1, 2, 3, 4, 5, 6, 7, 8, 977, or 988.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -158,6 +163,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): }, "ct_credit_product_ff": { "phase_1": [ + # FIXME: built-in Pandera checks do not support add'l params like `severity` SBLCheck.str_length( 0, 300, @@ -166,6 +172,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): description=( "'Free-form text field for other credit products' must not exceed 300 characters in length." ), + severity=Severity.ERROR, ) ], "phase_2": [ @@ -179,6 +186,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "When 'credit product' equals 977, 'free-form text field " "for other credit products' must not be blank." ), + severity=Severity.ERROR, groupby="ct_credit_product", condition_values={"977"}, ), @@ -195,6 +203,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " semicolons) must equal 1, 2, 3, 4, 5, 6, 7, 8," " 9, 10, 11, 977, or 999." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -222,6 +231,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "'Type of guarantee' must contain at least one and at" " most five values, separated by semicolons." ), + severity=Severity.ERROR, element_wise=True, min_length=1, max_length=5, @@ -229,21 +239,21 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): SBLCheck( is_unique_in_field, id="W0123", - warning=True, name="ct_guarantee.duplicates_in_field", description="'Type of guarantee' should not contain duplicated values.", + severity=Severity.WARNING, element_wise=True, ), SBLCheck( meets_multi_value_field_restriction, id="W0122", - warning=True, name="ct_guarantee.multi_value_field_restriction", description=( "When 'type of guarantee' contains 999 (no guarantee)," " 'type of guarantee' should not contain more than one" " value." ), + severity=Severity.WARNING, element_wise=True, single_values={"999"}, ), @@ -257,6 +267,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0140", name="ct_guarantee_ff.invalid_text_length", description="'Free-form text field for other guarantee' must not exceed 300 characters in length", + severity=Severity.ERROR, ), ], "phase_2": [ @@ -270,13 +281,13 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "When 'type of guarantee' contains 977, 'free-form text field" " for other guarantee' must not be blank." ), + severity=Severity.ERROR, groupby="ct_guarantee", condition_values={"977"}, ), SBLCheck( has_valid_multi_field_value_count, id="W2006", - warning=True, name="ct_guarantee_ff.multi_invalid_number_of_values", description=( "'Type of guarantee' and 'free-form text field for other " @@ -285,6 +296,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "toward the maximum number of values for the purpose of this " "validation check." ), + severity=Severity.WARNING, groupby="ct_guarantee", ignored_values={"977"}, max_length=5, @@ -300,6 +312,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): description=( "Each value in 'Loan term: NA/NP flag' (separated by semicolons) must equal 900, 988, or 999." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "900", @@ -321,6 +334,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "and otherwise undetermined), 'loan term: NA/NP flag' must" "equal 999." ), + severity=Severity.ERROR, groupby="ct_credit_product", conditions=[ { @@ -346,6 +360,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0180", name="ct_loan_term.invalid_numeric_format", description="When present, 'loan term' must be a whole number.", + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -360,6 +375,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "and reported), 'loan term' must be blank. When 'loan term:" "NA/NP flag' equals 900, 'loan term' must not be blank." ), + severity=Severity.ERROR, groupby="ct_loan_term_flag", condition_values={"900"}, ), @@ -368,6 +384,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0181", name="ct_loan_term.invalid_numeric_value", description="When present, 'loan term' must be greater than or equal to 1.", + severity=Severity.ERROR, element_wise=True, min_value="1", accept_blank=True, @@ -377,6 +394,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="W0182", name="ct_loan_term.unreasonable_numeric_value", description="When present, 'loan term' should be less than 1200 (100 years).", + severity=Severity.WARNING, element_wise=True, max_value="1200", accept_blank=True, @@ -394,6 +412,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " semicolons) must equal 1, 2, 3, 4, 5, 6, 7, 8," " 9, 10, 11, 977, 988, or 999." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -421,6 +440,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): description=( "'Credit purpose' must contain at least one and at most three values, separated by semicolons." ), + severity=Severity.ERROR, element_wise=True, min_length=1, max_length=3, @@ -428,13 +448,13 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): SBLCheck( meets_multi_value_field_restriction, id="W0202", - warning=True, name="credit_purpose.multi_value_field_restriction", description=( "When 'credit purpose' contains 988 or 999," " 'credit purpose' should not contain more than one" " value." ), + severity=Severity.WARNING, element_wise=True, single_values={ "988", @@ -444,9 +464,9 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): SBLCheck( is_unique_in_field, id="W0203", - warning=True, name="credit_purpose.duplicates_in_field", description="'Credit purpose' should not contain duplicated values.", + severity=Severity.WARNING, element_wise=True, ), ], @@ -461,6 +481,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): description=( "'Free-form text field for other credit purpose' must not exceed 300 characters in length" ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -474,13 +495,13 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "When 'credit purpose' contains 977, 'free-form text field for" "other credit purpose' must not be blank." ), + severity=Severity.ERROR, groupby="credit_purpose", condition_values={"977"}, ), SBLCheck( has_valid_multi_field_value_count, id="W2006", - warning=True, name="credit_purpose_ff.multi_invalid_number_of_values", description=( "'Credit purpose' and 'free-form text field for other credit " @@ -489,6 +510,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "toward the maximum number of values for the purpose of " "this validation check." ), + severity=Severity.WARNING, groupby="credit_purpose", ignored_values={"977"}, max_length=3, @@ -502,6 +524,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0240", name="amount_applied_for_flag.invalid_enum_value", description="'Amount applied For: NA/NP flag' must equal 900, 988, or 999.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "900", @@ -519,6 +542,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0260", name="amount_applied_for.invalid_numeric_format", description="When present, 'amount applied for' must be a numeric value.", + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -534,6 +558,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "When 'amount applied for: NA/NP flag' equals 900, " "'amount applied for' must not be blank." ), + severity=Severity.ERROR, groupby="amount_applied_for_flag", condition_values={"900"}, ), @@ -542,6 +567,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0261", name="amount_applied_for.invalid_numeric_value", description="When present, 'amount applied for' must be greater than 0.", + severity=Severity.ERROR, element_wise=True, min_value="0", accept_blank=True, @@ -555,6 +581,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0280", name="amount_approved.invalid_numeric_format", description="When present, 'amount approved or originated' must be a numeric value.", + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -565,6 +592,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0281", name="amount_approved.invalid_numeric_value", description="When present, 'amount approved or originated' must be greater than 0.", + severity=Severity.ERROR, element_wise=True, min_value="0", accept_blank=True, @@ -580,6 +608,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "equals 1 or 2, 'amount approved or originated' must " "not be blank." ), + severity=Severity.ERROR, groupby="action_taken", condition_values={"1", "2"}, ), @@ -592,6 +621,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0300", name="action_taken.invalid_enum_value", description="'Action taken' must equal 1, 2, 3, 4, or 5.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -621,6 +651,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "'Total origination charges', 'Amount of " "total broker fees', 'Initial annual charges'" ), + severity=Severity.ERROR, groupby=[ "pricing_interest_rate_type", "pricing_mca_addcost_flag", @@ -656,6 +687,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "penalty could be imposed', 'Prepayment " "penalty exists'" ), + severity=Severity.ERROR, groupby=[ "pricing_origination_charges", "pricing_broker_fees", @@ -681,6 +713,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0320", name="action_taken_date.invalid_date_format", description="'Action taken date' must be a real calendar date using YYYYMMDD format.", + severity=Severity.ERROR, element_wise=True, ), ], @@ -694,6 +727,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " within the current reporting period:" " October 1, 2024 to December 31, 2024." ), + severity=Severity.ERROR, element_wise=True, start_date_value="20241001", end_date_value="20241231", @@ -703,6 +737,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E2009", name="action_taken_date.date_value_conflict", description="The date indicated by 'action taken date' must occur on or after 'application date'.", + severity=Severity.ERROR, groupby="app_date", ), SBLCheck( @@ -714,6 +749,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " generally be less than two years (730 days) before" " 'action taken date'." ), + severity=Severity.WARNING, groupby="app_date", days_value=730, ), @@ -729,6 +765,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "Each value in 'denial reason(s)' (separated by semicolons)" "must equal 1, 2, 3, 4, 5, 6, 7, 8, 9, 977, or 999." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -753,6 +790,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): description=( "'Denial reason(s)' must contain at least one and at most fourvalues, separated by semicolons." ), + severity=Severity.ERROR, element_wise=True, min_length=1, max_length=4, @@ -766,6 +804,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "contain 999. When 'action taken' does not equal 3, 'denial" "reason(s)' must equal 999." ), + severity=Severity.ERROR, groupby="action_taken", conditions=[ { @@ -785,21 +824,21 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): SBLCheck( meets_multi_value_field_restriction, id="W0340", - warning=True, name="denial_reasons.multi_value_field_restriction", description=( "When 'denial reason(s)' contains 999 (not applicable)," "'denial reason(s)' should not contain more than one value." ), + severity=Severity.WARNING, element_wise=True, single_values={"999"}, ), SBLCheck( is_unique_in_field, id="W0341", - warning=True, name="denial_reasons.duplicates_in_field", description="'Denial reason(s)' should not contain duplicated values.", + severity=Severity.WARNING, element_wise=True, ), ], @@ -814,6 +853,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): description=( "'Free-form text field for other denial reason(s)'must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -827,13 +867,13 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "blank. When 'denial reason(s)' contains 977, 'free-form text" "field for other denial reason(s)' must not be blank." ), + severity=Severity.ERROR, groupby="denial_reasons", condition_values={"977"}, ), SBLCheck( has_valid_multi_field_value_count, id="W2013", - warning=True, name="denial_reasons_ff.multi_invalid_number_of_values", description=( "'Denial reason(s)' and 'free-form text field for other " @@ -842,6 +882,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "does not count toward the maximum number of values for " "the purpose of this validation check." ), + severity=Severity.WARNING, groupby="denial_reasons", ignored_values={"977"}, max_length=4, @@ -858,6 +899,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "Each value in 'Interest rate type' (separated by " " semicolons) Must equal 1, 2, 3, 4, 5, 6, or 999" ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -878,7 +920,8 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): is_number, id="E0400", name="pricing_init_rate_period.invalid_numeric_format", - description=("When present, 'initial rate period' must be a whole number.",), + description="When present, 'initial rate period' must be a whole number.", + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -897,6 +940,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "be blank. When 'interest rate type' equals 3, 4, 5, or 6, " "'initial rate period' must not be blank" ), + severity=Severity.ERROR, groupby="pricing_interest_rate_type", condition_values={"3", "4", "5", "6"}, ), @@ -904,7 +948,8 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): is_greater_than, id="E0401", name="pricing_init_rate_period.invalid_numeric_value", - description=("When present, 'initial rate period' must be greater than 0",), + description="When present, 'initial rate period' must be greater than 0", + severity=Severity.ERROR, element_wise=True, min_value="0", accept_blank=True, @@ -918,6 +963,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0420", name="pricing_fixed_rate.invalid_numeric_format", description="When present, 'fixed rate: interest rate' must be a numeric value.", + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -936,6 +982,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " blank. When 'interest rate type' equals 2, 4, or 6," " 'fixed rate: interest rate' must not be blank." ), + severity=Severity.ERROR, groupby="pricing_interest_rate_type", condition_values={"2", "4", "6"}, ), @@ -944,6 +991,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="W0420", name="pricing_fixed_rate.unreasonable_numeric_value", description="When present, 'fixed rate: interest rate' should generally be greater than 0.1.", + severity=Severity.WARNING, element_wise=True, min_value="0.1", accept_blank=True, @@ -957,6 +1005,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0440", name="pricing_adj_margin.invalid_numeric_format", description="When present, 'adjustable rate transaction: margin' must be a numeric value.", + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -975,6 +1024,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "be blank. When 'interest rate type' equals 1, 3, or 5, " "'variable rate transaction: margin' must not be blank." ), + severity=Severity.ERROR, groupby="pricing_interest_rate_type", condition_values={"1", "3", "5"}, ), @@ -985,6 +1035,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): description=( "When present, 'adjustable rate transaction: margin' should generally be greater than 0.1." ), + severity=Severity.ERROR, element_wise=True, min_value="0.1", accept_blank=True, @@ -1001,6 +1052,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "'Adjustable rate transaction: index name' must equal " "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 977, or 999." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -1032,6 +1084,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "When 'interest rate type' equals 1, 3, or 5, 'adjustable rate" "transaction: index name' must not equal 999." ), + severity=Severity.ERROR, groupby="pricing_interest_rate_type", conditions=[ { @@ -1060,6 +1113,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): description=( "'Adjustable rate transaction: index name: other' must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -1075,6 +1129,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "'adjustable rate transaction: index name: other' must not be" "blank." ), + severity=Severity.ERROR, groupby="pricing_adj_index_name", condition_values={"977"}, ), @@ -1087,6 +1142,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0500", name="pricing_adj_index_value.invalid_numeric_format", description="When present, 'adjustable rate transaction: index value' must be a numeric value.", + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -1104,6 +1160,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " blank. When 'interest rate type' equals 1 or 3," " 'adjustable rate transaction: index value' must not be blank." ), + severity=Severity.ERROR, groupby="pricing_interest_rate_type", condition_values={"1", "3"}, ), @@ -1115,10 +1172,8 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): is_number, id="E0520", name="pricing_origination_charges.invalid_numeric_format", - description=( - "When present, 'total origination charges' must be a numeric", - "value.", - ), + description="When present, 'total origination charges' must be a numeric value.", + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -1131,10 +1186,8 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): is_number, id="E0540", name="pricing_broker_fees.invalid_numeric_format", - description=( - "When present, 'amount of total broker fees' must be a", - "numeric value.", - ), + description="When present, 'amount of total broker fees' must be a numeric value.", + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -1148,6 +1201,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0560", name="pricing_initial_charges.invalid_numeric_format", description="When present, 'initial annual charges' must be anumeric value.", + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -1165,6 +1219,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "advances or other sales-based financing: NA flag' " "must equal 900 or 999." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "900", @@ -1184,6 +1239,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "merchant cash advances or other sales-based financing: " "NA flag' must be 999 (not applicable)." ), + severity=Severity.ERROR, groupby="ct_credit_product", conditions=[ { @@ -1207,6 +1263,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "merchant cash advances or other sales-based financing' " "must be a numeric value" ), + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -1227,6 +1284,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "additional cost for merchant cash advances or other " "sales-based financing’ must not be blank." ), + severity=Severity.ERROR, groupby="pricing_mca_addcost_flag", condition_values={"900"}, ), @@ -1239,6 +1297,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0620", name="pricing_prepenalty_allowed.invalid_enum_value", description="'Prepayment penalty could be imposed' must equal 1, 2, or 999.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -1256,6 +1315,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0640", name="pricing_prepenalty_exists.invalid_enum_value", description="'Prepayment penalty exists' must equal 1, 2, or 999.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -1273,6 +1333,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0640", name="census_tract_adr_type.invalid_enum_value", description="'Census tract: type of address' must equal 1, 2, 3, or 988.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -1291,6 +1352,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0680", name="census_tract_number.invalid_text_length", description="When present, 'census tract: tract number' must be a GEOID with exactly 11 digits.", + severity=Severity.ERROR, element_wise=True, accepted_length=11, accept_blank=True, @@ -1312,6 +1374,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "location associated with the applicant), 'census tract:" " tract number' must not be blank." ), + severity=Severity.ERROR, groupby="census_tract_adr_type", conditions=[ { @@ -1337,6 +1400,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0700", name="gross_annual_revenue_flag.invalid_enum_value", description="'Gross annual revenue: NP flag' must equal 900 or 988.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "900", @@ -1353,6 +1417,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0720", name="gross_annual_revenue.invalid_numeric_format", description="When present, 'gross annual revenue' must be a numeric value.", + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -1368,6 +1433,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "'gross annual revenue: NP flag' equals 900, " "'gross annual revenue' must not be blank." ), + severity=Severity.ERROR, groupby="gross_annual_revenue_flag", condition_values={"900"}, ), @@ -1380,8 +1446,9 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0720", name="naics_code_flag.invalid_enum_value", description=( - "'North American Industry Classification System (NAICS) code: NP flag' must equal 900 or 988." + "'North American Industry Classification System (NAICS) code: NP flag'must equal 900 or 988." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "900", @@ -1401,6 +1468,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "'North American Industry Classification System " "(NAICS) code' may only contain numeric characters." ), + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -1414,6 +1482,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "When present, 'North American Industry Classification System " "(NAICS) code' must be three digits in length." ), + severity=Severity.ERROR, element_wise=True, accepted_length=3, accept_blank=True, @@ -1426,6 +1495,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "When present, 'North American Industry Classification System " "(NAICS) code' should be a valid NAICS code." ), + severity=Severity.WARNING, element_wise=True, accept_blank=True, codes=global_data.naics_codes, @@ -1440,6 +1510,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "When 'type of guarantee' contains 977, 'free-form text field" " for other guarantee' must not be blank." ), + severity=Severity.ERROR, groupby="naics_code_flag", condition_values={"900"}, ), @@ -1452,6 +1523,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0780", name="number_of_workers.invalid_enum_value", description="'Number of workers' must equal 1, 2, 3, 4, 5, 6, 7, 8, 9, or 988.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -1476,6 +1548,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0800", name="time_in_business_type.invalid_enum_value", description="'Time in business: type of response' must equal 1, 2, 3, or 988.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -1494,6 +1567,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0820", name="time_in_business.invalid_numeric_format", description="When present, 'time in business' must be a whole number.", + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -1504,6 +1578,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0821", name="time_in_business.invalid_numeric_value", description="When present, 'time in business' must be greater than or equal to 0.", + severity=Severity.ERROR, element_wise=True, min_value="0", accept_blank=True, @@ -1520,6 +1595,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 'time in business: type of response' equals 1," " 'time in business' must not be blank." ), + severity=Severity.ERROR, groupby="time_in_business_type", condition_values={"1"}, ), @@ -1536,6 +1612,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " (separated by semicolons) must equal 1, 2, 3," " 955, 966, or 988." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -1553,21 +1630,21 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0841", name="business_ownership_status.invalid_number_of_values", description="'Business ownership status' must contain at least one value.", + severity=Severity.ERROR, element_wise=True, min_length=1, ), SBLCheck( is_unique_in_field, id="W0842", - warning=True, name="business_ownership_status.duplicates_in_field", description="'Business ownership status' should not contain duplicated values.", + severity=Severity.WARNING, element_wise=True, ), SBLCheck( meets_multi_value_field_restriction, id="W0843", - warning=True, name="business_ownership_status.multi_value_field_restriction", description=( "When 'business ownership status' contains 966" @@ -1576,6 +1653,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " by applicant), 'business ownership status' should" " not contain more than one value." ), + severity=Severity.WARNING, element_wise=True, single_values={"966", "988"}, ), @@ -1588,6 +1666,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0860", name="num_principal_owners_flag.invalid_enum_value", description="'Number of principal owners: NP flag' must equal 900 or 988.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "900", @@ -1605,6 +1684,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "demographic fields for principal owners 1, 2, 3, and 4 " "should be blank." ), + severity=Severity.WARNING, groupby=[ "po_1_ethnicity", "po_1_race", @@ -1646,6 +1726,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " blank. Demographic fields for principal owners 2, 3, and 4 " "should be blank." ), + severity=Severity.WARNING, groupby=[ "po_1_ethnicity", "po_1_race", @@ -1686,6 +1767,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "owner 1 and 2', and 'sex/gender of principal owner 1 and 2: " "NP flag' should not be blank." ), + severity=Severity.WARNING, groupby=[ "po_1_ethnicity", "po_1_race", @@ -1727,6 +1809,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "and 3: NP flag' should not be blank. Demographic fields for " "principal owner 4 should be blank." ), + severity=Severity.WARNING, groupby=[ "po_1_ethnicity", "po_1_race", @@ -1768,6 +1851,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "and 'sex/gender of principal owner 1, 2, 3, and 4: NP flag'" " should not be blank." ), + severity=Severity.WARNING, groupby=[ "po_1_ethnicity", "po_1_race", @@ -1807,6 +1891,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0880", name="num_principal_owners.invalid_enum_value", description="When present, 'number of principal owners' must equal 0, 1, 2, 3, or 4.", + severity=Severity.ERROR, element_wise=True, accepted_values=["0", "1", "2", "3", "4"], accept_blank=True, @@ -1823,6 +1908,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "When 'number of principal owners: NP flag' equals 900, " "'number of principal owners' must not be blank." ), + severity=Severity.ERROR, groupby="num_principal_owners_flag", condition_values={"900"}, ), @@ -1840,6 +1926,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " semicolons) must equal 1, 11, 12," " 13, 14, 2, 966, 977, or 988." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -1859,15 +1946,14 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): SBLCheck( is_unique_in_field, id="W0901", - warning=True, name="po_1_ethnicity.duplicates_in_field", description="'Ethnicity of principal owner 1' should not contain duplicated values.", + severity=Severity.WARNING, element_wise=True, ), SBLCheck( meets_multi_value_field_restriction, id="W0902", - warning=True, name="po_1_ethnicity.multi_value_field_restriction", description=( "When 'ethnicity of principal owner 1' contains" @@ -1876,6 +1962,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " (not provided by applicant), 'ethnicity of" " principal owner 1' should not contain more than one value." ), + severity=Severity.WARNING, element_wise=True, single_values={"966", "988"}, ), @@ -1893,6 +1980,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " text field for other Hispanic or Latino'" " must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -1910,6 +1998,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " owner 1: free-form text field for other Hispanic" " or Latino' must not be blank." ), + severity=Severity.ERROR, groupby="po_1_ethnicity", condition_values={"977"}, ), @@ -1929,6 +2018,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 34, 35, 36, 37, 4, 41, 42, 43, 44," " 5, 966, 971, 972, 973, 974, or 988." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -1968,15 +2058,14 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): SBLCheck( is_unique_in_field, id="W0941", - warning=True, name="po_1_race.duplicates_in_field", description="'Race of principal owner 1' should not contain duplicated values.", + severity=Severity.WARNING, element_wise=True, ), SBLCheck( meets_multi_value_field_restriction, id="W0942", - warning=True, name="po_1_race.multi_value_field_restriction", description=( "When 'race of principal owner 1' contains" @@ -1986,6 +2075,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 'race of principal owner 1' should not" " contain more than one value." ), + severity=Severity.WARNING, element_wise=True, single_values={"966", "988"}, ), @@ -2004,6 +2094,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " Native Enrolled or Principal Tribe' must" " not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2024,6 +2115,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " for American Indian or Alaska Native Enrolled or" " Principal Tribe' must not be blank." ), + severity=Severity.ERROR, groupby="po_1_race", condition_values={"971"}, ), @@ -2041,6 +2133,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " field for other Asian' must not exceed 300" " characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2057,6 +2150,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 972, 'race of principal owner 1: free-form text field" " for other Asian' must not be blank." ), + severity=Severity.ERROR, groupby="po_1_race", condition_values={"972"}, ), @@ -2074,6 +2168,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " field for other Black or African American'" " must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2090,6 +2185,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " contains 973, 'race of principal owner 1: free-form text" " field for other Black or African American' must not be blank." ), + severity=Severity.ERROR, groupby="po_1_race", condition_values={"973"}, ), @@ -2107,6 +2203,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " field for other Pacific Islander race' must" " not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2123,6 +2220,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " contains 974, 'race of principal owner 1: free-form text" " field for other Pacific Islander race' must not be blank." ), + severity=Severity.ERROR, groupby="po_1_race", condition_values={"974"}, ), @@ -2135,6 +2233,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E1040", name="po_1_gender_flag.invalid_enum_value", description="When present, 'sex/gender of principal owner 1: NP flag' must equal 1, 966, or 988.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -2158,6 +2257,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " text field for self-identified sex/gender'" " must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2175,6 +2275,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " of principal owner 1: free-form text field for" " self-identified sex/gender' must not be blank." ), + severity=Severity.ERROR, groupby="po_1_gender_flag", condition_values={"1"}, ), @@ -2192,6 +2293,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " semicolons) must equal 1, 11, 12," " 13, 14, 2, 966, 977, or 988." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -2211,15 +2313,14 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): SBLCheck( is_unique_in_field, id="W0901", - warning=True, name="po_2_ethnicity.duplicates_in_field", description="'Ethnicity of principal owner 2' should not contain duplicated values.", + severity=Severity.WARNING, element_wise=True, ), SBLCheck( meets_multi_value_field_restriction, id="W0902", - warning=True, name="po_2_ethnicity.multi_value_field_restriction", description=( "When 'ethnicity of principal owner 2' contains" @@ -2228,6 +2329,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " (not provided by applicant), 'ethnicity of" " principal owner 2' should not contain more than one value." ), + severity=Severity.WARNING, element_wise=True, single_values={"966", "988"}, ), @@ -2245,6 +2347,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " text field for other Hispanic or Latino'" " must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2262,6 +2365,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " owner 2: free-form text field for other Hispanic" " or Latino' must not be blank." ), + severity=Severity.ERROR, groupby="po_2_ethnicity", condition_values={"977"}, ), @@ -2281,6 +2385,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 34, 35, 36, 37, 4, 41, 42, 43, 44," " 5, 966, 971, 972, 973, 974, or 988." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -2320,15 +2425,14 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): SBLCheck( is_unique_in_field, id="W0941", - warning=True, name="po_2_race.duplicates_in_field", description="'Race of principal owner 2' should not contain duplicated values.", + severity=Severity.WARNING, element_wise=True, ), SBLCheck( meets_multi_value_field_restriction, id="W0942", - warning=True, name="po_2_race.multi_value_field_restriction", description=( "When 'race of principal owner 2' contains" @@ -2338,6 +2442,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 'race of principal owner 2' should not" " contain more than one value." ), + severity=Severity.WARNING, element_wise=True, single_values={"966", "988"}, ), @@ -2356,6 +2461,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " Native Enrolled or Principal Tribe' must" " not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2376,6 +2482,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " for American Indian or Alaska Native Enrolled or" " Principal Tribe' must not be blank." ), + severity=Severity.ERROR, groupby="po_2_race", condition_values={"971"}, ), @@ -2393,6 +2500,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " field for other Asian' must not exceed 300" " characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2409,6 +2517,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 972, 'race of principal owner 2: free-form text field" " for other Asian' must not be blank." ), + severity=Severity.ERROR, groupby="po_2_race", condition_values={"972"}, ), @@ -2426,6 +2535,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " field for other Black or African American'" " must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2442,6 +2552,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " contains 973, 'race of principal owner 2: free-form text" " field for other Black or African American' must not be blank." ), + severity=Severity.ERROR, groupby="po_2_race", condition_values={"973"}, ), @@ -2459,6 +2570,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " field for other Pacific Islander race' must" " not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2475,6 +2587,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " contains 974, 'race of principal owner 2: free-form text" " field for other Pacific Islander race' must not be blank." ), + severity=Severity.ERROR, groupby="po_2_race", condition_values={"974"}, ), @@ -2487,6 +2600,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E1040", name="po_2_gender_flag.invalid_enum_value", description="When present, 'sex/gender of principal owner 2: NP flag' must equal 1, 966, or 988.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -2510,6 +2624,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " text field for self-identified sex/gender'" " must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2527,6 +2642,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " of principal owner 2: free-form text field for" " self-identified sex/gender' must not be blank." ), + severity=Severity.ERROR, groupby="po_2_gender_flag", condition_values={"1"}, ), @@ -2544,6 +2660,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " semicolons) must equal 1, 11, 12," " 13, 14, 2, 966, 977, or 988." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -2563,15 +2680,14 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): SBLCheck( is_unique_in_field, id="W0901", - warning=True, name="po_3_ethnicity.duplicates_in_field", description="'Ethnicity of principal owner 3' should not contain duplicated values.", + severity=Severity.WARNING, element_wise=True, ), SBLCheck( meets_multi_value_field_restriction, id="W0902", - warning=True, name="po_3_ethnicity.multi_value_field_restriction", description=( "When 'ethnicity of principal owner 3' contains" @@ -2580,6 +2696,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " (not provided by applicant), 'ethnicity of" " principal owner 3' should not contain more than one value." ), + severity=Severity.WARNING, element_wise=True, single_values={"966", "988"}, ), @@ -2597,6 +2714,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " text field for other Hispanic or Latino'" " must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2614,6 +2732,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " owner 3: free-form text field for other Hispanic" " or Latino' must not be blank." ), + severity=Severity.ERROR, groupby="po_3_ethnicity", condition_values={"977"}, ), @@ -2633,6 +2752,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 34, 35, 36, 37, 4, 41, 42, 43, 44," " 5, 966, 971, 972, 973, 974, or 988." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -2672,15 +2792,14 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): SBLCheck( is_unique_in_field, id="W0941", - warning=True, name="po_3_race.duplicates_in_field", description="'Race of principal owner 3' should not contain duplicated values.", + severity=Severity.WARNING, element_wise=True, ), SBLCheck( meets_multi_value_field_restriction, id="W0942", - warning=True, name="po_3_race.multi_value_field_restriction", description=( "When 'race of principal owner 3' contains" @@ -2690,6 +2809,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 'race of principal owner 3' should not" " contain more than one value." ), + severity=Severity.WARNING, element_wise=True, single_values={"966", "988"}, ), @@ -2708,6 +2828,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " Native Enrolled or Principal Tribe' must" " not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2728,6 +2849,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " for American Indian or Alaska Native Enrolled or" " Principal Tribe' must not be blank." ), + severity=Severity.ERROR, groupby="po_3_race", condition_values={"971"}, ), @@ -2745,6 +2867,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " field for other Asian' must not exceed 300" " characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2761,6 +2884,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 972, 'race of principal owner 3: free-form text field" " for other Asian' must not be blank." ), + severity=Severity.ERROR, groupby="po_3_race", condition_values={"972"}, ), @@ -2778,6 +2902,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " field for other Black or African American'" " must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2794,6 +2919,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " contains 973, 'race of principal owner 3: free-form text" " field for other Black or African American' must not be blank." ), + severity=Severity.ERROR, groupby="po_3_race", condition_values={"973"}, ), @@ -2811,6 +2937,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " field for other Pacific Islander race' must" " not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2827,6 +2954,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " contains 974, 'race of principal owner 3: free-form text" " field for other Pacific Islander race' must not be blank." ), + severity=Severity.ERROR, groupby="po_3_race", condition_values={"974"}, ), @@ -2839,6 +2967,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E1040", name="po_3_gender_flag.invalid_enum_value", description="When present, 'sex/gender of principal owner 3: NP flag' must equal 1, 966, or 988.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -2862,6 +2991,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " text field for self-identified sex/gender'" " must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2879,6 +3009,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " of principal owner 3: free-form text field for" " self-identified sex/gender' must not be blank." ), + severity=Severity.ERROR, groupby="po_3_gender_flag", condition_values={"1"}, ), @@ -2896,6 +3027,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " semicolons) must equal 1, 11, 12," " 13, 14, 2, 966, 977, or 988." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -2915,15 +3047,14 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): SBLCheck( is_unique_in_field, id="W0901", - warning=True, name="po_4_ethnicity.duplicates_in_field", description="'Ethnicity of principal owner 4' should not contain duplicated values.", + severity=Severity.WARNING, element_wise=True, ), SBLCheck( meets_multi_value_field_restriction, id="W0902", - warning=True, name="po_4_ethnicity.multi_value_field_restriction", description=( "When 'ethnicity of principal owner 4' contains" @@ -2932,6 +3063,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " (not provided by applicant), 'ethnicity of" " principal owner 4' should not contain more than one value." ), + severity=Severity.WARNING, element_wise=True, single_values={"966", "988"}, ), @@ -2949,6 +3081,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " text field for other Hispanic or Latino'" " must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2966,6 +3099,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " owner 4: free-form text field for other Hispanic" " or Latino' must not be blank." ), + severity=Severity.ERROR, groupby="po_4_ethnicity", condition_values={"977"}, ), @@ -2985,6 +3119,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 34, 35, 36, 37, 4, 41, 42, 43, 44," " 5, 966, 971, 972, 973, 974, or 988." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -3024,15 +3159,14 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): SBLCheck( is_unique_in_field, id="W0941", - warning=True, name="po_4_race.duplicates_in_field", description="'Race of principal owner 4' should not contain duplicated values.", + severity=Severity.WARNING, element_wise=True, ), SBLCheck( meets_multi_value_field_restriction, id="W0942", - warning=True, name="po_4_race.multi_value_field_restriction", description=( "When 'race of principal owner 4' contains" @@ -3042,6 +3176,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 'race of principal owner 4' should not" " contain more than one value." ), + severity=Severity.WARNING, element_wise=True, single_values={"966", "988"}, ), @@ -3060,6 +3195,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " Native Enrolled or Principal Tribe' must" " not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -3080,6 +3216,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " for American Indian or Alaska Native Enrolled or" " Principal Tribe' must not be blank." ), + severity=Severity.ERROR, groupby="po_4_race", condition_values={"971"}, ), @@ -3097,6 +3234,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " field for other Asian' must not exceed 300" " characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -3113,6 +3251,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 972, 'race of principal owner 4: free-form text field" " for other Asian' must not be blank." ), + severity=Severity.ERROR, groupby="po_4_race", condition_values={"972"}, ), @@ -3130,6 +3269,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " field for other Black or African American'" " must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -3146,6 +3286,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " contains 973, 'race of principal owner 4: free-form text" " field for other Black or African American' must not be blank." ), + severity=Severity.ERROR, groupby="po_4_race", condition_values={"973"}, ), @@ -3163,6 +3304,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " field for other Pacific Islander race' must" " not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -3179,6 +3321,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " contains 974, 'race of principal owner 4: free-form text" " field for other Pacific Islander race' must not be blank." ), + severity=Severity.ERROR, groupby="po_4_race", condition_values={"974"}, ), @@ -3191,6 +3334,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E1040", name="po_4_gender_flag.invalid_enum_value", description="When present, 'sex/gender of principal owner 4: NP flag' must equal 1, 966, or 988.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -3214,6 +3358,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " text field for self-identified sex/gender'" " must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -3231,6 +3376,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " of principal owner 4: free-form text field for" " self-identified sex/gender' must not be blank." ), + severity=Severity.ERROR, groupby="po_4_gender_flag", condition_values={"1"}, ), diff --git a/src/validator/schema_template.py b/regtech_data_validator/schema_template.py similarity index 98% rename from src/validator/schema_template.py rename to regtech_data_validator/schema_template.py index 2aada648..a2229cdd 100644 --- a/src/validator/schema_template.py +++ b/regtech_data_validator/schema_template.py @@ -1,4 +1,4 @@ -"""This is a 'blank' Pandera template for SBLAR. All columns in the fig are present, +"""This is a 'blank' Pandera template for SBLAR. All columns in the FIG are present, but the checks need to be populated. Do not import _schema_template from this module directly. Instead, make use of the @@ -170,10 +170,7 @@ ), "pricing_mca_addcost": Column( str, - title=( - "Field 31: MCA/sales-based: additional cost for merchant cash ", - "advances or other sales-based financing", - ), + title="Field 31: MCA/sales-based: additional cost for merchant cash advances or other sales-based financing", checks=[], ), "pricing_prepenalty_allowed": Column( diff --git a/src/tests/__init__.py b/src/tests/__init__.py deleted file mode 100644 index 238d07e8..00000000 --- a/src/tests/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -import os -import sys - -ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - -sys.path.append(os.path.join(ROOT_DIR, "validator")) diff --git a/src/tests/test_checks.py b/src/tests/test_checks.py deleted file mode 100644 index bac4cc75..00000000 --- a/src/tests/test_checks.py +++ /dev/null @@ -1,27 +0,0 @@ -import pytest - -from validator.checks import SBLCheck - - -class TestSBLCheck: - def test_no_id_check(self): - with pytest.raises(Exception) as exc: - SBLCheck(lambda: True, warning=True, name="Just a Warning") - - assert "Each check must be assigned a `name` and an `id`." in str(exc.value) - assert exc.type == ValueError - - def test_no_name_check(self): - with pytest.raises(Exception) as exc: - SBLCheck(lambda: True, id="00000", warning=True) - - assert "Each check must be assigned a `name` and an `id`." in str(exc.value) - assert exc.type == ValueError - - def test_name_and_id_check(self): - raised = False - try: - SBLCheck(lambda: True, id="00000", warning=True, name="Just a Warning") - except ValueError: - raised = True - assert raised is False diff --git a/src/tests/test_global_data.py b/src/tests/test_global_data.py deleted file mode 100644 index 6e8fc13f..00000000 --- a/src/tests/test_global_data.py +++ /dev/null @@ -1,25 +0,0 @@ -import pytest - -from validator import global_data - - -class TestGlobalData: - def test_valid_naics_codes(self): - global_data.read_naics_codes() - assert len(global_data.naics_codes) == 96 - - def test_valid_geoids(self): - global_data.read_geoids() - assert len(global_data.census_geoids) == 87275 - - def test_invalid_naics_file(self): - failed_fpath = "./data/naics/processed/2022_codes.csv1" - with pytest.raises(Exception) as exc: - global_data.read_naics_codes(failed_fpath) - assert exc.type == FileNotFoundError - - def test_invalid_geoids_file(self): - failed_fpath = "./data/census/processed/Census2022.processed.csv2" - with pytest.raises(Exception) as exc: - global_data.read_geoids(failed_fpath) - assert exc.type == FileNotFoundError diff --git a/src/validator/__init__.py b/src/validator/__init__.py deleted file mode 100644 index 836099bf..00000000 --- a/src/validator/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -import os -import sys - -ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -sys.path.append(ROOT_DIR) diff --git a/src/validator/checks.py b/src/validator/checks.py deleted file mode 100644 index 29677a18..00000000 --- a/src/validator/checks.py +++ /dev/null @@ -1,73 +0,0 @@ -"""Custom subclass for warnings and errors. - -The class SBLCheck is a subclass of the standard Pandera Check class -that requires the `name` kwarg to be supplied. Errors and warnings are -distinguised based on the value of the warning attribute. It defaults -to false but can be set to True during init to indicate the validation -should be handled as a warning rather than an error. - -Examples: - - warning_check = SBLCheck( - lambda: True, - warning=True, - name="Just a Warning" - ) - - error_check_implied = SBLCheck(lambda: True, name="Error Check") - - error_check_explicit = SBLCheck( - lambda: True, - warning=False, - name="Also an Error" - ) -""" - - -from typing import Any, Callable, Type - -from pandera import Check -from pandera.backends.base import BaseCheckBackend -from pandera.backends.pandas.checks import PandasCheckBackend - - -class SBLCheck(Check): - """A custom Pandera.Check subclasss that requires a `name` and an `id` be - specified. Additionally, an attribute named `warning` is added to - the class to enable distinction between warnings and errors. The - default value of warning is `False` which corresponds to an error. - - Don't use this class directly. Make use of the SBLErrorCheck and - SBLWarningCheck subclasses below.""" - - def __init__(self, check_fn: Callable, id: str = None, warning=False, *args, **kwargs): - """Custom init method that verifies the presence of `name` and `id` in - kwargs creates a custom class attribute called `warning`. All - other initializaiton is handled by the parent Check class. - - Args: - check_fn (Callable): A function which evaluates the validity - of the column(s) being tested. - id (str, required): Each check mut have an id. - warning (bool, optional): Boolean specifying whether to - treat the check as a warning rather than an error. - - Raises: - ValueError: Raised if `name` not supplied in kwargs and if id is not - supplied or None. - """ - - self.id = id - - if "name" not in kwargs or id is None: - raise ValueError("Each check must be assigned a `name` and an `id`.") - - # if warning==False treat check as an error check - self.warning = warning - - super().__init__(check_fn=check_fn, *args, **kwargs) - - @classmethod - def get_backend(cls, check_obj: Any) -> Type[BaseCheckBackend]: - """Assume Pandas DataFrame and return PandasCheckBackend""" - return PandasCheckBackend diff --git a/src/validator/global_data.py b/src/validator/global_data.py deleted file mode 100644 index a9c54f04..00000000 --- a/src/validator/global_data.py +++ /dev/null @@ -1,36 +0,0 @@ -import os -import sys - -import pandas as pd - -ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # noqa: E402 -sys.path.append(ROOT_DIR) # noqa: E402 - -from config import CENSUS_PROCESSED_CSV_PATH, NAICS_CSV_PATH # noqa: E402 - -naics_codes = {} - -# global variable for geoids -census_geoids = {} - - -def read_naics_codes(csv_path: str = NAICS_CSV_PATH): - """ - read NAICS CSV file with this format: (code, description) - and populate global value: naics_codes - """ - naics_codes.clear() - df = pd.read_csv(csv_path, dtype=str, na_filter=False) - for _, row in df.iterrows(): - naics_codes.update({row.iloc[0]: row.iloc[1]}) - - -def read_geoids(csv_path: str = CENSUS_PROCESSED_CSV_PATH): - """ - read geoids CSV file with this format: (code) - and populate global value: census_geoids - """ - census_geoids.clear() - df = pd.read_csv(csv_path, dtype=str, na_filter=False) - for _, row in df.iterrows(): - census_geoids.update({row.iloc[0]: None}) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/tests/data/sbl-validations-fail.csv b/tests/data/sbl-validations-fail.csv similarity index 100% rename from src/tests/data/sbl-validations-fail.csv rename to tests/data/sbl-validations-fail.csv diff --git a/src/tests/data/sbl-validations-pass.csv b/tests/data/sbl-validations-pass.csv similarity index 100% rename from src/tests/data/sbl-validations-pass.csv rename to tests/data/sbl-validations-pass.csv diff --git a/src/tests/test_check_functions.py b/tests/test_check_functions.py similarity index 99% rename from src/tests/test_check_functions.py rename to tests/test_check_functions.py index 65175eff..c93b5613 100644 --- a/src/tests/test_check_functions.py +++ b/tests/test_check_functions.py @@ -1,7 +1,7 @@ import pandas as pd -from validator import global_data -from validator.check_functions import ( +from regtech_data_validator import global_data +from regtech_data_validator.check_functions import ( has_correct_length, has_no_conditional_field_conflict, has_valid_enum_pair, @@ -474,28 +474,24 @@ def test_with_incorrect_length(self): class TestIsValidCode: def test_with_valid_code(self): - global_data.read_naics_codes() result = is_valid_code("111", False, global_data.naics_codes) assert result is True result = is_valid_code("111", True, global_data.naics_codes) assert result is True def test_with_invalid_code(self): - global_data.read_naics_codes() result = is_valid_code("101", False, global_data.naics_codes) assert result is False result = is_valid_code("101", True, global_data.naics_codes) assert result is False def test_with_accepted_blank(self): - global_data.read_naics_codes() result = is_valid_code("", True, global_data.naics_codes) assert result is True result = is_valid_code(" ", True, global_data.naics_codes) assert result is True def test_with_invalid_blank(self): - global_data.read_naics_codes() result = is_valid_code("", False, global_data.naics_codes) assert result is False result = is_valid_code(" ", False, global_data.naics_codes) diff --git a/tests/test_global_data.py b/tests/test_global_data.py new file mode 100644 index 00000000..20f84c40 --- /dev/null +++ b/tests/test_global_data.py @@ -0,0 +1,9 @@ +from regtech_data_validator import global_data + + +class TestGlobalData: + def test_valid_naics_codes(self): + assert len(global_data.naics_codes) == 96 + + def test_valid_geoids(self): + assert len(global_data.census_geoids) == 87275 diff --git a/src/tests/test_sample_data.py b/tests/test_sample_data.py similarity index 82% rename from src/tests/test_sample_data.py rename to tests/test_sample_data.py index e1d6de07..35c1fe42 100644 --- a/src/tests/test_sample_data.py +++ b/tests/test_sample_data.py @@ -1,16 +1,10 @@ -import os -import sys - import pandas as pd import pytest -from validator.create_schemas import validate_phases - -ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # noqa: E402 -sys.path.append(ROOT_DIR) # noqa: E402 +from regtech_data_validator.create_schemas import validate_phases -GOOD_FILE_PATH = "./src/tests/data/sbl-validations-pass.csv" -BAD_FILE_PATH = "./src/tests/data/sbl-validations-fail.csv" +GOOD_FILE_PATH = "./tests/data/sbl-validations-pass.csv" +BAD_FILE_PATH = "./tests/data/sbl-validations-fail.csv" class TestValidatingSampleData: diff --git a/src/tests/test_schema_functions.py b/tests/test_schema_functions.py similarity index 98% rename from src/tests/test_schema_functions.py rename to tests/test_schema_functions.py index 911dc673..7c141dee 100644 --- a/src/tests/test_schema_functions.py +++ b/tests/test_schema_functions.py @@ -1,6 +1,11 @@ import pandas as pd -from validator.create_schemas import get_phase_1_schema_for_lei, get_phase_2_schema_for_lei, validate, validate_phases +from regtech_data_validator.create_schemas import ( + get_phase_1_schema_for_lei, + get_phase_2_schema_for_lei, + validate, + validate_phases, +) class TestUtil: diff --git a/tools/process_naics.py b/tools/process_naics.py deleted file mode 100644 index b202407c..00000000 --- a/tools/process_naics.py +++ /dev/null @@ -1,49 +0,0 @@ -import csv -import os -import sys - -import pandas as pd - -ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # noqa: E402 -sys.path.append(ROOT_DIR) # noqa: E402 - -import config # noqa: E402 - -""" -filter NAICS data with only 3 digit codes - -Raises: - FileNotFoundError: when input excel file not existed - FileExistsError: when output csv file existed -""" -if __name__ == "__main__": - EXCEL_PATH = config.NAICS_EXCEL_PATH - CSV_PATH = config.NAICS_CSV_PATH - CODE_COL = config.NAICS_CODE_COL - TITLE_COL = config.NAICS_TITLE_COL - - # check for paths - if not os.path.isfile(EXCEL_PATH): - error_msg = "Input excel file not existed" - raise FileNotFoundError(error_msg) - if os.path.isfile(CSV_PATH): - error_msg = "Output csv file existed" - raise FileExistsError(error_msg) - - df = pd.read_excel(EXCEL_PATH, dtype=str, na_filter=False) - - # add header - result = [["code", "title"]] - - # read excel file - # and create csv data list - for index, row in df.iterrows(): - code = str(row[CODE_COL]) - if len(code) == 3: - a_row = [code, str(row[TITLE_COL])] - result.append(a_row) - - # output data to csv file - with open(CSV_PATH, "w") as f: - writer = csv.writer(f) - writer.writerows(result)