From 722b98164eaf1eb40f349f528a30b12b2ae92842 Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Mon, 16 Oct 2023 19:11:02 -0400 Subject: [PATCH 01/33] Move files into more standard Python project layout --- {src/validator => regtech_data_validator}/__init__.py | 0 {src/validator => regtech_data_validator}/check_functions.py | 0 {src/validator => regtech_data_validator}/checks.py | 0 {src/validator => regtech_data_validator}/create_schemas.py | 0 {src/validator => regtech_data_validator}/global_data.py | 0 {src/validator => regtech_data_validator}/main.py | 0 {src/validator => regtech_data_validator}/phase_validations.py | 0 {src/validator => regtech_data_validator}/schema_template.py | 0 src/tests/.gitkeep | 0 {src/tests => tests}/__init__.py | 0 {src/tests => tests}/data/sbl-validations-fail.csv | 0 {src/tests => tests}/data/sbl-validations-pass.csv | 0 {src/tests => tests}/test_check_functions.py | 0 {src/tests => tests}/test_checks.py | 0 {src/tests => tests}/test_global_data.py | 0 {src/tests => tests}/test_sample_data.py | 0 {src/tests => tests}/test_schema_functions.py | 0 17 files changed, 0 insertions(+), 0 deletions(-) rename {src/validator => regtech_data_validator}/__init__.py (100%) rename {src/validator => regtech_data_validator}/check_functions.py (100%) rename {src/validator => regtech_data_validator}/checks.py (100%) rename {src/validator => regtech_data_validator}/create_schemas.py (100%) rename {src/validator => regtech_data_validator}/global_data.py (100%) rename {src/validator => regtech_data_validator}/main.py (100%) rename {src/validator => regtech_data_validator}/phase_validations.py (100%) rename {src/validator => regtech_data_validator}/schema_template.py (100%) delete mode 100644 src/tests/.gitkeep rename {src/tests => tests}/__init__.py (100%) rename {src/tests => tests}/data/sbl-validations-fail.csv (100%) rename {src/tests => tests}/data/sbl-validations-pass.csv (100%) rename {src/tests => tests}/test_check_functions.py (100%) rename {src/tests => tests}/test_checks.py (100%) rename {src/tests => tests}/test_global_data.py (100%) rename {src/tests => tests}/test_sample_data.py (100%) rename {src/tests => tests}/test_schema_functions.py (100%) diff --git a/src/validator/__init__.py b/regtech_data_validator/__init__.py similarity index 100% rename from src/validator/__init__.py rename to regtech_data_validator/__init__.py diff --git a/src/validator/check_functions.py b/regtech_data_validator/check_functions.py similarity index 100% rename from src/validator/check_functions.py rename to regtech_data_validator/check_functions.py diff --git a/src/validator/checks.py b/regtech_data_validator/checks.py similarity index 100% rename from src/validator/checks.py rename to regtech_data_validator/checks.py diff --git a/src/validator/create_schemas.py b/regtech_data_validator/create_schemas.py similarity index 100% rename from src/validator/create_schemas.py rename to regtech_data_validator/create_schemas.py diff --git a/src/validator/global_data.py b/regtech_data_validator/global_data.py similarity index 100% rename from src/validator/global_data.py rename to regtech_data_validator/global_data.py diff --git a/src/validator/main.py b/regtech_data_validator/main.py similarity index 100% rename from src/validator/main.py rename to regtech_data_validator/main.py diff --git a/src/validator/phase_validations.py b/regtech_data_validator/phase_validations.py similarity index 100% rename from src/validator/phase_validations.py rename to regtech_data_validator/phase_validations.py diff --git a/src/validator/schema_template.py b/regtech_data_validator/schema_template.py similarity index 100% rename from src/validator/schema_template.py rename to regtech_data_validator/schema_template.py diff --git a/src/tests/.gitkeep b/src/tests/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/src/tests/__init__.py b/tests/__init__.py similarity index 100% rename from src/tests/__init__.py rename to tests/__init__.py diff --git a/src/tests/data/sbl-validations-fail.csv b/tests/data/sbl-validations-fail.csv similarity index 100% rename from src/tests/data/sbl-validations-fail.csv rename to tests/data/sbl-validations-fail.csv diff --git a/src/tests/data/sbl-validations-pass.csv b/tests/data/sbl-validations-pass.csv similarity index 100% rename from src/tests/data/sbl-validations-pass.csv rename to tests/data/sbl-validations-pass.csv diff --git a/src/tests/test_check_functions.py b/tests/test_check_functions.py similarity index 100% rename from src/tests/test_check_functions.py rename to tests/test_check_functions.py diff --git a/src/tests/test_checks.py b/tests/test_checks.py similarity index 100% rename from src/tests/test_checks.py rename to tests/test_checks.py diff --git a/src/tests/test_global_data.py b/tests/test_global_data.py similarity index 100% rename from src/tests/test_global_data.py rename to tests/test_global_data.py diff --git a/src/tests/test_sample_data.py b/tests/test_sample_data.py similarity index 100% rename from src/tests/test_sample_data.py rename to tests/test_sample_data.py diff --git a/src/tests/test_schema_functions.py b/tests/test_schema_functions.py similarity index 100% rename from src/tests/test_schema_functions.py rename to tests/test_schema_functions.py From d4529ecb175a2692e35ca9376d18240ee6644d3c Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Tue, 17 Oct 2023 01:08:56 -0400 Subject: [PATCH 02/33] Fix issues related to repo restructure - Fixed all imports - Fixed test and coverage settings in pyproject.toml - Removed all Python path magic in __init.py__ files - Moved data files into the repo, and used `importlib` to load files by package name instead of path. This is more portable, especially once we turn this into a distributable package. - Refactored global_data to only load data once at module load time --- pyproject.toml | 8 ++-- regtech_data_validator/__init__.py | 5 --- regtech_data_validator/create_schemas.py | 7 +-- .../data/census}/Census2022.processed.csv | 0 .../data/census/__init__.py | 0 .../data/naics}/2022_codes.csv | 0 regtech_data_validator/data/naics/__init__.py | 0 regtech_data_validator/global_data.py | 44 ++++++------------- regtech_data_validator/main.py | 3 +- regtech_data_validator/phase_validations.py | 9 ++-- tests/__init__.py | 6 --- tests/test_check_functions.py | 8 +--- tests/test_checks.py | 2 +- tests/test_global_data.py | 16 +------ tests/test_sample_data.py | 12 ++--- tests/test_schema_functions.py | 6 ++- 16 files changed, 39 insertions(+), 87 deletions(-) rename {data/census/processed => regtech_data_validator/data/census}/Census2022.processed.csv (100%) rename __init__.py => regtech_data_validator/data/census/__init__.py (100%) rename {data/naics/processed => regtech_data_validator/data/naics}/2022_codes.csv (100%) create mode 100644 regtech_data_validator/data/naics/__init__.py diff --git a/pyproject.toml b/pyproject.toml index b84d616d..4e13e24c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,18 +49,18 @@ addopts = [ "--cov-branch", "--cov-report=xml", "--cov-report=term", - "--cov=src", + "--cov=regtech_data_validator", "-vv", "--strict-markers", "-rfE", ] testpaths = [ - "src/tests", + "tests", ] [tool.coverage.run] relative_files = true -source = ["src"] +source = ["regtech_data_validator"] [tool.coverage.report] -skip_empty = true \ No newline at end of file +skip_empty = true diff --git a/regtech_data_validator/__init__.py b/regtech_data_validator/__init__.py index 836099bf..e69de29b 100644 --- a/regtech_data_validator/__init__.py +++ b/regtech_data_validator/__init__.py @@ -1,5 +0,0 @@ -import os -import sys - -ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -sys.path.append(ROOT_DIR) diff --git a/regtech_data_validator/create_schemas.py b/regtech_data_validator/create_schemas.py index cc3cf8d7..76784af3 100644 --- a/regtech_data_validator/create_schemas.py +++ b/regtech_data_validator/create_schemas.py @@ -2,11 +2,12 @@ with validations listed in phase 1 and phase 2.""" import pandas as pd -from checks import SBLCheck from pandera import DataFrameSchema from pandera.errors import SchemaErrors -from phase_validations import get_phase_1_and_2_validations_for_lei -from schema_template import get_template + +from regtech_data_validator.checks import SBLCheck +from regtech_data_validator.phase_validations import get_phase_1_and_2_validations_for_lei +from regtech_data_validator.schema_template import get_template # Get separate schema templates for phase 1 and 2 diff --git a/data/census/processed/Census2022.processed.csv b/regtech_data_validator/data/census/Census2022.processed.csv similarity index 100% rename from data/census/processed/Census2022.processed.csv rename to regtech_data_validator/data/census/Census2022.processed.csv diff --git a/__init__.py b/regtech_data_validator/data/census/__init__.py similarity index 100% rename from __init__.py rename to regtech_data_validator/data/census/__init__.py diff --git a/data/naics/processed/2022_codes.csv b/regtech_data_validator/data/naics/2022_codes.csv similarity index 100% rename from data/naics/processed/2022_codes.csv rename to regtech_data_validator/data/naics/2022_codes.csv diff --git a/regtech_data_validator/data/naics/__init__.py b/regtech_data_validator/data/naics/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/regtech_data_validator/global_data.py b/regtech_data_validator/global_data.py index a9c54f04..b3364354 100644 --- a/regtech_data_validator/global_data.py +++ b/regtech_data_validator/global_data.py @@ -1,36 +1,20 @@ -import os -import sys +import csv +from importlib.resources import files -import pandas as pd -ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # noqa: E402 -sys.path.append(ROOT_DIR) # noqa: E402 +# global variable for NAICS codes +naics_codes: dict[str,str] = {} +naics_file_path = files('regtech_data_validator.data.naics').joinpath('2022_codes.csv') -from config import CENSUS_PROCESSED_CSV_PATH, NAICS_CSV_PATH # noqa: E402 +with naics_file_path.open('r') as f: + for row in csv.DictReader(f): + naics_codes[row['code']] = row['title'] -naics_codes = {} -# global variable for geoids -census_geoids = {} +# global variable for Census GEOIDs +census_geoids: set[str] = set() +census_file_path = files('regtech_data_validator.data.census').joinpath('Census2022.processed.csv') - -def read_naics_codes(csv_path: str = NAICS_CSV_PATH): - """ - read NAICS CSV file with this format: (code, description) - and populate global value: naics_codes - """ - naics_codes.clear() - df = pd.read_csv(csv_path, dtype=str, na_filter=False) - for _, row in df.iterrows(): - naics_codes.update({row.iloc[0]: row.iloc[1]}) - - -def read_geoids(csv_path: str = CENSUS_PROCESSED_CSV_PATH): - """ - read geoids CSV file with this format: (code) - and populate global value: census_geoids - """ - census_geoids.clear() - df = pd.read_csv(csv_path, dtype=str, na_filter=False) - for _, row in df.iterrows(): - census_geoids.update({row.iloc[0]: None}) +with census_file_path.open('r') as f: + for row in csv.DictReader(f): + census_geoids.add(row['geoid']) diff --git a/regtech_data_validator/main.py b/regtech_data_validator/main.py index 20972759..e7df5b11 100644 --- a/regtech_data_validator/main.py +++ b/regtech_data_validator/main.py @@ -9,7 +9,8 @@ import sys import pandas as pd -from create_schemas import validate_phases + +from regtech_data_validator.create_schemas import validate_phases def csv_to_df(path: str) -> pd.DataFrame: diff --git a/regtech_data_validator/phase_validations.py b/regtech_data_validator/phase_validations.py index fc7b0a22..fc2ad5ea 100644 --- a/regtech_data_validator/phase_validations.py +++ b/regtech_data_validator/phase_validations.py @@ -4,8 +4,8 @@ an instance of a PanderaSchema object for phase 1 and phase 2.""" -import global_data -from check_functions import ( +from regtech_data_validator import global_data +from regtech_data_validator.check_functions import ( has_correct_length, has_no_conditional_field_conflict, has_valid_enum_pair, @@ -28,10 +28,7 @@ meets_multi_value_field_restriction, string_contains, ) -from checks import SBLCheck - -# read and populate global naics code (this should be called only once) -global_data.read_naics_codes() +from regtech_data_validator.checks import SBLCheck def get_phase_1_and_2_validations_for_lei(lei: str = None): diff --git a/tests/__init__.py b/tests/__init__.py index 238d07e8..e69de29b 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,6 +0,0 @@ -import os -import sys - -ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - -sys.path.append(os.path.join(ROOT_DIR, "validator")) diff --git a/tests/test_check_functions.py b/tests/test_check_functions.py index 65175eff..c93b5613 100644 --- a/tests/test_check_functions.py +++ b/tests/test_check_functions.py @@ -1,7 +1,7 @@ import pandas as pd -from validator import global_data -from validator.check_functions import ( +from regtech_data_validator import global_data +from regtech_data_validator.check_functions import ( has_correct_length, has_no_conditional_field_conflict, has_valid_enum_pair, @@ -474,28 +474,24 @@ def test_with_incorrect_length(self): class TestIsValidCode: def test_with_valid_code(self): - global_data.read_naics_codes() result = is_valid_code("111", False, global_data.naics_codes) assert result is True result = is_valid_code("111", True, global_data.naics_codes) assert result is True def test_with_invalid_code(self): - global_data.read_naics_codes() result = is_valid_code("101", False, global_data.naics_codes) assert result is False result = is_valid_code("101", True, global_data.naics_codes) assert result is False def test_with_accepted_blank(self): - global_data.read_naics_codes() result = is_valid_code("", True, global_data.naics_codes) assert result is True result = is_valid_code(" ", True, global_data.naics_codes) assert result is True def test_with_invalid_blank(self): - global_data.read_naics_codes() result = is_valid_code("", False, global_data.naics_codes) assert result is False result = is_valid_code(" ", False, global_data.naics_codes) diff --git a/tests/test_checks.py b/tests/test_checks.py index bac4cc75..dafaf512 100644 --- a/tests/test_checks.py +++ b/tests/test_checks.py @@ -1,6 +1,6 @@ import pytest -from validator.checks import SBLCheck +from regtech_data_validator.checks import SBLCheck class TestSBLCheck: diff --git a/tests/test_global_data.py b/tests/test_global_data.py index 6e8fc13f..f939cacf 100644 --- a/tests/test_global_data.py +++ b/tests/test_global_data.py @@ -1,25 +1,11 @@ import pytest -from validator import global_data +from regtech_data_validator import global_data class TestGlobalData: def test_valid_naics_codes(self): - global_data.read_naics_codes() assert len(global_data.naics_codes) == 96 def test_valid_geoids(self): - global_data.read_geoids() assert len(global_data.census_geoids) == 87275 - - def test_invalid_naics_file(self): - failed_fpath = "./data/naics/processed/2022_codes.csv1" - with pytest.raises(Exception) as exc: - global_data.read_naics_codes(failed_fpath) - assert exc.type == FileNotFoundError - - def test_invalid_geoids_file(self): - failed_fpath = "./data/census/processed/Census2022.processed.csv2" - with pytest.raises(Exception) as exc: - global_data.read_geoids(failed_fpath) - assert exc.type == FileNotFoundError diff --git a/tests/test_sample_data.py b/tests/test_sample_data.py index e1d6de07..35c1fe42 100644 --- a/tests/test_sample_data.py +++ b/tests/test_sample_data.py @@ -1,16 +1,10 @@ -import os -import sys - import pandas as pd import pytest -from validator.create_schemas import validate_phases - -ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # noqa: E402 -sys.path.append(ROOT_DIR) # noqa: E402 +from regtech_data_validator.create_schemas import validate_phases -GOOD_FILE_PATH = "./src/tests/data/sbl-validations-pass.csv" -BAD_FILE_PATH = "./src/tests/data/sbl-validations-fail.csv" +GOOD_FILE_PATH = "./tests/data/sbl-validations-pass.csv" +BAD_FILE_PATH = "./tests/data/sbl-validations-fail.csv" class TestValidatingSampleData: diff --git a/tests/test_schema_functions.py b/tests/test_schema_functions.py index 911dc673..0b8e0c1b 100644 --- a/tests/test_schema_functions.py +++ b/tests/test_schema_functions.py @@ -1,6 +1,10 @@ import pandas as pd -from validator.create_schemas import get_phase_1_schema_for_lei, get_phase_2_schema_for_lei, validate, validate_phases +from regtech_data_validator.create_schemas import ( + get_phase_1_schema_for_lei, + get_phase_2_schema_for_lei, + validate, validate_phases +) class TestUtil: From ccee7383df0c4e3b9bd76d514743aefc7e1bc2e3 Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Tue, 17 Oct 2023 01:22:43 -0400 Subject: [PATCH 03/33] Move data-related code and config under `data` dir --- {tools => data/census}/process_census.py | 0 config.py => data/config.py | 0 {tools => data/naics}/process_naics.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename {tools => data/census}/process_census.py (100%) rename config.py => data/config.py (100%) rename {tools => data/naics}/process_naics.py (100%) diff --git a/tools/process_census.py b/data/census/process_census.py similarity index 100% rename from tools/process_census.py rename to data/census/process_census.py diff --git a/config.py b/data/config.py similarity index 100% rename from config.py rename to data/config.py diff --git a/tools/process_naics.py b/data/naics/process_naics.py similarity index 100% rename from tools/process_naics.py rename to data/naics/process_naics.py From baeb814355a359b51fea7cd3e35bd4c2b8ad7b33 Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Tue, 17 Oct 2023 02:11:55 -0400 Subject: [PATCH 04/33] Merge config.py and tools under data dir - Move file format settings from config.py into respective data transform scripts - Move src/dest file settings from config.py to CLI args - Use consistent CLI arg and file exists handling - Add openpyxl dependency for handling NAICS Excel reading --- data/census/process_census.py | 65 ++++++++++++++++++++--------------- data/config.py | 19 ---------- data/naics/process_naics.py | 44 ++++++++++++++---------- poetry.lock | 29 ++++++++++++++-- pyproject.toml | 3 ++ tools/__init__.py | 0 6 files changed, 93 insertions(+), 67 deletions(-) delete mode 100644 data/config.py delete mode 100644 tools/__init__.py diff --git a/data/census/process_census.py b/data/census/process_census.py index 0686b9c5..95a1445d 100644 --- a/data/census/process_census.py +++ b/data/census/process_census.py @@ -5,10 +5,12 @@ import pandas as pd -ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # noqa: E402 -sys.path.append(ROOT_DIR) # noqa: E402 +# census file col indexes +CENSUS_STATE_COL_INDEX = 2 +CENSUS_COUNTY_COL_INDEX = 3 +CENSUS_TRACT_COL_INDEX = 4 -import config # noqa: E402 +CENSUS_GEOID_COL = "geoid" # helper function to check number (float/int/negative) @@ -21,24 +23,22 @@ def _is_number(s): # helper function to unzip census file and extract CSV file -def _extract_census_zip_file(): - CENSUS_TMP_CSV_PATH = config.CENSUS_RAW_ZIP_PATH + ".tmp.csv" +def _extract_census_zip_file(raw_src): + census_tmp_csv_path = raw_src + ".tmp.csv" # unzip and extract csv files - with zipfile.ZipFile(config.CENSUS_RAW_ZIP_PATH, "r") as zip_ref: + with zipfile.ZipFile(raw_src, "r") as zip_ref: for file in zip_ref.namelist(): # iterate over files in archive if file[-4:] == ".csv": - print("Extracting CSV to {}".format(CENSUS_TMP_CSV_PATH)) - with open(CENSUS_TMP_CSV_PATH, "wb") as outfile: + print("Extracting CSV to {}".format(census_tmp_csv_path)) + with open(census_tmp_csv_path, "wb") as outfile: outfile.write(zip_ref.read(file)) - # it should only have one csv file - return CENSUS_TMP_CSV_PATH + # it should only have one csv file + + return census_tmp_csv_path # helper function to read extracted csv file and filter only geo-tract-id -def _read_census_csv(src_path: str, csv_path: str): - STATE_COL = config.CENSUS_STATE_COL_INDEX - COUNTY_COL = config.CENSUS_COUNTY_COL_INDEX - TRACT_COL = config.CENSUS_TRACT_COL_INDEX +def _process_census_csv(src_path: str, csv_path: str): # check paths if not os.path.isfile(src_path): @@ -52,14 +52,14 @@ def _read_census_csv(src_path: str, csv_path: str): ) # add header - result = [[config.CENSUS_GEOID_COL]] + result = [[CENSUS_GEOID_COL]] # read excel file # and create csv data list for index, row in df.iterrows(): - state_value = str(row[STATE_COL]) - county_value = str(row[COUNTY_COL]) - tract_value = str(row[TRACT_COL]) + state_value = str(row[CENSUS_STATE_COL_INDEX]) + county_value = str(row[CENSUS_COUNTY_COL_INDEX]) + tract_value = str(row[CENSUS_TRACT_COL_INDEX]) if ( _is_number(state_value) and _is_number(county_value) @@ -84,14 +84,23 @@ def _read_census_csv(src_path: str, csv_path: str): - output to defined output file """ if __name__ == "__main__": - CSV_PATH = config.CENSUS_PROCESSED_CSV_PATH - - if os.path.isfile(CSV_PATH): - error_msg = "Output {} csv file existed".format(CSV_PATH) - raise FileExistsError(error_msg) - - tmp_census_csv_file = _extract_census_zip_file() - print("Reading extracted CSV File . {}".format(tmp_census_csv_file)) - _read_census_csv(tmp_census_csv_file, CSV_PATH) - print("Removing extracted CSV File") + if len(sys.argv) != 3: + print(f"Usage: {sys.argv[0]} ") + exit(1) + + raw_src = sys.argv[1] + csv_dest = sys.argv[2] + + if not os.path.isfile(raw_src): + print(f"source file not existed: {raw_src}") + exit(2) + + if os.path.isfile(csv_dest): + print("destination file already existed: {csv_dest}") + exit(3) + + tmp_census_csv_file = _extract_census_zip_file(raw_src) + print(f"Reading extracted CSV file: {tmp_census_csv_file}") + _process_census_csv(tmp_census_csv_file, csv_dest) + print("Removing extracted CSV file") os.remove(tmp_census_csv_file) diff --git a/data/config.py b/data/config.py deleted file mode 100644 index 00a125f2..00000000 --- a/data/config.py +++ /dev/null @@ -1,19 +0,0 @@ -# path to original/raw NAICS excel file -NAICS_EXCEL_PATH = "./data/naics/raw/2-6 digit_2022_Codes.xlsx" -# path to parsed/filtered naics codes file -NAICS_CSV_PATH = "./data/naics/processed/2022_codes.csv" -# column header text containing naics code -NAICS_CODE_COL = "2022 NAICS US Code" -# column header text containing naics title/description -NAICS_TITLE_COL = "2022 NAICS US Title" - -# path to original/raw NAICS zip file -CENSUS_RAW_ZIP_PATH = "./data/census/raw/CensusFlatFile2022.zip" -# path to parsed/filtered naics codes file -CENSUS_PROCESSED_CSV_PATH = "./data/census/processed/Census2022.processed.csv" -# census file col indexes -CENSUS_STATE_COL_INDEX = 2 -CENSUS_COUNTY_COL_INDEX = 3 -CENSUS_TRACT_COL_INDEX = 4 - -CENSUS_GEOID_COL = "geoid" diff --git a/data/naics/process_naics.py b/data/naics/process_naics.py index b202407c..f6e1a251 100644 --- a/data/naics/process_naics.py +++ b/data/naics/process_naics.py @@ -4,10 +4,12 @@ import pandas as pd -ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # noqa: E402 -sys.path.append(ROOT_DIR) # noqa: E402 -import config # noqa: E402 +# column header text containing naics code +NAICS_CODE_COL = "2022 NAICS US Code" +# column header text containing naics title/description +NAICS_TITLE_COL = "2022 NAICS US Title" + """ filter NAICS data with only 3 digit codes @@ -17,20 +19,24 @@ FileExistsError: when output csv file existed """ if __name__ == "__main__": - EXCEL_PATH = config.NAICS_EXCEL_PATH - CSV_PATH = config.NAICS_CSV_PATH - CODE_COL = config.NAICS_CODE_COL - TITLE_COL = config.NAICS_TITLE_COL + if len(sys.argv) != 3: + print(f"Usage: {sys.argv[0]} ") + exit(1) + + raw_src = sys.argv[1] + csv_dest = sys.argv[2] + + if not os.path.isfile(raw_src): + print(f"source file not existed: {raw_src}") + exit(2) - # check for paths - if not os.path.isfile(EXCEL_PATH): - error_msg = "Input excel file not existed" - raise FileNotFoundError(error_msg) - if os.path.isfile(CSV_PATH): - error_msg = "Output csv file existed" - raise FileExistsError(error_msg) + if os.path.isfile(csv_dest): + print("destination file already existed: {csv_dest}") + exit(3) - df = pd.read_excel(EXCEL_PATH, dtype=str, na_filter=False) + df = pd.read_excel(raw_src, dtype=str, na_filter=False) + + print(f'source file successfully read: {raw_src}') # add header result = [["code", "title"]] @@ -38,12 +44,14 @@ # read excel file # and create csv data list for index, row in df.iterrows(): - code = str(row[CODE_COL]) + code = str(row[NAICS_CODE_COL]) if len(code) == 3: - a_row = [code, str(row[TITLE_COL])] + a_row = [code, str(row[NAICS_TITLE_COL])] result.append(a_row) # output data to csv file - with open(CSV_PATH, "w") as f: + with open(csv_dest, "w") as f: writer = csv.writer(f) writer.writerows(result) + + print(f'destination file successfully written: {csv_dest}') diff --git a/poetry.lock b/poetry.lock index 7fcbe376..600691d0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. [[package]] name = "black" @@ -136,6 +136,17 @@ files = [ [package.extras] toml = ["tomli"] +[[package]] +name = "et-xmlfile" +version = "1.1.0" +description = "An implementation of lxml.xmlfile for the standard library" +optional = false +python-versions = ">=3.6" +files = [ + {file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"}, + {file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"}, +] + [[package]] name = "iniconfig" version = "2.0.0" @@ -203,6 +214,20 @@ files = [ {file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"}, ] +[[package]] +name = "openpyxl" +version = "3.1.2" +description = "A Python library to read/write Excel 2010 xlsx/xlsm files" +optional = false +python-versions = ">=3.6" +files = [ + {file = "openpyxl-3.1.2-py2.py3-none-any.whl", hash = "sha256:f91456ead12ab3c6c2e9491cf33ba6d08357d802192379bb482f1033ade496f5"}, + {file = "openpyxl-3.1.2.tar.gz", hash = "sha256:a6f5977418eff3b2d5500d54d9db50c8277a368436f4e4f8ddb1be3422870184"}, +] + +[package.dependencies] +et-xmlfile = "*" + [[package]] name = "packaging" version = "23.1" @@ -642,4 +667,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "03e6adb7dcecd12194f8c44033d68666019c5bb52f8fd4bccd7301067832c9e1" +content-hash = "ac6360d9068e34f6bbad74a6c3339a85dd1968267f7272b48b8a99dfc5702812" diff --git a/pyproject.toml b/pyproject.toml index 4e13e24c..e959153d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,9 @@ pytest-cov = "4.1.0" black = "23.3.0" ruff = "0.0.259" +[tool.poetry.group.data.dependencies] +openpyxl = "^3.1.2" + [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" diff --git a/tools/__init__.py b/tools/__init__.py deleted file mode 100644 index e69de29b..00000000 From 9044db81c1efac13f3fe94083ae21cb398f25ff3 Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Tue, 17 Oct 2023 02:30:02 -0400 Subject: [PATCH 05/33] Add README for external data sources --- data/census/README.md | 3 +++ data/naics/README.md | 3 +++ 2 files changed, 6 insertions(+) create mode 100644 data/census/README.md create mode 100644 data/naics/README.md diff --git a/data/census/README.md b/data/census/README.md new file mode 100644 index 00000000..0b57af2e --- /dev/null +++ b/data/census/README.md @@ -0,0 +1,3 @@ +# FFIEC's Census Flat File + +- https://www.ffiec.gov/censusapp.htm diff --git a/data/naics/README.md b/data/naics/README.md new file mode 100644 index 00000000..fce44290 --- /dev/null +++ b/data/naics/README.md @@ -0,0 +1,3 @@ +# North American Industry Classification System (NAICS) codes + +- https://www.census.gov/naics/?48967 From 8bc7ab3e98fd78e90999a57da98e3c96f67712dc Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Tue, 17 Oct 2023 03:19:42 -0400 Subject: [PATCH 06/33] Improve SBLCheck constructor args - Use required enum-based `severity` arg over boolean `warning` with default to false. This default is likely partially the cause of several warning-level validations being set to error. - Remove `test_checks.py` as those tests are no longer needed with refactored `SBLCheck`. - Refactor JSON output to use `severity` enum value - Refactor exception handling for Pandera's `SchemaErrors` --- regtech_data_validator/checks.py | 86 +++----- regtech_data_validator/create_schemas.py | 67 +++--- regtech_data_validator/phase_validations.py | 232 ++++++++++++++++---- tests/test_checks.py | 27 --- 4 files changed, 261 insertions(+), 151 deletions(-) delete mode 100644 tests/test_checks.py diff --git a/regtech_data_validator/checks.py b/regtech_data_validator/checks.py index 29677a18..f9b933e4 100644 --- a/regtech_data_validator/checks.py +++ b/regtech_data_validator/checks.py @@ -1,73 +1,57 @@ -"""Custom subclass for warnings and errors. - -The class SBLCheck is a subclass of the standard Pandera Check class -that requires the `name` kwarg to be supplied. Errors and warnings are -distinguised based on the value of the warning attribute. It defaults -to false but can be set to True during init to indicate the validation -should be handled as a warning rather than an error. - -Examples: - - warning_check = SBLCheck( - lambda: True, - warning=True, - name="Just a Warning" - ) - - error_check_implied = SBLCheck(lambda: True, name="Error Check") - - error_check_explicit = SBLCheck( - lambda: True, - warning=False, - name="Also an Error" - ) +""" +Subclasses of Pandera's `Check` class """ - +from enum import StrEnum from typing import Any, Callable, Type from pandera import Check from pandera.backends.base import BaseCheckBackend from pandera.backends.pandas.checks import PandasCheckBackend +class Severity(StrEnum): + ERROR = 'error' + WARNING = 'warning' class SBLCheck(Check): - """A custom Pandera.Check subclasss that requires a `name` and an `id` be + """ + A Pandera.Check subclasss that requires a `name` and an `id` be specified. Additionally, an attribute named `warning` is added to the class to enable distinction between warnings and errors. The default value of warning is `False` which corresponds to an error. - Don't use this class directly. Make use of the SBLErrorCheck and - SBLWarningCheck subclasses below.""" - - def __init__(self, check_fn: Callable, id: str = None, warning=False, *args, **kwargs): - """Custom init method that verifies the presence of `name` and `id` in - kwargs creates a custom class attribute called `warning`. All - other initializaiton is handled by the parent Check class. - + SBLWarningCheck subclasses below. + """ + + def __init__(self, + check_fn: Callable, + id: str, + name: str, + description: str, + severity: Severity, + **check_kwargs): + """ + Subclass of Pandera's `Check`, with special handling for severity level Args: - check_fn (Callable): A function which evaluates the validity - of the column(s) being tested. - id (str, required): Each check mut have an id. - warning (bool, optional): Boolean specifying whether to - treat the check as a warning rather than an error. - - Raises: - ValueError: Raised if `name` not supplied in kwargs and if id is not - supplied or None. + check_fn (Callable): A function which evaluates the validity of the column(s) being tested. + id (str, required): Unique identifier for a check + name (str, required): Unique name for a check + description (str, required): Long-form description of a check + severity (Severity, required): The severity of a check (error or warning) + check_kwargs (Any, optional): Parameters passed to `check_fn` function """ - self.id = id - - if "name" not in kwargs or id is None: - raise ValueError("Each check must be assigned a `name` and an `id`.") - - # if warning==False treat check as an error check - self.warning = warning + self.severity = severity - super().__init__(check_fn=check_fn, *args, **kwargs) + super().__init__( + check_fn, + title=id, + name=name, + description=description, + **check_kwargs + ) @classmethod def get_backend(cls, check_obj: Any) -> Type[BaseCheckBackend]: """Assume Pandas DataFrame and return PandasCheckBackend""" - return PandasCheckBackend + return PandasCheckBackend \ No newline at end of file diff --git a/regtech_data_validator/create_schemas.py b/regtech_data_validator/create_schemas.py index 76784af3..e547a953 100644 --- a/regtech_data_validator/create_schemas.py +++ b/regtech_data_validator/create_schemas.py @@ -3,12 +3,13 @@ import pandas as pd from pandera import DataFrameSchema -from pandera.errors import SchemaErrors +from pandera.errors import SchemaErrors, SchemaError from regtech_data_validator.checks import SBLCheck from regtech_data_validator.phase_validations import get_phase_1_and_2_validations_for_lei from regtech_data_validator.schema_template import get_template + # Get separate schema templates for phase 1 and 2 @@ -16,58 +17,60 @@ phase_2_template = get_template() -def get_schema_by_phase_for_lei(template: dict, phase: str, lei: str = None): +def get_schema_by_phase_for_lei(template: dict, phase: str, lei: str|None = None): for column in get_phase_1_and_2_validations_for_lei(lei): validations = get_phase_1_and_2_validations_for_lei(lei)[column] template[column].checks = validations[phase] return DataFrameSchema(template) -def get_phase_1_schema_for_lei(lei: str = None): +def get_phase_1_schema_for_lei(lei: str|None = None): return get_schema_by_phase_for_lei(phase_1_template, "phase_1", lei) -def get_phase_2_schema_for_lei(lei: str = None): +def get_phase_2_schema_for_lei(lei: str|None = None): return get_schema_by_phase_for_lei(phase_2_template, "phase_2", lei) -def validate(schema: DataFrameSchema, df: pd.DataFrame): +def validate(schema: DataFrameSchema, df: pd.DataFrame) -> list[dict]: """ validate received dataframe with schema and return list of schema errors - Args: schema (DataFrameSchema): schema to be used for validation df (pd.DataFrame): data parsed into dataframe - Returns: - list of schema error + list of validation findings (warnings and errors) """ findings = [] try: schema(df, lazy=True) - except SchemaErrors as errors: - for error in errors.schema_errors: - check: SBLCheck = error.check - column_name = error.schema.name - check_id = "n/a" - + except SchemaErrors as err: + + # WARN: SchemaErrors.schema_errors is supposed to be of type + # list[dict[str,Any]], but it's actually of type SchemaError + schema_error: SchemaError + for schema_error in err.schema_errors: # type: ignore + check = schema_error.check + column_name = schema_error.schema.name + + if not check: + raise RuntimeError( + f'SchemaError occurred with no associated Check for {column_name} column' + ) from schema_error + + if not isinstance(check, SBLCheck): + raise RuntimeError( + f'Check {check} type on {column_name} column not supported. Must be of type {SBLCheck}' + ) from schema_error + fields: list[str] = [column_name] - if hasattr(check, "name") and hasattr(check, "id"): - check_name: str = check.name - check_id: str = check.id - - if check.groupby: - fields += check.groupby # type: ignore + if check.groupby: + fields += check.groupby # type: ignore - # This will either be a boolean series or a single bool - check_output = error.check_output - else: - # This means this check's column has unique set to True. - # we shouldn't be using Unique flag as it doesn't return series of - # validation result . it returns just a printout result string/txt - raise AttributeError(f"{str(check)}") + # This will either be a boolean series or a single bool + check_output = schema_error.check_output # Remove duplicates, but keep as `list` for JSON-friendliness fields = list(set(fields)) @@ -93,11 +96,11 @@ def validate(schema: DataFrameSchema, df: pd.DataFrame): validation_findings = { "validation": { - "id": check_id, - "name": check_name, + "id": check.title, + "name": check.name, "description": check.description, + "severity": check.severity, "fields": fields, - "severity": "warning" if check.warning else "error", }, "records": records, } @@ -107,7 +110,7 @@ def validate(schema: DataFrameSchema, df: pd.DataFrame): return findings -def validate_phases(df: pd.DataFrame, lei: str = None) -> list: +def validate_phases(df: pd.DataFrame, lei: str|None = None) -> list: phase1_findings = validate(get_phase_1_schema_for_lei(lei), df) if phase1_findings: return phase1_findings @@ -116,4 +119,4 @@ def validate_phases(df: pd.DataFrame, lei: str = None) -> list: if phase2_findings: return phase2_findings else: - return [{"response": "No validations errors or warnings"}] + return [{"response": "No validations errors or warnings"}] \ No newline at end of file diff --git a/regtech_data_validator/phase_validations.py b/regtech_data_validator/phase_validations.py index fc2ad5ea..6d65aef5 100644 --- a/regtech_data_validator/phase_validations.py +++ b/regtech_data_validator/phase_validations.py @@ -28,10 +28,10 @@ meets_multi_value_field_restriction, string_contains, ) -from regtech_data_validator.checks import SBLCheck +from regtech_data_validator.checks import SBLCheck, Severity -def get_phase_1_and_2_validations_for_lei(lei: str = None): +def get_phase_1_and_2_validations_for_lei(lei: str|None = None): return { "uid": { "phase_1": [ @@ -43,6 +43,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "Any 'unique identifier' may not be used in more than one " "record within a small business lending application register." ), + severity=Severity.ERROR, groupby="uid", ), SBLCheck.str_length( @@ -54,6 +55,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "'Unique identifier' must be at least 21 characters " "in length and at most 45 characters in length." ), + severity=Severity.ERROR, ), SBLCheck( has_valid_format, @@ -64,6 +66,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "numbers and/or uppercase letters (i.e., 0-9 and A-Z), " "and must not contain any other characters." ), + severity=Severity.ERROR, element_wise=True, regex="^[A-Z0-9]+$", ), @@ -76,6 +79,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " match the Legal Entity Identifier (LEI) for the financial" " institution." ), + severity=Severity.WARNING, element_wise=True, containing_value=lei, end_idx=20, @@ -90,6 +94,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0020", name="app_date.invalid_date_format", description="'Application date' must be a real calendar date using YYYYMMDD format.", + severity=Severity.ERROR, element_wise=True, ), ], @@ -102,6 +107,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0040", name="app_method.invalid_enum_value", description="'Application method' must equal 1, 2, 3, or 4.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -120,6 +126,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0060", name="app_recipient.invalid_enum_value", description="'Application recipient' must equal 1 or 2", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -136,6 +143,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0080", name="ct_credit_product.invalid_enum_value", description="'Credit product' must equal 1, 2, 3, 4, 5, 6, 7, 8, 977, or 988.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -155,6 +163,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): }, "ct_credit_product_ff": { "phase_1": [ + # FIXME: built-in Pandera checks do not support add'l params like `severity` SBLCheck.str_length( 0, 300, @@ -163,6 +172,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): description=( "'Free-form text field for other credit products' must not exceed 300 characters in length." ), + severity=Severity.ERROR, ) ], "phase_2": [ @@ -176,6 +186,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "When 'credit product' equals 977, 'free-form text field " "for other credit products' must not be blank." ), + severity=Severity.ERROR, groupby="ct_credit_product", condition_values={"977"}, ), @@ -192,6 +203,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " semicolons) must equal 1, 2, 3, 4, 5, 6, 7, 8," " 9, 10, 11, 977, or 999." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -219,6 +231,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "'Type of guarantee' must contain at least one and at" " most five values, separated by semicolons." ), + severity=Severity.ERROR, element_wise=True, min_length=1, max_length=5, @@ -226,21 +239,21 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): SBLCheck( is_unique_in_field, id="W0123", - warning=True, name="ct_guarantee.duplicates_in_field", description="'Type of guarantee' should not contain duplicated values.", + severity=Severity.WARNING, element_wise=True, ), SBLCheck( meets_multi_value_field_restriction, id="W0122", - warning=True, name="ct_guarantee.multi_value_field_restriction", description=( "When 'type of guarantee' contains 999 (no guarantee)," " 'type of guarantee' should not contain more than one" " value." ), + severity=Severity.WARNING, element_wise=True, single_values={"999"}, ), @@ -254,6 +267,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0140", name="ct_guarantee_ff.invalid_text_length", description="'Free-form text field for other guarantee' must not exceed 300 characters in length", + severity=Severity.ERROR, ), ], "phase_2": [ @@ -267,13 +281,13 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "When 'type of guarantee' contains 977, 'free-form text field" " for other guarantee' must not be blank." ), + severity=Severity.ERROR, groupby="ct_guarantee", condition_values={"977"}, ), SBLCheck( has_valid_multi_field_value_count, id="W2006", - warning=True, name="ct_guarantee_ff.multi_invalid_number_of_values", description=( "'Type of guarantee' and 'free-form text field for other " @@ -282,6 +296,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "toward the maximum number of values for the purpose of this " "validation check." ), + severity=Severity.WARNING, groupby="ct_guarantee", ignored_values={"977"}, max_length=5, @@ -297,6 +312,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): description=( "Each value in 'Loan term: NA/NP flag' (separated by semicolons) must equal 900, 988, or 999." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "900", @@ -318,6 +334,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "and otherwise undetermined), 'loan term: NA/NP flag' must" "equal 999." ), + severity=Severity.ERROR, groupby="ct_credit_product", conditions=[ { @@ -343,6 +360,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0180", name="ct_loan_term.invalid_numeric_format", description="When present, 'loan term' must be a whole number.", + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -357,6 +375,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "and reported), 'loan term' must be blank. When 'loan term:" "NA/NP flag' equals 900, 'loan term' must not be blank." ), + severity=Severity.ERROR, groupby="ct_loan_term_flag", condition_values={"900"}, ), @@ -365,6 +384,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0181", name="ct_loan_term.invalid_numeric_value", description="When present, 'loan term' must be greater than or equal to 1.", + severity=Severity.ERROR, element_wise=True, min_value="1", accept_blank=True, @@ -374,6 +394,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="W0182", name="ct_loan_term.unreasonable_numeric_value", description="When present, 'loan term' should be less than 1200 (100 years).", + severity=Severity.WARNING, element_wise=True, max_value="1200", accept_blank=True, @@ -391,6 +412,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " semicolons) must equal 1, 2, 3, 4, 5, 6, 7, 8," " 9, 10, 11, 977, 988, or 999." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -418,6 +440,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): description=( "'Credit purpose' must contain at least one and at most three values, separated by semicolons." ), + severity=Severity.ERROR, element_wise=True, min_length=1, max_length=3, @@ -425,13 +448,13 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): SBLCheck( meets_multi_value_field_restriction, id="W0202", - warning=True, name="credit_purpose.multi_value_field_restriction", description=( "When 'credit purpose' contains 988 or 999," " 'credit purpose' should not contain more than one" " value." ), + severity=Severity.WARNING, element_wise=True, single_values={ "988", @@ -441,9 +464,9 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): SBLCheck( is_unique_in_field, id="W0203", - warning=True, name="credit_purpose.duplicates_in_field", description="'Credit purpose' should not contain duplicated values.", + severity=Severity.WARNING, element_wise=True, ), ], @@ -458,6 +481,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): description=( "'Free-form text field for other credit purpose' must not exceed 300 characters in length" ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -471,13 +495,13 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "When 'credit purpose' contains 977, 'free-form text field for" "other credit purpose' must not be blank." ), + severity=Severity.ERROR, groupby="credit_purpose", condition_values={"977"}, ), SBLCheck( has_valid_multi_field_value_count, id="W2006", - warning=True, name="credit_purpose_ff.multi_invalid_number_of_values", description=( "'Credit purpose' and 'free-form text field for other credit " @@ -486,6 +510,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "toward the maximum number of values for the purpose of " "this validation check." ), + severity=Severity.WARNING, groupby="credit_purpose", ignored_values={"977"}, max_length=3, @@ -499,6 +524,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0240", name="amount_applied_for_flag.invalid_enum_value", description="'Amount applied For: NA/NP flag' must equal 900, 988, or 999.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "900", @@ -516,6 +542,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0260", name="amount_applied_for.invalid_numeric_format", description="When present, 'amount applied for' must be a numeric value.", + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -531,6 +558,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "When 'amount applied for: NA/NP flag' equals 900, " "'amount applied for' must not be blank." ), + severity=Severity.ERROR, groupby="amount_applied_for_flag", condition_values={"900"}, ), @@ -539,6 +567,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0261", name="amount_applied_for.invalid_numeric_value", description="When present, 'amount applied for' must be greater than 0.", + severity=Severity.ERROR, element_wise=True, min_value="0", accept_blank=True, @@ -552,6 +581,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0280", name="amount_approved.invalid_numeric_format", description="When present, 'amount approved or originated' must be a numeric value.", + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -562,6 +592,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0281", name="amount_approved.invalid_numeric_value", description="When present, 'amount approved or originated' must be greater than 0.", + severity=Severity.ERROR, element_wise=True, min_value="0", accept_blank=True, @@ -577,6 +608,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "equals 1 or 2, 'amount approved or originated' must " "not be blank." ), + severity=Severity.ERROR, groupby="action_taken", condition_values={"1", "2"}, ), @@ -589,6 +621,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0300", name="action_taken.invalid_enum_value", description="'Action taken' must equal 1, 2, 3, 4, or 5.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -618,6 +651,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "'Total origination charges', 'Amount of " "total broker fees', 'Initial annual charges'" ), + severity=Severity.ERROR, groupby=[ "pricing_interest_rate_type", "pricing_mca_addcost_flag", @@ -653,6 +687,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "penalty could be imposed', 'Prepayment " "penalty exists'" ), + severity=Severity.ERROR, groupby=[ "pricing_origination_charges", "pricing_broker_fees", @@ -678,6 +713,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0320", name="action_taken_date.invalid_date_format", description="'Action taken date' must be a real calendar date using YYYYMMDD format.", + severity=Severity.ERROR, element_wise=True, ), ], @@ -691,6 +727,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " within the current reporting period:" " October 1, 2024 to December 31, 2024." ), + severity=Severity.ERROR, element_wise=True, start_date_value="20241001", end_date_value="20241231", @@ -700,6 +737,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E2009", name="action_taken_date.date_value_conflict", description="The date indicated by 'action taken date' must occur on or after 'application date'.", + severity=Severity.ERROR, groupby="app_date", ), SBLCheck( @@ -711,6 +749,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " generally be less than two years (730 days) before" " 'action taken date'." ), + severity=Severity.WARNING, groupby="app_date", days_value=730, ), @@ -726,6 +765,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "Each value in 'denial reason(s)' (separated by semicolons)" "must equal 1, 2, 3, 4, 5, 6, 7, 8, 9, 977, or 999." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -750,6 +790,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): description=( "'Denial reason(s)' must contain at least one and at most fourvalues, separated by semicolons." ), + severity=Severity.ERROR, element_wise=True, min_length=1, max_length=4, @@ -763,6 +804,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "contain 999. When 'action taken' does not equal 3, 'denial" "reason(s)' must equal 999." ), + severity=Severity.ERROR, groupby="action_taken", conditions=[ { @@ -782,21 +824,21 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): SBLCheck( meets_multi_value_field_restriction, id="W0340", - warning=True, name="denial_reasons.multi_value_field_restriction", description=( "When 'denial reason(s)' contains 999 (not applicable)," "'denial reason(s)' should not contain more than one value." ), + severity=Severity.WARNING, element_wise=True, single_values={"999"}, ), SBLCheck( is_unique_in_field, id="W0341", - warning=True, name="denial_reasons.duplicates_in_field", description="'Denial reason(s)' should not contain duplicated values.", + severity=Severity.WARNING, element_wise=True, ), ], @@ -811,6 +853,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): description=( "'Free-form text field for other denial reason(s)'must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -824,13 +867,13 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "blank. When 'denial reason(s)' contains 977, 'free-form text" "field for other denial reason(s)' must not be blank." ), + severity=Severity.ERROR, groupby="denial_reasons", condition_values={"977"}, ), SBLCheck( has_valid_multi_field_value_count, id="W2013", - warning=True, name="denial_reasons_ff.multi_invalid_number_of_values", description=( "'Denial reason(s)' and 'free-form text field for other " @@ -839,6 +882,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "does not count toward the maximum number of values for " "the purpose of this validation check." ), + severity=Severity.WARNING, groupby="denial_reasons", ignored_values={"977"}, max_length=4, @@ -855,6 +899,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "Each value in 'Interest rate type' (separated by " " semicolons) Must equal 1, 2, 3, 4, 5, 6, or 999" ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -875,7 +920,8 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): is_number, id="E0400", name="pricing_init_rate_period.invalid_numeric_format", - description=("When present, 'initial rate period' must be a whole number.",), + description="When present, 'initial rate period' must be a whole number.", + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -894,6 +940,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "be blank. When 'interest rate type' equals 3, 4, 5, or 6, " "'initial rate period' must not be blank" ), + severity=Severity.ERROR, groupby="pricing_interest_rate_type", condition_values={"3", "4", "5", "6"}, ), @@ -901,7 +948,8 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): is_greater_than, id="E0401", name="pricing_init_rate_period.invalid_numeric_value", - description=("When present, 'initial rate period' must be greater than 0",), + description="When present, 'initial rate period' must be greater than 0", + severity=Severity.ERROR, element_wise=True, min_value="0", accept_blank=True, @@ -915,6 +963,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0420", name="pricing_fixed_rate.invalid_numeric_format", description="When present, 'fixed rate: interest rate' must be a numeric value.", + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -933,6 +982,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " blank. When 'interest rate type' equals 2, 4, or 6," " 'fixed rate: interest rate' must not be blank." ), + severity=Severity.ERROR, groupby="pricing_interest_rate_type", condition_values={"2", "4", "6"}, ), @@ -941,6 +991,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="W0420", name="pricing_fixed_rate.unreasonable_numeric_value", description="When present, 'fixed rate: interest rate' should generally be greater than 0.1.", + severity=Severity.WARNING, element_wise=True, min_value="0.1", accept_blank=True, @@ -954,6 +1005,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0440", name="pricing_adj_margin.invalid_numeric_format", description="When present, 'adjustable rate transaction: margin' must be a numeric value.", + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -972,6 +1024,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "be blank. When 'interest rate type' equals 1, 3, or 5, " "'variable rate transaction: margin' must not be blank." ), + severity=Severity.ERROR, groupby="pricing_interest_rate_type", condition_values={"1", "3", "5"}, ), @@ -982,6 +1035,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): description=( "When present, 'adjustable rate transaction: margin' should generally be greater than 0.1." ), + severity=Severity.ERROR, element_wise=True, min_value="0.1", accept_blank=True, @@ -998,6 +1052,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "'Adjustable rate transaction: index name' must equal " "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 977, or 999." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -1029,6 +1084,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "When 'interest rate type' equals 1, 3, or 5, 'adjustable rate" "transaction: index name' must not equal 999." ), + severity=Severity.ERROR, groupby="pricing_interest_rate_type", conditions=[ { @@ -1057,6 +1113,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): description=( "'Adjustable rate transaction: index name: other' must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -1072,6 +1129,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "'adjustable rate transaction: index name: other' must not be" "blank." ), + severity=Severity.ERROR, groupby="pricing_adj_index_name", condition_values={"977"}, ), @@ -1084,6 +1142,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0500", name="pricing_adj_index_value.invalid_numeric_format", description="When present, 'adjustable rate transaction: index value' must be a numeric value.", + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -1101,6 +1160,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " blank. When 'interest rate type' equals 1 or 3," " 'adjustable rate transaction: index value' must not be blank." ), + severity=Severity.ERROR, groupby="pricing_interest_rate_type", condition_values={"1", "3"}, ), @@ -1112,10 +1172,8 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): is_number, id="E0520", name="pricing_origination_charges.invalid_numeric_format", - description=( - "When present, 'total origination charges' must be a numeric", - "value.", - ), + description="When present, 'total origination charges' must be a numeric value.", + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -1128,10 +1186,8 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): is_number, id="E0540", name="pricing_broker_fees.invalid_numeric_format", - description=( - "When present, 'amount of total broker fees' must be a", - "numeric value.", - ), + description="When present, 'amount of total broker fees' must be a numeric value.", + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -1145,6 +1201,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0560", name="pricing_initial_charges.invalid_numeric_format", description="When present, 'initial annual charges' must be anumeric value.", + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -1162,6 +1219,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "advances or other sales-based financing: NA flag' " "must equal 900 or 999." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "900", @@ -1181,6 +1239,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "merchant cash advances or other sales-based financing: " "NA flag' must be 999 (not applicable)." ), + severity=Severity.ERROR, groupby="ct_credit_product", conditions=[ { @@ -1204,6 +1263,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "merchant cash advances or other sales-based financing' " "must be a numeric value" ), + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -1224,6 +1284,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "additional cost for merchant cash advances or other " "sales-based financing’ must not be blank." ), + severity=Severity.ERROR, groupby="pricing_mca_addcost_flag", condition_values={"900"}, ), @@ -1236,6 +1297,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0620", name="pricing_prepenalty_allowed.invalid_enum_value", description="'Prepayment penalty could be imposed' must equal 1, 2, or 999.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -1253,6 +1315,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0640", name="pricing_prepenalty_exists.invalid_enum_value", description="'Prepayment penalty exists' must equal 1, 2, or 999.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -1270,6 +1333,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0640", name="census_tract_adr_type.invalid_enum_value", description="'Census tract: type of address' must equal 1, 2, 3, or 988.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -1288,6 +1352,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0680", name="census_tract_number.invalid_text_length", description="When present, 'census tract: tract number' must be a GEOID with exactly 11 digits.", + severity=Severity.ERROR, element_wise=True, accepted_length=11, accept_blank=True, @@ -1309,6 +1374,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "location associated with the applicant), 'census tract:" " tract number' must not be blank." ), + severity=Severity.ERROR, groupby="census_tract_adr_type", conditions=[ { @@ -1334,6 +1400,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0700", name="gross_annual_revenue_flag.invalid_enum_value", description="'Gross annual revenue: NP flag' must equal 900 or 988.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "900", @@ -1350,6 +1417,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0720", name="gross_annual_revenue.invalid_numeric_format", description="When present, 'gross annual revenue' must be a numeric value.", + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -1365,6 +1433,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "'gross annual revenue: NP flag' equals 900, " "'gross annual revenue' must not be blank." ), + severity=Severity.ERROR, groupby="gross_annual_revenue_flag", condition_values={"900"}, ), @@ -1377,8 +1446,10 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0720", name="naics_code_flag.invalid_enum_value", description=( - "'North American Industry Classification System (NAICS) code: NP flag' must equal 900 or 988." + "'North American Industry Classification System (NAICS) code: NP flag'" + "must equal 900 or 988." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "900", @@ -1398,6 +1469,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "'North American Industry Classification System " "(NAICS) code' may only contain numeric characters." ), + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -1411,6 +1483,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "When present, 'North American Industry Classification System " "(NAICS) code' must be three digits in length." ), + severity=Severity.ERROR, element_wise=True, accepted_length=3, accept_blank=True, @@ -1423,6 +1496,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "When present, 'North American Industry Classification System " "(NAICS) code' should be a valid NAICS code." ), + severity=Severity.WARNING, element_wise=True, accept_blank=True, codes=global_data.naics_codes, @@ -1437,6 +1511,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "When 'type of guarantee' contains 977, 'free-form text field" " for other guarantee' must not be blank." ), + severity=Severity.ERROR, groupby="naics_code_flag", condition_values={"900"}, ), @@ -1449,6 +1524,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0780", name="number_of_workers.invalid_enum_value", description="'Number of workers' must equal 1, 2, 3, 4, 5, 6, 7, 8, 9, or 988.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -1473,6 +1549,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0800", name="time_in_business_type.invalid_enum_value", description="'Time in business: type of response' must equal 1, 2, 3, or 988.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -1491,6 +1568,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0820", name="time_in_business.invalid_numeric_format", description="When present, 'time in business' must be a whole number.", + severity=Severity.ERROR, element_wise=True, accept_blank=True, ), @@ -1501,6 +1579,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0821", name="time_in_business.invalid_numeric_value", description="When present, 'time in business' must be greater than or equal to 0.", + severity=Severity.ERROR, element_wise=True, min_value="0", accept_blank=True, @@ -1517,6 +1596,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 'time in business: type of response' equals 1," " 'time in business' must not be blank." ), + severity=Severity.ERROR, groupby="time_in_business_type", condition_values={"1"}, ), @@ -1533,6 +1613,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " (separated by semicolons) must equal 1, 2, 3," " 955, 966, or 988." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -1550,21 +1631,21 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0841", name="business_ownership_status.invalid_number_of_values", description="'Business ownership status' must contain at least one value.", + severity=Severity.ERROR, element_wise=True, min_length=1, ), SBLCheck( is_unique_in_field, id="W0842", - warning=True, name="business_ownership_status.duplicates_in_field", description="'Business ownership status' should not contain duplicated values.", + severity=Severity.WARNING, element_wise=True, ), SBLCheck( meets_multi_value_field_restriction, id="W0843", - warning=True, name="business_ownership_status.multi_value_field_restriction", description=( "When 'business ownership status' contains 966" @@ -1573,6 +1654,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " by applicant), 'business ownership status' should" " not contain more than one value." ), + severity=Severity.WARNING, element_wise=True, single_values={"966", "988"}, ), @@ -1585,6 +1667,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0860", name="num_principal_owners_flag.invalid_enum_value", description="'Number of principal owners: NP flag' must equal 900 or 988.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "900", @@ -1602,6 +1685,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "demographic fields for principal owners 1, 2, 3, and 4 " "should be blank." ), + severity=Severity.WARNING, groupby=[ "po_1_ethnicity", "po_1_race", @@ -1643,6 +1727,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " blank. Demographic fields for principal owners 2, 3, and 4 " "should be blank." ), + severity=Severity.WARNING, groupby=[ "po_1_ethnicity", "po_1_race", @@ -1683,6 +1768,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "owner 1 and 2', and 'sex/gender of principal owner 1 and 2: " "NP flag' should not be blank." ), + severity=Severity.WARNING, groupby=[ "po_1_ethnicity", "po_1_race", @@ -1724,6 +1810,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "and 3: NP flag' should not be blank. Demographic fields for " "principal owner 4 should be blank." ), + severity=Severity.WARNING, groupby=[ "po_1_ethnicity", "po_1_race", @@ -1765,6 +1852,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "and 'sex/gender of principal owner 1, 2, 3, and 4: NP flag'" " should not be blank." ), + severity=Severity.WARNING, groupby=[ "po_1_ethnicity", "po_1_race", @@ -1804,6 +1892,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E0880", name="num_principal_owners.invalid_enum_value", description="When present, 'number of principal owners' must equal 0, 1, 2, 3, or 4.", + severity=Severity.ERROR, element_wise=True, accepted_values=["0", "1", "2", "3", "4"], accept_blank=True, @@ -1820,6 +1909,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): "When 'number of principal owners: NP flag' equals 900, " "'number of principal owners' must not be blank." ), + severity=Severity.ERROR, groupby="num_principal_owners_flag", condition_values={"900"}, ), @@ -1837,6 +1927,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " semicolons) must equal 1, 11, 12," " 13, 14, 2, 966, 977, or 988." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -1856,15 +1947,14 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): SBLCheck( is_unique_in_field, id="W0901", - warning=True, name="po_1_ethnicity.duplicates_in_field", description="'Ethnicity of principal owner 1' should not contain duplicated values.", + severity=Severity.WARNING, element_wise=True, ), SBLCheck( meets_multi_value_field_restriction, id="W0902", - warning=True, name="po_1_ethnicity.multi_value_field_restriction", description=( "When 'ethnicity of principal owner 1' contains" @@ -1873,6 +1963,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " (not provided by applicant), 'ethnicity of" " principal owner 1' should not contain more than one value." ), + severity=Severity.WARNING, element_wise=True, single_values={"966", "988"}, ), @@ -1890,6 +1981,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " text field for other Hispanic or Latino'" " must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -1907,6 +1999,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " owner 1: free-form text field for other Hispanic" " or Latino' must not be blank." ), + severity=Severity.ERROR, groupby="po_1_ethnicity", condition_values={"977"}, ), @@ -1926,6 +2019,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 34, 35, 36, 37, 4, 41, 42, 43, 44," " 5, 966, 971, 972, 973, 974, or 988." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -1965,15 +2059,14 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): SBLCheck( is_unique_in_field, id="W0941", - warning=True, name="po_1_race.duplicates_in_field", description="'Race of principal owner 1' should not contain duplicated values.", + severity=Severity.WARNING, element_wise=True, ), SBLCheck( meets_multi_value_field_restriction, id="W0942", - warning=True, name="po_1_race.multi_value_field_restriction", description=( "When 'race of principal owner 1' contains" @@ -1983,6 +2076,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 'race of principal owner 1' should not" " contain more than one value." ), + severity=Severity.WARNING, element_wise=True, single_values={"966", "988"}, ), @@ -2001,6 +2095,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " Native Enrolled or Principal Tribe' must" " not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2021,6 +2116,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " for American Indian or Alaska Native Enrolled or" " Principal Tribe' must not be blank." ), + severity=Severity.ERROR, groupby="po_1_race", condition_values={"971"}, ), @@ -2038,6 +2134,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " field for other Asian' must not exceed 300" " characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2054,6 +2151,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 972, 'race of principal owner 1: free-form text field" " for other Asian' must not be blank." ), + severity=Severity.ERROR, groupby="po_1_race", condition_values={"972"}, ), @@ -2071,6 +2169,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " field for other Black or African American'" " must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2087,6 +2186,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " contains 973, 'race of principal owner 1: free-form text" " field for other Black or African American' must not be blank." ), + severity=Severity.ERROR, groupby="po_1_race", condition_values={"973"}, ), @@ -2104,6 +2204,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " field for other Pacific Islander race' must" " not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2120,6 +2221,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " contains 974, 'race of principal owner 1: free-form text" " field for other Pacific Islander race' must not be blank." ), + severity=Severity.ERROR, groupby="po_1_race", condition_values={"974"}, ), @@ -2132,6 +2234,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E1040", name="po_1_gender_flag.invalid_enum_value", description="When present, 'sex/gender of principal owner 1: NP flag' must equal 1, 966, or 988.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -2155,6 +2258,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " text field for self-identified sex/gender'" " must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2172,6 +2276,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " of principal owner 1: free-form text field for" " self-identified sex/gender' must not be blank." ), + severity=Severity.ERROR, groupby="po_1_gender_flag", condition_values={"1"}, ), @@ -2189,6 +2294,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " semicolons) must equal 1, 11, 12," " 13, 14, 2, 966, 977, or 988." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -2208,15 +2314,14 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): SBLCheck( is_unique_in_field, id="W0901", - warning=True, name="po_2_ethnicity.duplicates_in_field", description="'Ethnicity of principal owner 2' should not contain duplicated values.", + severity=Severity.WARNING, element_wise=True, ), SBLCheck( meets_multi_value_field_restriction, id="W0902", - warning=True, name="po_2_ethnicity.multi_value_field_restriction", description=( "When 'ethnicity of principal owner 2' contains" @@ -2225,6 +2330,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " (not provided by applicant), 'ethnicity of" " principal owner 2' should not contain more than one value." ), + severity=Severity.WARNING, element_wise=True, single_values={"966", "988"}, ), @@ -2242,6 +2348,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " text field for other Hispanic or Latino'" " must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2259,6 +2366,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " owner 2: free-form text field for other Hispanic" " or Latino' must not be blank." ), + severity=Severity.ERROR, groupby="po_2_ethnicity", condition_values={"977"}, ), @@ -2278,6 +2386,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 34, 35, 36, 37, 4, 41, 42, 43, 44," " 5, 966, 971, 972, 973, 974, or 988." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -2317,15 +2426,14 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): SBLCheck( is_unique_in_field, id="W0941", - warning=True, name="po_2_race.duplicates_in_field", description="'Race of principal owner 2' should not contain duplicated values.", + severity=Severity.WARNING, element_wise=True, ), SBLCheck( meets_multi_value_field_restriction, id="W0942", - warning=True, name="po_2_race.multi_value_field_restriction", description=( "When 'race of principal owner 2' contains" @@ -2335,6 +2443,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 'race of principal owner 2' should not" " contain more than one value." ), + severity=Severity.WARNING, element_wise=True, single_values={"966", "988"}, ), @@ -2353,6 +2462,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " Native Enrolled or Principal Tribe' must" " not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2373,6 +2483,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " for American Indian or Alaska Native Enrolled or" " Principal Tribe' must not be blank." ), + severity=Severity.ERROR, groupby="po_2_race", condition_values={"971"}, ), @@ -2390,6 +2501,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " field for other Asian' must not exceed 300" " characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2406,6 +2518,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 972, 'race of principal owner 2: free-form text field" " for other Asian' must not be blank." ), + severity=Severity.ERROR, groupby="po_2_race", condition_values={"972"}, ), @@ -2423,6 +2536,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " field for other Black or African American'" " must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2439,6 +2553,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " contains 973, 'race of principal owner 2: free-form text" " field for other Black or African American' must not be blank." ), + severity=Severity.ERROR, groupby="po_2_race", condition_values={"973"}, ), @@ -2456,6 +2571,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " field for other Pacific Islander race' must" " not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2472,6 +2588,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " contains 974, 'race of principal owner 2: free-form text" " field for other Pacific Islander race' must not be blank." ), + severity=Severity.ERROR, groupby="po_2_race", condition_values={"974"}, ), @@ -2484,6 +2601,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E1040", name="po_2_gender_flag.invalid_enum_value", description="When present, 'sex/gender of principal owner 2: NP flag' must equal 1, 966, or 988.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -2507,6 +2625,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " text field for self-identified sex/gender'" " must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2524,6 +2643,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " of principal owner 2: free-form text field for" " self-identified sex/gender' must not be blank." ), + severity=Severity.ERROR, groupby="po_2_gender_flag", condition_values={"1"}, ), @@ -2541,6 +2661,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " semicolons) must equal 1, 11, 12," " 13, 14, 2, 966, 977, or 988." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -2560,15 +2681,14 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): SBLCheck( is_unique_in_field, id="W0901", - warning=True, name="po_3_ethnicity.duplicates_in_field", description="'Ethnicity of principal owner 3' should not contain duplicated values.", + severity=Severity.WARNING, element_wise=True, ), SBLCheck( meets_multi_value_field_restriction, id="W0902", - warning=True, name="po_3_ethnicity.multi_value_field_restriction", description=( "When 'ethnicity of principal owner 3' contains" @@ -2577,6 +2697,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " (not provided by applicant), 'ethnicity of" " principal owner 3' should not contain more than one value." ), + severity=Severity.WARNING, element_wise=True, single_values={"966", "988"}, ), @@ -2594,6 +2715,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " text field for other Hispanic or Latino'" " must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2611,6 +2733,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " owner 3: free-form text field for other Hispanic" " or Latino' must not be blank." ), + severity=Severity.ERROR, groupby="po_3_ethnicity", condition_values={"977"}, ), @@ -2630,6 +2753,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 34, 35, 36, 37, 4, 41, 42, 43, 44," " 5, 966, 971, 972, 973, 974, or 988." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -2669,15 +2793,14 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): SBLCheck( is_unique_in_field, id="W0941", - warning=True, name="po_3_race.duplicates_in_field", description="'Race of principal owner 3' should not contain duplicated values.", + severity=Severity.WARNING, element_wise=True, ), SBLCheck( meets_multi_value_field_restriction, id="W0942", - warning=True, name="po_3_race.multi_value_field_restriction", description=( "When 'race of principal owner 3' contains" @@ -2687,6 +2810,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 'race of principal owner 3' should not" " contain more than one value." ), + severity=Severity.WARNING, element_wise=True, single_values={"966", "988"}, ), @@ -2705,6 +2829,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " Native Enrolled or Principal Tribe' must" " not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2725,6 +2850,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " for American Indian or Alaska Native Enrolled or" " Principal Tribe' must not be blank." ), + severity=Severity.ERROR, groupby="po_3_race", condition_values={"971"}, ), @@ -2742,6 +2868,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " field for other Asian' must not exceed 300" " characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2758,6 +2885,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 972, 'race of principal owner 3: free-form text field" " for other Asian' must not be blank." ), + severity=Severity.ERROR, groupby="po_3_race", condition_values={"972"}, ), @@ -2775,6 +2903,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " field for other Black or African American'" " must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2791,6 +2920,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " contains 973, 'race of principal owner 3: free-form text" " field for other Black or African American' must not be blank." ), + severity=Severity.ERROR, groupby="po_3_race", condition_values={"973"}, ), @@ -2808,6 +2938,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " field for other Pacific Islander race' must" " not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2824,6 +2955,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " contains 974, 'race of principal owner 3: free-form text" " field for other Pacific Islander race' must not be blank." ), + severity=Severity.ERROR, groupby="po_3_race", condition_values={"974"}, ), @@ -2836,6 +2968,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E1040", name="po_3_gender_flag.invalid_enum_value", description="When present, 'sex/gender of principal owner 3: NP flag' must equal 1, 966, or 988.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -2859,6 +2992,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " text field for self-identified sex/gender'" " must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2876,6 +3010,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " of principal owner 3: free-form text field for" " self-identified sex/gender' must not be blank." ), + severity=Severity.ERROR, groupby="po_3_gender_flag", condition_values={"1"}, ), @@ -2893,6 +3028,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " semicolons) must equal 1, 11, 12," " 13, 14, 2, 966, 977, or 988." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -2912,15 +3048,14 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): SBLCheck( is_unique_in_field, id="W0901", - warning=True, name="po_4_ethnicity.duplicates_in_field", description="'Ethnicity of principal owner 4' should not contain duplicated values.", + severity=Severity.WARNING, element_wise=True, ), SBLCheck( meets_multi_value_field_restriction, id="W0902", - warning=True, name="po_4_ethnicity.multi_value_field_restriction", description=( "When 'ethnicity of principal owner 4' contains" @@ -2929,6 +3064,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " (not provided by applicant), 'ethnicity of" " principal owner 4' should not contain more than one value." ), + severity=Severity.WARNING, element_wise=True, single_values={"966", "988"}, ), @@ -2946,6 +3082,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " text field for other Hispanic or Latino'" " must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -2963,6 +3100,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " owner 4: free-form text field for other Hispanic" " or Latino' must not be blank." ), + severity=Severity.ERROR, groupby="po_4_ethnicity", condition_values={"977"}, ), @@ -2982,6 +3120,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 34, 35, 36, 37, 4, 41, 42, 43, 44," " 5, 966, 971, 972, 973, 974, or 988." ), + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -3021,15 +3160,14 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): SBLCheck( is_unique_in_field, id="W0941", - warning=True, name="po_4_race.duplicates_in_field", description="'Race of principal owner 4' should not contain duplicated values.", + severity=Severity.WARNING, element_wise=True, ), SBLCheck( meets_multi_value_field_restriction, id="W0942", - warning=True, name="po_4_race.multi_value_field_restriction", description=( "When 'race of principal owner 4' contains" @@ -3039,6 +3177,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 'race of principal owner 4' should not" " contain more than one value." ), + severity=Severity.WARNING, element_wise=True, single_values={"966", "988"}, ), @@ -3057,6 +3196,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " Native Enrolled or Principal Tribe' must" " not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -3077,6 +3217,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " for American Indian or Alaska Native Enrolled or" " Principal Tribe' must not be blank." ), + severity=Severity.ERROR, groupby="po_4_race", condition_values={"971"}, ), @@ -3094,6 +3235,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " field for other Asian' must not exceed 300" " characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -3110,6 +3252,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " 972, 'race of principal owner 4: free-form text field" " for other Asian' must not be blank." ), + severity=Severity.ERROR, groupby="po_4_race", condition_values={"972"}, ), @@ -3127,6 +3270,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " field for other Black or African American'" " must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -3143,6 +3287,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " contains 973, 'race of principal owner 4: free-form text" " field for other Black or African American' must not be blank." ), + severity=Severity.ERROR, groupby="po_4_race", condition_values={"973"}, ), @@ -3160,6 +3305,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " field for other Pacific Islander race' must" " not exceed 300 characters in length." ), + severity=Severity.ERROR ), ], "phase_2": [ @@ -3176,6 +3322,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " contains 974, 'race of principal owner 4: free-form text" " field for other Pacific Islander race' must not be blank." ), + severity=Severity.ERROR, groupby="po_4_race", condition_values={"974"}, ), @@ -3188,6 +3335,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): id="E1040", name="po_4_gender_flag.invalid_enum_value", description="When present, 'sex/gender of principal owner 4: NP flag' must equal 1, 966, or 988.", + severity=Severity.ERROR, element_wise=True, accepted_values=[ "1", @@ -3211,6 +3359,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " text field for self-identified sex/gender'" " must not exceed 300 characters in length." ), + severity=Severity.ERROR, ), ], "phase_2": [ @@ -3228,9 +3377,10 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): " of principal owner 4: free-form text field for" " self-identified sex/gender' must not be blank." ), + severity=Severity.ERROR, groupby="po_4_gender_flag", condition_values={"1"}, ), ], }, - } + } \ No newline at end of file diff --git a/tests/test_checks.py b/tests/test_checks.py deleted file mode 100644 index dafaf512..00000000 --- a/tests/test_checks.py +++ /dev/null @@ -1,27 +0,0 @@ -import pytest - -from regtech_data_validator.checks import SBLCheck - - -class TestSBLCheck: - def test_no_id_check(self): - with pytest.raises(Exception) as exc: - SBLCheck(lambda: True, warning=True, name="Just a Warning") - - assert "Each check must be assigned a `name` and an `id`." in str(exc.value) - assert exc.type == ValueError - - def test_no_name_check(self): - with pytest.raises(Exception) as exc: - SBLCheck(lambda: True, id="00000", warning=True) - - assert "Each check must be assigned a `name` and an `id`." in str(exc.value) - assert exc.type == ValueError - - def test_name_and_id_check(self): - raised = False - try: - SBLCheck(lambda: True, id="00000", warning=True, name="Just a Warning") - except ValueError: - raised = True - assert raised is False From 58873bcbe40c793b6d51e56604d014f49d405b75 Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Tue, 17 Oct 2023 03:20:28 -0400 Subject: [PATCH 07/33] Fix multi-line string that was setup as a tuple --- regtech_data_validator/schema_template.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/regtech_data_validator/schema_template.py b/regtech_data_validator/schema_template.py index 2aada648..0c473523 100644 --- a/regtech_data_validator/schema_template.py +++ b/regtech_data_validator/schema_template.py @@ -1,4 +1,4 @@ -"""This is a 'blank' Pandera template for SBLAR. All columns in the fig are present, +"""This is a 'blank' Pandera template for SBLAR. All columns in the FIG are present, but the checks need to be populated. Do not import _schema_template from this module directly. Instead, make use of the @@ -171,8 +171,8 @@ "pricing_mca_addcost": Column( str, title=( - "Field 31: MCA/sales-based: additional cost for merchant cash ", - "advances or other sales-based financing", + "Field 31: MCA/sales-based: additional cost for merchant cash " + "advances or other sales-based financing" ), checks=[], ), @@ -449,4 +449,4 @@ def get_template() -> Dict: cause absolute havoc in a program and it's proactically impossible to debug.""" - return deepcopy(_schema_template) + return deepcopy(_schema_template) \ No newline at end of file From b732419bf0d077a49685539f247747ac8bc088aa Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Tue, 17 Oct 2023 03:21:12 -0400 Subject: [PATCH 08/33] Print CLI output as JSON instead of Python dict. --- regtech_data_validator/main.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/regtech_data_validator/main.py b/regtech_data_validator/main.py index e7df5b11..d00a61ed 100644 --- a/regtech_data_validator/main.py +++ b/regtech_data_validator/main.py @@ -5,7 +5,7 @@ Run from the terminal to see the generated output. """ -import pprint +import json import sys import pandas as pd @@ -17,18 +17,21 @@ def csv_to_df(path: str) -> pd.DataFrame: return pd.read_csv(path, dtype=str, na_filter=False) -def run_validation_on_df(df: pd.DataFrame, lei: str) -> None: +def run_validation_on_df(df: pd.DataFrame, lei: str|None) -> None: """ Run validation on the supplied dataframe and print a report to the terminal. """ - pprint.pprint(validate_phases(df, lei)) + validation_dict = validate_phases(df, lei) + validation_json = json.dumps(validation_dict, indent=4) + print(validation_json) -if __name__ == "__main__": + +def main(): csv_path = None - lei: str = None + lei: str|None = None if len(sys.argv) == 1: raise ValueError("csv_path arg not provided") elif len(sys.argv) == 2: @@ -41,3 +44,6 @@ def run_validation_on_df(df: pd.DataFrame, lei: str) -> None: df = csv_to_df(csv_path) run_validation_on_df(df, lei) + +if __name__ == "__main__": + main() \ No newline at end of file From 3b18289ae05bf8b82c07b4162ef112ae8f47b291 Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Tue, 17 Oct 2023 03:35:03 -0400 Subject: [PATCH 09/33] black and ruff fixups --- regtech_data_validator/checks.py | 20 +++++--------------- regtech_data_validator/create_schemas.py | 15 +++++++-------- regtech_data_validator/global_data.py | 2 +- regtech_data_validator/main.py | 7 ++++--- regtech_data_validator/phase_validations.py | 9 ++++----- regtech_data_validator/schema_template.py | 7 ++----- tests/test_global_data.py | 2 -- tests/test_schema_functions.py | 3 ++- 8 files changed, 25 insertions(+), 40 deletions(-) diff --git a/regtech_data_validator/checks.py b/regtech_data_validator/checks.py index f9b933e4..802dcbc7 100644 --- a/regtech_data_validator/checks.py +++ b/regtech_data_validator/checks.py @@ -9,10 +9,12 @@ from pandera.backends.base import BaseCheckBackend from pandera.backends.pandas.checks import PandasCheckBackend + class Severity(StrEnum): ERROR = 'error' WARNING = 'warning' + class SBLCheck(Check): """ A Pandera.Check subclasss that requires a `name` and an `id` be @@ -23,13 +25,7 @@ class SBLCheck(Check): SBLWarningCheck subclasses below. """ - def __init__(self, - check_fn: Callable, - id: str, - name: str, - description: str, - severity: Severity, - **check_kwargs): + def __init__(self, check_fn: Callable, id: str, name: str, description: str, severity: Severity, **check_kwargs): """ Subclass of Pandera's `Check`, with special handling for severity level Args: @@ -43,15 +39,9 @@ def __init__(self, self.severity = severity - super().__init__( - check_fn, - title=id, - name=name, - description=description, - **check_kwargs - ) + super().__init__(check_fn, title=id, name=name, description=description, **check_kwargs) @classmethod def get_backend(cls, check_obj: Any) -> Type[BaseCheckBackend]: """Assume Pandas DataFrame and return PandasCheckBackend""" - return PandasCheckBackend \ No newline at end of file + return PandasCheckBackend diff --git a/regtech_data_validator/create_schemas.py b/regtech_data_validator/create_schemas.py index e547a953..bbb5e99d 100644 --- a/regtech_data_validator/create_schemas.py +++ b/regtech_data_validator/create_schemas.py @@ -17,18 +17,18 @@ phase_2_template = get_template() -def get_schema_by_phase_for_lei(template: dict, phase: str, lei: str|None = None): +def get_schema_by_phase_for_lei(template: dict, phase: str, lei: str | None = None): for column in get_phase_1_and_2_validations_for_lei(lei): validations = get_phase_1_and_2_validations_for_lei(lei)[column] template[column].checks = validations[phase] return DataFrameSchema(template) -def get_phase_1_schema_for_lei(lei: str|None = None): +def get_phase_1_schema_for_lei(lei: str | None = None): return get_schema_by_phase_for_lei(phase_1_template, "phase_1", lei) -def get_phase_2_schema_for_lei(lei: str|None = None): +def get_phase_2_schema_for_lei(lei: str | None = None): return get_schema_by_phase_for_lei(phase_2_template, "phase_2", lei) @@ -46,11 +46,10 @@ def validate(schema: DataFrameSchema, df: pd.DataFrame) -> list[dict]: try: schema(df, lazy=True) except SchemaErrors as err: - # WARN: SchemaErrors.schema_errors is supposed to be of type # list[dict[str,Any]], but it's actually of type SchemaError schema_error: SchemaError - for schema_error in err.schema_errors: # type: ignore + for schema_error in err.schema_errors: # type: ignore check = schema_error.check column_name = schema_error.schema.name @@ -63,7 +62,7 @@ def validate(schema: DataFrameSchema, df: pd.DataFrame) -> list[dict]: raise RuntimeError( f'Check {check} type on {column_name} column not supported. Must be of type {SBLCheck}' ) from schema_error - + fields: list[str] = [column_name] if check.groupby: @@ -110,7 +109,7 @@ def validate(schema: DataFrameSchema, df: pd.DataFrame) -> list[dict]: return findings -def validate_phases(df: pd.DataFrame, lei: str|None = None) -> list: +def validate_phases(df: pd.DataFrame, lei: str | None = None) -> list: phase1_findings = validate(get_phase_1_schema_for_lei(lei), df) if phase1_findings: return phase1_findings @@ -119,4 +118,4 @@ def validate_phases(df: pd.DataFrame, lei: str|None = None) -> list: if phase2_findings: return phase2_findings else: - return [{"response": "No validations errors or warnings"}] \ No newline at end of file + return [{"response": "No validations errors or warnings"}] diff --git a/regtech_data_validator/global_data.py b/regtech_data_validator/global_data.py index b3364354..d02d4fc7 100644 --- a/regtech_data_validator/global_data.py +++ b/regtech_data_validator/global_data.py @@ -3,7 +3,7 @@ # global variable for NAICS codes -naics_codes: dict[str,str] = {} +naics_codes: dict[str, str] = {} naics_file_path = files('regtech_data_validator.data.naics').joinpath('2022_codes.csv') with naics_file_path.open('r') as f: diff --git a/regtech_data_validator/main.py b/regtech_data_validator/main.py index d00a61ed..bed8bb3a 100644 --- a/regtech_data_validator/main.py +++ b/regtech_data_validator/main.py @@ -17,7 +17,7 @@ def csv_to_df(path: str) -> pd.DataFrame: return pd.read_csv(path, dtype=str, na_filter=False) -def run_validation_on_df(df: pd.DataFrame, lei: str|None) -> None: +def run_validation_on_df(df: pd.DataFrame, lei: str | None) -> None: """ Run validation on the supplied dataframe and print a report to the terminal. @@ -31,7 +31,7 @@ def run_validation_on_df(df: pd.DataFrame, lei: str|None) -> None: def main(): csv_path = None - lei: str|None = None + lei: str | None = None if len(sys.argv) == 1: raise ValueError("csv_path arg not provided") elif len(sys.argv) == 2: @@ -45,5 +45,6 @@ def main(): df = csv_to_df(csv_path) run_validation_on_df(df, lei) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/regtech_data_validator/phase_validations.py b/regtech_data_validator/phase_validations.py index 6d65aef5..20b23c06 100644 --- a/regtech_data_validator/phase_validations.py +++ b/regtech_data_validator/phase_validations.py @@ -31,7 +31,7 @@ from regtech_data_validator.checks import SBLCheck, Severity -def get_phase_1_and_2_validations_for_lei(lei: str|None = None): +def get_phase_1_and_2_validations_for_lei(lei: str | None = None): return { "uid": { "phase_1": [ @@ -1446,8 +1446,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str|None = None): id="E0720", name="naics_code_flag.invalid_enum_value", description=( - "'North American Industry Classification System (NAICS) code: NP flag'" - "must equal 900 or 988." + "'North American Industry Classification System (NAICS) code: NP flag'must equal 900 or 988." ), severity=Severity.ERROR, element_wise=True, @@ -3305,7 +3304,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str|None = None): " field for other Pacific Islander race' must" " not exceed 300 characters in length." ), - severity=Severity.ERROR + severity=Severity.ERROR, ), ], "phase_2": [ @@ -3383,4 +3382,4 @@ def get_phase_1_and_2_validations_for_lei(lei: str|None = None): ), ], }, - } \ No newline at end of file + } diff --git a/regtech_data_validator/schema_template.py b/regtech_data_validator/schema_template.py index 0c473523..a2229cdd 100644 --- a/regtech_data_validator/schema_template.py +++ b/regtech_data_validator/schema_template.py @@ -170,10 +170,7 @@ ), "pricing_mca_addcost": Column( str, - title=( - "Field 31: MCA/sales-based: additional cost for merchant cash " - "advances or other sales-based financing" - ), + title="Field 31: MCA/sales-based: additional cost for merchant cash advances or other sales-based financing", checks=[], ), "pricing_prepenalty_allowed": Column( @@ -449,4 +446,4 @@ def get_template() -> Dict: cause absolute havoc in a program and it's proactically impossible to debug.""" - return deepcopy(_schema_template) \ No newline at end of file + return deepcopy(_schema_template) diff --git a/tests/test_global_data.py b/tests/test_global_data.py index f939cacf..20f84c40 100644 --- a/tests/test_global_data.py +++ b/tests/test_global_data.py @@ -1,5 +1,3 @@ -import pytest - from regtech_data_validator import global_data diff --git a/tests/test_schema_functions.py b/tests/test_schema_functions.py index 0b8e0c1b..7c141dee 100644 --- a/tests/test_schema_functions.py +++ b/tests/test_schema_functions.py @@ -3,7 +3,8 @@ from regtech_data_validator.create_schemas import ( get_phase_1_schema_for_lei, get_phase_2_schema_for_lei, - validate, validate_phases + validate, + validate_phases, ) From 5a515e9627fbc3f135485294ca25cf5dba0185f2 Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Wed, 18 Oct 2023 18:45:39 -0400 Subject: [PATCH 10/33] Fix path to `tests` dir in DevContainer setup --- .devcontainer/devcontainer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 6fc52aad..9656043b 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -49,7 +49,7 @@ "python.testing.unittestEnabled": false, "python.testing.pytestArgs": [ "--rootdir", - "${workspaceFolder}/src/tests" + "${workspaceFolder}/tests" ] } } From e4d7681e6c1ce6057e62c48b5a480556b2c98c78 Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Fri, 20 Oct 2023 02:33:37 -0400 Subject: [PATCH 11/33] Remove deprecated python formatting VSCode setting --- .devcontainer/devcontainer.json | 1 - 1 file changed, 1 deletion(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 9656043b..1dc2c1ad 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -40,7 +40,6 @@ ], "editor.tabSize": 4, "editor.formatOnSave": true, - "python.formatting.provider": "none", "python.envFile": "${workspaceFolder}/.env", "editor.codeActionsOnSave": { "source.organizeImports": true From 1bb7c947fba56b48e147037d504c45ec97dd8003 Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Fri, 20 Oct 2023 02:35:38 -0400 Subject: [PATCH 12/33] Add `typer` package for building better CLI --- poetry.lock | 23 ++++++++++++++++++++++- pyproject.toml | 1 + 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 600691d0..9cdd0373 100644 --- a/poetry.lock +++ b/poetry.lock @@ -543,6 +543,27 @@ typing-extensions = {version = ">=4.7.0", markers = "python_version < \"3.12\""} doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)"] test = ["coverage[toml] (>=7)", "mypy (>=1.2.0)", "pytest (>=7)"] +[[package]] +name = "typer" +version = "0.9.0" +description = "Typer, build great CLIs. Easy to code. Based on Python type hints." +optional = false +python-versions = ">=3.6" +files = [ + {file = "typer-0.9.0-py3-none-any.whl", hash = "sha256:5d96d986a21493606a358cae4461bd8cdf83cbf33a5aa950ae629ca3b51467ee"}, + {file = "typer-0.9.0.tar.gz", hash = "sha256:50922fd79aea2f4751a8e0408ff10d2662bd0c8bbfa84755a699f3bada2978b2"}, +] + +[package.dependencies] +click = ">=7.1.1,<9.0.0" +typing-extensions = ">=3.7.4.3" + +[package.extras] +all = ["colorama (>=0.4.3,<0.5.0)", "rich (>=10.11.0,<14.0.0)", "shellingham (>=1.3.0,<2.0.0)"] +dev = ["autoflake (>=1.3.1,<2.0.0)", "flake8 (>=3.8.3,<4.0.0)", "pre-commit (>=2.17.0,<3.0.0)"] +doc = ["cairosvg (>=2.5.2,<3.0.0)", "mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "pillow (>=9.3.0,<10.0.0)"] +test = ["black (>=22.3.0,<23.0.0)", "coverage (>=6.2,<7.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.910)", "pytest (>=4.4.0,<8.0.0)", "pytest-cov (>=2.10.0,<5.0.0)", "pytest-sugar (>=0.9.4,<0.10.0)", "pytest-xdist (>=1.32.0,<4.0.0)", "rich (>=10.11.0,<14.0.0)", "shellingham (>=1.3.0,<2.0.0)"] + [[package]] name = "typing-extensions" version = "4.7.1" @@ -667,4 +688,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "ac6360d9068e34f6bbad74a6c3339a85dd1968267f7272b48b8a99dfc5702812" +content-hash = "df661a7d55c274c9a68fd0c4a3967bb4f1c04f286e5aafbb619216290a39eab4" diff --git a/pyproject.toml b/pyproject.toml index e959153d..eb244b06 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,7 @@ readme = "README.md" python = "^3.11" pandas = "2.1.0" pandera = "0.16.1" +typer = "^0.9.0" [tool.poetry.group.dev.dependencies] pytest = "7.4.0" From 8497a9a5f2f509d8f29edb3f9e1526216029e244 Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Fri, 20 Oct 2023 02:37:41 -0400 Subject: [PATCH 13/33] Return df of validation findings instead of dict --- regtech_data_validator/create_schemas.py | 68 ++++++++++++++---------- regtech_data_validator/main.py | 2 +- 2 files changed, 41 insertions(+), 29 deletions(-) diff --git a/regtech_data_validator/create_schemas.py b/regtech_data_validator/create_schemas.py index bbb5e99d..2aba2af5 100644 --- a/regtech_data_validator/create_schemas.py +++ b/regtech_data_validator/create_schemas.py @@ -32,7 +32,7 @@ def get_phase_2_schema_for_lei(lei: str | None = None): return get_schema_by_phase_for_lei(phase_2_template, "phase_2", lei) -def validate(schema: DataFrameSchema, df: pd.DataFrame) -> list[dict]: +def validate(schema: DataFrameSchema, df: pd.DataFrame) -> pd.DataFrame: """ validate received dataframe with schema and return list of schema errors @@ -40,9 +40,10 @@ def validate(schema: DataFrameSchema, df: pd.DataFrame) -> list[dict]: schema (DataFrameSchema): schema to be used for validation df (pd.DataFrame): data parsed into dataframe Returns: - list of validation findings (warnings and errors) + pd.DataFrame containing validation results data """ - findings = [] + findings_df: pd.DataFrame = pd.DataFrame() + try: schema(df, lazy=True) except SchemaErrors as err: @@ -69,12 +70,14 @@ def validate(schema: DataFrameSchema, df: pd.DataFrame) -> list[dict]: fields += check.groupby # type: ignore # This will either be a boolean series or a single bool - check_output = schema_error.check_output + # Q: Is the scenario where it returns a single bool even with the above error checking? + check_output: pd.Series = schema_error.check_output # type: ignore # Remove duplicates, but keep as `list` for JSON-friendliness fields = list(set(fields)) - if check_output is not None: + # Q: What's the scenario where `check_output` is empty? + if not check_output.empty: # `check_output` must be sorted so its index lines up with `df`'s index check_output.sort_index(inplace=True) @@ -84,29 +87,38 @@ def validate(schema: DataFrameSchema, df: pd.DataFrame) -> list[dict]: # http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#boolean-indexing failed_check_fields_df = df[~check_output][fields].fillna("") - # Create list of dicts representing the failed validations and the - # associated field data for each invalid record. - records = [] - for idx, row in failed_check_fields_df.iterrows(): - record = {"number": idx + 1, "field_values": {}} - for field in fields: - record["field_values"][field] = row[field] - records.append(record) - - validation_findings = { - "validation": { - "id": check.title, - "name": check.name, - "description": check.description, - "severity": check.severity, - "fields": fields, - }, - "records": records, - } - - findings.append(validation_findings) - - return findings + # Melts a DataFrame with the line number as the index columns for the validations's fields' values + # into one with the validation_id, line_no, and field_name as a multiindex, and all of the validation + # metadata merged in as well. + # + # from... + # + # ct_loan_term_flag ct_credit_product + # 0 999 1 + # 1 999 2 + # + # ...to... + # field_value v_sev v_name v_desc + # v_id line_no field_name + # E2003 0 ct_credit_product 1 error ct_loan_term_flag.enum_value_conflict When 'credit product' equals 1 (term loan - un... + # ct_loan_term_flag 999 error ct_loan_term_flag.enum_value_conflict When 'credit product' equals 1 (term loan - un... + # 1 ct_credit_product 2 error ct_loan_term_flag.enum_value_conflict When 'credit product' equals 1 (term loan - un... + # ct_loan_term_flag 999 error ct_loan_term_flag.enum_value_conflict When 'credit product' equals 1 (term loan - un... + failed_check_fields_melt_df = ( + failed_check_fields_df.reset_index(names='line_no') + .melt(var_name='field_name', value_name='field_value', id_vars='line_no') + .assign(v_id=check.title) + .assign(v_sev=check.severity) + .assign(v_name=check.name) + .assign(v_desc=check.description) + .set_index(['v_id', 'line_no', 'field_name']) + .sort_index + ) + print(failed_check_fields_melt_df) + + findings_df = pd.concat([findings_df, failed_check_fields_melt_df]) + + return findings_df def validate_phases(df: pd.DataFrame, lei: str | None = None) -> list: diff --git a/regtech_data_validator/main.py b/regtech_data_validator/main.py index bed8bb3a..b39e57cd 100644 --- a/regtech_data_validator/main.py +++ b/regtech_data_validator/main.py @@ -26,7 +26,7 @@ def run_validation_on_df(df: pd.DataFrame, lei: str | None) -> None: validation_dict = validate_phases(df, lei) validation_json = json.dumps(validation_dict, indent=4) - print(validation_json) + #print(validation_json) def main(): From 15e2e02e2f7e52aeb32f024bef62ebf4354095f1 Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Tue, 24 Oct 2023 04:17:59 -0400 Subject: [PATCH 14/33] Add typer and tabulate in CLI poetry dep group --- poetry.lock | 16 +++++++++++++++- pyproject.toml | 14 ++++++++++---- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 9cdd0373..60b45865 100644 --- a/poetry.lock +++ b/poetry.lock @@ -525,6 +525,20 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +[[package]] +name = "tabulate" +version = "0.9.0" +description = "Pretty-print tabular data" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}, + {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}, +] + +[package.extras] +widechars = ["wcwidth"] + [[package]] name = "typeguard" version = "4.1.3" @@ -688,4 +702,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "df661a7d55c274c9a68fd0c4a3967bb4f1c04f286e5aafbb619216290a39eab4" +content-hash = "9740a71e98802af3624303af7136fe5020e2d705435cbbf2ef214b988c39dfe2" diff --git a/pyproject.toml b/pyproject.toml index eb244b06..b4c92ab6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,7 @@ +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" + [tool.poetry] name = "regtech-data-validator" version = "0.1.0" @@ -9,7 +13,6 @@ readme = "README.md" python = "^3.11" pandas = "2.1.0" pandera = "0.16.1" -typer = "^0.9.0" [tool.poetry.group.dev.dependencies] pytest = "7.4.0" @@ -20,9 +23,12 @@ ruff = "0.0.259" [tool.poetry.group.data.dependencies] openpyxl = "^3.1.2" -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" +[tool.poetry.group.cli.dependencies] +tabulate = "^0.9.0" +typer = "^0.9.0" + +[tool.poetry.scripts] +cfpb-comply = 'regtech_data_validator.cli:app' # Black formatting [tool.black] From 4324a15d910eb627b7275a2403de178a144619c3 Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Wed, 25 Oct 2023 03:14:00 -0400 Subject: [PATCH 15/33] Add project setup and CLI usage to README.md --- README.md | 1357 +++++++++-------------------------------------------- 1 file changed, 212 insertions(+), 1145 deletions(-) diff --git a/README.md b/README.md index becd0ac5..3c30a777 100644 --- a/README.md +++ b/README.md @@ -13,46 +13,135 @@ We are currently focused on implementing the SBL (Small Business Lending) data submission. For details on this dataset and its validations, please see: [Filing instructions guide for small business lending data collected in 2024](https://www.consumerfinance.gov/data-research/small-business-lending/filing-instructions-guide/2024-guide/) -## Pre-requisites +## Setup + +The following setup must be completed to running the CLI utilities or doing any +development on the project. + +### Pre-requisites The following software packages are pre-requisites to installing this software. - [Python](https://www.python.org/downloads/) version 3.11 or greater. - [Poetry](https://python-poetry.org/docs/#installation) for Python package management. -## Dependencies +### Install -All packages and libraries used in this repository can be found in [`pyproject.toml`](https://github.com/cfpb/regtech-data-validator/blob/main/pyproject.toml) +1. Checkout this project -## Contributing + ```sh + git clone https://github.com/cfpb/regtech-data-validator.git + cd regtech-data-validator + ``` -[CFPB](https://www.consumerfinance.gov/) is developing the -`RegTech Data Validator` in the open to maximize transparency and encourage -third party contributions. If you want to contribute, please read and abide by -the terms of the [License](./LICENSE) for this project. Pull Requests are always -welcome. +1. Install Python packages via Poetry -## Contact Us + ```sh + poetry install + ``` -If you have an inquiry or suggestion for the validator or any SBL related code -please reach out to us at +1. Activate Poetry's virtual environment -## Development + ```sh + poetry shell + ``` -There are few files in `src/validator` that will be of interest. +**Note:** All Python packages used in project can be found in +[`pyproject.toml`](https://github.com/cfpb/regtech-data-validator/blob/main/pyproject.toml) -- `checks.py` defines custom Pandera Check class called `SBLCheck`. -- `global_data.py` defines functions to parse NAICS and GEOIDs. -- `phase_validations.py` defines phase 1 and phase 2 Pandera schema/checks used - for validating the SBLAR data. -- `check_functions.py` contains a collection of functions to be run against the - data that are a bit too complex to be implemented directly within the schema - as Lambda functions. -- Lastly, the file `main.py` pulls everything together and illustrates how the - schema can catch the various validation errors present in our mock, invalid - dataset and different LEI values. +## Usage -Unit tests that can be located under [`src/tests`](https://github.com/cfpb/regtech-data-validator/tree/main/src/tests). +This project includes the `cfpb-val` CLI utility for validating CFPB's RegTech-related +data collection file formats. It currently supports the small business lending (SBL) data +collected for 2024, but may support more formats in the future. This tool is intended for +testing purposes, allowing a quick way to check the validity of a file without having +to submit it through the full CFPB-hosted filing systems. + +### Validating data + +``` +$ cfpb-val validate --help + + Usage: cfpb-val validate [OPTIONS] PATH + +╭─ Arguments ─────────────────────────────────────────────────────────────────────────────────────────╮ +│ * path FILE [default: None] [required] │ +╰─────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────╮ +│ --context = [example: lei=12345678901234567890] │ +│ --output [csv|json|pandas|table] [default: table] │ +│ --help Show this message and exit. │ +╰─────────────────────────────────────────────────────────────────────────────────────────────────────╯ +``` + +#### Examples + +1. Validate file with no findings + + $ cfpb-val validate tests/data/sbl-validations-pass.csv + +1. Validate file with findings, passing in LEI as context + + $ cfpb-val validate tests/data/sbl-validations-fail.csv --context lei=000TESTFIUIDDONOTUSE + + ╭────────────┬───────────┬──────────────────┬────────────────────────────────────────────────────┬─────────────────────┬───────────────┬──────────────────────────────────────╮ + │ finding_no │ record_no │ field_name │ field_value │ validation_severity │ validation_id │ validation_name │ + ├────────────┼───────────┼──────────────────┼────────────────────────────────────────────────────┼─────────────────────┼───────────────┼──────────────────────────────────────┤ + │ 1 │ 4 │ uid │ 000TESTFIUIDDONOTUSEXBXVID13XTC1 │ error │ E3000 │ uid.duplicates_in_dataset │ + │ 2 │ 5 │ uid │ 000TESTFIUIDDONOTUSEXBXVID13XTC1 │ error │ E3000 │ uid.duplicates_in_dataset │ + │ 3 │ 0 │ uid │ │ error │ E0001 │ uid.invalid_text_length │ + │ 4 │ 1 │ uid │ BXUIDXVID11XTC2 │ error │ E0001 │ uid.invalid_text_length │ + │ 5 │ 2 │ uid │ BXUIDXVID11XTC31234567890123456789012345678901 │ error │ E0001 │ uid.invalid_text_length │ + │ ... │ ... │ ... │ ... │ ... │ ... │ ... │ + │ 115 │ 278 │ po_4_race_baa_ff │ 12345678901234567890123456789012345678901234567890 │ error │ E1000 │ po_4_race_baa_ff.invalid_text_length │ + │ 116 │ 290 │ po_4_race_pi_ff │ 12345678901234567890123456789012345678901234567890 │ error │ E1020 │ po_4_race_pi_ff.invalid_text_length │ + │ 117 │ 302 │ po_4_gender_flag │ 9001 │ error │ E1040 │ po_4_gender_flag.invalid_enum_value │ + │ 118 │ 306 │ po_4_gender_ff │ 12345678901234567890123456789012345678901234567890 │ error │ E1060 │ po_4_gender_ff.invalid_text_length │ + ╰────────────┴───────────┴──────────────────┴────────────────────────────────────────────────────┴─────────────────────┴───────────────┴──────────────────────────────────────╯ + +1. Validate file with findings with JSON output + + $ cfpb-val validate tests/data/sbl-validations-fail.csv --output json + + [ + { + "validation": { + "id": "E0001", + "name": "uid.invalid_text_length", + "description": "'Unique identifier' must be at least 21 characters in length and at most 45 characters in length.", + "severity": "error" + }, + "records": [ + { + "record_no": 0, + "fields": [ + { + "name": "uid", + "value": "" + } + ] + }, + { + "record_no": 1, + "fields": [ + { + "name": "uid", + "value": "BXUIDXVID11XTC2" + } + ] + }, + { + "record_no": 2, + "fields": [ + { + "name": "uid", + "value": "BXUIDXVID11XTC31234567890123456789012345678901" + } + ] + } + ] + }, + ... ## Test Data @@ -60,1121 +149,93 @@ This repo includes 2 test datasets, one with all valid data, and one where each line represents a different failed validation, or different permutation of the same failed validation. -- [`sbl-validations-pass.csv`](src/tests/data/sbl-validations-pass.csv) -- [`sbl-validations-fail.csv`](src/tests/data/sbl-validations-fail.csv) +- [`sbl-validations-pass.csv`](tests/data/sbl-validations-pass.csv) +- [`sbl-validations-fail.csv`](tests/data/sbl-validations-fail.csv) We use these test files in for automated test, but can also be passed in via the -CLI for manual testing. +`cfpb-val` CLI utility for manual testing. -## Development Process and Standard -Development Process -Below are the steps the development team follows to fix issues, develop new features, -etc. +## Development -1. Work in a branch -2. Create a PR to merge into main -3. The PR is automatically built, tested, and linted using github actions with PyTest, - Black, Ruff, and Coverage. -4. Manual review is performed in addition to ensuring the above automatic scans - are positive -5. The PR is deployed to development servers to be checked -6. The PR is merged only by a separate member in the dev team +### Best practices -Development standard practice +#### `Check` functions - Check functions should focus on reuse. - - Most of the validations share logic with other validations. + - Most of the validations share logic with other validations. - Avoid using lambdas for Check functions. - - They do not promote reuse. - - They are harder to debug. - - They are harder to test. + - They do not promote reuse. + - They are harder to debug. + - They are harder to test. - Check function signatures should reflect the functionality. - Check functions should have corresponding unit tests. - - [Unit Test](./src/tests/test_check_functions.py) + - [Unit Test](./src/tests/test_check_functions.py) - Check definitions' name should be set to validation ID. - - Example: "denial_reasons. enum_value_conflict" - ![Validation ID](images/validation_id.png) + - Example: "denial_reasons. enum_value_conflict" + ![Validation ID](images/validation_id.png) - Check new added lines are formatted correctly. -## Installing Dependencies - -Run `poetry install` to install dependencies defined in `pyproject.toml` - -
- See Terminal Output - -```sh - -$ cd ~/Projects/regtech-data-validator - -$ poetry install - -Installing dependencies from lock file - -Package operations: 25 installs, 0 updates, 0 removals - - • Installing six (1.16.0) - • Installing iniconfig (2.0.0) - • Installing mypy-extensions (1.0.0) - • Installing numpy (1.25.2) - • Installing packaging (23.1) - • Installing pluggy (1.3.0) - • Installing python-dateutil (2.8.2) - • Installing pytz (2023.3.post1) - • Installing typing-extensions (4.7.1) - • Installing tzdata (2023.3) - • Installing click (8.1.7): Pending... - • Installing coverage (7.3.1): Pending... - • Installing coverage (7.3.1): Pending... - • Installing click (8.1.7): Downloading... 0% - • Installing coverage (7.3.1): Pending... - • Installing coverage (7.3.1): Downloading... 0% - • Installing coverage (7.3.1): Downloading... 0% - • Installing click (8.1.7): Downloading... 20% - • Installing coverage (7.3.1): Downloading... 0% - • Installing coverage (7.3.1): Downloading... 10% - • Installing coverage (7.3.1): Downloading... 10% - • Installing click (8.1.7): Downloading... 62% - • Installing coverage (7.3.1): Downloading... 10% - • Installing coverage (7.3.1): Downloading... 30% - • Installing coverage (7.3.1): Downloading... 30% - • Installing click (8.1.7): Downloading... 100% - • Installing coverage (7.3.1): Downloading... 30% - • Installing coverage (7.3.1): Downloading... 30% - • Installing click (8.1.7): Installing... - • Installing coverage (7.3.1): Downloading... 30% - • Installing coverage (7.3.1): Downloading... 30% - • Installing click (8.1.7) - • Installing coverage (7.3.1): Downloading... 30% - • Installing coverage (7.3.1): Downloading... 61% - • Installing coverage (7.3.1): Downloading... 91% - • Installing coverage (7.3.1): Downloading... 100% - • Installing coverage (7.3.1): Installing... - • Installing coverage (7.3.1) - • Installing multimethod (1.9.1) - • Installing pandas (2.1.0) - • Installing pathspec (0.11.2) - • Installing platformdirs (3.10.0) - • Installing pydantic (1.10.12) - • Installing pytest (7.4.0) - • Installing typeguard (4.1.3) - • Installing typing-inspect (0.9.0) - • Installing wrapt (1.15.0) - • Installing black (23.3.0) - • Installing pandera (0.16.1) - • Installing pytest-cov (4.1.0) - • Installing ruff (0.0.259) - -``` - -
- -## Running Validator - -`main.py` allows user to test csv file with and without LEI number - -```sh -# Running validator using LEI and CSV file -main.py - -# Running validator using just CSV file -main.py -``` - -When all validations passed, it prints out : - -```sh -[{'response': 'No validations errors or warnings'}] -``` - -When validation(s) failed, it prints out JSON data containing failed validation(s) - -```sh -# Example of JSON response containing failed validation -[ - { - 'validation': - { - 'id': 'E3000', - 'name': 'uid.duplicates_in_dataset', - 'description': "Any 'unique identifier' may not be used in more than one - record within a small business lending application register.", - 'fields': ['uid'], - 'severity': 'error' - }, - 'records': [ - { - 'number': 5, - 'field_values': {'uid': '000TESTFIUIDDONOTUSEXBXVID13XTC1'} - }, - { - 'number': 6, - 'field_values': {'uid': '000TESTFIUIDDONOTUSEXBXVID13XTC1'} - } - ] - } -] - -``` -To run `main.py` in terminal, you can use these commands. +## Testing -```sh -# Test validating the "good" file -# If passing lei value, pass lei as first arg and csv_path as second arg -$ poetry run python src/validator/main.py 000TESTFIUIDDONOTUSE src/tests/data/sbl-validations-pass.csv -# else just pass the csv_path as arg -$ poetry run python src/validator/main.py src/tests/data/sbl-validations-pass.csv - -# Test validating the "bad" file -$ poetry run python src/validator/main.py 000TESTFIUIDDONOTUSE src/tests/data/sbl-validations-fail.csv -# or -$ poetry run python src/validator/main.py src/tests/data/sbl-validations-fail.csv -``` +This project uses [pytest](https://docs.pytest.org/) for automated testing. -
- Example of Validator with Valid Data +### Running tests -```sh -$ poetry run python src/validator/main.py src/tests/data/sbl-validations-pass.csv -[{'response': 'No validations errors or warnings'}] ``` - - -
- -
- Example of Validator with Incorrect Data - -```sh - -$ poetry run python src/validator/main.py src/tests/data/sbl-validations-fail.csv -[{'records': [{'field_values': {'uid': '000TESTFIUIDDONOTUSEXBXVID13XTC1'}, - 'number': 5}, - {'field_values': {'uid': '000TESTFIUIDDONOTUSEXBXVID13XTC1'}, - 'number': 6}], - 'validation': {'description': "Any 'unique identifier' may not be used in " - 'more than one record within a small business ' - 'lending application register.', - 'fields': ['uid'], - 'id': 'E3000', - 'name': 'uid.duplicates_in_dataset', - 'severity': 'error'}}, - {'records': [{'field_values': {'uid': ''}, 'number': 1}, - {'field_values': {'uid': 'BXUIDXVID11XTC2'}, 'number': 2}, - {'field_values': {'uid': 'BXUIDXVID11XTC31234567890123456789012345678901'}, - 'number': 3}], - 'validation': {'description': "'Unique identifier' must be at least 21 " - 'characters in length and at most 45 ' - 'characters in length.', - 'fields': ['uid'], - 'id': 'E0001', - 'name': 'uid.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'uid': ''}, 'number': 1}, - {'field_values': {'uid': 'BXUIDXVID12XTC1abcdef'}, 'number': 4}], - 'validation': {'description': "'Unique identifier' may contain any " - 'combination of numbers and/or uppercase ' - 'letters (i.e., 0-9 and A-Z), and must not ' - 'contain any other characters.', - 'fields': ['uid'], - 'id': 'E0002', - 'name': 'uid.invalid_text_pattern', - 'severity': 'error'}}, - {'records': [{'field_values': {'app_date': ''}, 'number': 8}, - {'field_values': {'app_date': '12012024'}, 'number': 9}], - 'validation': {'description': "'Application date' must be a real calendar " - 'date using YYYYMMDD format.', - 'fields': ['app_date'], - 'id': 'E0020', - 'name': 'app_date.invalid_date_format', - 'severity': 'error'}}, - {'records': [{'field_values': {'app_method': ''}, 'number': 10}, - {'field_values': {'app_method': '9001'}, 'number': 11}], - 'validation': {'description': "'Application method' must equal 1, 2, 3, or " - '4.', - 'fields': ['app_method'], - 'id': 'E0040', - 'name': 'app_method.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'app_recipient': ''}, 'number': 12}, - {'field_values': {'app_recipient': '9001'}, 'number': 13}], - 'validation': {'description': "'Application recipient' must equal 1 or 2", - 'fields': ['app_recipient'], - 'id': 'E0060', - 'name': 'app_recipient.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'ct_credit_product': ''}, 'number': 14}, - {'field_values': {'ct_credit_product': '9001'}, 'number': 15}], - 'validation': {'description': "'Credit product' must equal 1, 2, 3, 4, 5, 6, " - '7, 8, 977, or 988.', - 'fields': ['ct_credit_product'], - 'id': 'E0080', - 'name': 'ct_credit_product.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'ct_credit_product_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 16}], - 'validation': {'description': "'Free-form text field for other credit " - "products' must not exceed 300 characters in " - 'length.', - 'fields': ['ct_credit_product_ff'], - 'id': 'E0100', - 'name': 'ct_credit_product_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'ct_guarantee': '9001'}, 'number': 19}, - {'field_values': {'ct_guarantee': ''}, 'number': 20}], - 'validation': {'description': "Each value in 'type of guarantee' (separated " - 'by semicolons) must equal 1, 2, 3, 4, 5, 6, ' - '7, 8, 9, 10, 11, 977, or 999.', - 'fields': ['ct_guarantee'], - 'id': 'E0120', - 'name': 'ct_guarantee.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'ct_guarantee_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 24}], - 'validation': {'description': "'Free-form text field for other guarantee' " - 'must not exceed 300 characters in length', - 'fields': ['ct_guarantee_ff'], - 'id': 'E0140', - 'name': 'ct_guarantee_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'ct_loan_term_flag': ''}, 'number': 29}, - {'field_values': {'ct_loan_term_flag': '9001'}, 'number': 30}, - {'field_values': {'ct_loan_term_flag': '1'}, 'number': 33}], - 'validation': {'description': "Each value in 'Loan term: NA/NP flag' " - '(separated by semicolons) must equal 900, ' - '988, or 999.', - 'fields': ['ct_loan_term_flag'], - 'id': 'E0160', - 'name': 'ct_loan_term_flag.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'ct_loan_term': 'must be blank'}, - 'number': 36}], - 'validation': {'description': "When present, 'loan term' must be a whole " - 'number.', - 'fields': ['ct_loan_term'], - 'id': 'E0180', - 'name': 'ct_loan_term.invalid_numeric_format', - 'severity': 'error'}}, - {'records': [{'field_values': {'credit_purpose': '1;2;9001'}, 'number': 39}, - {'field_values': {'credit_purpose': ''}, 'number': 40}], - 'validation': {'description': "Each value in 'credit purpose' (separated by " - 'semicolons) must equal 1, 2, 3, 4, 5, 6, 7, ' - '8, 9, 10, 11, 977, 988, or 999.', - 'fields': ['credit_purpose'], - 'id': 'E0200', - 'name': 'credit_purpose.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'credit_purpose_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 45}], - 'validation': {'description': "'Free-form text field for other credit " - "purpose' must not exceed 300 characters in " - 'length', - 'fields': ['credit_purpose_ff'], - 'id': 'E0220', - 'name': 'credit_purpose_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'amount_applied_for_flag': ''}, 'number': 50}, - {'field_values': {'amount_applied_for_flag': '9001'}, - 'number': 51}], - 'validation': {'description': "'Amount applied For: NA/NP flag' must equal " - '900, 988, or 999.', - 'fields': ['amount_applied_for_flag'], - 'id': 'E0240', - 'name': 'amount_applied_for_flag.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'amount_applied_for': 'nonNumericValue'}, - 'number': 52}, - {'field_values': {'amount_applied_for': 'must be blank'}, - 'number': 55}], - 'validation': {'description': "When present, 'amount applied for' must be a " - 'numeric value.', - 'fields': ['amount_applied_for'], - 'id': 'E0260', - 'name': 'amount_applied_for.invalid_numeric_format', - 'severity': 'error'}}, - {'records': [{'field_values': {'amount_approved': 'nonNumericValue'}, - 'number': 56}], - 'validation': {'description': "When present, 'amount approved or originated' " - 'must be a numeric value.', - 'fields': ['amount_approved'], - 'id': 'E0280', - 'name': 'amount_approved.invalid_numeric_format', - 'severity': 'error'}}, - {'records': [{'field_values': {'action_taken': ''}, 'number': 63}, - {'field_values': {'action_taken': '9001'}, 'number': 64}], - 'validation': {'description': "'Action taken' must equal 1, 2, 3, 4, or 5.", - 'fields': ['action_taken'], - 'id': 'E0300', - 'name': 'action_taken.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'action_taken_date': '12312024'}, - 'number': 65}], - 'validation': {'description': "'Action taken date' must be a real calendar " - 'date using YYYYMMDD format.', - 'fields': ['action_taken_date'], - 'id': 'E0320', - 'name': 'action_taken_date.invalid_date_format', - 'severity': 'error'}}, - {'records': [{'field_values': {'denial_reasons': '9001'}, 'number': 70}, - {'field_values': {'denial_reasons': ''}, 'number': 71}, - {'field_values': {'denial_reasons': '999;1; 2'}, 'number': 78}], - 'validation': {'description': "Each value in 'denial reason(s)' (separated " - 'by semicolons)must equal 1, 2, 3, 4, 5, 6, 7, ' - '8, 9, 977, or 999.', - 'fields': ['denial_reasons'], - 'id': 'E0001', - 'name': 'denial_reasons.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'denial_reasons_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 80}], - 'validation': {'description': "'Free-form text field for other denial " - "reason(s)'must not exceed 300 characters in " - 'length.', - 'fields': ['denial_reasons_ff'], - 'id': 'E0360', - 'name': 'denial_reasons_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'pricing_interest_rate_type': ''}, - 'number': 85}, - {'field_values': {'pricing_interest_rate_type': '9001'}, - 'number': 86}, - {'field_values': {'pricing_interest_rate_type': '900'}, - 'number': 87}, - {'field_values': {'pricing_interest_rate_type': '900'}, - 'number': 94}, - {'field_values': {'pricing_interest_rate_type': '900'}, - 'number': 101}], - 'validation': {'description': "Each value in 'Interest rate type' (separated " - 'by semicolons) Must equal 1, 2, 3, 4, 5, 6, ' - 'or 999', - 'fields': ['pricing_interest_rate_type'], - 'id': 'E0380', - 'name': 'pricing_interest_rate_type.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'pricing_init_rate_period': 'nonNumericValue'}, - 'number': 118}], - 'validation': {'description': ("When present, 'initial rate period' must be " - 'a whole number.',), - 'fields': ['pricing_init_rate_period'], - 'id': 'E0400', - 'name': 'pricing_init_rate_period.invalid_numeric_format', - 'severity': 'error'}}, - {'records': [{'field_values': {'pricing_fixed_rate': 'nonNumericValue'}, - 'number': 127}], - 'validation': {'description': "When present, 'fixed rate: interest rate' " - 'must be a numeric value.', - 'fields': ['pricing_fixed_rate'], - 'id': 'E0420', - 'name': 'pricing_fixed_rate.invalid_numeric_format', - 'severity': 'error'}}, - {'records': [{'field_values': {'pricing_adj_index_name': ''}, 'number': 145}, - {'field_values': {'pricing_adj_index_name': '9001'}, - 'number': 146}], - 'validation': {'description': "'Adjustable rate transaction: index name' " - 'must equal 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ' - '977, or 999.', - 'fields': ['pricing_adj_index_name'], - 'id': 'E0460', - 'name': 'pricing_adj_index_name.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'pricing_adj_index_name_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 154}], - 'validation': {'description': "'Adjustable rate transaction: index name: " - "other' must not exceed 300 characters in " - 'length.', - 'fields': ['pricing_adj_index_name_ff'], - 'id': 'E0480', - 'name': 'pricing_adj_index_name_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'pricing_adj_index_value': 'nonNumericValue'}, - 'number': 157}], - 'validation': {'description': "When present, 'adjustable rate transaction: " - "index value' must be a numeric value.", - 'fields': ['pricing_adj_index_value'], - 'id': 'E0500', - 'name': 'pricing_adj_index_value.invalid_numeric_format', - 'severity': 'error'}}, - {'records': [{'field_values': {'pricing_origination_charges': 'nonNumericValue'}, - 'number': 165}], - 'validation': {'description': ("When present, 'total origination charges' " - 'must be a numeric', - 'value.'), - 'fields': ['pricing_origination_charges'], - 'id': 'E0520', - 'name': 'pricing_origination_charges.invalid_numeric_format', - 'severity': 'error'}}, - {'records': [{'field_values': {'pricing_broker_fees': 'nonNumericValue'}, - 'number': 166}], - 'validation': {'description': ("When present, 'amount of total broker fees' " - 'must be a', - 'numeric value.'), - 'fields': ['pricing_broker_fees'], - 'id': 'E0540', - 'name': 'pricing_broker_fees.invalid_numeric_format', - 'severity': 'error'}}, - {'records': [{'field_values': {'pricing_initial_charges': 'nonNumericValue'}, - 'number': 167}], - 'validation': {'description': "When present, 'initial annual charges' must " - 'be anumeric value.', - 'fields': ['pricing_initial_charges'], - 'id': 'E0560', - 'name': 'pricing_initial_charges.invalid_numeric_format', - 'severity': 'error'}}, - {'records': [{'field_values': {'pricing_mca_addcost_flag': ''}, 'number': 168}, - {'field_values': {'pricing_mca_addcost_flag': '99009001'}, - 'number': 169}], - 'validation': {'description': "'MCA/sales-based: additional cost for " - 'merchant cash advances or other sales-based ' - "financing: NA flag' must equal 900 or 999.", - 'fields': ['pricing_mca_addcost_flag'], - 'id': 'E0580', - 'name': 'pricing_mca_addcost_flag.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'pricing_mca_addcost': 'nonNumericValue'}, - 'number': 171}, - {'field_values': {'pricing_mca_addcost': 'must be blank'}, - 'number': 172}], - 'validation': {'description': "When present, 'MCA/sales-based: additional " - 'cost for merchant cash advances or other ' - "sales-based financing' must be a numeric " - 'value', - 'fields': ['pricing_mca_addcost'], - 'id': 'E0600', - 'name': 'pricing_mca_addcost.invalid_numeric_format', - 'severity': 'error'}}, - {'records': [{'field_values': {'pricing_prepenalty_allowed': ''}, - 'number': 174}, - {'field_values': {'pricing_prepenalty_allowed': '9001'}, - 'number': 175}], - 'validation': {'description': "'Prepayment penalty could be imposed' must " - 'equal 1, 2, or 999.', - 'fields': ['pricing_prepenalty_allowed'], - 'id': 'E0620', - 'name': 'pricing_prepenalty_allowed.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'pricing_prepenalty_exists': ''}, - 'number': 176}, - {'field_values': {'pricing_prepenalty_exists': '9001'}, - 'number': 177}], - 'validation': {'description': "'Prepayment penalty exists' must equal 1, 2, " - 'or 999.', - 'fields': ['pricing_prepenalty_exists'], - 'id': 'E0640', - 'name': 'pricing_prepenalty_exists.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'census_tract_adr_type': ''}, 'number': 178}, - {'field_values': {'census_tract_adr_type': '9001'}, - 'number': 179}], - 'validation': {'description': "'Census tract: type of address' must equal 1, " - '2, 3, or 988.', - 'fields': ['census_tract_adr_type'], - 'id': 'E0640', - 'name': 'census_tract_adr_type.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'census_tract_number': '1234567890'}, - 'number': 181}, - {'field_values': {'census_tract_number': 'must be blank'}, - 'number': 182}], - 'validation': {'description': "When present, 'census tract: tract number' " - 'must be a GEOID with exactly 11 digits.', - 'fields': ['census_tract_number'], - 'id': 'E0680', - 'name': 'census_tract_number.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'gross_annual_revenue_flag': ''}, - 'number': 187}, - {'field_values': {'gross_annual_revenue_flag': '99009001'}, - 'number': 188}], - 'validation': {'description': "'Gross annual revenue: NP flag' must equal " - '900 or 988.', - 'fields': ['gross_annual_revenue_flag'], - 'id': 'E0700', - 'name': 'gross_annual_revenue_flag.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'gross_annual_revenue': 'nonNumericValue'}, - 'number': 189}, - {'field_values': {'gross_annual_revenue': 'must be blank'}, - 'number': 190}], - 'validation': {'description': "When present, 'gross annual revenue' must be " - 'a numeric value.', - 'fields': ['gross_annual_revenue'], - 'id': 'E0720', - 'name': 'gross_annual_revenue.invalid_numeric_format', - 'severity': 'error'}}, - {'records': [{'field_values': {'naics_code_flag': ''}, 'number': 192}, - {'field_values': {'naics_code_flag': '9001'}, 'number': 193}], - 'validation': {'description': "'North American Industry Classification " - "System (NAICS) code: NP flag' must equal 900 " - 'or 988.', - 'fields': ['naics_code_flag'], - 'id': 'E0720', - 'name': 'naics_code_flag.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'naics_code': 'notDigits'}, 'number': 196}], - 'validation': {'description': "'North American Industry Classification " - "System (NAICS) code' may only contain numeric " - 'characters.', - 'fields': ['naics_code'], - 'id': 'E0761', - 'name': 'naics_code.invalid_naics_format', - 'severity': 'error'}}, - {'records': [{'field_values': {'number_of_workers': ''}, 'number': 199}, - {'field_values': {'number_of_workers': '9001'}, 'number': 200}], - 'validation': {'description': "'Number of workers' must equal 1, 2, 3, 4, 5, " - '6, 7, 8, 9, or 988.', - 'fields': ['number_of_workers'], - 'id': 'E0780', - 'name': 'number_of_workers.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'time_in_business_type': ''}, 'number': 201}, - {'field_values': {'time_in_business_type': '9001'}, - 'number': 202}], - 'validation': {'description': "'Time in business: type of response' must " - 'equal 1, 2, 3, or 988.', - 'fields': ['time_in_business_type'], - 'id': 'E0800', - 'name': 'time_in_business_type.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'time_in_business': 'must be blank'}, - 'number': 205}], - 'validation': {'description': "When present, 'time in business' must be a " - 'whole number.', - 'fields': ['time_in_business'], - 'id': 'E0820', - 'name': 'time_in_business.invalid_numeric_format', - 'severity': 'error'}}, - {'records': [{'field_values': {'business_ownership_status': '1;2; 9001'}, - 'number': 207}, - {'field_values': {'business_ownership_status': ''}, - 'number': 208}], - 'validation': {'description': "Each value in 'business ownership status' " - '(separated by semicolons) must equal 1, 2, 3, ' - '955, 966, or 988.', - 'fields': ['business_ownership_status'], - 'id': 'E0840', - 'name': 'business_ownership_status.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'num_principal_owners_flag': ''}, - 'number': 211}, - {'field_values': {'num_principal_owners_flag': '9001'}, - 'number': 212}], - 'validation': {'description': "'Number of principal owners: NP flag' must " - 'equal 900 or 988.', - 'fields': ['num_principal_owners_flag'], - 'id': 'E0860', - 'name': 'num_principal_owners_flag.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'num_principal_owners': '9001'}, 'number': 213}, - {'field_values': {'num_principal_owners': 'must be blank'}, - 'number': 214}], - 'validation': {'description': "When present, 'number of principal owners' " - 'must equal 0, 1, 2, 3, or 4.', - 'fields': ['num_principal_owners'], - 'id': 'E0880', - 'name': 'num_principal_owners.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_1_ethnicity': '9001;1'}, 'number': 216}], - 'validation': {'description': "When present, each value in 'ethnicity of " - "principal owner 1' (separated by semicolons) " - 'must equal 1, 11, 12, 13, 14, 2, 966, 977, or ' - '988.', - 'fields': ['po_1_ethnicity'], - 'id': 'E0900', - 'name': 'po_1_ethnicity.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_1_ethnicity_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 228}], - 'validation': {'description': "'Ethnicity of principal owner 1: free-form " - "text field for other Hispanic or Latino' must " - 'not exceed 300 characters in length.', - 'fields': ['po_1_ethnicity_ff'], - 'id': 'E0920', - 'name': 'po_1_ethnicity_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_1_race': '9001;1'}, 'number': 240}], - 'validation': {'description': "When present, each value in 'race of " - "principal owner 1' (separated by semicolons) " - 'must equal 1, 2, 21, 22, 23, 24, 25, 26, 27, ' - '3, 31, 32, 33, 34, 35, 36, 37, 4, 41, 42, 43, ' - '44, 5, 966, 971, 972, 973, 974, or 988.', - 'fields': ['po_1_race'], - 'id': 'E0940', - 'name': 'po_1_race.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_1_race_anai_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 252}], - 'validation': {'description': "'Race of principal owner 1: free-form text " - 'field for American Indian or Alaska Native ' - "Enrolled or Principal Tribe' must not exceed " - '300 characters in length.', - 'fields': ['po_1_race_anai_ff'], - 'id': 'E0960', - 'name': 'po_1_race_anai_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_1_race_asian_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 264}], - 'validation': {'description': "'Race of principal owner 1: free-form text " - "field for other Asian' must not exceed 300 " - 'characters in length.', - 'fields': ['po_1_race_asian_ff'], - 'id': 'E0980', - 'name': 'po_1_race_asian_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_1_race_baa_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 276}], - 'validation': {'description': "'Race of principal owner 1: free-form text " - "field for other Black or African American' " - 'must not exceed 300 characters in length.', - 'fields': ['po_1_race_baa_ff'], - 'id': 'E1000', - 'name': 'po_1_race_baa_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_1_race_pi_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 288}], - 'validation': {'description': "'Race of principal owner 1: free-form text " - "field for other Pacific Islander race' must " - 'not exceed 300 characters in length.', - 'fields': ['po_1_race_pi_ff'], - 'id': 'E1020', - 'name': 'po_1_race_pi_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_1_gender_flag': '9001'}, 'number': 300}], - 'validation': {'description': "When present, 'sex/gender of principal owner " - "1: NP flag' must equal 1, 966, or 988.", - 'fields': ['po_1_gender_flag'], - 'id': 'E1040', - 'name': 'po_1_gender_flag.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_1_gender_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 304}], - 'validation': {'description': "'Sex/gender of principal owner 1: free-form " - "text field for self-identified sex/gender' " - 'must not exceed 300 characters in length.', - 'fields': ['po_1_gender_ff'], - 'id': 'E1060', - 'name': 'po_1_gender_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_2_ethnicity': '9001;1'}, 'number': 217}], - 'validation': {'description': "When present, each value in 'ethnicity of " - "principal owner 2' (separated by semicolons) " - 'must equal 1, 11, 12, 13, 14, 2, 966, 977, or ' - '988.', - 'fields': ['po_2_ethnicity'], - 'id': 'E0900', - 'name': 'po_2_ethnicity.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_2_ethnicity_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 229}], - 'validation': {'description': "'Ethnicity of principal owner 2: free-form " - "text field for other Hispanic or Latino' must " - 'not exceed 300 characters in length.', - 'fields': ['po_2_ethnicity_ff'], - 'id': 'E0920', - 'name': 'po_2_ethnicity_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_2_race': '9001;1'}, 'number': 241}], - 'validation': {'description': "When present, each value in 'race of " - "principal owner 2' (separated by semicolons) " - 'must equal 1, 2, 21, 22, 23, 24, 25, 26, 27, ' - '3, 31, 32, 33, 34, 35, 36, 37, 4, 41, 42, 43, ' - '44, 5, 966, 971, 972, 973, 974, or 988.', - 'fields': ['po_2_race'], - 'id': 'E0940', - 'name': 'po_2_race.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_2_race_anai_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 253}], - 'validation': {'description': "'Race of principal owner 2: free-form text " - 'field for American Indian or Alaska Native ' - "Enrolled or Principal Tribe' must not exceed " - '300 characters in length.', - 'fields': ['po_2_race_anai_ff'], - 'id': 'E0960', - 'name': 'po_2_race_anai_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_2_race_asian_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 265}], - 'validation': {'description': "'Race of principal owner 2: free-form text " - "field for other Asian' must not exceed 300 " - 'characters in length.', - 'fields': ['po_2_race_asian_ff'], - 'id': 'E0980', - 'name': 'po_2_race_asian_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_2_race_baa_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 277}], - 'validation': {'description': "'Race of principal owner 2: free-form text " - "field for other Black or African American' " - 'must not exceed 300 characters in length.', - 'fields': ['po_2_race_baa_ff'], - 'id': 'E1000', - 'name': 'po_2_race_baa_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_2_race_pi_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 289}], - 'validation': {'description': "'Race of principal owner 2: free-form text " - "field for other Pacific Islander race' must " - 'not exceed 300 characters in length.', - 'fields': ['po_2_race_pi_ff'], - 'id': 'E1020', - 'name': 'po_2_race_pi_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_2_gender_flag': '9001'}, 'number': 301}], - 'validation': {'description': "When present, 'sex/gender of principal owner " - "2: NP flag' must equal 1, 966, or 988.", - 'fields': ['po_2_gender_flag'], - 'id': 'E1040', - 'name': 'po_2_gender_flag.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_2_gender_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 305}], - 'validation': {'description': "'Sex/gender of principal owner 2: free-form " - "text field for self-identified sex/gender' " - 'must not exceed 300 characters in length.', - 'fields': ['po_2_gender_ff'], - 'id': 'E1060', - 'name': 'po_2_gender_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_3_ethnicity': '9001;1'}, 'number': 218}], - 'validation': {'description': "When present, each value in 'ethnicity of " - "principal owner 3' (separated by semicolons) " - 'must equal 1, 11, 12, 13, 14, 2, 966, 977, or ' - '988.', - 'fields': ['po_3_ethnicity'], - 'id': 'E0900', - 'name': 'po_3_ethnicity.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_3_ethnicity_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 230}], - 'validation': {'description': "'Ethnicity of principal owner 3: free-form " - "text field for other Hispanic or Latino' must " - 'not exceed 300 characters in length.', - 'fields': ['po_3_ethnicity_ff'], - 'id': 'E0920', - 'name': 'po_3_ethnicity_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_3_race': '9001;1'}, 'number': 242}], - 'validation': {'description': "When present, each value in 'race of " - "principal owner 3' (separated by semicolons) " - 'must equal 1, 2, 21, 22, 23, 24, 25, 26, 27, ' - '3, 31, 32, 33, 34, 35, 36, 37, 4, 41, 42, 43, ' - '44, 5, 966, 971, 972, 973, 974, or 988.', - 'fields': ['po_3_race'], - 'id': 'E0940', - 'name': 'po_3_race.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_3_race_anai_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 254}], - 'validation': {'description': "'Race of principal owner 3: free-form text " - 'field for American Indian or Alaska Native ' - "Enrolled or Principal Tribe' must not exceed " - '300 characters in length.', - 'fields': ['po_3_race_anai_ff'], - 'id': 'E0960', - 'name': 'po_3_race_anai_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_3_race_asian_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 266}], - 'validation': {'description': "'Race of principal owner 3: free-form text " - "field for other Asian' must not exceed 300 " - 'characters in length.', - 'fields': ['po_3_race_asian_ff'], - 'id': 'E0980', - 'name': 'po_3_race_asian_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_3_race_baa_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 278}], - 'validation': {'description': "'Race of principal owner 3: free-form text " - "field for other Black or African American' " - 'must not exceed 300 characters in length.', - 'fields': ['po_3_race_baa_ff'], - 'id': 'E1000', - 'name': 'po_3_race_baa_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_3_race_pi_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 290}], - 'validation': {'description': "'Race of principal owner 3: free-form text " - "field for other Pacific Islander race' must " - 'not exceed 300 characters in length.', - 'fields': ['po_3_race_pi_ff'], - 'id': 'E1020', - 'name': 'po_3_race_pi_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_3_gender_flag': '9001'}, 'number': 302}], - 'validation': {'description': "When present, 'sex/gender of principal owner " - "3: NP flag' must equal 1, 966, or 988.", - 'fields': ['po_3_gender_flag'], - 'id': 'E1040', - 'name': 'po_3_gender_flag.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_3_gender_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 306}], - 'validation': {'description': "'Sex/gender of principal owner 3: free-form " - "text field for self-identified sex/gender' " - 'must not exceed 300 characters in length.', - 'fields': ['po_3_gender_ff'], - 'id': 'E1060', - 'name': 'po_3_gender_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_4_ethnicity': '9001;1'}, 'number': 219}], - 'validation': {'description': "When present, each value in 'ethnicity of " - "principal owner 4' (separated by semicolons) " - 'must equal 1, 11, 12, 13, 14, 2, 966, 977, or ' - '988.', - 'fields': ['po_4_ethnicity'], - 'id': 'E0900', - 'name': 'po_4_ethnicity.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_4_ethnicity_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 231}], - 'validation': {'description': "'Ethnicity of principal owner 4: free-form " - "text field for other Hispanic or Latino' must " - 'not exceed 300 characters in length.', - 'fields': ['po_4_ethnicity_ff'], - 'id': 'E0920', - 'name': 'po_4_ethnicity_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_4_race': '9001;1'}, 'number': 243}], - 'validation': {'description': "When present, each value in 'race of " - "principal owner 4' (separated by semicolons) " - 'must equal 1, 2, 21, 22, 23, 24, 25, 26, 27, ' - '3, 31, 32, 33, 34, 35, 36, 37, 4, 41, 42, 43, ' - '44, 5, 966, 971, 972, 973, 974, or 988.', - 'fields': ['po_4_race'], - 'id': 'E0940', - 'name': 'po_4_race.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_4_race_anai_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 255}], - 'validation': {'description': "'Race of principal owner 4: free-form text " - 'field for American Indian or Alaska Native ' - "Enrolled or Principal Tribe' must not exceed " - '300 characters in length.', - 'fields': ['po_4_race_anai_ff'], - 'id': 'E0960', - 'name': 'po_4_race_anai_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_4_race_asian_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 267}], - 'validation': {'description': "'Race of principal owner 4: free-form text " - "field for other Asian' must not exceed 300 " - 'characters in length.', - 'fields': ['po_4_race_asian_ff'], - 'id': 'E0980', - 'name': 'po_4_race_asian_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_4_race_baa_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 279}], - 'validation': {'description': "'Race of principal owner 4: free-form text " - "field for other Black or African American' " - 'must not exceed 300 characters in length.', - 'fields': ['po_4_race_baa_ff'], - 'id': 'E1000', - 'name': 'po_4_race_baa_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_4_race_pi_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 291}], - 'validation': {'description': "'Race of principal owner 4: free-form text " - "field for other Pacific Islander race' must " - 'not exceed 300 characters in length.', - 'fields': ['po_4_race_pi_ff'], - 'id': 'E1020', - 'name': 'po_4_race_pi_ff.invalid_text_length', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_4_gender_flag': '9001'}, 'number': 303}], - 'validation': {'description': "When present, 'sex/gender of principal owner " - "4: NP flag' must equal 1, 966, or 988.", - 'fields': ['po_4_gender_flag'], - 'id': 'E1040', - 'name': 'po_4_gender_flag.invalid_enum_value', - 'severity': 'error'}}, - {'records': [{'field_values': {'po_4_gender_ff': '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890XXX'}, - 'number': 307}], - 'validation': {'description': "'Sex/gender of principal owner 4: free-form " - "text field for self-identified sex/gender' " - 'must not exceed 300 characters in length.', - 'fields': ['po_4_gender_ff'], - 'id': 'E1060', - 'name': 'po_4_gender_ff.invalid_text_length', - 'severity': 'error'}}] - -``` - -
- -## Running Test - -This repository is using `pytest`. If using VS Code, tests can be completed within -a Dev Container. If using local terminal or console, you can use this command -`poetry run pytest` in the root directory - -
- See Example of Pytest Output - -```sh - -$ poetry run pytest -================================================================================== -test session starts ================================================================================== -platform darwin -- Python 3.11.5, pytest-7.4.0, pluggy-1.3.0 -- /Library/Caches/pypoetry/virtualenvs/regtech-data-validator-uJQWmvcM-py3.11/bin/python +$ pytest +======================================================= test session starts ======================================================= +platform darwin -- Python 3.11.2, pytest-7.4.0, pluggy-1.3.0 -- /Users/keelerh/Library/Caches/pypoetry/virtualenvs/regtech-data-validator-Sa0Sf38s-py3.11/bin/python cachedir: .pytest_cache -rootdir: /Projects/regtech-data-validator +rootdir: /Users/keelerh/Projects/regtech-data-validator configfile: pyproject.toml -testpaths: src/tests +testpaths: tests plugins: cov-4.1.0, typeguard-4.1.3 -collected 117 items - -src/tests/test_check_functions.py::TestInvalidDateFormat::test_with_valid_date <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 0%] -src/tests/test_check_functions.py::TestInvalidDateFormat::test_with_invalid_date <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 1%] -src/tests/test_check_functions.py::TestInvalidDateFormat::test_with_invalid_day <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 2%] -src/tests/test_check_functions.py::TestInvalidDateFormat::test_with_invalid_month <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 3%] -src/tests/test_check_functions.py::TestInvalidDateFormat::test_with_invalid_year <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 4%] -src/tests/test_check_functions.py::TestInvalidDateFormat::test_with_invalid_format <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 5%] -src/tests/test_check_functions.py::TestInvalidDateFormat::test_with_invalid_type <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 5%] -src/tests/test_check_functions.py::TestDuplicatesInField::test_with_blank <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 6%] -src/tests/test_check_functions.py::TestDuplicatesInField::test_with_no_duplicates <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 7%] -src/tests/test_check_functions.py::TestDuplicatesInField::test_with_duplicates <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 8%] -src/tests/test_check_functions.py::TestInvalidNumberOfValues::test_with_in_range <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 9%] -src/tests/test_check_functions.py::TestInvalidNumberOfValues::test_with_lower_range_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 10%] -src/tests/test_check_functions.py::TestInvalidNumberOfValues::test_with_invalid_lower_range_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 11%] -src/tests/test_check_functions.py::TestInvalidNumberOfValues::test_with_upper_range_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 11%] -src/tests/test_check_functions.py::TestInvalidNumberOfValues::test_with_invalid_upper_range_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 12%] -src/tests/test_check_functions.py::TestInvalidNumberOfValues::test_valid_with_no_upper_bound <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 13%] -src/tests/test_check_functions.py::TestInvalidNumberOfValues::test_invalid_with_no_upper_bound <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 14%] -src/tests/test_check_functions.py::TestMultiValueFieldRestriction::test_with_invalid_values <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 15%] -src/tests/test_check_functions.py::TestMultiValueFieldRestriction::test_with_valid_length <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 16%] -src/tests/test_check_functions.py::TestMultiValueFieldRestriction::test_with_valid_values <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 17%] -src/tests/test_check_functions.py::TestMultiInvalidNumberOfValues::test_inside_maxlength <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 17%] -src/tests/test_check_functions.py::TestMultiInvalidNumberOfValues::test_on_maxlength <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 18%] -src/tests/test_check_functions.py::TestMultiInvalidNumberOfValues::test_with_blank <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 19%] -src/tests/test_check_functions.py::TestMultiInvalidNumberOfValues::test_invalid_length_with_blank <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 20%] -src/tests/test_check_functions.py::TestMultiInvalidNumberOfValues::test_invalid_length_with_blank_and_ignored_values <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 21%] -src/tests/test_check_functions.py::TestMultiInvalidNumberOfValues::test_valid_length_with_blank_and_ignored_values <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 22%] -src/tests/test_check_functions.py::TestMultiInvalidNumberOfValues::test_outside_maxlength <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 23%] -src/tests/test_check_functions.py::TestMultiInvalidNumberOfValues::test_valid_length_with_non_blank <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 23%] -src/tests/test_check_functions.py::TestMultiInvalidNumberOfValues::test_invalid_length_with_non_blank <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 24%] -src/tests/test_check_functions.py::TestMultiInvalidNumberOfValues::test_valid_length_with_ignored_values <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 25%] -src/tests/test_check_functions.py::TestMultiInvalidNumberOfValues::test_invalid_length_with_ignored_values <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 26%] -src/tests/test_check_functions.py::TestMultiInvalidNumberOfValues::test_valid_length_with_blank_values <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 27%] -src/tests/test_check_functions.py::TestMultiInvalidNumberOfValues::test_invalid_length_with_blank_values <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 28%] -src/tests/test_check_functions.py::TestInvalidEnumValue::test_with_valid_enum_values <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 29%] -src/tests/test_check_functions.py::TestInvalidEnumValue::test_with_is_valid_enums <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 29%] -src/tests/test_check_functions.py::TestInvalidEnumValue::test_with_valid_blank <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 30%] -src/tests/test_check_functions.py::TestInvalidEnumValue::test_with_invalid_blank <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 31%] -src/tests/test_check_functions.py::TestIsNumber::test_number_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 32%] -src/tests/test_check_functions.py::TestIsNumber::test_non_number_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 33%] -src/tests/test_check_functions.py::TestIsNumber::test_decimal_numeric_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 34%] -src/tests/test_check_functions.py::TestIsNumber::test_alphanumeric_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 35%] -src/tests/test_check_functions.py::TestIsNumber::test_negative_numeric_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 35%] -src/tests/test_check_functions.py::TestIsNumber::test_negative_decimal_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 36%] -src/tests/test_check_functions.py::TestIsNumber::test_valid_blank <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 37%] -src/tests/test_check_functions.py::TestIsNumber::test_invalid_blank <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 38%] -src/tests/test_check_functions.py::TestConditionalFieldConflict::test_conditional_field_conflict_correct <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 39%] -src/tests/test_check_functions.py::TestConditionalFieldConflict::test_conditional_field_conflict_incorrect <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 40%] -src/tests/test_check_functions.py::TestEnumValueConflict::test_enum_value_confict_correct <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 41%] -src/tests/test_check_functions.py::TestEnumValueConflict::test_enum_value_confict_incorrect <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 41%] -src/tests/test_check_functions.py::TestHasCorrectLength::test_with_accept_blank_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 42%] -src/tests/test_check_functions.py::TestHasCorrectLength::test_with_invalid_blank_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 43%] -src/tests/test_check_functions.py::TestHasCorrectLength::test_with_correct_length <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 44%] -src/tests/test_check_functions.py::TestHasCorrectLength::test_with_incorrect_length <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 45%] -src/tests/test_check_functions.py::TestIsValidCode::test_with_valid_code <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 46%] -src/tests/test_check_functions.py::TestIsValidCode::test_with_invalid_code <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 47%] -src/tests/test_check_functions.py::TestIsValidCode::test_with_accepted_blank <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 47%] -src/tests/test_check_functions.py::TestIsValidCode::test_with_invalid_blank <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 48%] -src/tests/test_check_functions.py::TestIsGreaterThan::test_with_greater_min_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 49%] -src/tests/test_check_functions.py::TestIsGreaterThan::test_with_smaller_min_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 50%] -src/tests/test_check_functions.py::TestIsGreaterThan::test_with_equal_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 51%] -src/tests/test_check_functions.py::TestIsGreaterThan::test_with_valid_blank_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 52%] -src/tests/test_check_functions.py::TestIsGreaterThan::test_with_invalid_blank_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 52%] -src/tests/test_check_functions.py::TestIsGreaterThanOrEqualTo::test_with_greater_min_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 53%] -src/tests/test_check_functions.py::TestIsGreaterThanOrEqualTo::test_with_smaller_min_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 54%] -src/tests/test_check_functions.py::TestIsGreaterThanOrEqualTo::test_with_equal_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 55%] -src/tests/test_check_functions.py::TestIsGreaterThanOrEqualTo::test_with_valid_blank_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 56%] -src/tests/test_check_functions.py::TestIsGreaterThanOrEqualTo::test_with_invalid_blank_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 57%] -src/tests/test_check_functions.py::TestIsLessThan::test_with_greater_max_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 58%] -src/tests/test_check_functions.py::TestIsLessThan::test_with_less_max_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 58%] -src/tests/test_check_functions.py::TestIsLessThan::test_with_equal_max_value <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 59%] -src/tests/test_check_functions.py::TestIsLessThan::test_with_valid_blank_space <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 60%] -src/tests/test_check_functions.py::TestIsLessThan::test_with_invalid_blank_space <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 61%] -src/tests/test_check_functions.py::TestHasValidFormat::test_with_valid_data_alphanumeric <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 62%] -src/tests/test_check_functions.py::TestHasValidFormat::test_with_invalid_data_alphanumeric <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 63%] -src/tests/test_check_functions.py::TestHasValidFormat::test_with_accepting_blank <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 64%] -src/tests/test_check_functions.py::TestHasValidFormat::test_with_not_accepting_blank <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 64%] -src/tests/test_check_functions.py::TestHasValidFormat::test_with_valid_data_ip <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 65%] -src/tests/test_check_functions.py::TestHasValidFormat::test_with_invalid_data_ip <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 66%] -src/tests/test_check_functions.py::TestIsUniqueColumn::test_with_valid_series <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 67%] -src/tests/test_check_functions.py::TestIsUniqueColumn::test_with_multiple_valid_series <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 68%] -src/tests/test_check_functions.py::TestIsUniqueColumn::test_with_invalid_series <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 69%] -src/tests/test_check_functions.py::TestIsUniqueColumn::test_with_multiple_items_series <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 70%] -src/tests/test_check_functions.py::TestIsUniqueColumn::test_with_multiple_invalid_series <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 70%] -src/tests/test_check_functions.py::TestIsUniqueColumn::test_with_multiple_mix_series <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 71%] -src/tests/test_check_functions.py::TestIsUniqueColumn::test_with_blank_value_series <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 72%] -src/tests/test_check_functions.py::TestHasValidFieldsetPair::test_with_correct_is_not_equal_condition <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 73%] -src/tests/test_check_functions.py::TestHasValidFieldsetPair::test_with_correct_is_equal_condition <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 74%] -src/tests/test_check_functions.py::TestHasValidFieldsetPair::test_with_correct_is_equal_and_not_equal_conditions <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 75%] -src/tests/test_check_functions.py::TestHasValidFieldsetPair::test_with_value_not_in_condition_values <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 76%] -src/tests/test_check_functions.py::TestHasValidFieldsetPair::test_with_incorrect_is_not_equal_condition <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 76%] -src/tests/test_check_functions.py::TestHasValidFieldsetPair::test_with_incorrect_is_equal_condition <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 77%] -src/tests/test_check_functions.py::TestHasValidFieldsetPair::test_with_incorrect_is_equal_and_not_equal_conditions <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 78%] -src/tests/test_check_functions.py::TestIsValidId::test_with_correct_values <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 79%] -src/tests/test_check_functions.py::TestIsValidId::test_with_incorrect_values <- ../../../../workspaces/regtech-data-validator/src/tests/test_check_functions.py PASSED [ 80%] -src/tests/test_checks.py::TestSBLCheck::test_no_id_check <- ../../../../workspaces/regtech-data-validator/src/tests/test_checks.py PASSED [ 81%] -src/tests/test_checks.py::TestSBLCheck::test_no_name_check <- ../../../../workspaces/regtech-data-validator/src/tests/test_checks.py PASSED [ 82%] -src/tests/test_checks.py::TestSBLCheck::test_name_and_id_check <- ../../../../workspaces/regtech-data-validator/src/tests/test_checks.py PASSED [ 82%] -src/tests/test_global_data.py::TestGlobalData::test_valid_naics_codes <- ../../../../workspaces/regtech-data-validator/src/tests/test_global_data.py PASSED [ 83%] -src/tests/test_global_data.py::TestGlobalData::test_valid_geoids <- ../../../../workspaces/regtech-data-validator/src/tests/test_global_data.py PASSED [ 84%] -src/tests/test_global_data.py::TestGlobalData::test_invalid_naics_file <- ../../../../workspaces/regtech-data-validator/src/tests/test_global_data.py PASSED [ 85%] -src/tests/test_global_data.py::TestGlobalData::test_invalid_geoids_file <- ../../../../workspaces/regtech-data-validator/src/tests/test_global_data.py PASSED [ 86%] -src/tests/test_sample_data.py::TestValidatingSampleData::test_invalid_data_file PASSED [ 87%] -src/tests/test_sample_data.py::TestValidatingSampleData::test_run_validation_on_good_data_invalid_lei PASSED [ 88%] -src/tests/test_sample_data.py::TestValidatingSampleData::test_run_validation_on_good_data_valid_lei PASSED [ 88%] -src/tests/test_sample_data.py::TestValidatingSampleData::test_run_validation_on_bad_data_invalid_lei PASSED [ 89%] -src/tests/test_sample_data.py::TestValidatingSampleData::test_run_validation_on_bad_data_valid_lei PASSED [ 90%] -src/tests/test_schema_functions.py::TestValidate::test_with_valid_dataframe <- ../../../../workspaces/regtech-data-validator/src/tests/test_schema_functions.py PASSED [ 91%] -src/tests/test_schema_functions.py::TestValidate::test_with_valid_lei <- ../../../../workspaces/regtech-data-validator/src/tests/test_schema_functions.py PASSED [ 92%] -src/tests/test_schema_functions.py::TestValidate::test_with_invalid_dataframe <- ../../../../workspaces/regtech-data-validator/src/tests/test_schema_functions.py PASSED [ 93%] -src/tests/test_schema_functions.py::TestValidate::test_with_multi_invalid_dataframe <- ../../../../workspaces/regtech-data-validator/src/tests/test_schema_functions.py PASSED [ 94%] -src/tests/test_schema_functions.py::TestValidate::test_with_invalid_lei <- ../../../../workspaces/regtech-data-validator/src/tests/test_schema_functions.py PASSED [ 94%] -src/tests/test_schema_functions.py::TestValidatePhases::test_with_valid_data <- ../../../../workspaces/regtech-data-validator/src/tests/test_schema_functions.py PASSED [ 95%] -src/tests/test_schema_functions.py::TestValidatePhases::test_with_valid_lei <- ../../../../workspaces/regtech-data-validator/src/tests/test_schema_functions.py PASSED [ 96%] -src/tests/test_schema_functions.py::TestValidatePhases::test_with_invalid_data <- ../../../../workspaces/regtech-data-validator/src/tests/test_schema_functions.py PASSED [ 97%] -src/tests/test_schema_functions.py::TestValidatePhases::test_with_multi_invalid_data_with_phase1 <- ../../../../workspaces/regtech-data-validator/src/tests/test_schema_functions.py PASSED [ 98%] -src/tests/test_schema_functions.py::TestValidatePhases::test_with_multi_invalid_data_with_phase2 <- ../../../../workspaces/regtech-data-validator/src/tests/test_schema_functions.py PASSED [ 99%] -src/tests/test_schema_functions.py::TestValidatePhases::test_with_invalid_lei <- ../../../../workspaces/regtech-data-validator/src/tests/test_schema_functions.py PASSED [100%] - ----------- coverage: platform darwin, python 3.11.5-final-0 ---------- -Name Stmts Miss Branch BrPart Cover Missing --------------------------------------------------------------------------------- -src/tests/__init__.py 4 0 0 0 100% -src/tests/test_check_functions.py 418 0 0 0 100% -src/tests/test_checks.py 20 2 10 3 83% 9->exit, 16->exit, 24->exit, 25-26 -src/tests/test_global_data.py 19 0 4 0 100% -src/tests/test_sample_data.py 38 0 2 0 100% -src/tests/test_schema_functions.py 78 0 0 0 100% -src/validator/__init__.py 4 0 0 0 100% -src/validator/check_functions.py 184 14 78 0 91% 55-59, 111-121, 275-276, 297-298, 420-421 -src/validator/checks.py 14 0 4 0 100% -src/validator/create_schemas.py 55 1 18 2 96% 69, 74->49 -src/validator/global_data.py 18 0 4 0 100% -src/validator/main.py 25 25 8 0 0% 8-47 -src/validator/phase_validations.py 6 0 0 0 100% -src/validator/schema_template.py 6 0 0 0 100% --------------------------------------------------------------------------------- -TOTAL 889 42 128 5 94% +collected 112 items + +tests/test_check_functions.py::TestInvalidDateFormat::test_with_valid_date PASSED [ 0%] +tests/test_check_functions.py::TestInvalidDateFormat::test_with_invalid_date PASSED [ 1%] +tests/test_check_functions.py::TestInvalidDateFormat::test_with_invalid_day PASSED [ 2%] +tests/test_check_functions.py::TestInvalidDateFormat::test_with_invalid_month PASSED [ 3%] +tests/test_check_functions.py::TestInvalidDateFormat::test_with_invalid_year PASSED [ 4%] +... +tests/test_schema_functions.py::TestValidatePhases::test_with_valid_lei PASSED [ 96%] +tests/test_schema_functions.py::TestValidatePhases::test_with_invalid_data PASSED [ 97%] +tests/test_schema_functions.py::TestValidatePhases::test_with_multi_invalid_data_with_phase1 PASSED [ 98%] +tests/test_schema_functions.py::TestValidatePhases::test_with_multi_invalid_data_with_phase2 PASSED [ 99%] +tests/test_schema_functions.py::TestValidatePhases::test_with_invalid_lei PASSED [100%] + + +---------- coverage: platform darwin, python 3.11.2-final-0 ---------- +Name Stmts Miss Branch BrPart Cover Missing +----------------------------------------------------------------------------------------- +regtech_data_validator/check_functions.py 184 14 78 0 91% 55-59, 111-121, 275-276, 297-298, 420-421 +regtech_data_validator/checks.py 15 0 2 0 100% +regtech_data_validator/cli.py 62 62 30 0 0% 1-126 +regtech_data_validator/create_schemas.py 61 3 18 3 92% 94, 99, 133 +regtech_data_validator/global_data.py 12 0 8 0 100% +regtech_data_validator/phase_validations.py 6 0 0 0 100% +regtech_data_validator/schema_template.py 6 0 0 0 100% +----------------------------------------------------------------------------------------- +TOTAL 346 79 136 3 75% + +3 empty files skipped. Coverage XML written to file coverage.xml +``` -================================================================================= 117 passed in 25.14s ================================================================================== -``` +### Test Coverage + +Test coverage details can be found in this project's +[`python-coverage-comment-action-data`](https://github.com/cfpb/regtech-data-validator/tree/python-coverage-comment-action-data) +branch. -
-## Running Linter +## Linting This repository utilizing `black` and `ruff` libraries to check and fix any -formatting issues +formatting issues. ```sh # Example of Ruff with an error @@ -1191,47 +252,53 @@ All done! ✨ 🍰 ✨ 1 file reformatted, 13 files left unchanged. ``` -## (Optional) Using Dev Container and Visual Studio Code Development Setup +## (Optional) Visual Studio Code (VS Code) with Dev Containers setup -Requirements when using Visual Studio Code for Development: +This setup uses [VS Code's Dev Containers extension](https://code.visualstudio.com/docs/devcontainers/containers) +to create a standardized development environment, allowing for a quick setup, and a greater guarantee +that you'll have all the tools you need to start developing. It does rely heavily on +[Docker](https://docs.docker.com/), so some familiarity there is highly recommended. -- Visual Studio Code with Dev Containers extension -- Docker. +### Prerequisites -These instructions will not work if using an alternative editor such as Vim or - Emacs: +- [Docker](https://docs.docker.com/desktop/) +- [Visual Studio Code](https://code.visualstudio.com/Download) + - [Dev Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) -- Open this repository within VS Code and press `COMMAND + SHIFT + p` on your - keyboard. This will open the command bar at the top of your window. -- Enter `Dev Containers: Rebuild and Reopen in Container`. VS Code will open a - new window and you'll see a status message towards the bottom right of your - screen that the container is building and attaching. -- This will take a few minutes the first time because Docker needs to build the - container without a build cache. -- You may receive a notification that VS Code wants to perform a reload because - some extensions could not load. Sometimes this happens because extensions are - loaded in conflicting orders and dependencies are not satisfied. -- Unit tests can be run through VS Code Test. - ![VS Code Test](images/vscode_test.png) -- Validator can be executed by running `main.py` within a Dev Container. To run - `main.py`, you can run these commands in VSCode terminal. +### Setup instructions -```sh -# Test validating the "good" file -# If passing lei value, pass lei as first arg and csv_path as second arg -python src/validator/main.py 000TESTFIUIDDONOTUSE src/tests/data/sbl-validations-pass.csv -# else just pass the csv_path as arg -python src/validator/main.py src/tests/data/sbl-validations-pass.csv - -# Test validating the "bad" file -python src/validator/main.py 000TESTFIUIDDONOTUSE src/tests/data/sbl-validations-fail.csv -# or -python src/validator/main.py src/tests/data/sbl-validations-fail.csv -``` +1. Open this repository within VS Code and press `COMMAND + SHIFT + p` on your + keyboard. This will open the command bar at the top of your window. +1. Enter `Dev Containers: Rebuild and Reopen in Container`. VS Code will open a + new window and you'll see a status message towards the bottom right of your + screen that the container is building and attaching. +1. This will take a few minutes the first time because Docker needs to build the + container without a build cache. +1. You may receive a notification that VS Code wants to perform a reload because + some extensions could not load. Sometimes this happens because extensions are + loaded in conflicting orders and dependencies are not satisfied. -## Code Coverage -Complete coverage details can be found in [(`python-coverage-comment-action-data`)](https://github.com/cfpb/regtech-data-validator/tree/python-coverage-comment-action-data) +### Running unit tests + +Unit tests can be run through VS Code Test. + +![VS Code Test](images/vscode_test.png) + + +## Contributing + +[CFPB](https://www.consumerfinance.gov/) is developing the +`RegTech Data Validator` in the open to maximize transparency and encourage +third party contributions. If you want to contribute, please read and abide by +the terms of the [License](./LICENSE) for this project. Pull Requests are always +welcome. + +## Contact Us + +If you have an inquiry or suggestion for the validator or any SBL related code +please reach out to us at + ## Open source licensing info From 5d85a333f1bfefbf446a45a34b807e8b95cad3fe Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Wed, 25 Oct 2023 03:15:08 -0400 Subject: [PATCH 16/33] Cleanup badges section of README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3c30a777..35b47719 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # RegTech Data Validator -Current overall coverage: [![Coverage badge](https://github.com/cfpb/regtech-data-validator/raw/python-coverage-comment-action-data/badge.svg)](https://github.com/cfpb/regtech-data-validator/tree/python-coverage-comment-action-data) +[![Coverage badge](https://github.com/cfpb/regtech-data-validator/raw/python-coverage-comment-action-data/badge.svg)](https://github.com/cfpb/regtech-data-validator/tree/python-coverage-comment-action-data) Python-based tool for parsing and validating CFPB's RegTech-related data submissions. It uses the [Pandera](https://pandera.readthedocs.io/en/stable/) data testing From d2288f4be74e6743310d381e2b7c9f200fa1ee89 Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Wed, 25 Oct 2023 03:20:47 -0400 Subject: [PATCH 17/33] Moar README.md fix 'em ups --- README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 35b47719..527dfa1f 100644 --- a/README.md +++ b/README.md @@ -15,12 +15,12 @@ submission. For details on this dataset and its validations, please see: ## Setup -The following setup must be completed to running the CLI utilities or doing any +The following setup must be completed prior to running the CLI utilities or doing any development on the project. -### Pre-requisites +### Prerequisites -The following software packages are pre-requisites to installing this software. +The following software packages are prerequisites to installing this software. - [Python](https://www.python.org/downloads/) version 3.11 or greater. - [Poetry](https://python-poetry.org/docs/#installation) for Python package management. @@ -80,6 +80,8 @@ $ cfpb-val validate --help $ cfpb-val validate tests/data/sbl-validations-pass.csv + **Note:** No output is returned if the file contains no validations errors or warnings. + 1. Validate file with findings, passing in LEI as context $ cfpb-val validate tests/data/sbl-validations-fail.csv --context lei=000TESTFIUIDDONOTUSE From 2128f694de02d74d29f0100db013076a5d1bbdc0 Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Mon, 30 Oct 2023 19:57:25 -0400 Subject: [PATCH 18/33] Remove old Makefile --- Makefile | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 Makefile diff --git a/Makefile b/Makefile deleted file mode 100644 index 99afd87e..00000000 --- a/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -lint: - ruff check src; \ - exit 0; - -lint_and_fix: - ruff check src --fix; \ - black src; \ - exit 0; \ No newline at end of file From 821aae883957fbd4f215b40cccd86073c659cfea Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Wed, 1 Nov 2023 21:09:55 -0400 Subject: [PATCH 19/33] Change CLI command to `cfpb-val` --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b4c92ab6..71633b40 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ tabulate = "^0.9.0" typer = "^0.9.0" [tool.poetry.scripts] -cfpb-comply = 'regtech_data_validator.cli:app' +cfpb-val = 'regtech_data_validator.cli:app' # Black formatting [tool.black] From 4d136baeba403f8ccee29a153e359a16923e1d2d Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Wed, 1 Nov 2023 21:11:57 -0400 Subject: [PATCH 20/33] Fix 3 misconfigured validations --- regtech_data_validator/phase_validations.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/regtech_data_validator/phase_validations.py b/regtech_data_validator/phase_validations.py index 20b23c06..8900480b 100644 --- a/regtech_data_validator/phase_validations.py +++ b/regtech_data_validator/phase_validations.py @@ -31,7 +31,9 @@ from regtech_data_validator.checks import SBLCheck, Severity -def get_phase_1_and_2_validations_for_lei(lei: str | None = None): +def get_phase_1_and_2_validations_for_lei(context: dict[str, str] | None = None): + lei: str | None = context.get('lei', None) if context else None + return { "uid": { "phase_1": [ @@ -70,6 +72,8 @@ def get_phase_1_and_2_validations_for_lei(lei: str | None = None): element_wise=True, regex="^[A-Z0-9]+$", ), + ], + "phase_2": [ SBLCheck( string_contains, id="W0003", @@ -85,7 +89,6 @@ def get_phase_1_and_2_validations_for_lei(lei: str | None = None): end_idx=20, ), ], - "phase_2": [], }, "app_date": { "phase_1": [ @@ -759,7 +762,7 @@ def get_phase_1_and_2_validations_for_lei(lei: str | None = None): "phase_1": [ SBLCheck( is_valid_enum, - id="E0001", + id="E0340", name="denial_reasons.invalid_enum_value", description=( "Each value in 'denial reason(s)' (separated by semicolons)" @@ -1030,12 +1033,12 @@ def get_phase_1_and_2_validations_for_lei(lei: str | None = None): ), SBLCheck( is_greater_than, - id="E0001", + id="W0441", name="pricing_adj_margin.unreasonable_numeric_value", description=( "When present, 'adjustable rate transaction: margin' should generally be greater than 0.1." ), - severity=Severity.ERROR, + severity=Severity.WARNING, element_wise=True, min_value="0.1", accept_blank=True, From a20fc228e69bb4a476f33379af55bb10e1940ac8 Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Wed, 1 Nov 2023 21:14:47 -0400 Subject: [PATCH 21/33] Return DataFrame for validation results --- regtech_data_validator/create_schemas.py | 195 +++++++++++++---------- tests/test_sample_data.py | 34 ++-- tests/test_schema_functions.py | 105 +++++++----- 3 files changed, 194 insertions(+), 140 deletions(-) diff --git a/regtech_data_validator/create_schemas.py b/regtech_data_validator/create_schemas.py index 2aba2af5..fe03afdf 100644 --- a/regtech_data_validator/create_schemas.py +++ b/regtech_data_validator/create_schemas.py @@ -2,7 +2,7 @@ with validations listed in phase 1 and phase 2.""" import pandas as pd -from pandera import DataFrameSchema +from pandera import Check, DataFrameSchema from pandera.errors import SchemaErrors, SchemaError from regtech_data_validator.checks import SBLCheck @@ -11,44 +11,114 @@ # Get separate schema templates for phase 1 and 2 - - phase_1_template = get_template() phase_2_template = get_template() -def get_schema_by_phase_for_lei(template: dict, phase: str, lei: str | None = None): - for column in get_phase_1_and_2_validations_for_lei(lei): - validations = get_phase_1_and_2_validations_for_lei(lei)[column] +def get_schema_by_phase_for_lei(template: dict, phase: str, context: dict[str, str] | None = None): + for column in get_phase_1_and_2_validations_for_lei(context): + validations = get_phase_1_and_2_validations_for_lei(context)[column] template[column].checks = validations[phase] + return DataFrameSchema(template) -def get_phase_1_schema_for_lei(lei: str | None = None): - return get_schema_by_phase_for_lei(phase_1_template, "phase_1", lei) +def get_phase_1_schema_for_lei(context: dict[str, str] | None = None): + return get_schema_by_phase_for_lei(phase_1_template, "phase_1", context) + + +def get_phase_2_schema_for_lei(context: dict[str, str] | None = None): + return get_schema_by_phase_for_lei(phase_2_template, "phase_2", context) + + +@staticmethod +def _get_check_fields(check: Check, primary_column: str) -> list[str]: + """ + Retrieves unique sorted list of fields associated with a given Check + """ + + field_set: set[str] = {primary_column} + + if check.groupby: + field_set.update(check.groupby) # type: ignore + + fields = sorted(list(field_set)) + + return fields + + +@staticmethod +def _filter_valid_records(df: pd.DataFrame, check_output: pd.Series, fields: list[str]) -> pd.DataFrame: + """ + Return only records and fields associated with a given `Check`'s + """ + + # `check_output` must be sorted so its index lines up with `df`'s index + sorted_check_output: pd.Series = check_output.sort_index() + + # Filter records using Pandas's boolean indexing, where all False values get filtered out. + # The `~` does the inverse since it's actually the False values we want to keep. + # http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#boolean-indexing + failed_records_df = df[~sorted_check_output][fields].reset_index(names='record_no') + failed_records_df.index.rename('finding_no', inplace=True) + + return failed_records_df -def get_phase_2_schema_for_lei(lei: str | None = None): - return get_schema_by_phase_for_lei(phase_2_template, "phase_2", lei) +@staticmethod +def _records_to_fields(failed_records_df: pd.DataFrame) -> pd.DataFrame: + """ + Transforms a DataFrame with columns per Check field to DataFrame with a row per field + """ + + # Melts a DataFrame with the line number as the index columns for the validations's fields' values + # into one with the validation_id, record_no, and field_name as a multiindex, and all of the validation + # metadata merged in as well. + failed_record_fields_df = failed_records_df.melt( + var_name='field_name', value_name='field_value', id_vars='record_no', ignore_index=False + ) + + return failed_record_fields_df + + +@staticmethod +def _add_validation_metadata(failed_check_fields_df: pd.DataFrame, check: SBLCheck): + """ + Add SBLCheck metadata (id, name, description, severity) + """ + + validation_fields_df = ( + failed_check_fields_df.assign(validation_severity=check.severity) + .assign(validation_id=check.title) + .assign(validation_name=check.name) + .assign(validation_desc=check.description) + ) + + return validation_fields_df -def validate(schema: DataFrameSchema, df: pd.DataFrame) -> pd.DataFrame: +def validate(schema: DataFrameSchema, submission_df: pd.DataFrame) -> tuple[bool, pd.DataFrame]: """ validate received dataframe with schema and return list of schema errors Args: schema (DataFrameSchema): schema to be used for validation - df (pd.DataFrame): data parsed into dataframe + submission_df (pd.DataFrame): data to be validated against the schema Returns: + bool whether the given submission was valid or not pd.DataFrame containing validation results data """ + is_valid = True findings_df: pd.DataFrame = pd.DataFrame() + next_finding_no: int = 1 try: - schema(df, lazy=True) + schema(submission_df, lazy=True) except SchemaErrors as err: - # WARN: SchemaErrors.schema_errors is supposed to be of type - # list[dict[str,Any]], but it's actually of type SchemaError + is_valid = False + + # NOTE: `type: ignore` because SchemaErrors.schema_errors is supposed to be + # `list[dict[str,Any]]`, but it's actually of type `SchemaError` schema_error: SchemaError for schema_error in err.schema_errors: # type: ignore check = schema_error.check @@ -64,70 +134,31 @@ def validate(schema: DataFrameSchema, df: pd.DataFrame) -> pd.DataFrame: f'Check {check} type on {column_name} column not supported. Must be of type {SBLCheck}' ) from schema_error - fields: list[str] = [column_name] - - if check.groupby: - fields += check.groupby # type: ignore - - # This will either be a boolean series or a single bool - # Q: Is the scenario where it returns a single bool even with the above error checking? - check_output: pd.Series = schema_error.check_output # type: ignore - - # Remove duplicates, but keep as `list` for JSON-friendliness - fields = list(set(fields)) - - # Q: What's the scenario where `check_output` is empty? - if not check_output.empty: - # `check_output` must be sorted so its index lines up with `df`'s index - check_output.sort_index(inplace=True) - - # Filter records using Pandas's boolean indexing, where all False values - # get filtered out. The `~` does the inverse since it's actually the - # False values we want to keep. - # http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#boolean-indexing - failed_check_fields_df = df[~check_output][fields].fillna("") - - # Melts a DataFrame with the line number as the index columns for the validations's fields' values - # into one with the validation_id, line_no, and field_name as a multiindex, and all of the validation - # metadata merged in as well. - # - # from... - # - # ct_loan_term_flag ct_credit_product - # 0 999 1 - # 1 999 2 - # - # ...to... - # field_value v_sev v_name v_desc - # v_id line_no field_name - # E2003 0 ct_credit_product 1 error ct_loan_term_flag.enum_value_conflict When 'credit product' equals 1 (term loan - un... - # ct_loan_term_flag 999 error ct_loan_term_flag.enum_value_conflict When 'credit product' equals 1 (term loan - un... - # 1 ct_credit_product 2 error ct_loan_term_flag.enum_value_conflict When 'credit product' equals 1 (term loan - un... - # ct_loan_term_flag 999 error ct_loan_term_flag.enum_value_conflict When 'credit product' equals 1 (term loan - un... - failed_check_fields_melt_df = ( - failed_check_fields_df.reset_index(names='line_no') - .melt(var_name='field_name', value_name='field_value', id_vars='line_no') - .assign(v_id=check.title) - .assign(v_sev=check.severity) - .assign(v_name=check.name) - .assign(v_desc=check.description) - .set_index(['v_id', 'line_no', 'field_name']) - .sort_index - ) - print(failed_check_fields_melt_df) - - findings_df = pd.concat([findings_df, failed_check_fields_melt_df]) - - return findings_df - - -def validate_phases(df: pd.DataFrame, lei: str | None = None) -> list: - phase1_findings = validate(get_phase_1_schema_for_lei(lei), df) - if phase1_findings: - return phase1_findings - else: - phase2_findings = validate(get_phase_2_schema_for_lei((lei)), df) - if phase2_findings: - return phase2_findings - else: - return [{"response": "No validations errors or warnings"}] + fields = _get_check_fields(check, column_name) + + check_output: pd.Series | None = schema_error.check_output + + if check_output is not None: + # Filter data not associated with failed Check, and update index for merging with findings_df + failed_records_df = _filter_valid_records(submission_df, check_output, fields) + failed_records_df.index += next_finding_no + next_finding_no = failed_records_df.tail(1).index + 1 # type: ignore + + failed_record_fields_df = _records_to_fields(failed_records_df) + check_findings_df = _add_validation_metadata(failed_record_fields_df, check) + + findings_df = pd.concat([findings_df, check_findings_df]) + else: + # The above exception handling _should_ prevent this from ever happenin, but...just in case. + raise RuntimeError(f'No check output for "{check.name}" check. Pandera SchemaError: {schema_error}') + + return is_valid, findings_df.sort_index() + + +def validate_phases(df: pd.DataFrame, context: dict[str, str] | None = None) -> tuple[bool, pd.DataFrame]: + p1_is_valid, p1_findings = validate(get_phase_1_schema_for_lei(context), df) + + if not p1_is_valid: + return p1_is_valid, p1_findings + + return validate(get_phase_2_schema_for_lei(context), df) diff --git a/tests/test_sample_data.py b/tests/test_sample_data.py index 35c1fe42..4a760e4a 100644 --- a/tests/test_sample_data.py +++ b/tests/test_sample_data.py @@ -8,8 +8,6 @@ class TestValidatingSampleData: - valid_response = {"response": "No validations errors or warnings"} - good_file_df = pd.read_csv(GOOD_FILE_PATH, dtype=str, na_filter=False) bad_file_df = pd.read_csv(BAD_FILE_PATH, dtype=str, na_filter=False) @@ -21,28 +19,36 @@ def test_invalid_data_file(self): def test_run_validation_on_good_data_invalid_lei(self): lei = "000TESTFIUIDDONOTUS1" - validation_result = validate_phases(self.good_file_df, lei) + is_valid, findings_df = validate_phases(self.good_file_df, {'lei': lei}) + + assert not is_valid - assert len(validation_result) == 1 - assert validation_result[0] != self.valid_response + # Only 'uid.invalid_uid_lei' validation returned + assert len(findings_df['validation_name'].unique()) == 1 + assert len(findings_df['validation_name'] == 'uid.invalid_uid_lei') > 0 def test_run_validation_on_good_data_valid_lei(self): lei = "000TESTFIUIDDONOTUSE" - validation_result = validate_phases(self.good_file_df, lei) + is_valid, findings_df = validate_phases(self.good_file_df, {'lei': lei}) - assert len(validation_result) == 1 - assert validation_result[0] == self.valid_response + assert is_valid + assert findings_df.empty def test_run_validation_on_bad_data_invalid_lei(self): lei = "000TESTFIUIDDONOTUS1" - validation_result = validate_phases(self.bad_file_df, lei) + is_valid, findings_df = validate_phases(self.bad_file_df, {'lei': lei}) - assert len(validation_result) >= 1 - assert validation_result[0] != self.valid_response + assert not is_valid + + # 'uid.invalid_uid_lei' and other validations returned + assert len(findings_df['validation_name'].unique()) > 1 + assert len(findings_df['validation_name'] == 'uid.invalid_uid_lei') > 0 def test_run_validation_on_bad_data_valid_lei(self): lei = "000TESTFIUIDDONOTUSE" - validation_result = validate_phases(self.bad_file_df, lei) + is_valid, findings_df = validate_phases(self.bad_file_df, {'lei': lei}) + + assert not is_valid - assert len(validation_result) >= 1 - assert validation_result[0] != self.valid_response + # 'uid.invalid_uid_lei' and other validations returned + assert len(findings_df['validation_name'].unique()) > 1 diff --git a/tests/test_schema_functions.py b/tests/test_schema_functions.py index 7c141dee..bc8ae363 100644 --- a/tests/test_schema_functions.py +++ b/tests/test_schema_functions.py @@ -9,8 +9,6 @@ class TestUtil: - valid_response = {"response": "No validations errors or warnings"} - def get_data(self, update_data: dict[str, list[str]] = {}) -> dict[str, list[str]]: default = { "uid": ["000TESTFIUIDDONOTUSEXGXVID11XTC1"], @@ -106,27 +104,34 @@ class TestValidate: def test_with_valid_dataframe(self): df = pd.DataFrame(data=self.util.get_data()) - result = validate(self.phase1_schema, df) - ph2_result = validate(self.phase2_schema, df) - assert len(result) == 0 - assert len(ph2_result) == 0 + p1_is_valid, p1_findings_df = validate(self.phase1_schema, df) + p2_is_valid, p2_findings_df = validate(self.phase2_schema, df) + + assert p1_is_valid + assert p2_is_valid def test_with_valid_lei(self): lei = "000TESTFIUIDDONOTUSE" - phase1_schema_by_lei = get_phase_1_schema_for_lei(lei) - phase2_schema_by_lei = get_phase_2_schema_for_lei(lei) + phase1_schema_by_lei = get_phase_1_schema_for_lei({'lei': lei}) + phase2_schema_by_lei = get_phase_2_schema_for_lei({'lei': lei}) + df = pd.DataFrame(data=self.util.get_data()) - result = validate(phase1_schema_by_lei, df) - ph2_result = validate(phase2_schema_by_lei, df) - assert len(result) == 0 - assert len(ph2_result) == 0 + + p1_is_valid, p1_findings_df = validate(phase1_schema_by_lei, df) + p2_is_valid, p2_findings_df = validate(phase2_schema_by_lei, df) + + assert p1_is_valid + assert p2_is_valid def test_with_invalid_dataframe(self): df = pd.DataFrame(data=self.util.get_data({"ct_credit_product": ["989"]})) - result = validate(self.phase1_schema, df) - ph2_result = validate(self.phase2_schema, df) - assert len(result) == 1 - assert len(ph2_result) == 0 + + p1_is_valid, p1_findings_df = validate(self.phase1_schema, df) + p2_is_valid, p2_findings_df = validate(self.phase2_schema, df) + + assert not p1_is_valid + assert len(p1_findings_df) == 1 + assert p2_is_valid def test_with_multi_invalid_dataframe(self): df = pd.DataFrame( @@ -138,47 +143,57 @@ def test_with_multi_invalid_dataframe(self): } ) ) - result = validate(self.phase1_schema, df) - assert len(result) == 1 + p1_is_valid, p1_findings_df = validate(self.phase1_schema, df) + assert not p1_is_valid + assert len(p1_findings_df) == 1 - ph2_result = validate(self.phase2_schema, df) - assert len(ph2_result) == 3 + p2_is_valid, p2_findings_df = validate(self.phase2_schema, df) + # 3 unique findings raised + assert len(p2_findings_df.index.unique()) == 3 def test_with_invalid_lei(self): lei = "000TESTFIUIDDONOTUS1" - phase1_schema_by_lei = get_phase_1_schema_for_lei(lei) - phase2_schema_by_lei = get_phase_2_schema_for_lei(lei) + + phase1_schema_by_lei = get_phase_1_schema_for_lei({'lei': lei}) + phase2_schema_by_lei = get_phase_2_schema_for_lei({'lei': lei}) + df = pd.DataFrame(data=self.util.get_data({"ct_credit_product": ["989"]})) - result = validate(phase1_schema_by_lei, df) - ph2_result = validate(phase2_schema_by_lei, df) - assert len(result) == 2 - assert len(ph2_result) == 0 + + p1_is_valid, p1_findings_df = validate(phase1_schema_by_lei, df) + p2_is_valid, p2_findings_df = validate(phase2_schema_by_lei, df) + + # 1 unique findings raised in phase 1 + assert not p1_is_valid + assert len(p1_findings_df.index.unique()) == 1 + + # 1 unique finding raised in phase 2 + assert not p2_is_valid + assert len(p2_findings_df.index.unique()) == 1 class TestValidatePhases: util = TestUtil() def test_with_valid_data(self): - result = validate_phases(pd.DataFrame(data=self.util.get_data())) + is_valid, findings_df = validate_phases(pd.DataFrame(data=self.util.get_data())) - assert len(result) == 1 - assert result[0] == self.util.valid_response + assert is_valid def test_with_valid_lei(self): lei = "000TESTFIUIDDONOTUSE" df = pd.DataFrame(data=self.util.get_data()) - result = validate_phases(df, lei) - assert len(result) == 1 - assert result[0] == self.util.valid_response + is_valid, findings_df = validate_phases(df, {'lei': lei}) + + assert is_valid def test_with_invalid_data(self): - result = validate_phases(pd.DataFrame(data=self.util.get_data({"ct_credit_product": ["989"]}))) + is_valid, findings_df = validate_phases(pd.DataFrame(data=self.util.get_data({"ct_credit_product": ["989"]}))) - assert len(result) == 1 - assert result[0] != self.util.valid_response + assert not is_valid + assert len(findings_df) == 1 def test_with_multi_invalid_data_with_phase1(self): - result = validate_phases( + is_valid, findings_df = validate_phases( pd.DataFrame( data=self.util.get_data( { @@ -189,12 +204,13 @@ def test_with_multi_invalid_data_with_phase1(self): ) ) ) + # should only return phase 1 validation result since phase1 failed - assert len(result) == 1 - assert result[0] != self.util.valid_response + assert not is_valid + assert len(findings_df) == 1 def test_with_multi_invalid_data_with_phase2(self): - result = validate_phases( + is_valid, findings_df = validate_phases( pd.DataFrame( data=self.util.get_data( { @@ -206,12 +222,13 @@ def test_with_multi_invalid_data_with_phase2(self): ) # since the data passed phase 1 validations # this should return phase 2 validations - assert len(result) == 3 - assert result[0] != self.util.valid_response + assert not is_valid + assert len(findings_df.index.unique()) == 3 def test_with_invalid_lei(self): lei = "000TESTFIUIDDONOTUS1" df = pd.DataFrame(data=self.util.get_data()) - result = validate_phases(df, lei) - assert len(result) == 1 - assert result[0] != self.util.valid_response + is_valid, findings_df = validate_phases(df, {'lei': lei}) + + assert not is_valid + assert len(findings_df['validation_name'] == 'uid.invalid_uid_lei') > 0 From cef30d419d7a6e3c6cceb8422c4de3ae703edc46 Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Wed, 1 Nov 2023 21:15:15 -0400 Subject: [PATCH 22/33] Replace `main.py` with Typer-based CLI app --- regtech_data_validator/cli.py | 121 +++++++++++++++++++++++++++++++++ regtech_data_validator/main.py | 50 -------------- 2 files changed, 121 insertions(+), 50 deletions(-) create mode 100644 regtech_data_validator/cli.py delete mode 100644 regtech_data_validator/main.py diff --git a/regtech_data_validator/cli.py b/regtech_data_validator/cli.py new file mode 100644 index 00000000..2aa3c1cf --- /dev/null +++ b/regtech_data_validator/cli.py @@ -0,0 +1,121 @@ +from dataclasses import dataclass +from enum import StrEnum +import json +from pathlib import Path +from typing import Annotated, Optional + +import pandas as pd +from tabulate import tabulate +import typer + +from regtech_data_validator.create_schemas import validate_phases + + +app = typer.Typer(no_args_is_help=True, pretty_exceptions_enable=False) + + +@dataclass +class KeyValueOpt: + key: str + value: str + + +def parse_key_value(kv_str: str) -> KeyValueOpt: + split_str = kv_str.split('=') + + if len(split_str) != 2: + raise ValueError(f'Invalid key/value pair: {kv_str}') + + return KeyValueOpt(split_str[0], split_str[1]) + + +class OutputFormat(StrEnum): + CSV = 'csv' + JSON = 'json' + PANDAS = 'pandas' + TABLE = 'table' + + +@app.command(no_args_is_help=True) +def validate( + path: Annotated[ + Path, + typer.Argument( + exists=True, + dir_okay=False, + readable=True, + resolve_path=True, + show_default=False, + help='Path of file to be validated', + ), + ], + context: Annotated[ + Optional[list[KeyValueOpt]], + typer.Option( + parser=parse_key_value, + metavar='=', + help='[example: lei=12345678901234567890]', + show_default=False, + ), + ] = None, + output: Annotated[Optional[OutputFormat], typer.Option()] = OutputFormat.TABLE, +): + """ + CFPB's RegTech data validation utility. + """ + context_dict = {x.key: x.value for x in context} if context else {} + + # FIXME: Handle ParserError + input_df = pd.read_csv(path, dtype=str, na_filter=False) + is_valid, findings_df = validate_phases(input_df, context_dict) + + if not is_valid: + match output: + case OutputFormat.PANDAS: + with pd.option_context('display.width', None, 'display.max_rows', None): + print(findings_df) + case OutputFormat.CSV: + print(findings_df.to_csv()) + case OutputFormat.JSON: + findings_json = [] + findings_by_v_id_df = findings_df.reset_index().set_index(['validation_id', 'record_no', 'field_name']) + + for v_id_idx, v_id_df in findings_by_v_id_df.groupby(by='validation_id'): + v_head = v_id_df.iloc[0] + + finding_json = { + 'validation': { + 'id': v_id_idx, + 'name': v_head.at['validation_name'], + 'description': v_head.at['validation_desc'], + 'severity': v_head.at['validation_severity'], + }, + 'records': [], + } + findings_json.append(finding_json) + + for rec_idx, rec_df in v_id_df.groupby(by='record_no'): + record_json = {'record_no': rec_idx, 'fields': []} + finding_json['records'].append(record_json) + + for field_idx, field_df in rec_df.groupby(by='field_name'): + field_head = field_df.iloc[0] + record_json['fields'].append({'name': field_idx, 'value': field_head.at['field_value']}) + + print() + print(json.dumps(findings_json, indent=4)) + + case OutputFormat.TABLE: + # trim field_value field to just 50 chars, similar to DataFrame default + table_df = findings_df.drop(columns='validation_desc').sort_index() + table_df['field_value'] = table_df['field_value'].str[0:50] + + # NOTE: `type: ignore` because tabulate package typing does not include Pandas + # DataFrame as input, but the library itself does support it. ¯\_(ツ)_/¯ + print(tabulate(table_df, headers='keys', showindex=True, tablefmt='rounded_outline')) # type: ignore + case _: + raise ValueError(f'output format "{output}" not supported') + + +if __name__ == '__main__': + app() diff --git a/regtech_data_validator/main.py b/regtech_data_validator/main.py deleted file mode 100644 index b39e57cd..00000000 --- a/regtech_data_validator/main.py +++ /dev/null @@ -1,50 +0,0 @@ -""" -This script loads a given CSV into a Pandas DataFrame, and then validates it against -the SBL Pandera schema. - -Run from the terminal to see the generated output. -""" - -import json -import sys - -import pandas as pd - -from regtech_data_validator.create_schemas import validate_phases - - -def csv_to_df(path: str) -> pd.DataFrame: - return pd.read_csv(path, dtype=str, na_filter=False) - - -def run_validation_on_df(df: pd.DataFrame, lei: str | None) -> None: - """ - Run validation on the supplied dataframe and print a report to - the terminal. - """ - - validation_dict = validate_phases(df, lei) - validation_json = json.dumps(validation_dict, indent=4) - - #print(validation_json) - - -def main(): - csv_path = None - lei: str | None = None - if len(sys.argv) == 1: - raise ValueError("csv_path arg not provided") - elif len(sys.argv) == 2: - csv_path = sys.argv[1] - elif len(sys.argv) == 3: - lei = sys.argv[1] - csv_path = sys.argv[2] - else: - raise ValueError("correct number of args not provided") - - df = csv_to_df(csv_path) - run_validation_on_df(df, lei) - - -if __name__ == "__main__": - main() From b27b749ee15eb8b0a615ee1c27b380c37d1bf22f Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Wed, 1 Nov 2023 21:15:52 -0400 Subject: [PATCH 23/33] Fix paths in README.md --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 527dfa1f..13b2e436 100644 --- a/README.md +++ b/README.md @@ -172,7 +172,7 @@ We use these test files in for automated test, but can also be passed in via the - They are harder to test. - Check function signatures should reflect the functionality. - Check functions should have corresponding unit tests. - - [Unit Test](./src/tests/test_check_functions.py) + - [Unit Test](.tests/test_check_functions.py) - Check definitions' name should be set to validation ID. - Example: "denial_reasons. enum_value_conflict" ![Validation ID](images/validation_id.png) @@ -241,14 +241,14 @@ formatting issues. ```sh # Example of Ruff with an error -$ poetry run ruff src/ -src/tests/test_check_functions.py:205:26: E712 [*] Comparison to `False` should be `cond is False` +$ ruff . +tests/test_check_functions.py:205:26: E712 [*] Comparison to `False` should be `cond is False` Found 1 error. [*] 1 potentially fixable with the --fix option. # Example of black with reformatted line -$ poetry run black src/ -reformatted /Projects/regtech-data-validator/src/validator/main.py +$ black . +reformatted regtech_data_validator/cli.py All done! ✨ 🍰 ✨ 1 file reformatted, 13 files left unchanged. From 4e4c18e69e137c6de084a038605faf16e46df4af Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Thu, 2 Nov 2023 01:59:16 -0400 Subject: [PATCH 24/33] Bring back `describe` sub-command When you only have a single sub-command, `validate` in this case, Typer seems to automatically move that functionality to the top-level command. To get around that strangeness, I've re-added the `describe` command that's intended to print out the file format and the validation list, but isn't implemented yet. For now if you call it, it simply reports _Feature coming soon..._ We'll implement this for real in a follow-up PR sometime soon. --- regtech_data_validator/cli.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/regtech_data_validator/cli.py b/regtech_data_validator/cli.py index 2aa3c1cf..6dbefc10 100644 --- a/regtech_data_validator/cli.py +++ b/regtech_data_validator/cli.py @@ -36,6 +36,15 @@ class OutputFormat(StrEnum): TABLE = 'table' +@app.command() +def describe() -> None: + """ + Describe CFPB data submission formats and validations + """ + + print('Feature coming soon...') + + @app.command(no_args_is_help=True) def validate( path: Annotated[ @@ -61,11 +70,9 @@ def validate( output: Annotated[Optional[OutputFormat], typer.Option()] = OutputFormat.TABLE, ): """ - CFPB's RegTech data validation utility. + Validate CFPB data submission """ context_dict = {x.key: x.value for x in context} if context else {} - - # FIXME: Handle ParserError input_df = pd.read_csv(path, dtype=str, na_filter=False) is_valid, findings_df = validate_phases(input_df, context_dict) From 87b0c3a3573f242353fce4658e38f407e4451094 Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Thu, 2 Nov 2023 02:20:13 -0400 Subject: [PATCH 25/33] Make `black` happy --- regtech_data_validator/cli.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/regtech_data_validator/cli.py b/regtech_data_validator/cli.py index 6dbefc10..d2abdfe1 100644 --- a/regtech_data_validator/cli.py +++ b/regtech_data_validator/cli.py @@ -20,6 +20,7 @@ class KeyValueOpt: value: str +@staticmethod def parse_key_value(kv_str: str) -> KeyValueOpt: split_str = kv_str.split('=') @@ -41,7 +42,7 @@ def describe() -> None: """ Describe CFPB data submission formats and validations """ - + print('Feature coming soon...') From 46e2b43b03fecb31ca18fbd41476d57a3574d536 Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Mon, 6 Nov 2023 18:40:45 -0500 Subject: [PATCH 26/33] Fix path to unit tests in README.md Co-authored-by: lchen-2101 <73617864+lchen-2101@users.noreply.github.com> --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 13b2e436..18edda07 100644 --- a/README.md +++ b/README.md @@ -172,7 +172,7 @@ We use these test files in for automated test, but can also be passed in via the - They are harder to test. - Check function signatures should reflect the functionality. - Check functions should have corresponding unit tests. - - [Unit Test](.tests/test_check_functions.py) + - [Unit Test](tests/test_check_functions.py) - Check definitions' name should be set to validation ID. - Example: "denial_reasons. enum_value_conflict" ![Validation ID](images/validation_id.png) From c4abc6db9a768a17e7cb786e9ddecc8380250346 Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Mon, 6 Nov 2023 23:00:25 -0500 Subject: [PATCH 27/33] Improve test cov by splitting up CLI output logic --- regtech_data_validator/cli.py | 96 +++++++++------ tests/test_cli.py | 218 ++++++++++++++++++++++++++++++++++ 2 files changed, 275 insertions(+), 39 deletions(-) create mode 100644 tests/test_cli.py diff --git a/regtech_data_validator/cli.py b/regtech_data_validator/cli.py index d2abdfe1..9e89f094 100644 --- a/regtech_data_validator/cli.py +++ b/regtech_data_validator/cli.py @@ -37,6 +37,56 @@ class OutputFormat(StrEnum): TABLE = 'table' +def df_to_str(df: pd.DataFrame) -> str: + with pd.option_context('display.width', None, 'display.max_rows', None): + return str(df) + + +def df_to_csv(df: pd.DataFrame) -> str: + return df.to_csv() + + +def df_to_table(df: pd.DataFrame) -> str: + # trim field_value field to just 50 chars, similar to DataFrame default + table_df = df.drop(columns='validation_desc').sort_index() + table_df['field_value'] = table_df['field_value'].str[0:50] + + # NOTE: `type: ignore` because tabulate package typing does not include Pandas + # DataFrame as input, but the library itself does support it. ¯\_(ツ)_/¯ + return tabulate(table_df, headers='keys', showindex=True, tablefmt='rounded_outline') # type: ignore + + +def df_to_json(df: pd.DataFrame) -> str: + findings_json = [] + findings_by_v_id_df = df.reset_index().set_index(['validation_id', 'record_no', 'field_name']) + + for v_id_idx, v_id_df in findings_by_v_id_df.groupby(by='validation_id'): + v_head = v_id_df.iloc[0] + + finding_json = { + 'validation': { + 'id': v_id_idx, + 'name': v_head.at['validation_name'], + 'description': v_head.at['validation_desc'], + 'severity': v_head.at['validation_severity'], + }, + 'records': [], + } + findings_json.append(finding_json) + + for rec_idx, rec_df in v_id_df.groupby(by='record_no'): + record_json = {'record_no': rec_idx, 'fields': []} + finding_json['records'].append(record_json) + + for field_idx, field_df in rec_df.groupby(by='field_name'): + field_head = field_df.iloc[0] + record_json['fields'].append({'name': field_idx, 'value': field_head.at['field_value']}) + + json_str = json.dumps(findings_json, indent=4) + + return json_str + + @app.command() def describe() -> None: """ @@ -69,7 +119,7 @@ def validate( ), ] = None, output: Annotated[Optional[OutputFormat], typer.Option()] = OutputFormat.TABLE, -): +) -> tuple[bool, pd.DataFrame]: """ Validate CFPB data submission """ @@ -80,50 +130,18 @@ def validate( if not is_valid: match output: case OutputFormat.PANDAS: - with pd.option_context('display.width', None, 'display.max_rows', None): - print(findings_df) + print(df_to_str(findings_df)) case OutputFormat.CSV: - print(findings_df.to_csv()) + print(df_to_csv(findings_df)) case OutputFormat.JSON: - findings_json = [] - findings_by_v_id_df = findings_df.reset_index().set_index(['validation_id', 'record_no', 'field_name']) - - for v_id_idx, v_id_df in findings_by_v_id_df.groupby(by='validation_id'): - v_head = v_id_df.iloc[0] - - finding_json = { - 'validation': { - 'id': v_id_idx, - 'name': v_head.at['validation_name'], - 'description': v_head.at['validation_desc'], - 'severity': v_head.at['validation_severity'], - }, - 'records': [], - } - findings_json.append(finding_json) - - for rec_idx, rec_df in v_id_df.groupby(by='record_no'): - record_json = {'record_no': rec_idx, 'fields': []} - finding_json['records'].append(record_json) - - for field_idx, field_df in rec_df.groupby(by='field_name'): - field_head = field_df.iloc[0] - record_json['fields'].append({'name': field_idx, 'value': field_head.at['field_value']}) - - print() - print(json.dumps(findings_json, indent=4)) - + print(df_to_json(findings_df)) case OutputFormat.TABLE: - # trim field_value field to just 50 chars, similar to DataFrame default - table_df = findings_df.drop(columns='validation_desc').sort_index() - table_df['field_value'] = table_df['field_value'].str[0:50] - - # NOTE: `type: ignore` because tabulate package typing does not include Pandas - # DataFrame as input, but the library itself does support it. ¯\_(ツ)_/¯ - print(tabulate(table_df, headers='keys', showindex=True, tablefmt='rounded_outline')) # type: ignore + print(df_to_table(findings_df)) case _: raise ValueError(f'output format "{output}" not supported') + # returned values are only used in unit tests + return is_valid, findings_df if __name__ == '__main__': app() diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 00000000..56d83625 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,218 @@ +from pathlib import Path +from textwrap import dedent +import os + +import pandas as pd +import pytest +from typer.testing import CliRunner + +from regtech_data_validator import cli + +cli_runner = CliRunner() +data_dir = f'{os.path.dirname(os.path.realpath(__file__))}/data' +pass_file = f'{data_dir}/sbl-validations-pass.csv' +fail_file = f'{data_dir}/sbl-validations-fail.csv' + + +class TestParseKeyValue: + + def test_parse_success(self): + test_str = "fruit=apple" + key_val: cli.KeyValueOpt = cli.parse_key_value(test_str) + + assert key_val.key == 'fruit' + assert key_val.value == 'apple' + + + def test_parse_fail_wrong_delimiter(self): + test_str = "fruit:apple" + + with pytest.raises(ValueError): + cli.parse_key_value(test_str) + + def test_parse_fail_no_delimiter(self): + test_str = "fruitapple" + + with pytest.raises(ValueError): + cli.parse_key_value(test_str) + + def test_parse_fail_multiple_delimiters(self): + test_str = "fruit=apple=orange" + + with pytest.raises(ValueError): + cli.parse_key_value(test_str) + + +class TestOutputFormat: + + # TODO: Figure out why uid.duplicates_in_dataset returns different findings for matched records + input_df = pd.DataFrame( + data=[ + (1, 'uid', '12345678901234567890', 'error', 'E3000', 'uid.duplicates_in_dataset', "Any 'unique identifier' may not be used in mor..."), + (2, 'uid', '12345678901234567890', 'error', 'E3000', 'uid.duplicates_in_dataset', "Any 'unique identifier' may not be used in mor...") + ], + columns=['record_no', 'field_name', 'field_value', 'validation_severity', 'validation_id', 'validation_name', 'validation_desc'], + ) + input_df.index.name = 'finding_no' + input_df.index += 1 + + def test_output_pandas(self): + expected_output = dedent(""" + record_no field_name field_value validation_severity validation_id validation_name validation_desc + finding_no + 1 1 uid 12345678901234567890 error E3000 uid.duplicates_in_dataset Any 'unique identifier' may not be used in mor... + 2 2 uid 12345678901234567890 error E3000 uid.duplicates_in_dataset Any 'unique identifier' may not be used in mor... + """).strip('\n') + + actual_output = cli.df_to_str(self.input_df) + + assert actual_output == expected_output + + + def test_output_table(self): + expected_output = dedent(""" + ╭──────────────┬─────────────┬──────────────┬──────────────────────┬───────────────────────┬─────────────────┬───────────────────────────╮ + │ finding_no │ record_no │ field_name │ field_value │ validation_severity │ validation_id │ validation_name │ + ├──────────────┼─────────────┼──────────────┼──────────────────────┼───────────────────────┼─────────────────┼───────────────────────────┤ + │ 1 │ 1 │ uid │ 12345678901234567890 │ error │ E3000 │ uid.duplicates_in_dataset │ + │ 2 │ 2 │ uid │ 12345678901234567890 │ error │ E3000 │ uid.duplicates_in_dataset │ + ╰──────────────┴─────────────┴──────────────┴──────────────────────┴───────────────────────┴─────────────────┴───────────────────────────╯ + """).strip('\n') + + actual_output = cli.df_to_table(self.input_df) + + assert actual_output == expected_output + + + def test_output_csv(self): + expected_output = dedent(""" + finding_no,record_no,field_name,field_value,validation_severity,validation_id,validation_name,validation_desc + 1,1,uid,12345678901234567890,error,E3000,uid.duplicates_in_dataset,Any 'unique identifier' may not be used in mor... + 2,2,uid,12345678901234567890,error,E3000,uid.duplicates_in_dataset,Any 'unique identifier' may not be used in mor... + """).strip('\n') + + actual_output = cli.df_to_csv(self.input_df) + + assert actual_output.strip('\n') == expected_output + + + def test_output_csv(self): + expected_output = dedent(""" + [ + { + "validation": { + "id": "E3000", + "name": "uid.duplicates_in_dataset", + "description": "Any 'unique identifier' may not be used in mor...", + "severity": "error" + }, + "records": [ + { + "record_no": 1, + "fields": [ + { + "name": "uid", + "value": "12345678901234567890" + } + ] + }, + { + "record_no": 2, + "fields": [ + { + "name": "uid", + "value": "12345678901234567890" + } + ] + } + ] + } + ] + """).strip('\n') + + actual_output = cli.df_to_json(self.input_df) + + assert actual_output == expected_output + + +class TestDescribeCommand: + + def test_defaults(self): + cli.describe() + + +class TestValidateCommand: + + valid_lei_context = cli.KeyValueOpt('lei','000TESTFIUIDDONOTUSE') + invalid_lei_context = cli.KeyValueOpt('lei','XXXXXXXXXXXXXXXXXXXX') + + pass_path = Path(pass_file) + fail_path = Path(fail_file) + + def test_pass_file_defaults(self): + is_valid, findings_df = cli.validate(path=self.pass_path) + + assert is_valid + + def test_pass_file_with_valid_context(self): + is_valid, findings_df = cli.validate(path=self.pass_path, context=[self.valid_lei_context]) + + assert is_valid + + def test_pass_file_with_invalid_context(self): + is_valid, findings_df = cli.validate(path=self.pass_path, context=[self.invalid_lei_context]) + + assert not is_valid + + def test_fail_file_csv_output(self): + is_valid, findings_df = cli.validate(path=self.fail_path, output=cli.OutputFormat.CSV) + + assert not is_valid + + def test_fail_file_json_output(self): + is_valid, findings_df = cli.validate(path=self.fail_path, output=cli.OutputFormat.JSON) + + assert not is_valid + + def test_fail_file_pandas_output(self): + is_valid, findings_df = cli.validate(path=self.fail_path, output=cli.OutputFormat.PANDAS) + + assert not is_valid + + def test_fail_file_table_output(self): + is_valid, findings_df = cli.validate(path=self.fail_path, output=cli.OutputFormat.TABLE) + + assert not is_valid + + + +class TestDescribeCli: + """ + Test `describe` command with Typer's CLI test runner + """ + + def test_defaults(self): + result = cli_runner.invoke(cli.app, ['describe']) + + assert result.exit_code == 0 + assert result.stdout == 'Feature coming soon...\n' + +class TestValidateCli: + """ + Test `validate` command with Typer's CLI test runner + """ + + def test_pass_file_defaults(self): + result = cli_runner.invoke(cli.app, ['validate', pass_file]) + + assert result.exit_code == 0 + assert result.stdout == '' + + + def test_fail_file_output_arg_value(self): + result = cli_runner.invoke(cli.app, ['validate', pass_file, '--output', 'pdf']) + + print(result.stdout) + + assert result.exit_code == 2 + assert "Invalid value for '--output': 'pdf' is not one of" in result.stdout From 253f8156184f09ff7fbbcae1374d4c1e6c40ec84 Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Mon, 6 Nov 2023 23:01:43 -0500 Subject: [PATCH 28/33] Remove @staticmethod used outside of a class --- regtech_data_validator/cli.py | 1 - 1 file changed, 1 deletion(-) diff --git a/regtech_data_validator/cli.py b/regtech_data_validator/cli.py index 9e89f094..e0e7559b 100644 --- a/regtech_data_validator/cli.py +++ b/regtech_data_validator/cli.py @@ -20,7 +20,6 @@ class KeyValueOpt: value: str -@staticmethod def parse_key_value(kv_str: str) -> KeyValueOpt: split_str = kv_str.split('=') From 18ff0d1da94df240a3f5ab7c6932c8c52daf782e Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Mon, 6 Nov 2023 23:20:42 -0500 Subject: [PATCH 29/33] Make black and ruff happy --- regtech_data_validator/cli.py | 1 + tests/test_cli.py | 52 +++++++++++++++++++---------------- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/regtech_data_validator/cli.py b/regtech_data_validator/cli.py index e0e7559b..417eddfd 100644 --- a/regtech_data_validator/cli.py +++ b/regtech_data_validator/cli.py @@ -142,5 +142,6 @@ def validate( # returned values are only used in unit tests return is_valid, findings_df + if __name__ == '__main__': app() diff --git a/tests/test_cli.py b/tests/test_cli.py index 56d83625..cc5fc889 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -15,7 +15,6 @@ class TestParseKeyValue: - def test_parse_success(self): test_str = "fruit=apple" key_val: cli.KeyValueOpt = cli.parse_key_value(test_str) @@ -23,7 +22,6 @@ def test_parse_success(self): assert key_val.key == 'fruit' assert key_val.value == 'apple' - def test_parse_fail_wrong_delimiter(self): test_str = "fruit:apple" @@ -44,14 +42,28 @@ def test_parse_fail_multiple_delimiters(self): class TestOutputFormat: - # TODO: Figure out why uid.duplicates_in_dataset returns different findings for matched records input_df = pd.DataFrame( data=[ - (1, 'uid', '12345678901234567890', 'error', 'E3000', 'uid.duplicates_in_dataset', "Any 'unique identifier' may not be used in mor..."), - (2, 'uid', '12345678901234567890', 'error', 'E3000', 'uid.duplicates_in_dataset', "Any 'unique identifier' may not be used in mor...") + { + 'record_no': 1, + 'field_name': 'uid', + 'field_value': '12345678901234567890', + 'validation_severity': 'error', + 'validation_id': 'E3000', + 'validation_name': 'uid.duplicates_in_dataset', + 'validation_desc': "Any 'unique identifier' may not be used in mor...", + }, + { + 'record_no': 2, + 'field_name': 'uid', + 'field_value': '12345678901234567890', + 'validation_severity': 'error', + 'validation_id': 'E3000', + 'validation_name': 'uid.duplicates_in_dataset', + 'validation_desc': "Any 'unique identifier' may not be used in mor...", + }, ], - columns=['record_no', 'field_name', 'field_value', 'validation_severity', 'validation_id', 'validation_name', 'validation_desc'], ) input_df.index.name = 'finding_no' input_df.index += 1 @@ -62,13 +74,12 @@ def test_output_pandas(self): finding_no 1 1 uid 12345678901234567890 error E3000 uid.duplicates_in_dataset Any 'unique identifier' may not be used in mor... 2 2 uid 12345678901234567890 error E3000 uid.duplicates_in_dataset Any 'unique identifier' may not be used in mor... - """).strip('\n') + """).strip('\n') # noqa: E501 actual_output = cli.df_to_str(self.input_df) assert actual_output == expected_output - def test_output_table(self): expected_output = dedent(""" ╭──────────────┬─────────────┬──────────────┬──────────────────────┬───────────────────────┬─────────────────┬───────────────────────────╮ @@ -77,26 +88,24 @@ def test_output_table(self): │ 1 │ 1 │ uid │ 12345678901234567890 │ error │ E3000 │ uid.duplicates_in_dataset │ │ 2 │ 2 │ uid │ 12345678901234567890 │ error │ E3000 │ uid.duplicates_in_dataset │ ╰──────────────┴─────────────┴──────────────┴──────────────────────┴───────────────────────┴─────────────────┴───────────────────────────╯ - """).strip('\n') + """).strip('\n') # noqa: E501 actual_output = cli.df_to_table(self.input_df) assert actual_output == expected_output - def test_output_csv(self): expected_output = dedent(""" finding_no,record_no,field_name,field_value,validation_severity,validation_id,validation_name,validation_desc 1,1,uid,12345678901234567890,error,E3000,uid.duplicates_in_dataset,Any 'unique identifier' may not be used in mor... 2,2,uid,12345678901234567890,error,E3000,uid.duplicates_in_dataset,Any 'unique identifier' may not be used in mor... - """).strip('\n') + """).strip('\n') # noqa: E501 actual_output = cli.df_to_csv(self.input_df) assert actual_output.strip('\n') == expected_output - - def test_output_csv(self): + def test_output_json(self): expected_output = dedent(""" [ { @@ -136,15 +145,13 @@ def test_output_csv(self): class TestDescribeCommand: - def test_defaults(self): - cli.describe() + cli.describe() class TestValidateCommand: - - valid_lei_context = cli.KeyValueOpt('lei','000TESTFIUIDDONOTUSE') - invalid_lei_context = cli.KeyValueOpt('lei','XXXXXXXXXXXXXXXXXXXX') + valid_lei_context = cli.KeyValueOpt('lei', '000TESTFIUIDDONOTUSE') + invalid_lei_context = cli.KeyValueOpt('lei', 'XXXXXXXXXXXXXXXXXXXX') pass_path = Path(pass_file) fail_path = Path(fail_file) @@ -185,30 +192,29 @@ def test_fail_file_table_output(self): assert not is_valid - class TestDescribeCli: """ Test `describe` command with Typer's CLI test runner """ - + def test_defaults(self): result = cli_runner.invoke(cli.app, ['describe']) assert result.exit_code == 0 assert result.stdout == 'Feature coming soon...\n' - + + class TestValidateCli: """ Test `validate` command with Typer's CLI test runner """ - + def test_pass_file_defaults(self): result = cli_runner.invoke(cli.app, ['validate', pass_file]) assert result.exit_code == 0 assert result.stdout == '' - def test_fail_file_output_arg_value(self): result = cli_runner.invoke(cli.app, ['validate', pass_file, '--output', 'pdf']) From ddb66825630e6a7e9329a440c5c249db09597482 Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Tue, 7 Nov 2023 00:52:17 -0500 Subject: [PATCH 30/33] Revert pinned `black` Action; sync Poetry versions --- .github/workflows/linters.yml | 1 - poetry.lock | 303 ++++++++++---------- pyproject.toml | 4 +- regtech_data_validator/check_functions.py | 1 - regtech_data_validator/phase_validations.py | 1 - 5 files changed, 150 insertions(+), 160 deletions(-) diff --git a/.github/workflows/linters.yml b/.github/workflows/linters.yml index db76efdf..1c672781 100644 --- a/.github/workflows/linters.yml +++ b/.github/workflows/linters.yml @@ -10,7 +10,6 @@ jobs: - uses: psf/black@stable with: options: "--check --diff --verbose" - version: "~= 22.0" ruff: runs-on: ubuntu-latest steps: diff --git a/poetry.lock b/poetry.lock index 60b45865..5d1b552e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2,36 +2,29 @@ [[package]] name = "black" -version = "23.3.0" +version = "23.10.1" description = "The uncompromising code formatter." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "black-23.3.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:0945e13506be58bf7db93ee5853243eb368ace1c08a24c65ce108986eac65915"}, - {file = "black-23.3.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:67de8d0c209eb5b330cce2469503de11bca4085880d62f1628bd9972cc3366b9"}, - {file = "black-23.3.0-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:7c3eb7cea23904399866c55826b31c1f55bbcd3890ce22ff70466b907b6775c2"}, - {file = "black-23.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32daa9783106c28815d05b724238e30718f34155653d4d6e125dc7daec8e260c"}, - {file = "black-23.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:35d1381d7a22cc5b2be2f72c7dfdae4072a3336060635718cc7e1ede24221d6c"}, - {file = "black-23.3.0-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:a8a968125d0a6a404842fa1bf0b349a568634f856aa08ffaff40ae0dfa52e7c6"}, - {file = "black-23.3.0-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:c7ab5790333c448903c4b721b59c0d80b11fe5e9803d8703e84dcb8da56fec1b"}, - {file = "black-23.3.0-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:a6f6886c9869d4daae2d1715ce34a19bbc4b95006d20ed785ca00fa03cba312d"}, - {file = "black-23.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f3c333ea1dd6771b2d3777482429864f8e258899f6ff05826c3a4fcc5ce3f70"}, - {file = "black-23.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:11c410f71b876f961d1de77b9699ad19f939094c3a677323f43d7a29855fe326"}, - {file = "black-23.3.0-cp37-cp37m-macosx_10_16_x86_64.whl", hash = "sha256:1d06691f1eb8de91cd1b322f21e3bfc9efe0c7ca1f0e1eb1db44ea367dff656b"}, - {file = "black-23.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50cb33cac881766a5cd9913e10ff75b1e8eb71babf4c7104f2e9c52da1fb7de2"}, - {file = "black-23.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:e114420bf26b90d4b9daa597351337762b63039752bdf72bf361364c1aa05925"}, - {file = "black-23.3.0-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:48f9d345675bb7fbc3dd85821b12487e1b9a75242028adad0333ce36ed2a6d27"}, - {file = "black-23.3.0-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:714290490c18fb0126baa0fca0a54ee795f7502b44177e1ce7624ba1c00f2331"}, - {file = "black-23.3.0-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:064101748afa12ad2291c2b91c960be28b817c0c7eaa35bec09cc63aa56493c5"}, - {file = "black-23.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:562bd3a70495facf56814293149e51aa1be9931567474993c7942ff7d3533961"}, - {file = "black-23.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:e198cf27888ad6f4ff331ca1c48ffc038848ea9f031a3b40ba36aced7e22f2c8"}, - {file = "black-23.3.0-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:3238f2aacf827d18d26db07524e44741233ae09a584273aa059066d644ca7b30"}, - {file = "black-23.3.0-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:f0bd2f4a58d6666500542b26354978218a9babcdc972722f4bf90779524515f3"}, - {file = "black-23.3.0-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:92c543f6854c28a3c7f39f4d9b7694f9a6eb9d3c5e2ece488c327b6e7ea9b266"}, - {file = "black-23.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a150542a204124ed00683f0db1f5cf1c2aaaa9cc3495b7a3b5976fb136090ab"}, - {file = "black-23.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:6b39abdfb402002b8a7d030ccc85cf5afff64ee90fa4c5aebc531e3ad0175ddb"}, - {file = "black-23.3.0-py3-none-any.whl", hash = "sha256:ec751418022185b0c1bb7d7736e6933d40bbb14c14a0abcf9123d1b159f98dd4"}, - {file = "black-23.3.0.tar.gz", hash = "sha256:1c7b8d606e728a41ea1ccbd7264677e494e87cf630e399262ced92d4a8dac940"}, + {file = "black-23.10.1-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:ec3f8e6234c4e46ff9e16d9ae96f4ef69fa328bb4ad08198c8cee45bb1f08c69"}, + {file = "black-23.10.1-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:1b917a2aa020ca600483a7b340c165970b26e9029067f019e3755b56e8dd5916"}, + {file = "black-23.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c74de4c77b849e6359c6f01987e94873c707098322b91490d24296f66d067dc"}, + {file = "black-23.10.1-cp310-cp310-win_amd64.whl", hash = "sha256:7b4d10b0f016616a0d93d24a448100adf1699712fb7a4efd0e2c32bbb219b173"}, + {file = "black-23.10.1-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:b15b75fc53a2fbcac8a87d3e20f69874d161beef13954747e053bca7a1ce53a0"}, + {file = "black-23.10.1-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:e293e4c2f4a992b980032bbd62df07c1bcff82d6964d6c9496f2cd726e246ace"}, + {file = "black-23.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d56124b7a61d092cb52cce34182a5280e160e6aff3137172a68c2c2c4b76bcb"}, + {file = "black-23.10.1-cp311-cp311-win_amd64.whl", hash = "sha256:3f157a8945a7b2d424da3335f7ace89c14a3b0625e6593d21139c2d8214d55ce"}, + {file = "black-23.10.1-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:cfcce6f0a384d0da692119f2d72d79ed07c7159879d0bb1bb32d2e443382bf3a"}, + {file = "black-23.10.1-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:33d40f5b06be80c1bbce17b173cda17994fbad096ce60eb22054da021bf933d1"}, + {file = "black-23.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:840015166dbdfbc47992871325799fd2dc0dcf9395e401ada6d88fe11498abad"}, + {file = "black-23.10.1-cp38-cp38-win_amd64.whl", hash = "sha256:037e9b4664cafda5f025a1728c50a9e9aedb99a759c89f760bd83730e76ba884"}, + {file = "black-23.10.1-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:7cb5936e686e782fddb1c73f8aa6f459e1ad38a6a7b0e54b403f1f05a1507ee9"}, + {file = "black-23.10.1-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:7670242e90dc129c539e9ca17665e39a146a761e681805c54fbd86015c7c84f7"}, + {file = "black-23.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ed45ac9a613fb52dad3b61c8dea2ec9510bf3108d4db88422bacc7d1ba1243d"}, + {file = "black-23.10.1-cp39-cp39-win_amd64.whl", hash = "sha256:6d23d7822140e3fef190734216cefb262521789367fbdc0b3f22af6744058982"}, + {file = "black-23.10.1-py3-none-any.whl", hash = "sha256:d431e6739f727bb2e0495df64a6c7a5310758e87505f5f8cde9ff6c0f2d7e4fe"}, + {file = "black-23.10.1.tar.gz", hash = "sha256:1f8ce316753428ff68749c65a5f7844631aa18c8679dfd3ca9dc1a289979c258"}, ] [package.dependencies] @@ -74,63 +67,63 @@ files = [ [[package]] name = "coverage" -version = "7.3.1" +version = "7.3.2" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.8" files = [ - {file = "coverage-7.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cd0f7429ecfd1ff597389907045ff209c8fdb5b013d38cfa7c60728cb484b6e3"}, - {file = "coverage-7.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:966f10df9b2b2115da87f50f6a248e313c72a668248be1b9060ce935c871f276"}, - {file = "coverage-7.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0575c37e207bb9b98b6cf72fdaaa18ac909fb3d153083400c2d48e2e6d28bd8e"}, - {file = "coverage-7.3.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:245c5a99254e83875c7fed8b8b2536f040997a9b76ac4c1da5bff398c06e860f"}, - {file = "coverage-7.3.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c96dd7798d83b960afc6c1feb9e5af537fc4908852ef025600374ff1a017392"}, - {file = "coverage-7.3.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:de30c1aa80f30af0f6b2058a91505ea6e36d6535d437520067f525f7df123887"}, - {file = "coverage-7.3.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:50dd1e2dd13dbbd856ffef69196781edff26c800a74f070d3b3e3389cab2600d"}, - {file = "coverage-7.3.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b9c0c19f70d30219113b18fe07e372b244fb2a773d4afde29d5a2f7930765136"}, - {file = "coverage-7.3.1-cp310-cp310-win32.whl", hash = "sha256:770f143980cc16eb601ccfd571846e89a5fe4c03b4193f2e485268f224ab602f"}, - {file = "coverage-7.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:cdd088c00c39a27cfa5329349cc763a48761fdc785879220d54eb785c8a38520"}, - {file = "coverage-7.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:74bb470399dc1989b535cb41f5ca7ab2af561e40def22d7e188e0a445e7639e3"}, - {file = "coverage-7.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:025ded371f1ca280c035d91b43252adbb04d2aea4c7105252d3cbc227f03b375"}, - {file = "coverage-7.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a6191b3a6ad3e09b6cfd75b45c6aeeffe7e3b0ad46b268345d159b8df8d835f9"}, - {file = "coverage-7.3.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7eb0b188f30e41ddd659a529e385470aa6782f3b412f860ce22b2491c89b8593"}, - {file = "coverage-7.3.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75c8f0df9dfd8ff745bccff75867d63ef336e57cc22b2908ee725cc552689ec8"}, - {file = "coverage-7.3.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:7eb3cd48d54b9bd0e73026dedce44773214064be93611deab0b6a43158c3d5a0"}, - {file = "coverage-7.3.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:ac3c5b7e75acac31e490b7851595212ed951889918d398b7afa12736c85e13ce"}, - {file = "coverage-7.3.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5b4ee7080878077af0afa7238df1b967f00dc10763f6e1b66f5cced4abebb0a3"}, - {file = "coverage-7.3.1-cp311-cp311-win32.whl", hash = "sha256:229c0dd2ccf956bf5aeede7e3131ca48b65beacde2029f0361b54bf93d36f45a"}, - {file = "coverage-7.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:c6f55d38818ca9596dc9019eae19a47410d5322408140d9a0076001a3dcb938c"}, - {file = "coverage-7.3.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5289490dd1c3bb86de4730a92261ae66ea8d44b79ed3cc26464f4c2cde581fbc"}, - {file = "coverage-7.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ca833941ec701fda15414be400c3259479bfde7ae6d806b69e63b3dc423b1832"}, - {file = "coverage-7.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd694e19c031733e446c8024dedd12a00cda87e1c10bd7b8539a87963685e969"}, - {file = "coverage-7.3.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aab8e9464c00da5cb9c536150b7fbcd8850d376d1151741dd0d16dfe1ba4fd26"}, - {file = "coverage-7.3.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87d38444efffd5b056fcc026c1e8d862191881143c3aa80bb11fcf9dca9ae204"}, - {file = "coverage-7.3.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:8a07b692129b8a14ad7a37941a3029c291254feb7a4237f245cfae2de78de037"}, - {file = "coverage-7.3.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:2829c65c8faaf55b868ed7af3c7477b76b1c6ebeee99a28f59a2cb5907a45760"}, - {file = "coverage-7.3.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1f111a7d85658ea52ffad7084088277135ec5f368457275fc57f11cebb15607f"}, - {file = "coverage-7.3.1-cp312-cp312-win32.whl", hash = "sha256:c397c70cd20f6df7d2a52283857af622d5f23300c4ca8e5bd8c7a543825baa5a"}, - {file = "coverage-7.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:5ae4c6da8b3d123500f9525b50bf0168023313963e0e2e814badf9000dd6ef92"}, - {file = "coverage-7.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ca70466ca3a17460e8fc9cea7123c8cbef5ada4be3140a1ef8f7b63f2f37108f"}, - {file = "coverage-7.3.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f2781fd3cabc28278dc982a352f50c81c09a1a500cc2086dc4249853ea96b981"}, - {file = "coverage-7.3.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6407424621f40205bbe6325686417e5e552f6b2dba3535dd1f90afc88a61d465"}, - {file = "coverage-7.3.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:04312b036580ec505f2b77cbbdfb15137d5efdfade09156961f5277149f5e344"}, - {file = "coverage-7.3.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac9ad38204887349853d7c313f53a7b1c210ce138c73859e925bc4e5d8fc18e7"}, - {file = "coverage-7.3.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:53669b79f3d599da95a0afbef039ac0fadbb236532feb042c534fbb81b1a4e40"}, - {file = "coverage-7.3.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:614f1f98b84eb256e4f35e726bfe5ca82349f8dfa576faabf8a49ca09e630086"}, - {file = "coverage-7.3.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f1a317fdf5c122ad642db8a97964733ab7c3cf6009e1a8ae8821089993f175ff"}, - {file = "coverage-7.3.1-cp38-cp38-win32.whl", hash = "sha256:defbbb51121189722420a208957e26e49809feafca6afeef325df66c39c4fdb3"}, - {file = "coverage-7.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:f4f456590eefb6e1b3c9ea6328c1e9fa0f1006e7481179d749b3376fc793478e"}, - {file = "coverage-7.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f12d8b11a54f32688b165fd1a788c408f927b0960984b899be7e4c190ae758f1"}, - {file = "coverage-7.3.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f09195dda68d94a53123883de75bb97b0e35f5f6f9f3aa5bf6e496da718f0cb6"}, - {file = "coverage-7.3.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6601a60318f9c3945be6ea0f2a80571f4299b6801716f8a6e4846892737ebe4"}, - {file = "coverage-7.3.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07d156269718670d00a3b06db2288b48527fc5f36859425ff7cec07c6b367745"}, - {file = "coverage-7.3.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:636a8ac0b044cfeccae76a36f3b18264edcc810a76a49884b96dd744613ec0b7"}, - {file = "coverage-7.3.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5d991e13ad2ed3aced177f524e4d670f304c8233edad3210e02c465351f785a0"}, - {file = "coverage-7.3.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:586649ada7cf139445da386ab6f8ef00e6172f11a939fc3b2b7e7c9082052fa0"}, - {file = "coverage-7.3.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4aba512a15a3e1e4fdbfed2f5392ec221434a614cc68100ca99dcad7af29f3f8"}, - {file = "coverage-7.3.1-cp39-cp39-win32.whl", hash = "sha256:6bc6f3f4692d806831c136c5acad5ccedd0262aa44c087c46b7101c77e139140"}, - {file = "coverage-7.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:553d7094cb27db58ea91332e8b5681bac107e7242c23f7629ab1316ee73c4981"}, - {file = "coverage-7.3.1-pp38.pp39.pp310-none-any.whl", hash = "sha256:220eb51f5fb38dfdb7e5d54284ca4d0cd70ddac047d750111a68ab1798945194"}, - {file = "coverage-7.3.1.tar.gz", hash = "sha256:6cb7fe1581deb67b782c153136541e20901aa312ceedaf1467dcb35255787952"}, + {file = "coverage-7.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d872145f3a3231a5f20fd48500274d7df222e291d90baa2026cc5152b7ce86bf"}, + {file = "coverage-7.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:310b3bb9c91ea66d59c53fa4989f57d2436e08f18fb2f421a1b0b6b8cc7fffda"}, + {file = "coverage-7.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f47d39359e2c3779c5331fc740cf4bce6d9d680a7b4b4ead97056a0ae07cb49a"}, + {file = "coverage-7.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aa72dbaf2c2068404b9870d93436e6d23addd8bbe9295f49cbca83f6e278179c"}, + {file = "coverage-7.3.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:beaa5c1b4777f03fc63dfd2a6bd820f73f036bfb10e925fce067b00a340d0f3f"}, + {file = "coverage-7.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:dbc1b46b92186cc8074fee9d9fbb97a9dd06c6cbbef391c2f59d80eabdf0faa6"}, + {file = "coverage-7.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:315a989e861031334d7bee1f9113c8770472db2ac484e5b8c3173428360a9148"}, + {file = "coverage-7.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d1bc430677773397f64a5c88cb522ea43175ff16f8bfcc89d467d974cb2274f9"}, + {file = "coverage-7.3.2-cp310-cp310-win32.whl", hash = "sha256:a889ae02f43aa45032afe364c8ae84ad3c54828c2faa44f3bfcafecb5c96b02f"}, + {file = "coverage-7.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:c0ba320de3fb8c6ec16e0be17ee1d3d69adcda99406c43c0409cb5c41788a611"}, + {file = "coverage-7.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ac8c802fa29843a72d32ec56d0ca792ad15a302b28ca6203389afe21f8fa062c"}, + {file = "coverage-7.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:89a937174104339e3a3ffcf9f446c00e3a806c28b1841c63edb2b369310fd074"}, + {file = "coverage-7.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e267e9e2b574a176ddb983399dec325a80dbe161f1a32715c780b5d14b5f583a"}, + {file = "coverage-7.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2443cbda35df0d35dcfb9bf8f3c02c57c1d6111169e3c85fc1fcc05e0c9f39a3"}, + {file = "coverage-7.3.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4175e10cc8dda0265653e8714b3174430b07c1dca8957f4966cbd6c2b1b8065a"}, + {file = "coverage-7.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0cbf38419fb1a347aaf63481c00f0bdc86889d9fbf3f25109cf96c26b403fda1"}, + {file = "coverage-7.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:5c913b556a116b8d5f6ef834038ba983834d887d82187c8f73dec21049abd65c"}, + {file = "coverage-7.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1981f785239e4e39e6444c63a98da3a1db8e971cb9ceb50a945ba6296b43f312"}, + {file = "coverage-7.3.2-cp311-cp311-win32.whl", hash = "sha256:43668cabd5ca8258f5954f27a3aaf78757e6acf13c17604d89648ecc0cc66640"}, + {file = "coverage-7.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10c39c0452bf6e694511c901426d6b5ac005acc0f78ff265dbe36bf81f808a2"}, + {file = "coverage-7.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:4cbae1051ab791debecc4a5dcc4a1ff45fc27b91b9aee165c8a27514dd160836"}, + {file = "coverage-7.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12d15ab5833a997716d76f2ac1e4b4d536814fc213c85ca72756c19e5a6b3d63"}, + {file = "coverage-7.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c7bba973ebee5e56fe9251300c00f1579652587a9f4a5ed8404b15a0471f216"}, + {file = "coverage-7.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fe494faa90ce6381770746077243231e0b83ff3f17069d748f645617cefe19d4"}, + {file = "coverage-7.3.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6e9589bd04d0461a417562649522575d8752904d35c12907d8c9dfeba588faf"}, + {file = "coverage-7.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d51ac2a26f71da1b57f2dc81d0e108b6ab177e7d30e774db90675467c847bbdf"}, + {file = "coverage-7.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:99b89d9f76070237975b315b3d5f4d6956ae354a4c92ac2388a5695516e47c84"}, + {file = "coverage-7.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fa28e909776dc69efb6ed975a63691bc8172b64ff357e663a1bb06ff3c9b589a"}, + {file = "coverage-7.3.2-cp312-cp312-win32.whl", hash = "sha256:289fe43bf45a575e3ab10b26d7b6f2ddb9ee2dba447499f5401cfb5ecb8196bb"}, + {file = "coverage-7.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:7dbc3ed60e8659bc59b6b304b43ff9c3ed858da2839c78b804973f613d3e92ed"}, + {file = "coverage-7.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f94b734214ea6a36fe16e96a70d941af80ff3bfd716c141300d95ebc85339738"}, + {file = "coverage-7.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:af3d828d2c1cbae52d34bdbb22fcd94d1ce715d95f1a012354a75e5913f1bda2"}, + {file = "coverage-7.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:630b13e3036e13c7adc480ca42fa7afc2a5d938081d28e20903cf7fd687872e2"}, + {file = "coverage-7.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c9eacf273e885b02a0273bb3a2170f30e2d53a6d53b72dbe02d6701b5296101c"}, + {file = "coverage-7.3.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8f17966e861ff97305e0801134e69db33b143bbfb36436efb9cfff6ec7b2fd9"}, + {file = "coverage-7.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b4275802d16882cf9c8b3d057a0839acb07ee9379fa2749eca54efbce1535b82"}, + {file = "coverage-7.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:72c0cfa5250f483181e677ebc97133ea1ab3eb68645e494775deb6a7f6f83901"}, + {file = "coverage-7.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:cb536f0dcd14149425996821a168f6e269d7dcd2c273a8bff8201e79f5104e76"}, + {file = "coverage-7.3.2-cp38-cp38-win32.whl", hash = "sha256:307adb8bd3abe389a471e649038a71b4eb13bfd6b7dd9a129fa856f5c695cf92"}, + {file = "coverage-7.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:88ed2c30a49ea81ea3b7f172e0269c182a44c236eb394718f976239892c0a27a"}, + {file = "coverage-7.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b631c92dfe601adf8f5ebc7fc13ced6bb6e9609b19d9a8cd59fa47c4186ad1ce"}, + {file = "coverage-7.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d3d9df4051c4a7d13036524b66ecf7a7537d14c18a384043f30a303b146164e9"}, + {file = "coverage-7.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f7363d3b6a1119ef05015959ca24a9afc0ea8a02c687fe7e2d557705375c01f"}, + {file = "coverage-7.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2f11cc3c967a09d3695d2a6f03fb3e6236622b93be7a4b5dc09166a861be6d25"}, + {file = "coverage-7.3.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:149de1d2401ae4655c436a3dced6dd153f4c3309f599c3d4bd97ab172eaf02d9"}, + {file = "coverage-7.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:3a4006916aa6fee7cd38db3bfc95aa9c54ebb4ffbfc47c677c8bba949ceba0a6"}, + {file = "coverage-7.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9028a3871280110d6e1aa2df1afd5ef003bab5fb1ef421d6dc748ae1c8ef2ebc"}, + {file = "coverage-7.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9f805d62aec8eb92bab5b61c0f07329275b6f41c97d80e847b03eb894f38d083"}, + {file = "coverage-7.3.2-cp39-cp39-win32.whl", hash = "sha256:d1c88ec1a7ff4ebca0219f5b1ef863451d828cccf889c173e1253aa84b1e07ce"}, + {file = "coverage-7.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b4767da59464bb593c07afceaddea61b154136300881844768037fd5e859353f"}, + {file = "coverage-7.3.2-pp38.pp39.pp310-none-any.whl", hash = "sha256:ae97af89f0fbf373400970c0a21eef5aa941ffeed90aee43650b81f7d7f47637"}, + {file = "coverage-7.3.2.tar.gz", hash = "sha256:be32ad29341b0170e795ca590e1c07e81fc061cb5b10c74ce7203491484404ef"}, ] [package.extras] @@ -160,13 +153,13 @@ files = [ [[package]] name = "multimethod" -version = "1.9.1" +version = "1.10" description = "Multiple argument dispatching." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "multimethod-1.9.1-py3-none-any.whl", hash = "sha256:52f8f1f2b9d5a4c7adfdcc114dbeeebe3245a4420801e8807e26522a79fb6bc2"}, - {file = "multimethod-1.9.1.tar.gz", hash = "sha256:1589bf52ca294667fd15527ea830127c763f5bfc38562e3642591ffd0fd9d56f"}, + {file = "multimethod-1.10-py3-none-any.whl", hash = "sha256:afd84da9c3d0445c84f827e4d63ad42d17c6d29b122427c6dee9032ac2d2a0d4"}, + {file = "multimethod-1.10.tar.gz", hash = "sha256:daa45af3fe257f73abb69673fd54ddeaf31df0eb7363ad6e1251b7c9b192d8c5"}, ] [[package]] @@ -230,13 +223,13 @@ et-xmlfile = "*" [[package]] name = "packaging" -version = "23.1" +version = "23.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.7" files = [ - {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"}, - {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, + {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, + {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, ] [[package]] @@ -345,13 +338,13 @@ files = [ [[package]] name = "platformdirs" -version = "3.10.0" +version = "3.11.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." optional = false python-versions = ">=3.7" files = [ - {file = "platformdirs-3.10.0-py3-none-any.whl", hash = "sha256:d7c24979f292f916dc9cbf8648319032f551ea8c49a4c9bf2fb556a02070ec1d"}, - {file = "platformdirs-3.10.0.tar.gz", hash = "sha256:b45696dab2d7cc691a3226759c0d3b00c47c8b6e293d96f6436f733303f77f6d"}, + {file = "platformdirs-3.11.0-py3-none-any.whl", hash = "sha256:e9d171d00af68be50e9202731309c4e658fd8bc76f55c11c7dd760d023bda68e"}, + {file = "platformdirs-3.11.0.tar.gz", hash = "sha256:cf8ee52a3afdb965072dcc652433e0c7e3e40cf5ea1477cd4b3b1d2eb75495b3"}, ] [package.extras] @@ -375,47 +368,47 @@ testing = ["pytest", "pytest-benchmark"] [[package]] name = "pydantic" -version = "1.10.12" +version = "1.10.13" description = "Data validation and settings management using python type hints" optional = false python-versions = ">=3.7" files = [ - {file = "pydantic-1.10.12-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a1fcb59f2f355ec350073af41d927bf83a63b50e640f4dbaa01053a28b7a7718"}, - {file = "pydantic-1.10.12-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b7ccf02d7eb340b216ec33e53a3a629856afe1c6e0ef91d84a4e6f2fb2ca70fe"}, - {file = "pydantic-1.10.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8fb2aa3ab3728d950bcc885a2e9eff6c8fc40bc0b7bb434e555c215491bcf48b"}, - {file = "pydantic-1.10.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:771735dc43cf8383959dc9b90aa281f0b6092321ca98677c5fb6125a6f56d58d"}, - {file = "pydantic-1.10.12-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ca48477862372ac3770969b9d75f1bf66131d386dba79506c46d75e6b48c1e09"}, - {file = "pydantic-1.10.12-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a5e7add47a5b5a40c49b3036d464e3c7802f8ae0d1e66035ea16aa5b7a3923ed"}, - {file = "pydantic-1.10.12-cp310-cp310-win_amd64.whl", hash = "sha256:e4129b528c6baa99a429f97ce733fff478ec955513630e61b49804b6cf9b224a"}, - {file = "pydantic-1.10.12-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b0d191db0f92dfcb1dec210ca244fdae5cbe918c6050b342d619c09d31eea0cc"}, - {file = "pydantic-1.10.12-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:795e34e6cc065f8f498c89b894a3c6da294a936ee71e644e4bd44de048af1405"}, - {file = "pydantic-1.10.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69328e15cfda2c392da4e713443c7dbffa1505bc9d566e71e55abe14c97ddc62"}, - {file = "pydantic-1.10.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2031de0967c279df0d8a1c72b4ffc411ecd06bac607a212892757db7462fc494"}, - {file = "pydantic-1.10.12-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:ba5b2e6fe6ca2b7e013398bc7d7b170e21cce322d266ffcd57cca313e54fb246"}, - {file = "pydantic-1.10.12-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2a7bac939fa326db1ab741c9d7f44c565a1d1e80908b3797f7f81a4f86bc8d33"}, - {file = "pydantic-1.10.12-cp311-cp311-win_amd64.whl", hash = "sha256:87afda5539d5140cb8ba9e8b8c8865cb5b1463924d38490d73d3ccfd80896b3f"}, - {file = "pydantic-1.10.12-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:549a8e3d81df0a85226963611950b12d2d334f214436a19537b2efed61b7639a"}, - {file = "pydantic-1.10.12-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:598da88dfa127b666852bef6d0d796573a8cf5009ffd62104094a4fe39599565"}, - {file = "pydantic-1.10.12-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba5c4a8552bff16c61882db58544116d021d0b31ee7c66958d14cf386a5b5350"}, - {file = "pydantic-1.10.12-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c79e6a11a07da7374f46970410b41d5e266f7f38f6a17a9c4823db80dadf4303"}, - {file = "pydantic-1.10.12-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ab26038b8375581dc832a63c948f261ae0aa21f1d34c1293469f135fa92972a5"}, - {file = "pydantic-1.10.12-cp37-cp37m-win_amd64.whl", hash = "sha256:e0a16d274b588767602b7646fa05af2782576a6cf1022f4ba74cbb4db66f6ca8"}, - {file = "pydantic-1.10.12-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6a9dfa722316f4acf4460afdf5d41d5246a80e249c7ff475c43a3a1e9d75cf62"}, - {file = "pydantic-1.10.12-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a73f489aebd0c2121ed974054cb2759af8a9f747de120acd2c3394cf84176ccb"}, - {file = "pydantic-1.10.12-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b30bcb8cbfccfcf02acb8f1a261143fab622831d9c0989707e0e659f77a18e0"}, - {file = "pydantic-1.10.12-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2fcfb5296d7877af406ba1547dfde9943b1256d8928732267e2653c26938cd9c"}, - {file = "pydantic-1.10.12-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:2f9a6fab5f82ada41d56b0602606a5506aab165ca54e52bc4545028382ef1c5d"}, - {file = "pydantic-1.10.12-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:dea7adcc33d5d105896401a1f37d56b47d443a2b2605ff8a969a0ed5543f7e33"}, - {file = "pydantic-1.10.12-cp38-cp38-win_amd64.whl", hash = "sha256:1eb2085c13bce1612da8537b2d90f549c8cbb05c67e8f22854e201bde5d98a47"}, - {file = "pydantic-1.10.12-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ef6c96b2baa2100ec91a4b428f80d8f28a3c9e53568219b6c298c1125572ebc6"}, - {file = "pydantic-1.10.12-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6c076be61cd0177a8433c0adcb03475baf4ee91edf5a4e550161ad57fc90f523"}, - {file = "pydantic-1.10.12-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d5a58feb9a39f481eda4d5ca220aa8b9d4f21a41274760b9bc66bfd72595b86"}, - {file = "pydantic-1.10.12-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5f805d2d5d0a41633651a73fa4ecdd0b3d7a49de4ec3fadf062fe16501ddbf1"}, - {file = "pydantic-1.10.12-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:1289c180abd4bd4555bb927c42ee42abc3aee02b0fb2d1223fb7c6e5bef87dbe"}, - {file = "pydantic-1.10.12-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5d1197e462e0364906cbc19681605cb7c036f2475c899b6f296104ad42b9f5fb"}, - {file = "pydantic-1.10.12-cp39-cp39-win_amd64.whl", hash = "sha256:fdbdd1d630195689f325c9ef1a12900524dceb503b00a987663ff4f58669b93d"}, - {file = "pydantic-1.10.12-py3-none-any.whl", hash = "sha256:b749a43aa51e32839c9d71dc67eb1e4221bb04af1033a32e3923d46f9effa942"}, - {file = "pydantic-1.10.12.tar.gz", hash = "sha256:0fe8a415cea8f340e7a9af9c54fc71a649b43e8ca3cc732986116b3cb135d303"}, + {file = "pydantic-1.10.13-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:efff03cc7a4f29d9009d1c96ceb1e7a70a65cfe86e89d34e4a5f2ab1e5693737"}, + {file = "pydantic-1.10.13-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3ecea2b9d80e5333303eeb77e180b90e95eea8f765d08c3d278cd56b00345d01"}, + {file = "pydantic-1.10.13-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1740068fd8e2ef6eb27a20e5651df000978edce6da6803c2bef0bc74540f9548"}, + {file = "pydantic-1.10.13-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:84bafe2e60b5e78bc64a2941b4c071a4b7404c5c907f5f5a99b0139781e69ed8"}, + {file = "pydantic-1.10.13-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bc0898c12f8e9c97f6cd44c0ed70d55749eaf783716896960b4ecce2edfd2d69"}, + {file = "pydantic-1.10.13-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:654db58ae399fe6434e55325a2c3e959836bd17a6f6a0b6ca8107ea0571d2e17"}, + {file = "pydantic-1.10.13-cp310-cp310-win_amd64.whl", hash = "sha256:75ac15385a3534d887a99c713aa3da88a30fbd6204a5cd0dc4dab3d770b9bd2f"}, + {file = "pydantic-1.10.13-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c553f6a156deb868ba38a23cf0df886c63492e9257f60a79c0fd8e7173537653"}, + {file = "pydantic-1.10.13-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5e08865bc6464df8c7d61439ef4439829e3ab62ab1669cddea8dd00cd74b9ffe"}, + {file = "pydantic-1.10.13-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e31647d85a2013d926ce60b84f9dd5300d44535a9941fe825dc349ae1f760df9"}, + {file = "pydantic-1.10.13-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:210ce042e8f6f7c01168b2d84d4c9eb2b009fe7bf572c2266e235edf14bacd80"}, + {file = "pydantic-1.10.13-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:8ae5dd6b721459bfa30805f4c25880e0dd78fc5b5879f9f7a692196ddcb5a580"}, + {file = "pydantic-1.10.13-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f8e81fc5fb17dae698f52bdd1c4f18b6ca674d7068242b2aff075f588301bbb0"}, + {file = "pydantic-1.10.13-cp311-cp311-win_amd64.whl", hash = "sha256:61d9dce220447fb74f45e73d7ff3b530e25db30192ad8d425166d43c5deb6df0"}, + {file = "pydantic-1.10.13-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4b03e42ec20286f052490423682016fd80fda830d8e4119f8ab13ec7464c0132"}, + {file = "pydantic-1.10.13-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f59ef915cac80275245824e9d771ee939133be38215555e9dc90c6cb148aaeb5"}, + {file = "pydantic-1.10.13-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5a1f9f747851338933942db7af7b6ee8268568ef2ed86c4185c6ef4402e80ba8"}, + {file = "pydantic-1.10.13-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:97cce3ae7341f7620a0ba5ef6cf043975cd9d2b81f3aa5f4ea37928269bc1b87"}, + {file = "pydantic-1.10.13-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:854223752ba81e3abf663d685f105c64150873cc6f5d0c01d3e3220bcff7d36f"}, + {file = "pydantic-1.10.13-cp37-cp37m-win_amd64.whl", hash = "sha256:b97c1fac8c49be29486df85968682b0afa77e1b809aff74b83081cc115e52f33"}, + {file = "pydantic-1.10.13-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c958d053453a1c4b1c2062b05cd42d9d5c8eb67537b8d5a7e3c3032943ecd261"}, + {file = "pydantic-1.10.13-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4c5370a7edaac06daee3af1c8b1192e305bc102abcbf2a92374b5bc793818599"}, + {file = "pydantic-1.10.13-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d6f6e7305244bddb4414ba7094ce910560c907bdfa3501e9db1a7fd7eaea127"}, + {file = "pydantic-1.10.13-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d3a3c792a58e1622667a2837512099eac62490cdfd63bd407993aaf200a4cf1f"}, + {file = "pydantic-1.10.13-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:c636925f38b8db208e09d344c7aa4f29a86bb9947495dd6b6d376ad10334fb78"}, + {file = "pydantic-1.10.13-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:678bcf5591b63cc917100dc50ab6caebe597ac67e8c9ccb75e698f66038ea953"}, + {file = "pydantic-1.10.13-cp38-cp38-win_amd64.whl", hash = "sha256:6cf25c1a65c27923a17b3da28a0bdb99f62ee04230c931d83e888012851f4e7f"}, + {file = "pydantic-1.10.13-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8ef467901d7a41fa0ca6db9ae3ec0021e3f657ce2c208e98cd511f3161c762c6"}, + {file = "pydantic-1.10.13-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:968ac42970f57b8344ee08837b62f6ee6f53c33f603547a55571c954a4225691"}, + {file = "pydantic-1.10.13-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9849f031cf8a2f0a928fe885e5a04b08006d6d41876b8bbd2fc68a18f9f2e3fd"}, + {file = "pydantic-1.10.13-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:56e3ff861c3b9c6857579de282ce8baabf443f42ffba355bf070770ed63e11e1"}, + {file = "pydantic-1.10.13-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f00790179497767aae6bcdc36355792c79e7bbb20b145ff449700eb076c5f96"}, + {file = "pydantic-1.10.13-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:75b297827b59bc229cac1a23a2f7a4ac0031068e5be0ce385be1462e7e17a35d"}, + {file = "pydantic-1.10.13-cp39-cp39-win_amd64.whl", hash = "sha256:e70ca129d2053fb8b728ee7d1af8e553a928d7e301a311094b8a0501adc8763d"}, + {file = "pydantic-1.10.13-py3-none-any.whl", hash = "sha256:b87326822e71bd5f313e7d3bfdc77ac3247035ac10b0c0618bd99dcf95b1e687"}, + {file = "pydantic-1.10.13.tar.gz", hash = "sha256:32c8b48dcd3b2ac4e78b0ba4af3a2c2eb6048cb75202f0ea7b34feb740efc340"}, ] [package.dependencies] @@ -490,28 +483,28 @@ files = [ [[package]] name = "ruff" -version = "0.0.259" -description = "An extremely fast Python linter, written in Rust." +version = "0.1.4" +description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.0.259-py3-none-macosx_10_7_x86_64.whl", hash = "sha256:f3938dc45e2a3f818e9cbd53007265c22246fbfded8837b2c563bf0ebde1a226"}, - {file = "ruff-0.0.259-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:22e1e35bf5f12072cd644d22afd9203641ccf258bc14ff91aa1c43dc14f6047d"}, - {file = "ruff-0.0.259-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2fb20e89e85d147c85caa807707a1488bccc1f3854dc3d53533e89b52a0c5ff"}, - {file = "ruff-0.0.259-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:49e903bcda19f6bb0725a962c058eb5d61f40d84ef52ed53b61939b69402ab4e"}, - {file = "ruff-0.0.259-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:71f0ef1985e9a6696fa97da8459917fa34bdaa2c16bd33bd5edead585b7d44f7"}, - {file = "ruff-0.0.259-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:7cfef26619cba184d59aa7fa17b48af5891d51fc0b755a9bc533478a10d4d066"}, - {file = "ruff-0.0.259-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:79b02fa17ec1fd8d306ae302cb47fb614b71e1f539997858243769bcbe78c6d9"}, - {file = "ruff-0.0.259-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:428507fb321b386dda70d66cd1a8aa0abf51d7c197983d83bb9e4fa5ee60300b"}, - {file = "ruff-0.0.259-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5fbaea9167f1852757f02133e5daacdb8c75b3431343205395da5b10499927a"}, - {file = "ruff-0.0.259-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:40ae87f2638484b7e8a7567b04a7af719f1c484c5bf132038b702bb32e1f6577"}, - {file = "ruff-0.0.259-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:29e2b77b7d5da6a7dd5cf9b738b511355c5734ece56f78e500d4b5bffd58c1a0"}, - {file = "ruff-0.0.259-py3-none-musllinux_1_2_i686.whl", hash = "sha256:5b3c1beacf6037e7f0781d4699d9a2dd4ba2462f475be5b1f45cf84c4ba3c69d"}, - {file = "ruff-0.0.259-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:daaea322e7e85f4c13d82be9536309e1c4b8b9851bb0cbc7eeb15d490fd46bf9"}, - {file = "ruff-0.0.259-py3-none-win32.whl", hash = "sha256:38704f151323aa5858370a2f792e122cc25e5d1aabe7d42ceeab83da18f0b456"}, - {file = "ruff-0.0.259-py3-none-win_amd64.whl", hash = "sha256:aa9449b898287e621942cc71b9327eceb8f0c357e4065fecefb707ef2d978df8"}, - {file = "ruff-0.0.259-py3-none-win_arm64.whl", hash = "sha256:e4f39e18702de69faaaee3969934b92d7467285627f99a5b6ecd55a7d9f5d086"}, - {file = "ruff-0.0.259.tar.gz", hash = "sha256:8b56496063ab3bfdf72339a5fbebb8bd46e5c5fee25ef11a9f03b208fa0562ec"}, + {file = "ruff-0.1.4-py3-none-macosx_10_7_x86_64.whl", hash = "sha256:864958706b669cce31d629902175138ad8a069d99ca53514611521f532d91495"}, + {file = "ruff-0.1.4-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:9fdd61883bb34317c788af87f4cd75dfee3a73f5ded714b77ba928e418d6e39e"}, + {file = "ruff-0.1.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b4eaca8c9cc39aa7f0f0d7b8fe24ecb51232d1bb620fc4441a61161be4a17539"}, + {file = "ruff-0.1.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a9a1301dc43cbf633fb603242bccd0aaa34834750a14a4c1817e2e5c8d60de17"}, + {file = "ruff-0.1.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:78e8db8ab6f100f02e28b3d713270c857d370b8d61871d5c7d1702ae411df683"}, + {file = "ruff-0.1.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:80fea754eaae06335784b8ea053d6eb8e9aac75359ebddd6fee0858e87c8d510"}, + {file = "ruff-0.1.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6bc02a480d4bfffd163a723698da15d1a9aec2fced4c06f2a753f87f4ce6969c"}, + {file = "ruff-0.1.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9862811b403063765b03e716dac0fda8fdbe78b675cd947ed5873506448acea4"}, + {file = "ruff-0.1.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58826efb8b3efbb59bb306f4b19640b7e366967a31c049d49311d9eb3a4c60cb"}, + {file = "ruff-0.1.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:fdfd453fc91d9d86d6aaa33b1bafa69d114cf7421057868f0b79104079d3e66e"}, + {file = "ruff-0.1.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e8791482d508bd0b36c76481ad3117987301b86072158bdb69d796503e1c84a8"}, + {file = "ruff-0.1.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:01206e361021426e3c1b7fba06ddcb20dbc5037d64f6841e5f2b21084dc51800"}, + {file = "ruff-0.1.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:645591a613a42cb7e5c2b667cbefd3877b21e0252b59272ba7212c3d35a5819f"}, + {file = "ruff-0.1.4-py3-none-win32.whl", hash = "sha256:99908ca2b3b85bffe7e1414275d004917d1e0dfc99d497ccd2ecd19ad115fd0d"}, + {file = "ruff-0.1.4-py3-none-win_amd64.whl", hash = "sha256:1dfd6bf8f6ad0a4ac99333f437e0ec168989adc5d837ecd38ddb2cc4a2e3db8a"}, + {file = "ruff-0.1.4-py3-none-win_arm64.whl", hash = "sha256:d98ae9ebf56444e18a3e3652b3383204748f73e247dea6caaf8b52d37e6b32da"}, + {file = "ruff-0.1.4.tar.gz", hash = "sha256:21520ecca4cc555162068d87c747b8f95e1e95f8ecfcbbe59e8dd00710586315"}, ] [[package]] @@ -541,13 +534,13 @@ widechars = ["wcwidth"] [[package]] name = "typeguard" -version = "4.1.3" +version = "4.1.5" description = "Run-time type checker for Python" optional = false python-versions = ">=3.8" files = [ - {file = "typeguard-4.1.3-py3-none-any.whl", hash = "sha256:5b7453b1e3b35fcfe2d62fa4ec500d05e6f2f2eb46f4126ae964677fcc384fff"}, - {file = "typeguard-4.1.3.tar.gz", hash = "sha256:7d4264cd631ac1157c5bb5ec992281b4f1e2ba7a35db91bc15f442235e244803"}, + {file = "typeguard-4.1.5-py3-none-any.whl", hash = "sha256:8923e55f8873caec136c892c3bed1f676eae7be57cdb94819281b3d3bc9c0953"}, + {file = "typeguard-4.1.5.tar.gz", hash = "sha256:ea0a113bbc111bcffc90789ebb215625c963411f7096a7e9062d4e4630c155fd"}, ] [package.dependencies] @@ -580,13 +573,13 @@ test = ["black (>=22.3.0,<23.0.0)", "coverage (>=6.2,<7.0)", "isort (>=5.0.6,<6. [[package]] name = "typing-extensions" -version = "4.7.1" -description = "Backported and Experimental Type Hints for Python 3.7+" +version = "4.8.0" +description = "Backported and Experimental Type Hints for Python 3.8+" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.7.1-py3-none-any.whl", hash = "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36"}, - {file = "typing_extensions-4.7.1.tar.gz", hash = "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2"}, + {file = "typing_extensions-4.8.0-py3-none-any.whl", hash = "sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0"}, + {file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"}, ] [[package]] @@ -702,4 +695,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "9740a71e98802af3624303af7136fe5020e2d705435cbbf2ef214b988c39dfe2" +content-hash = "0d2735ed16d5b0a5c4cd2acee15f27f33614a589da94a7442084f88644b65e33" diff --git a/pyproject.toml b/pyproject.toml index c2ddaa8d..ef85dd3d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,8 +17,8 @@ pandera = "0.16.1" [tool.poetry.group.dev.dependencies] pytest = "7.4.0" pytest-cov = "4.1.0" -black = "23.3.0" -ruff = "0.0.259" +black = "23.10.1" +ruff = "0.1.4" [tool.poetry.group.data.dependencies] openpyxl = "^3.1.2" diff --git a/regtech_data_validator/check_functions.py b/regtech_data_validator/check_functions.py index dbbc6470..3e6157ad 100644 --- a/regtech_data_validator/check_functions.py +++ b/regtech_data_validator/check_functions.py @@ -11,7 +11,6 @@ the function. This may or may not align with the name of the validation in the fig.""" - import re from datetime import datetime, timedelta from typing import Dict diff --git a/regtech_data_validator/phase_validations.py b/regtech_data_validator/phase_validations.py index 8900480b..e809259c 100644 --- a/regtech_data_validator/phase_validations.py +++ b/regtech_data_validator/phase_validations.py @@ -3,7 +3,6 @@ This mapping is used to populate the schema template object and create an instance of a PanderaSchema object for phase 1 and phase 2.""" - from regtech_data_validator import global_data from regtech_data_validator.check_functions import ( has_correct_length, From 6e30b5f2aa524eccf0bd1245ed7a70254316e9a2 Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Tue, 7 Nov 2023 10:42:12 -0500 Subject: [PATCH 31/33] Remove remaining non-class `@staticmethod` usage Co-authored-by: lchen-2101 <73617864+lchen-2101@users.noreply.github.com> --- regtech_data_validator/create_schemas.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/regtech_data_validator/create_schemas.py b/regtech_data_validator/create_schemas.py index fe03afdf..1c03e0cc 100644 --- a/regtech_data_validator/create_schemas.py +++ b/regtech_data_validator/create_schemas.py @@ -31,7 +31,6 @@ def get_phase_2_schema_for_lei(context: dict[str, str] | None = None): return get_schema_by_phase_for_lei(phase_2_template, "phase_2", context) -@staticmethod def _get_check_fields(check: Check, primary_column: str) -> list[str]: """ Retrieves unique sorted list of fields associated with a given Check @@ -47,7 +46,6 @@ def _get_check_fields(check: Check, primary_column: str) -> list[str]: return fields -@staticmethod def _filter_valid_records(df: pd.DataFrame, check_output: pd.Series, fields: list[str]) -> pd.DataFrame: """ Return only records and fields associated with a given `Check`'s @@ -65,7 +63,6 @@ def _filter_valid_records(df: pd.DataFrame, check_output: pd.Series, fields: lis return failed_records_df -@staticmethod def _records_to_fields(failed_records_df: pd.DataFrame) -> pd.DataFrame: """ Transforms a DataFrame with columns per Check field to DataFrame with a row per field @@ -81,7 +78,6 @@ def _records_to_fields(failed_records_df: pd.DataFrame) -> pd.DataFrame: return failed_record_fields_df -@staticmethod def _add_validation_metadata(failed_check_fields_df: pd.DataFrame, check: SBLCheck): """ Add SBLCheck metadata (id, name, description, severity) From 8770b038b8e1cba61f093573175d133dbb0759a7 Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Tue, 7 Nov 2023 20:18:37 -0500 Subject: [PATCH 32/33] Write status and findings count to stderr --- regtech_data_validator/cli.py | 8 ++++++++ tests/test_cli.py | 16 +++++++++++----- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/regtech_data_validator/cli.py b/regtech_data_validator/cli.py index 417eddfd..976dc67c 100644 --- a/regtech_data_validator/cli.py +++ b/regtech_data_validator/cli.py @@ -126,7 +126,13 @@ def validate( input_df = pd.read_csv(path, dtype=str, na_filter=False) is_valid, findings_df = validate_phases(input_df, context_dict) + status = 'SUCCESS' + no_of_findings = 0 + if not is_valid: + status = 'FAILURE' + no_of_findings = len(findings_df.index.unique()) + match output: case OutputFormat.PANDAS: print(df_to_str(findings_df)) @@ -139,6 +145,8 @@ def validate( case _: raise ValueError(f'output format "{output}" not supported') + typer.echo(f"status: {status}, findings: {no_of_findings}", err=True) + # returned values are only used in unit tests return is_valid, findings_df diff --git a/tests/test_cli.py b/tests/test_cli.py index cc5fc889..72759363 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -8,7 +8,7 @@ from regtech_data_validator import cli -cli_runner = CliRunner() +cli_runner = CliRunner(mix_stderr=False) data_dir = f'{os.path.dirname(os.path.realpath(__file__))}/data' pass_file = f'{data_dir}/sbl-validations-pass.csv' fail_file = f'{data_dir}/sbl-validations-fail.csv' @@ -214,11 +214,17 @@ def test_pass_file_defaults(self): assert result.exit_code == 0 assert result.stdout == '' + assert result.stderr == 'status: SUCCESS, findings: 0\n' - def test_fail_file_output_arg_value(self): + def test_pass_file_invalid_output_arg_value(self): result = cli_runner.invoke(cli.app, ['validate', pass_file, '--output', 'pdf']) - print(result.stdout) - assert result.exit_code == 2 - assert "Invalid value for '--output': 'pdf' is not one of" in result.stdout + assert "Invalid value for '--output': 'pdf' is not one of" in result.stderr + + def test_fail_file_defaults(self): + result = cli_runner.invoke(cli.app, ['validate', fail_file]) + + assert result.exit_code == 0 + assert result.stdout != '' + assert 'status: FAILURE, findings:' in result.stderr From ee765b0bb7f1d47cfe6fbffe13871b3d9afe92c6 Mon Sep 17 00:00:00 2001 From: Hans Keeler Date: Wed, 8 Nov 2023 02:04:06 -0500 Subject: [PATCH 33/33] Fix mis-id'd validations: E0660, E0740 --- regtech_data_validator/phase_validations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/regtech_data_validator/phase_validations.py b/regtech_data_validator/phase_validations.py index e809259c..c9271887 100644 --- a/regtech_data_validator/phase_validations.py +++ b/regtech_data_validator/phase_validations.py @@ -1332,7 +1332,7 @@ def get_phase_1_and_2_validations_for_lei(context: dict[str, str] | None = None) "phase_1": [ SBLCheck( is_valid_enum, - id="E0640", + id="E0660", name="census_tract_adr_type.invalid_enum_value", description="'Census tract: type of address' must equal 1, 2, 3, or 988.", severity=Severity.ERROR, @@ -1445,7 +1445,7 @@ def get_phase_1_and_2_validations_for_lei(context: dict[str, str] | None = None) "phase_1": [ SBLCheck( is_valid_enum, - id="E0720", + id="E0740", name="naics_code_flag.invalid_enum_value", description=( "'North American Industry Classification System (NAICS) code: NP flag'must equal 900 or 988."