Skip to content

Commit

Permalink
Ticket 11, added invalid_uid_lei validation and unit tests (#30)
Browse files Browse the repository at this point in the history
* Ticket 11, added invalid_uid_lei validation and unit tests

* changed function name

* addressed Hans suggestions to have lei passed through command line and have a way to skip this validation

* addressed the comments

* modified comments

* addressed the comment and modified the phase validation

* modified function name

* fixed typo

* addressed the comment

* Addressed the comment

---------

Co-authored-by: Nargis Sultani <[email protected]>
  • Loading branch information
nargis-sultani and Nargis Sultani authored Aug 16, 2023
1 parent cf37142 commit 30e4650
Show file tree
Hide file tree
Showing 8 changed files with 6,518 additions and 6,360 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ failed validation.

```sh
# Test validating the "good" file
# If passing lei value, pass lei as first arg and csv_path as second arg
# else just pass the csv_path as arg
python src/validator/main.py SBL_Validations_SampleData_GoodFile_03312023.csv

# Test validating the "bad" file
Expand Down
2 changes: 1 addition & 1 deletion SBL_Validations_SampleData_GoodFile_03312023.csv
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ uid,app_date,app_method,app_recipient,ct_credit_product,ct_credit_product_ff,ct_
000TESTFIUIDDONOTUSEXGXVID302XTC7,20241201,1,1,988,,999,,999,,999,,999,,,5,20241231,999,,999,,,,999,,,,,,999,,999,999,988,,988,,988,,988,988,,988,988,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
000TESTFIUIDDONOTUSEXGXVID311XTC1,20241201,1,1,988,,999,,999,,999,,999,,,5,20241231,999,,999,,,,999,,,,,,999,,999,999,988,,988,,988,,988,988,,988,988,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
000TESTFIUIDDONOTUSEXGXVID312XTC1,20241201,1,1,988,,999,,999,,999,,999,,,5,20241231,999,,999,,,,999,,,,,,999,,999,999,988,,988,,988,,988,988,,988,988,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
0TESTFIUIDDONOTUSEXGXVID312XTC2,20241201,1,1,988,,999,,999,,999,,999,,,5,20241231,999,,999,,,,999,,,,,,999,,999,999,988,,988,,988,,988,988,,988,988,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
000TESTFIUIDDONOTUSEXGXVID312XTC2,20241201,1,1,988,,999,,999,,999,,999,,,5,20241231,999,,999,,,,999,,,,,,999,,999,999,988,,988,,988,,988,988,,988,988,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
000TESTFIUIDDONOTUSEXGXVID321XTC1,20241201,1,1,988,,999,,999,,999,,999,,7777,1,20241231,999,,999,,,,999,,,0,0,0,999,,1,1,988,,988,,988,,988,988,,988,988,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
000TESTFIUIDDONOTUSEXGXVID321XTC2,20241201,1,1,988,,999,,999,,999,,999,,7777,1,20241231,999,,999,,,,999,,,0,0,0,999,,2,2,988,,988,,988,,988,988,,988,988,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
000TESTFIUIDDONOTUSEXGXVID321XTC3,20241201,1,1,988,,999,,999,,999,,999,,7777,1,20241231,999,,999,,,,999,,,0,0,0,999,,1,1,988,,988,,988,,988,988,,988,988,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Expand Down
72 changes: 72 additions & 0 deletions src/tests/test_check_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
is_valid_code,
is_valid_enum,
meets_multi_value_field_restriction,
string_contains,
)


Expand Down Expand Up @@ -803,3 +804,74 @@ def test_with_incorrect_is_equal_and_not_equal_conditions(self):
{groupby_values: series}, condition_values, should_fieldset_key_equal_to
)
assert result1.values == [False]


class TestIsValidId:
def test_with_correct_values(self):
"""when start_idx and end_idx are not set,
if value matches containing_value, must return true"""
assert string_contains("000TESTFIUIDDONOTUSE", "000TESTFIUIDDONOTUSE") is True
""" when start_idx and end_idx are set,
if sliced value matches ontaining_value, must return true """
assert (
string_contains(
"000TESTFIUIDDONOTUSEXGXVID11XTC1",
"TEST",
start_idx=3,
end_idx=7,
)
is True
)
""" when only start_idx is set,
if sliced value matches containing_value, must return true """
assert (
string_contains(
"000TESTFIUIDDONOTUSEXGXVID11XTC1",
"TESTFIUIDDONOTUSEXGXVID11XTC1",
start_idx=3,
)
is True
)
""" when only end_idx is set,
if sliced value matches containing_value, must return true """
assert (
string_contains(
"000TESTFIUIDDONOTUSEXGXVID11XTC1",
"000TESTFIUIDDONOTUSE",
end_idx=20,
)
is True
)

def test_with_incorrect_values(self):
"""when start_idx and end_idx are not set,
if value does not match containing_value, must return false"""
assert string_contains("000TESTFIUIDDONOTUSE", "TESTFIUIDDONOTUSE") is False
""" when start_idx and end_idx are set,
if sliced value does not match containing_value, must return false """
assert (
string_contains(
"000FIUIDDONOTUSEXGXVID11XTC1", "TEST", start_idx=4, end_idx=7
)
is False
)
""" when only start_idx is set,
if sliced value does not match containing_value, must return false """
assert (
string_contains(
"000TESTFIUIDDONOTUSEXGXVID11XTC1",
"0TESTFIUIDDONOTUSEXGXVID11XTC1",
start_idx=4,
)
is False
)
""" when only end_idx is set,
if sliced value does not match containing_value, must return false """
assert (
string_contains(
"000TESTFIUIDDONOTUSEXGXVID11XTC1",
"000TESTFIUIDDONOTUSEXGX",
end_idx=20,
)
is False
)
30 changes: 30 additions & 0 deletions src/validator/check_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,3 +716,33 @@ def has_valid_fieldset_pair(
)
)
return pd.concat(validation_holder)


def string_contains(
value: str,
containing_value: str = None,
start_idx: int = None,
end_idx: int = None,
) -> bool:
"""
check if value matches containing value
Args:
value (str): parsed value
containing_value (str): tcontaining value to which value is compared to
start_idx (int): the start index if the value needs to sliced
end_idx (int): the end index if the value needs to sliced
Returns:
bool: true if value matches containing_value
"""
if containing_value is not None:
if start_idx is not None and end_idx is not None:
return value[start_idx:end_idx] == containing_value
elif start_idx is not None and end_idx is None:
return value[start_idx:] == containing_value
elif start_idx is None and end_idx is not None:
return value[:end_idx] == containing_value
else:
return value == containing_value
else:
return True
21 changes: 15 additions & 6 deletions src/validator/create_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,25 @@
with validations listed in phase 1 and phase 2."""

from pandera import DataFrameSchema
from phase_validations import phase_1_and_2_validations
from phase_validations import get_phase_1_and_2_validations_for_lei
from schema_template import get_template

# Get separate schema templates for phase 1 and 2


phase_1_template = get_template()
phase_2_template = get_template()

for column, validations in phase_1_and_2_validations:
phase_1_template[column].checks = validations["phase_1"]
phase_2_template[column].checks = validations["phase_2"]

phase_1_schema = DataFrameSchema(phase_1_template)
phase_2_schema = DataFrameSchema(phase_2_template)
def get_schema_by_phase_for_lei(template: dict, phase: str, lei: str = None):
for column, validations in get_phase_1_and_2_validations_for_lei(lei):
template[column].checks = validations[phase]
return DataFrameSchema(template)


def get_phase_1_schema_for_lei(lei: str = None):
return get_schema_by_phase_for_lei(phase_1_template, "phase_1", lei)


def get_phase_2_schema_for_lei(lei: str = None):
return get_schema_by_phase_for_lei(phase_2_template, "phase_2", lei)
22 changes: 15 additions & 7 deletions src/validator/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@

import pandas as pd
from pandera.errors import SchemaErrors
from schema import sblar_schema
from schema import get_schema_for_lei


def csv_to_df(path: str) -> pd.DataFrame:
return pd.read_csv(path, dtype=str, na_filter=False)


def run_validation_on_df(df: pd.DataFrame) -> None:
def run_validation_on_df(df: pd.DataFrame, lei: str) -> None:
"""
Run validaition on the supplied dataframe and print a report to
the terminal.
Expand All @@ -28,6 +28,8 @@ def run_validation_on_df(df: pd.DataFrame) -> None:
print(df)
print("")

sblar_schema = get_schema_for_lei(lei)

try:
sblar_schema(df, lazy=True)
except SchemaErrors as errors:
Expand All @@ -53,10 +55,16 @@ def run_validation_on_df(df: pd.DataFrame) -> None:

if __name__ == "__main__":
csv_path = None
try:
csv_path = sys.argv[1]
except IndexError:
lei: str = None
if len(sys.argv) == 1:
raise ValueError("csv_path arg not provided")

elif len(sys.argv) == 2:
csv_path = sys.argv[1]
elif len(sys.argv) == 3:
lei = sys.argv[1]
csv_path = sys.argv[2]
else:
raise ValueError("correct number of args not provided")

df = csv_to_df(csv_path)
run_validation_on_df(df)
run_validation_on_df(df, lei)
Loading

0 comments on commit 30e4650

Please sign in to comment.