Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Task 33, adding validation id #44

Merged
merged 11 commits into from
Sep 12, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions src/tests/test_check_functions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pandas as pd
import pytest

from validator import global_data
from validator.check_functions import (
Expand All @@ -21,6 +22,7 @@
meets_multi_value_field_restriction,
string_contains,
)
from validator.checks import SBLCheck


class TestInvalidDateFormat:
Expand Down Expand Up @@ -868,3 +870,27 @@ def test_with_incorrect_values(self):
)
is False
)


class TestSBLCheck:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you move this into its own file?
following "test_check_functions.py" name, maybe name the new file to be "test_checks.py"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh yeah, that makes sense. Sure.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you remove these from test_check_functions since they are moved into test_checks file?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought I did. Let me check

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hadn't committed the test_chec_functions changes

def test_no_id_check(self):
with pytest.raises(Exception) as exc:
SBLCheck(lambda: True, warning=True, name="Just a Warning")

assert "Each check must be assigned a `name` and an `id`." in str(exc.value)
assert exc.type == ValueError

def test_no_name_check(self):
with pytest.raises(Exception) as exc:
SBLCheck(lambda: True, id="00000", warning=True)

assert "Each check must be assigned a `name` and an `id`." in str(exc.value)
assert exc.type == ValueError

def test_name_and_id_check(self):
raised = False
try:
SBLCheck(lambda: True, id="00000", warning=True, name="Just a Warning")
except ValueError:
raised = True
assert raised is False
27 changes: 13 additions & 14 deletions src/validator/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
name="Just a Warning"
)

error_check_implied = SBLCheck(lambda: Truename="Error Check")
error_check_implied = SBLCheck(lambda: True name="Error Check")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is not needed. can you revert this?


error_check_explicit = SBLCheck(
lambda: True,
Expand All @@ -32,31 +32,37 @@


class SBLCheck(Check):
"""A custom Pandera.Check subclasss that requires a `name` be
"""A custom Pandera.Check subclasss that requires a `name` and an `id` be
specified. Additionally, an attribute named `warning` is added to
the class to enable distinction between warnings and errors. The
default value of warning is `False` which corresponds to an error.

Don't use this class directly. Make use of the SBLErrorCheck and
SBLWarningCheck subclasses below."""

def __init__(self, check_fn: Callable, warning=False, *args, **kwargs):
"""Custom init method that verifies the presence of `name` in
def __init__(
self, check_fn: Callable, id: str = None, warning=False, *args, **kwargs
):
"""Custom init method that verifies the presence of `name` and `id` in
kwargs creates a custom class attribute called `warning`. All
other initializaiton is handled by the parent Check class.

Args:
check_fn (Callable): A function which evaluates the validity
of the column(s) being tested.
id (str, required): Each check mut have an id.
warning (bool, optional): Boolean specifying whether to
treat the check as a warning rather than an error.

Raises:
ValueError: Raised if `name` not supplied in kwargs.
ValueError: Raised if `name` not supplied in kwargs and if id is not
supplied or None.
"""

if "name" not in kwargs:
raise ValueError("Each check must be assigned a `name`.")
self.id = id

if "name" not in kwargs or id is None:
raise ValueError("Each check must be assigned a `name` and an `id`.")

# if warning==False treat check as an error check
self.warning = warning
Expand All @@ -67,10 +73,3 @@ def __init__(self, check_fn: Callable, warning=False, *args, **kwargs):
def get_backend(cls, check_obj: Any) -> Type[BaseCheckBackend]:
"""Assume Pandas DataFrame and return PandasCheckBackend"""
return PandasCheckBackend


if __name__ == "__main__":
warning_check = SBLCheck(lambda: True, warning=True, name="Just a Warning")

error_check_implied = SBLCheck(lambda: True, name="Error Check")
error_check_explicit = SBLCheck(lambda: True, warning=False, name="Also an Error")
5 changes: 4 additions & 1 deletion src/validator/create_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,22 @@ def print_schema_errors(errors: SchemaErrors, phase: str):
# Name of the column in the dataframe being checked
schema_error = error["error"]
column_name = schema_error.schema.name
check_id = "n/a"

# built in checks such as unique=True are different than custom
# checks unfortunately so the name needs to be accessed differently
try:
check_name = schema_error.check.name
check_id = schema_error.check.id
# This will either be a boolean series or a single bool
check_output = schema_error.check_output
except AttributeError:
check_name = schema_error.check
# this is just a string that we'd need to parse manually
check_output = schema_error.args[0]

print(f"{phase} Validation `{check_name}` failed for column `{column_name}`")
f"{phase} Validation `{check_name}` with id: `{check_id}` \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is missing print.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

        print(
            f"{phase} Validation `{check_name}` with id: `{check_id}` failed for column `{column_name}`"
        )

failed for column `{column_name}`"
print(check_output)
print("")

Expand Down
Loading