Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Replace main.py with Typer-based CLI app #63

Merged
merged 35 commits into from
Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
722b981
Move files into more standard Python project layout
hkeeler Oct 16, 2023
d4529ec
Fix issues related to repo restructure
hkeeler Oct 17, 2023
ccee738
Move data-related code and config under `data` dir
hkeeler Oct 17, 2023
baeb814
Merge config.py and tools under data dir
hkeeler Oct 17, 2023
9044db8
Add README for external data sources
hkeeler Oct 17, 2023
8bc7ab3
Improve SBLCheck constructor args
hkeeler Oct 17, 2023
58873bc
Fix multi-line string that was setup as a tuple
hkeeler Oct 17, 2023
b732419
Print CLI output as JSON instead of Python dict.
hkeeler Oct 17, 2023
3b18289
black and ruff fixups
hkeeler Oct 17, 2023
5a515e9
Fix path to `tests` dir in DevContainer setup
hkeeler Oct 18, 2023
e4d7681
Remove deprecated python formatting VSCode setting
hkeeler Oct 20, 2023
1bb7c94
Add `typer` package for building better CLI
hkeeler Oct 20, 2023
8497a9a
Return df of validation findings instead of dict
hkeeler Oct 20, 2023
15e2e02
Add typer and tabulate in CLI poetry dep group
hkeeler Oct 24, 2023
4324a15
Add project setup and CLI usage to README.md
hkeeler Oct 25, 2023
5d85a33
Cleanup badges section of README.md
hkeeler Oct 25, 2023
d2288f4
Moar README.md fix 'em ups
hkeeler Oct 25, 2023
2128f69
Remove old Makefile
hkeeler Oct 30, 2023
821aae8
Change CLI command to `cfpb-val`
hkeeler Nov 2, 2023
4d136ba
Fix 3 misconfigured validations
hkeeler Nov 2, 2023
a20fc22
Return DataFrame for validation results
hkeeler Nov 2, 2023
cef30d4
Replace `main.py` with Typer-based CLI app
hkeeler Nov 2, 2023
b27b749
Fix paths in README.md
hkeeler Nov 2, 2023
6f36b99
Merge branch 'main' of https://github.com/cfpb/regtech-data-validator…
hkeeler Nov 2, 2023
4e4c18e
Bring back `describe` sub-command
hkeeler Nov 2, 2023
87b0c3a
Make `black` happy
hkeeler Nov 2, 2023
46e2b43
Fix path to unit tests in README.md
hkeeler Nov 6, 2023
c4abc6d
Improve test cov by splitting up CLI output logic
hkeeler Nov 7, 2023
253f815
Remove @staticmethod used outside of a class
hkeeler Nov 7, 2023
18ff0d1
Make black and ruff happy
hkeeler Nov 7, 2023
ddb6682
Revert pinned `black` Action; sync Poetry versions
hkeeler Nov 7, 2023
6e30b5f
Remove remaining non-class `@staticmethod` usage
hkeeler Nov 7, 2023
8770b03
Write status and findings count to stderr
hkeeler Nov 8, 2023
23bf8ef
Merge branch 'add-typer-cli' of https://github.com/cfpb/regtech-data-…
hkeeler Nov 8, 2023
ee765b0
Fix mis-id'd validations: E0660, E0740
hkeeler Nov 8, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
],
"editor.tabSize": 4,
"editor.formatOnSave": true,
"python.formatting.provider": "none",
"python.envFile": "${workspaceFolder}/.env",
"editor.codeActionsOnSave": {
"source.organizeImports": true
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/linters.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ jobs:
- uses: psf/black@stable
with:
options: "--check --diff --verbose"
version: "~= 22.0"
ruff:
runs-on: ubuntu-latest
steps:
Expand Down
8 changes: 0 additions & 8 deletions Makefile

This file was deleted.

1,371 changes: 220 additions & 1,151 deletions README.md

Large diffs are not rendered by default.

338 changes: 183 additions & 155 deletions poetry.lock

Large diffs are not rendered by default.

17 changes: 12 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "regtech-data-validator"
version = "0.1.0"
Expand All @@ -13,15 +17,18 @@ pandera = "0.16.1"
[tool.poetry.group.dev.dependencies]
pytest = "7.4.0"
pytest-cov = "4.1.0"
black = "23.3.0"
ruff = "0.0.259"
black = "23.10.1"
ruff = "0.1.4"

[tool.poetry.group.data.dependencies]
openpyxl = "^3.1.2"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.poetry.group.cli.dependencies]
tabulate = "^0.9.0"
typer = "^0.9.0"

[tool.poetry.scripts]
cfpb-val = 'regtech_data_validator.cli:app'

# Black formatting
[tool.black]
Expand Down
1 change: 0 additions & 1 deletion regtech_data_validator/check_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
the function. This may or may not align with the name of the validation
in the fig."""


import re
from datetime import datetime, timedelta
from typing import Dict
Expand Down
155 changes: 155 additions & 0 deletions regtech_data_validator/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
from dataclasses import dataclass
from enum import StrEnum
import json
from pathlib import Path
from typing import Annotated, Optional

import pandas as pd
from tabulate import tabulate
import typer

from regtech_data_validator.create_schemas import validate_phases


app = typer.Typer(no_args_is_help=True, pretty_exceptions_enable=False)


@dataclass
class KeyValueOpt:
key: str
value: str


def parse_key_value(kv_str: str) -> KeyValueOpt:
split_str = kv_str.split('=')

if len(split_str) != 2:
raise ValueError(f'Invalid key/value pair: {kv_str}')

return KeyValueOpt(split_str[0], split_str[1])


class OutputFormat(StrEnum):
CSV = 'csv'
JSON = 'json'
PANDAS = 'pandas'
TABLE = 'table'


def df_to_str(df: pd.DataFrame) -> str:
with pd.option_context('display.width', None, 'display.max_rows', None):
return str(df)


def df_to_csv(df: pd.DataFrame) -> str:
return df.to_csv()


def df_to_table(df: pd.DataFrame) -> str:
# trim field_value field to just 50 chars, similar to DataFrame default
table_df = df.drop(columns='validation_desc').sort_index()
table_df['field_value'] = table_df['field_value'].str[0:50]

# NOTE: `type: ignore` because tabulate package typing does not include Pandas
# DataFrame as input, but the library itself does support it. ¯\_(ツ)_/¯
return tabulate(table_df, headers='keys', showindex=True, tablefmt='rounded_outline') # type: ignore


def df_to_json(df: pd.DataFrame) -> str:
findings_json = []
findings_by_v_id_df = df.reset_index().set_index(['validation_id', 'record_no', 'field_name'])

for v_id_idx, v_id_df in findings_by_v_id_df.groupby(by='validation_id'):
v_head = v_id_df.iloc[0]

finding_json = {
'validation': {
'id': v_id_idx,
'name': v_head.at['validation_name'],
'description': v_head.at['validation_desc'],
'severity': v_head.at['validation_severity'],
},
'records': [],
}
findings_json.append(finding_json)

for rec_idx, rec_df in v_id_df.groupby(by='record_no'):
record_json = {'record_no': rec_idx, 'fields': []}
finding_json['records'].append(record_json)

for field_idx, field_df in rec_df.groupby(by='field_name'):
field_head = field_df.iloc[0]
record_json['fields'].append({'name': field_idx, 'value': field_head.at['field_value']})

json_str = json.dumps(findings_json, indent=4)

return json_str


@app.command()
def describe() -> None:
"""
Describe CFPB data submission formats and validations
"""

print('Feature coming soon...')


@app.command(no_args_is_help=True)
def validate(
path: Annotated[
Path,
typer.Argument(
exists=True,
dir_okay=False,
readable=True,
resolve_path=True,
show_default=False,
help='Path of file to be validated',
),
],
context: Annotated[
Optional[list[KeyValueOpt]],
typer.Option(
parser=parse_key_value,
metavar='<key>=<value>',
help='[example: lei=12345678901234567890]',
show_default=False,
),
] = None,
output: Annotated[Optional[OutputFormat], typer.Option()] = OutputFormat.TABLE,
) -> tuple[bool, pd.DataFrame]:
"""
Validate CFPB data submission
"""
context_dict = {x.key: x.value for x in context} if context else {}
input_df = pd.read_csv(path, dtype=str, na_filter=False)
is_valid, findings_df = validate_phases(input_df, context_dict)

status = 'SUCCESS'
no_of_findings = 0

if not is_valid:
lchen-2101 marked this conversation as resolved.
Show resolved Hide resolved
status = 'FAILURE'
no_of_findings = len(findings_df.index.unique())

match output:
case OutputFormat.PANDAS:
print(df_to_str(findings_df))
case OutputFormat.CSV:
print(df_to_csv(findings_df))
case OutputFormat.JSON:
print(df_to_json(findings_df))
case OutputFormat.TABLE:
print(df_to_table(findings_df))
case _:
raise ValueError(f'output format "{output}" not supported')

typer.echo(f"status: {status}, findings: {no_of_findings}", err=True)

# returned values are only used in unit tests
return is_valid, findings_df


if __name__ == '__main__':
app()
Loading
Loading