Skip to content

Commit

Permalink
allow for parquet directory
Browse files Browse the repository at this point in the history
  • Loading branch information
lchen-2101 committed Oct 29, 2024
1 parent 24f528a commit c9c5b6b
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/regtech_data_validator/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def validate_batch(
batch_count: int = 1,
max_errors=1000000,
):
validate_func = validate_batch_parquet if str(path).endswith(".parquet") else validate_batch_csv
validate_func = validate_batch_csv if str(path).endswith(".csv") else validate_batch_parquet
for validation_results in validate_func(path, context, batch_size, batch_count, max_errors):
yield validation_results

Expand Down Expand Up @@ -201,7 +201,7 @@ def validate_batch_parquet(
'aws_region': 'us-east-1',
}

lf = pl.scan_parquet(path, allow_missing_columns=True, storage_options=storage_options).fill_null('')
lf = pl.scan_parquet(path, storage_options=storage_options).fill_null('')

for validation_results, uids in validate_lazy_chunks(
syntax_schema, lf, batch_size, batch_count, max_errors, syntax_checks
Expand Down

0 comments on commit c9c5b6b

Please sign in to comment.