Skip to content

Commit

Permalink
docs: csv validation results.
Browse files Browse the repository at this point in the history
  • Loading branch information
ratheesh-kr committed Jun 12, 2024
1 parent 483e020 commit 297820d
Show file tree
Hide file tree
Showing 11 changed files with 2,010 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
MPI_ID|PAT_MRN_ID|FACILITY_ID|CONSENT|FIRST_NAME|MIDDLE_NAME|LAST_NAME|ADMINISTRATIVE_SEX_CODE|ADMINISTRATIVE_SEX_CODE_DESCRIPTION|ADMINISTRATIVE_SEX_CODE_SYSTEM|SEX_AT_BIRTH_CODE|SEX_AT_BIRTH_CODE_DESCRIPTION|SEX_AT_BIRTH_CODE_SYSTEM|PAT_BIRTH_DATE|ADDRESS1|ADDRESS2|CITY|STATE|ZIP|GENDER_IDENTITY_CODE|GENDER_IDENTITY_CODE_DESCRIPTION|GENDER_IDENTITY_CODE_SYSTEM_NAME|SEXUAL_ORIENTATION_CODE|SEXUAL_ORIENTATION_CODE_DESCRIPTION|SEXUAL_ORIENTATION_CODE_SYSTEM_NAME|PREFERRED_LANGUAGE_CODE|PREFERRED_LANGUAGE_CODE_DESCRIPTION|PREFERRED_LANGUAGE_CODE_SYSTEM_NAME|RACE_CODE|RACE_CODE_DESCRIPTION|RACE_CODE_SYSTEM_NAME|ETHNICITY_CODE|ETHNICITY_CODE_DESCRIPTION|ETHNICITY_CODE_SYSTEM_NAME|MEDICAID_CIN
qcsHFT7WEQGXZ|qcs-20240530-testcase500-MRN|CNYSCN|Yes|Jamie|Adam|Serrano|M|Male|http://terminology.hl7.org/CodeSystem/v3-AdministrativeGender|ASKU|asked but unknown|http://terminology.hl7.org/CodeSystem/v3-NullFlavor|1971-07-25|3090 Tracey Street|Apt 356|Elmira|NY|14901|446141000124107|Female|http://snomed.info/sct|UNK|Unknown|http://snomed.info/sct|gem|Germanic languages|ISO|1186-6|Coushatta|urn:oid:2.16.840.1.113883.6.238|2175-8|South American Indian|urn:oid:2.16.840.1.113883.6.238|EP68884S
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
MPI_ID|PAT_MRN_ID|FACILITY_ID|CONSENT|FIRST_NAME|MIDDLE_NAME|LAST_NAME|ADMINISTRATIVE_SEX_CODE|ADMINISTRATIVE_SEX_CODE_DESCRIPTION|ADMINISTRATIVE_SEX_CODE_SYSTEM|SEX_AT_BIRTH_CODE|SEX_AT_BIRTH_CODE_DESCRIPTION|SEX_AT_BIRTH_CODE_SYSTEM|PAT_BIRTH_DATE|ADDRESS1|ADDRESS2|CITY|STATE|ZIP|PHONE|SSN|GENDER_IDENTITY_CODE|GENDER_IDENTITY_CODE_DESCRIPTION|GENDER_IDENTITY_CODE_SYSTEM_NAME|SEXUAL_ORIENTATION_CODE|SEXUAL_ORIENTATION_CODE_DESCRIPTION|SEXUAL_ORIENTATION_CODE_SYSTEM_NAME|PREFERRED_LANGUAGE_CODE|PREFERRED_LANGUAGE_CODE_DESCRIPTION|PREFERRED_LANGUAGE_CODE_SYSTEM_NAME|RACE_CODE|RACE_CODE_DESCRIPTION|RACE_CODE_SYSTEM_NAME|ETHNICITY_CODE|ETHNICITY_CODE_DESCRIPTION|ETHNICITY_CODE_SYSTEM_NAME|MEDICAID_CIN
|qcs-test-20240603-testcase4-MRN|CNYSCN|No|Amanda|Christina|Guerrero|X|Undifferentiated|http://terminology.hl7.org/CodeSystem/v3-AdministrativeGender|ASKU|asked but unknown|http://terminology.hl7.org/CodeSystem/v3-NullFlavor|1974-04-16|28007 Joanna Tunnel|Suite 11|Jamestown|NY|14701|(831)800-7041x27084|862-37-7633|446151000124109|Male|http://snomed.info/sct|UNK|Unknown|http://snomed.info/sct|mkd|Macedonian|ISO|1262-5|Grand Ronde|urn:oid:2.16.840.1.113883.6.238|2174-1|Venezuelan|urn:oid:2.16.840.1.113883.6.238|HM65654G
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
PAT_MRN_ID|FACILITY_ID|FACILITY_LONG_NAME|ORGANIZATION_TYPE|FACILITY_ADDRESS1|FACILITY_ADDRESS2|FACILITY_CITY|FACILITY_STATE|FACILITY_ZIP|VISIT_PART_2_FLAG|VISIT_OMH_FLAG|VISIT_OPWDD_FLAG
qcs-20240530-testcase500-MRN|CNYSCN|Crossroads NY Social Care Network|SCN|25 W 45th st|Suite 16|New York|New York|10036|No|No|No
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
PAT_MRN_ID|FACILITY_ID|FACILITY_LONG_NAME|ORGANIZATION_TYPE|FACILITY_ADDRESS1|FACILITY_ADDRESS2|FACILITY_CITY|FACILITY_STATE|FACILITY_ZIP|VISIT_PART_2_FLAG|VISIT_OMH_FLAG|VISIT_OPWDD_FLAG
qcs-test-20240603-testcase4-MRN|CNYSCN|Crossroads NY Social Care Network|SCN|25 W 45th st|Suite 16|New York|New York|10036|No|No|No

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
{
"name": "my-dataset",
"resources": [
{
"name": "qe_admin_data",
"path": "data/QE_ADMIN_DATA_qcs-test-20240603-testcase4.csv",
"schema": {
"fields": [
{"name": "PAT_MRN_ID", "type": "string", "constraints": {"required": true}},
{"name": "FACILITY_ID", "type": "string", "constraints": {"required": true}},
{"name": "FACILITY_LONG_NAME", "type": "string", "constraints": {"required": true, "pattern": "^[a-zA-Z\\s]+$"}},
{"name": "ORGANIZATION_TYPE", "type": "string", "constraints": {"required": true, "enum":["prov","dept","team","govt","ins","pay","edu","reli","crs","cg","bus","other","laboratory","imaging","pharmacy","health-information-network","health-data-aggregator"]}},
{"name": "FACILITY_ADDRESS1", "type": "string", "constraints": {"required": true}},
{"name": "FACILITY_ADDRESS2", "type": "string"},
{"name": "FACILITY_CITY", "type": "string"},
{"name": "FACILITY_STATE", "type": "string", "constraints": {"enum":["FACILITY_STATE", "NY", "New York"]}},
{"name": "FACILITY_ZIP", "type": "string", "constraints": {"required": true}},
{"name": "VISIT_PART_2_FLAG", "type": "string", "constraints": {"required": true, "enum":["Yes", "No"]}},
{"name": "VISIT_OMH_FLAG", "type": "string", "constraints": {"required": true, "enum":["Yes", "No"]}},
{"name": "VISIT_OPWDD_FLAG", "type": "string", "constraints": {"required": true, "enum":["Yes", "No"]}}
],
"primaryKey": ["PAT_MRN_ID"]
},
"dialect": {
"delimiter": "|"
}
},
{
"name": "screening_data",
"path": "data/SCREENING_qcs-test-20240603-testcase4.csv",
"schema": {
"fields": [
{"name": "PAT_MRN_ID", "type": "string", "constraints": {"required": true}},
{"name": "FACILITY_ID", "type": "string", "constraints": {"required": true}},
{"name": "ENCOUNTER_ID", "type": "string"},
{"name": "ENCOUNTER_CLASS_CODE", "type": "string", "constraints": {"required": true}},
{"name": "ENCOUNTER_CLASS_CODE_DESCRIPTION", "type": "string"},
{"name": "ENCOUNTER_CLASS_CODE_SYSTEM", "type": "string", "constraints": {"required": true, "enum": ["http://terminology.hl7.org/CodeSystem/v3-ActCode"]}},
{"name": "ENCOUNTER_STATUS_CODE", "type": "string", "constraints": {"required": true}},
{"name": "ENCOUNTER_STATUS_CODE_DESCRIPTION", "type": "string"},
{"name": "ENCOUNTER_STATUS_CODE_SYSTEM", "type": "string", "constraints": {"required": true, "enum": ["http://hl7.org/fhir/encounter-status"]}},
{"name": "ENCOUNTER_TYPE_CODE", "type": "string"},
{"name": "ENCOUNTER_TYPE_CODE_DESCRIPTION", "type": "string"},
{"name": "ENCOUNTER_TYPE_CODE_SYSTEM", "type": "string", "constraints": {"enum": ["SNOMED-CT", "snomed-ct", "Snomed-ct", "SNOMED", "snomed", "Snomed", "http://snomed.info/sct"]} },
{"name": "SCREENING_STATUS_CODE", "type": "string", "constraints": {"required": true}},
{"name": "SCREENING_STATUS_CODE_DESCRIPTION", "type": "string"},
{"name": "SCREENING_STATUS_CODE_SYSTEM", "type": "string", "constraints": {"required": true, "enum": ["http://hl7.org/fhir/observation-status"]}},
{"name": "SCREENING_CODE", "type": "string", "constraints": {"required": true, "enum": ["96777-8", "97023-6"]}},
{"name": "SCREENING_CODE_DESCRIPTION", "type": "string", "constraints": {"required": true, "enum": ["Accountable health communities (AHC) health-related social needs (HRSN) supplemental questions","accountable health communities (AHC) health-related social needs (HRSN) supplemental questions","Accountable health communities (AHC) health-related social needs screening (HRSN) tool","accountable health communities (AHC) health-related social needs screening (HRSN) tool","NYS AHC HRSN screening"]}},
{"name": "SCREENING_CODE_SYSTEM_NAME", "type": "string", "constraints": {"required": true, "enum": ["LN", "ln", "LOINC", "loinc", "http://loinc.org", "NYS standard","NYS Standard"]}},
{"name": "RECORDED_TIME", "type": "datetime", "constraints": {"required": true}},
{"name": "QUESTION_CODE", "type": "string", "constraints": {"required": true}},
{"name": "QUESTION_CODE_DESCRIPTION", "type": "string", "constraints": {"required": true}},
{"name": "QUESTION_CODE_SYSTEM_NAME", "type": "string", "constraints": {"required": true, "enum": ["LN","LOINC","http://loinc.org"]}},
{"name": "UCUM_UNITS", "type": "string"},
{"name": "SDOH_DOMAIN", "type": "string", "constraints": {"required": true}},
{"name": "PARENT_QUESTION_CODE", "type": "string"},
{"name": "ANSWER_CODE", "type": "string", "constraints": {"required": true}},
{"name": "ANSWER_CODE_DESCRIPTION", "type": "string", "constraints": {"required": true}},
{"name": "ANSWER_CODE_SYSTEM_NAME", "type": "string", "constraints": {"required": true, "enum": ["LN","LOINC","http://loinc.org"]}},
{"name": "POTENTIAL_NEED_INDICATED", "type": "string", "constraints": {"required": true, "enum": ["Yes","No","NA","yes","no","na"]}}
],
"foreignKeys": [
{
"fields": ["PAT_MRN_ID"],
"reference": {
"resource": "qe_admin_data",
"fields": ["PAT_MRN_ID"]
}
}
]
},
"dialect": {
"delimiter": "|"
}
},
{
"name": "demographic_data",
"path": "data/DEMOGRAPHIC_DATA_qcs-test-20240603-testcase4.csv",
"schema": {
"fields": [
{"name": "MPI_ID", "type": "string", "constraints": {"required": true}},
{"name": "PAT_MRN_ID", "type": "string", "constraints": {"required": true}},
{"name": "FACILITY_ID", "type": "string", "constraints": {"required": true}},
{"name": "CONSENT", "type": "string", "constraints": {"required": true, "enum": ["Yes", "YES", "yes", "Y", "y", "No", "NO", "no","N", "n","Unknown", "UNKNOWN", "unknown","UNK", "Unk", "unk"]}},
{"name": "FIRST_NAME", "type": "string", "constraints": {"required": true, "pattern":"^[A-Za-z]+$"}},
{"name": "MIDDLE_NAME", "type": "string", "constraints": {"pattern":"^[A-Za-z]+$"}},
{"name": "LAST_NAME", "type": "string", "constraints": {"required": true, "pattern":"^[A-Za-z]+$"}},
{"name": "ADMINISTRATIVE_SEX_CODE", "type": "string", "constraints": {"required": true}},
{"name": "ADMINISTRATIVE_SEX_CODE_DESCRIPTION", "type": "string"},
{"name": "ADMINISTRATIVE_SEX_CODE_SYSTEM", "type": "string", "constraints": {"required": true}},
{"name": "SEX_AT_BIRTH_CODE", "type": "string", "constraints": {"required": true}},
{"name": "SEX_AT_BIRTH_CODE_DESCRIPTION", "type": "string"},
{"name": "SEX_AT_BIRTH_CODE_SYSTEM", "type": "string"},
{"name": "PAT_BIRTH_DATE", "type": "date"},
{"name": "ADDRESS1", "type": "string"},
{"name": "ADDRESS2", "type": "string"},
{"name": "CITY", "type": "string", "constraints": {"required": true}},
{"name": "STATE", "type": "string", "constraints": {"required": true, "enum":["NY", "ny", "New York","new york", "NEW YORK"]}},
{"name": "ZIP", "type": "string", "constraints": {"required": true, "pattern": "^\\d{5}(\\d{4})?$"}},
{"name": "PHONE", "type": "string"},
{"name": "SSN", "type": "string"},
{"name": "GENDER_IDENTITY_CODE", "type": "string"},
{"name": "GENDER_IDENTITY_CODE_DESCRIPTION", "type": "string"},
{"name": "GENDER_IDENTITY_CODE_SYSTEM_NAME", "type": "string", "constraints": {"required": true, "enum":["SNOMED-CT","SNOMED","http://snomed.info/sct"]}},
{"name": "SEXUAL_ORIENTATION_CODE", "type": "string"},
{"name": "SEXUAL_ORIENTATION_CODE_DESCRIPTION", "type": "string"},
{"name": "SEXUAL_ORIENTATION_CODE_SYSTEM_NAME", "type": "string", "constraints": {"required": true, "enum":["SNOMED-CT","SNOMED","http://snomed.info/sct"]}},
{"name": "PREFERRED_LANGUAGE_CODE", "type": "string"},
{"name": "PREFERRED_LANGUAGE_CODE_DESCRIPTION", "type": "string"},
{"name": "PREFERRED_LANGUAGE_CODE_SYSTEM_NAME", "type": "string", "constraints": {"required": true, "enum":["ISO","ISO 639-2","http://hl7.org/fhir/us/core/ValueSet/simple-language"]}},
{"name": "RACE_CODE", "type": "string"},
{"name": "RACE_CODE_DESCRIPTION", "type": "string"},
{"name": "RACE_CODE_SYSTEM_NAME", "type": "string", "constraints": {"required": true, "enum":["CDC","CDCRE","urn:oid:2.16.840.1.113883.6.238"]}},
{"name": "ETHNICITY_CODE", "type": "string"},
{"name": "ETHNICITY_CODE_DESCRIPTION", "type": "string"},
{"name": "ETHNICITY_CODE_SYSTEM_NAME", "type": "string", "constraints": {"required": true, "enum":["CDC","CDCRE","urn:oid:2.16.840.1.113883.6.238"]}},
{"name": "MEDICAID_CIN", "type": "string", "constraints": {"pattern": "^[A-Za-z]{2}\\d{5}[A-Za-z]$"}}
],
"foreignKeys": [
{
"fields": ["PAT_MRN_ID"],
"reference": {
"resource": "qe_admin_data",
"fields": ["PAT_MRN_ID"]
}
}
]
},
"dialect": {
"delimiter": "|"
}
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import json
from frictionless import Package, Report

def validate_package(package_path, output_path):
# Load the data package
package = Package(package_path)

# Validate the package
report = package.validate()
print(report)
# Prepare the results dictionary
results = {
"report": report.to_dict(),
"errorsummary": []
}

# Check if validation passed
if report.valid:
print("Data package is valid.")
else:
print("Data package has validation errors.")
for error in report.flatten(["rowNumber", "fieldNumber", "fieldName", "message", "type"]):
print(f"Row: {error[0]}, fieldNumber: {error[1]}, fieldName: {error[2]}, Message: {error[3]}, Type: {error[4]}")
# Append each error to the results dictionary
results["errorsummary"].append({
"rowNumber": error[0],
"fieldNumber": error[1],
"fieldName": error[2],
"message": error[3],
"type": error[4]
})



# Write the results to a JSON file
with open(output_path, 'w') as json_file:
json.dump(results, json_file, indent=4)

if __name__ == "__main__":
package_path = "datapackage.json"
output_path = "validation_report.json"
validate_package(package_path, output_path)
Loading

0 comments on commit 297820d

Please sign in to comment.