Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Standardised col_name check #133

Merged
merged 7 commits into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions R/knownVariables.r
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,5 @@ acceptable_indicatorunits <- c("%", "pp", "£", "£m")

# Harmonised values ===================================================================================================
ethnicity_standard_values <- suppressMessages(read_csv("data/ethnicity.csv"))

harmonised_col_names <- suppressMessages(read_csv("data/harmonised_col_names.csv"))
53 changes: 31 additions & 22 deletions R/mainTests.r
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ mainTests <- function(data_character, meta_character, datafile, metafile) {
indicator_dp(metafile), # active test
indicator_dp_validation(metafile), # active test
indicator_dp_completed(metafile), # active test
ethnicity_headers(metafile), # active test
standard_filter_headers(metafile), # active test
ethnicity_values(datafile), # active test
ethnicity_characteristic_group(datafile), # active test
ethnicity_characteristic_values(datafile), # active test
Expand Down Expand Up @@ -2556,36 +2556,45 @@ indicator_dp_completed <- function(meta) {
return(output)
}

ethnicity_headers <- function(meta) {
# First find any ethnicity type columns that don't have the standard col_names
ethnicity_standard_headers <- c("ethnicity_major", "ethnicity_minor", "ethnicity_detailed", "minority_ethnic")
ethnicity_columns <- meta %>%
standard_filter_headers <- function(meta) {
# Collapse search terms for bad column names into regex term
search_string <- harmonised_col_names %>%
pull(col_name_search_string) %>%
cjrace marked this conversation as resolved.
Show resolved Hide resolved
unique() %>%
paste(
collapse = "|"
)
# Pivot meta data to arrange col_name and filter_grouping_column together and
# then filter for possible non-standard filter names.
standard_col_names <- harmonised_col_names %>%
pull(col_name_harmonised) %>%
unique()
bad_col_names <- meta %>%
select(col_name, filter_grouping_column) %>%
pivot_longer(
c(col_name, filter_grouping_column),
values_to = "col_name"
) %>%
filter(
grepl("ethnic", tolower(col_name)),
!(col_name %in% ethnicity_standard_headers)
grepl(search_string, tolower(col_name)),
!(col_name %in% standard_col_names)
) %>%
pull(col_name)
if (length(ethnicity_columns) == 0) {
if (length(bad_col_names) == 0) {
output <- list(
"message" = "No ethnicity header issues found.",
"message" = "No standardised col_name issues found.",
"result" = "PASS"
)
} else if (length(ethnicity_columns) == 1) {
output <- list(
"message" = paste0(
paste(ethnicity_columns, collapse = "', '"), " appears to relate to ethnicity data, but does not conform to the standard col_name conventions: ",
paste(ethnicity_standard_headers, collapse = ", "),
"."
),
"result" = "FAIL"
)
} else {
output <- list(
"message" = paste0(
"The following columns appear to relate to ethnicity data, but do not conform to the standard col_name conventions: <br> - '",
paste(ethnicity_columns, collapse = "', '"), "'. <br> - These should take the form of one of the following: ",
paste(ethnicity_standard_headers, collapse = ", "),
"."
"The column(s) '",
paste(bad_col_names, collapse = "', '"), "' appear to relate to ",
"contexts that fall under the harmonised data standards. Please verify",
" your column headers against the data standards in the <a href=",
"'https://dfe-analytical-services.github.io/analysts-guide/",
"statistics-production/ud.html#common-harmonised-variables'",
">DfE harmonised data guidance</a>."
),
"result" = "FAIL"
)
Expand Down
17 changes: 17 additions & 0 deletions data/harmonised_col_names.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
col_name_search_string,col_name_harmonised
sen_,sen_status
cjrace marked this conversation as resolved.
Show resolved Hide resolved
sen_,sen_primary_need
sen_,sen_secondary_need
sen_,sen_provision
primary_need,sen_primary_need
secondary_need,sen_secondary_need
establishment_type,establishment_type
establishment_type,establishment_type_group
provider_type,provider_type
school_type,establishment_type
school_type,establishment_type_group
phase,education_phase
ethnic,ethnicity_major
ethnic,ethnicity_minor
ethnic,ethnicity_detailed
ethnic,minority_ethnic
4 changes: 2 additions & 2 deletions data/sen.csv
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ sen_provision,SEN provision,SEN support / SEN without an EHC plan,sen_status,Any
sen_provision,SEN provision,No SEN provision,sen_status,No identified special educational need,
sen_provision,SEN provision,Total,sen_status,Total,
sen_primary_need,SEN primary need,All primary need,,,
sen_primary_need,SEN primary need,"Autistic spectrum disorder ",,,ASD
sen_primary_need,SEN primary need,"Autistic spectrum disorder",,,ASD
sen_primary_need,SEN primary need,Hearing impairment,,,HI
sen_primary_need,SEN primary need,Not reported,,,
sen_primary_need,SEN primary need,Moderate learning difficulty,,,MLD
Expand All @@ -24,7 +24,7 @@ sen_primary_need,SEN primary need,Vision impairment,,,VI
sen_primary_need,SEN primary need,No primary need,,,
sen_primary_need,SEN primary need,Unknown,,,
sen_secondary_need,SEN secondary need,All secondary need,,,
sen_secondary_need,SEN secondary need,"Autistic spectrum disorder ",,,ASD
sen_secondary_need,SEN secondary need,"Autistic spectrum disorder",,,ASD
sen_secondary_need,SEN secondary need,Hearing impairment,,,HI
sen_secondary_need,SEN secondary need,Not reported,,,
sen_secondary_need,SEN secondary need,Moderate learning difficulty,,,MLD
Expand Down
Loading