Skip to content

Commit

Permalink
Merge pull request #133 from dfe-analytical-services/sen-standards-check
Browse files Browse the repository at this point in the history
Standardised col_name check
  • Loading branch information
rmbielby authored Sep 6, 2024
2 parents a9570b1 + 20042cb commit 0d23027
Show file tree
Hide file tree
Showing 43 changed files with 633 additions and 1,842 deletions.
2 changes: 2 additions & 0 deletions R/knownVariables.r
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,5 @@ acceptable_indicatorunits <- c("%", "pp", "£", "£m")

# Harmonised values ===================================================================================================
ethnicity_standard_values <- suppressMessages(read_csv("data/ethnicity.csv"))

harmonised_col_names <- suppressMessages(read_csv("data/harmonised_col_names.csv"))
53 changes: 31 additions & 22 deletions R/mainTests.r
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ mainTests <- function(data_character, meta_character, datafile, metafile) {
indicator_dp(metafile), # active test
indicator_dp_validation(metafile), # active test
indicator_dp_completed(metafile), # active test
ethnicity_headers(metafile), # active test
standard_filter_headers(metafile), # active test
ethnicity_values(datafile), # active test
ethnicity_characteristic_group(datafile), # active test
ethnicity_characteristic_values(datafile), # active test
Expand Down Expand Up @@ -2556,36 +2556,45 @@ indicator_dp_completed <- function(meta) {
return(output)
}

ethnicity_headers <- function(meta) {
# First find any ethnicity type columns that don't have the standard col_names
ethnicity_standard_headers <- c("ethnicity_major", "ethnicity_minor", "ethnicity_detailed", "minority_ethnic")
ethnicity_columns <- meta %>%
standard_filter_headers <- function(meta) {
# Collapse search terms for bad column names into regex term
search_string <- harmonised_col_names %>%
pull(col_name_search_string) %>%
unique() %>%
paste(
collapse = "|"
)
# Pivot meta data to arrange col_name and filter_grouping_column together and
# then filter for possible non-standard filter names.
standard_col_names <- harmonised_col_names %>%
pull(col_name_harmonised) %>%
unique()
bad_col_names <- meta %>%
select(col_name, filter_grouping_column) %>%
pivot_longer(
c(col_name, filter_grouping_column),
values_to = "col_name"
) %>%
filter(
grepl("ethnic", tolower(col_name)),
!(col_name %in% ethnicity_standard_headers)
grepl(search_string, tolower(col_name)),
!(col_name %in% standard_col_names)
) %>%
pull(col_name)
if (length(ethnicity_columns) == 0) {
if (length(bad_col_names) == 0) {
output <- list(
"message" = "No ethnicity header issues found.",
"message" = "No standardised col_name issues found.",
"result" = "PASS"
)
} else if (length(ethnicity_columns) == 1) {
output <- list(
"message" = paste0(
paste(ethnicity_columns, collapse = "', '"), " appears to relate to ethnicity data, but does not conform to the standard col_name conventions: ",
paste(ethnicity_standard_headers, collapse = ", "),
"."
),
"result" = "FAIL"
)
} else {
output <- list(
"message" = paste0(
"The following columns appear to relate to ethnicity data, but do not conform to the standard col_name conventions: <br> - '",
paste(ethnicity_columns, collapse = "', '"), "'. <br> - These should take the form of one of the following: ",
paste(ethnicity_standard_headers, collapse = ", "),
"."
"The column(s) '",
paste(bad_col_names, collapse = "', '"), "' appear to relate to ",
"contexts that fall under the harmonised data standards. Please verify",
" your column headers against the data standards in the <a href=",
"'https://dfe-analytical-services.github.io/analysts-guide/",
"statistics-production/ud.html#common-harmonised-variables'",
">DfE harmonised data guidance</a>."
),
"result" = "FAIL"
)
Expand Down
17 changes: 17 additions & 0 deletions data/harmonised_col_names.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
col_name_search_string,col_name_harmonised
sen_,sen_status
sen_,sen_primary_need
sen_,sen_secondary_need
sen_,sen_provision
primary_need,sen_primary_need
secondary_need,sen_secondary_need
establishment_type,establishment_type
establishment_type,establishment_type_group
provider_type,provider_type
school_type,establishment_type
school_type,establishment_type_group
phase,education_phase
ethnic,ethnicity_major
ethnic,ethnicity_minor
ethnic,ethnicity_detailed
ethnic,minority_ethnic
4 changes: 2 additions & 2 deletions data/sen.csv
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ sen_provision,SEN provision,SEN support / SEN without an EHC plan,sen_status,Any
sen_provision,SEN provision,No SEN provision,sen_status,No identified special educational need,
sen_provision,SEN provision,Total,sen_status,Total,
sen_primary_need,SEN primary need,All primary need,,,
sen_primary_need,SEN primary need,"Autistic spectrum disorder ",,,ASD
sen_primary_need,SEN primary need,"Autistic spectrum disorder",,,ASD
sen_primary_need,SEN primary need,Hearing impairment,,,HI
sen_primary_need,SEN primary need,Not reported,,,
sen_primary_need,SEN primary need,Moderate learning difficulty,,,MLD
Expand All @@ -24,7 +24,7 @@ sen_primary_need,SEN primary need,Vision impairment,,,VI
sen_primary_need,SEN primary need,No primary need,,,
sen_primary_need,SEN primary need,Unknown,,,
sen_secondary_need,SEN secondary need,All secondary need,,,
sen_secondary_need,SEN secondary need,"Autistic spectrum disorder ",,,ASD
sen_secondary_need,SEN secondary need,"Autistic spectrum disorder",,,ASD
sen_secondary_need,SEN secondary need,Hearing impairment,,,HI
sen_secondary_need,SEN secondary need,Not reported,,,
sen_secondary_need,SEN secondary need,Moderate learning difficulty,,,MLD
Expand Down
Loading

0 comments on commit 0d23027

Please sign in to comment.