Quarto render (#69)

* added zero response handling to professions filter * Fixed typo in summarise_coding_tools * Fixed typos on ci & dep management freq table functions * Added dept and workplace data cleaning. Fixed summarise_rap_champ_status * Updated tests * Update docs --------- Co-authored-by: ldavies99 <[email protected]>
best-practice-and-impact · Dec 20, 2023 · 711e839 · 711e839
1 parent d672e73
commit 711e839
Show file tree

Hide file tree

Showing 15 changed files with 137 additions and 88 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -6,6 +6,7 @@ export(break_q_names)
 export(calculate_freqs)
 export(check_skip_logic)
 export(clean_departments)
+export(clean_workplace)
 export(compare_models)
 export(create_filtered_pages)
 export(create_y_lab)

diff --git a/R/data_cleaning.R b/R/data_cleaning.R
@@ -144,6 +144,20 @@ clean_departments <- function(data) {
 
   data$department[data$workplace == "NHS"] <- "NHS"
 
+  data$department[data$other_department_name == "Office for National Statistics"] <- "Office for National Statistics"
+
+  data$department[data$other_department_name == "Data Science Campus"] <- "Office for National Statistics"
+
+  data$department[data$other_department_name == "Welsh Revenue Authority"] <- "Welsh Government"
+
+  data$department[data$other_department_name == "Equality Hub, Cabinet Office"] <- "Cabinet Office (excl. agencies)"
+
+  data$department[data$other_department_name == "Natural England"] <- "Natural England"
+
+  data$department[data$other_department_name == "Department for Communities"] <- "Northern Ireland Executive"
+
+  data$department[data$other_department_name == "Department of Education Northern Ireland"] <- "Northern Ireland Executive"
+
   defra_orgs <- c(
     "Department for Environment, Food and Rural Affairs (excl. agencies)",
     "Forestry Commission",
@@ -163,3 +177,33 @@ clean_departments <- function(data) {
 
 }
 
+#' @title Clean workplace data
+#'
+#' @description reclassify 'other' text responses into CS/NHS
+#'
+#' @param data cleaned CARS dataset
+#'
+#' @return CARS dataset
+#' @export
+
+clean_workplace <- function(data) {
+
+  data$workplace[data$workplace == "MOD"] <- "Civil service, including devolved administrations"
+
+  data$workplace[data$workplace == "HMRC"] <- "Civil service, including devolved administrations"
+
+  data$workplace[data$workplace == "The Pensions Regulator"] <- "Civil service, including devolved administrations"
+
+  data$workplace[data$workplace == "Scottish Funding Council"] <- "Civil service, including devolved administrations"
+
+  data$workplace[data$workplace == "Office for Students"] <- "Civil service, including devolved administrations"
+
+  data$workplace[data$workplace == "Office for students"] <- "Civil service, including devolved administrations"
+
+  data$workplace[data$workplace == "OfS"] <- "Civil service, including devolved administrations"
+
+  data$workplace[data$workplace == "Dstl"] <- "Civil service, including devolved administrations"
+
+  return(data)
+
+}
diff --git a/R/derive_vars.R b/R/derive_vars.R
@@ -200,7 +200,7 @@ derive_rap_champ_status <- function(data){
                                               have_RAP_champ == "Yes" & know_RAP_champ == "Yes" ~ "Yes, and I know who the RAP Champion is",
                                               have_RAP_champ == "Yes" & know_RAP_champ == "No" ~ "Yes, but I don't know who the RAP Champion is",
                                               have_RAP_champ == "No" ~ "No",
-                                              have_RAP_champ == "I don't know" ~ "I don't know"))
+                                              have_RAP_champ == "Don't know" ~ "I don't know"))
 
 }
 

diff --git a/R/frequency-tables.R b/R/frequency-tables.R
@@ -21,6 +21,7 @@ summarise_all <- function(data, all_tables = FALSE) {
     coding_practices = summarise_coding_practices(data),
     doc = summarise_doc(data),
     rap_knowledge = summarise_rap_knowledge(data),
+    rap_champ_status = summarise_rap_champ_status(data),
     rap_opinions = summarise_rap_opinions(data),
     basic_rap_scores = summarise_rap_basic(data),
     advanced_rap_scores = summarise_rap_advanced(data),
@@ -122,7 +123,7 @@ summarise_coding_tools <- function(data, type = list("knowledge", "access"), pro
                  "access_SPSS", "knowledge_stata", "access_stata",
                  "knowledge_matlab", "access_matlab")
 
-  levels <- c("Yes", "Don't Know", "No")
+  levels <- c("Yes", "Don't know", "No")
 
   labels <- c("R", "SQL", "SAS", "VBA", "Python", "SPSS", "Stata", "Matlab")
 
@@ -483,7 +484,7 @@ summarise_ci <- function(data) {
 
   levels <- c("Yes",
               "No",
-              "I don't know")
+              "I don't know what continuous integration is")
 
   frequencies <- calculate_freqs(data, questions, levels)
 
@@ -511,7 +512,7 @@ summarise_dep_man <- function(data) {
 
   levels <- c("Yes",
               "No",
-              "I don't know")
+              "I don't know what dependency management is")
 
   frequencies <- calculate_freqs(data, questions, levels)
 
@@ -539,7 +540,7 @@ summarise_rep_workflow <- function(data) {
 
   levels <- c("Yes",
               "No",
-              "I don't know")
+              "I don't know what reproducible workflows are")
 
   frequencies <- calculate_freqs(data, questions, levels)
 

diff --git a/R/render.R b/R/render.R
@@ -40,41 +40,39 @@ create_filtered_pages <- function(data, type = c("professions", "departments"),
   dir.create(filtered_pages_path)
 
   if (type == "professions") {
-    prof_cols <- c(
-      "prof_DE",
-      "prof_DS",
-      "prof_DDAT",
-      "prof_GAD",
-      "prof_GES",
-      "prof_geog",
-      "prof_GORS",
-      "prof_GSR",
-      "prof_GSG"
+    prof_ref <- data.frame(prof_cols =  grep("prof", colnames(data), value = TRUE),
+                           prof_names = c("government data engineers",
+                                          "government data scientists",
+                                          "digital and data profession (DDAT)",
+                                          "government actuary's department (GAD)",
+                                          "government economic service (GES)",
+                                          "government geography profession",
+                                          "government operational research (GORS)",
+                                          "government social research (GSR)",
+                                          "government statistician group (GSG)",
+                                          "no government profession",
+                                          "other government profession"),
+                           filenames = c("data-engineers.qmd",
+                                         "data-scientists.qmd",
+                                         "digital-and-data.qmd",
+                                         "government-actuarys-department.qmd",
+                                         "government-economic-service.qmd",
+                                         "government-geography.qmd",
+                                         "government-operational-research.qmd",
+                                         "government-social-research.qmd",
+                                         "government-statistician-group.qmd",
+                                         "no-government-profession.qmd",
+                                         "other-government-profession.qmd"
+                           )
     )
 
-    prof_names <- c(
-      "government data engineers",
-      "government data scientists",
-      "digital and data profession (DDAT)",
-      "government actuary's department (GAD)",
-      "government economic service (GES)",
-      "government geography profession",
-      "government operational research (GORS)",
-      "government social research (GSR)",
-      "government statistician group (GSG)"
-    )
+    prof_cols <- data %>%
+      dplyr::select(dplyr::contains("prof") & !dplyr::contains("none")) %>%
+      dplyr::select_if(~ any(. == "Yes")) %>%
+      colnames()
 
-    filenames <- c(
-      "data-engineers.qmd",
-      "data-scientists.qmd",
-      "digital-and-data.qmd",
-      "government-actuarys-department.qmd",
-      "government-economic-service.qmd",
-      "government-geography.qmd",
-      "government-operational-research.qmd",
-      "government-social-research.qmd",
-      "government-statician-group.qmd"
-    )
+    prof_names <- prof_ref$prof_names[prof_ref$prof_cols %in% prof_cols]
+    filenames <- prof_ref$filenames[prof_ref$prof_cols %in% prof_cols]
 
     n_pages <- length(prof_cols)
   } else if (type == "departments") {
@@ -115,7 +113,7 @@ create_filtered_pages <- function(data, type = c("professions", "departments"),
       title <- paste0("Department summary: ", dep_list[i])
     }
 
-     # Custom open and close tags are used here to avoid clashes with quarto syntax
+    # Custom open and close tags are used here to avoid clashes with quarto syntax
     contents <- glue::glue(template, .open = "{{{", .close = "}}}") %>% as.character()
 
     path <- paste0(filtered_pages_path, "/", filenames[[i]])
@@ -146,7 +144,6 @@ create_filtered_pages <- function(data, type = c("professions", "departments"),
 
 }
 
-
 #' @title Display programming languages filtered by profession
 #'
 #' @param table frequency table (languages_by_prof, see frequency table functions).

diff --git a/main.R b/main.R
@@ -3,9 +3,10 @@ library(magrittr)
 data <- CARS::get_tidy_data_file("2023_data.csv") %>%
   CARS::rename_cols() %>%
   CARS::apply_skip_logic() %>%
+  CARS::clean_workplace() %>%
   CARS::clean_departments() %>%
   CARS::derive_vars()
 
 CARS::create_filtered_pages(data, type = "departments")
-CARS::create_filtered_pages(type = "professions")
+CARS::create_filtered_pages(data, type = "professions")
 CARS::render_site()
diff --git a/man/clean_workplace.Rd b/man/clean_workplace.Rd
diff --git a/quarto/main/data_collection.qmd b/quarto/main/data_collection.qmd
@@ -8,9 +8,10 @@ library(magrittr)
 # Setup
 all_wave_data <- CARS::get_all_waves(mode = "file")
 
-data <- CARS::get_tidy_data_file("2022_data.csv") %>%
+data <- CARS::get_tidy_data_file("2023_data.csv") %>%
   CARS::rename_cols() %>%
   CARS::apply_skip_logic() %>%
+  CARS::clean_workplace() %>%
   CARS::clean_departments() %>%
   CARS::derive_vars()
 

diff --git a/quarto/main/summary.qmd b/quarto/main/summary.qmd
@@ -9,9 +9,10 @@ output:
 
 library(magrittr)
 
-data <- CARS::get_tidy_data_file("2022_data.csv") %>%
+data <- CARS::get_tidy_data_file("2023_data.csv") %>%
   CARS::rename_cols() %>%
   CARS::apply_skip_logic() %>%
+  CARS::clean_workplace() %>%
   CARS::clean_departments() %>%
   CARS::derive_vars()
 

diff --git a/quarto/templates/summary.qmd b/quarto/templates/summary.qmd
@@ -11,9 +11,10 @@ output:
 
 library(magrittr)
 
-data <- CARS::get_tidy_data_file("2022_data.csv") %>%
+data <- CARS::get_tidy_data_file("2023_data.csv") %>%
   CARS::rename_cols() %>%
   CARS::apply_skip_logic() %>%
+  CARS::clean_workplace() %>%
   CARS::clean_departments() %>%
   CARS::derive_vars()
 
@@ -47,21 +48,6 @@ CARS::wrap_outputs("coding-freq", plot, table)
 ```
 
 
-### What code is being used for
-
-We asked respondents what data operations they carry out in their work, and whether they use code to do them. Please note, we did not ask how much of each data operation is done with code or how often.
-
-Respondents who don't do the operation at all have been removed.
-
-```{r}
-
-plot <- CARS::plot_stacked(tables$operations, xlab = "Operation", font_size = 14)
-table <- CARS::df_to_table(tables$operations, column_headers = c("Operation", "I do some or all of this by coding (%)", "I do this without coding (%)"), crosstab = TRUE)
-
-CARS::wrap_outputs("operations", plot, table)
-
-```
-
 ### Access to and knowledge of programming languages
 
 Given a list of programming tools, we asked respondents to answer "Yes", "No" or "Don't know" for the following questions;  

diff --git a/tests/testthat/test-summarise_ci.R b/tests/testthat/test-summarise_ci.R
@@ -2,7 +2,7 @@
 dummy_data <- data.frame(CI = c(NA,
                                 rep("Yes", 2),
                                 rep("No", 3),
-                                rep("I don't know", 4)))
+                                rep("I don't know what continuous integration is", 4)))
 
 test_that("summarise_ci validation works", {
 
@@ -26,10 +26,10 @@ test_that("summarise_ci output is as expected", {
 
   expected <- data.frame(value = factor(c("Yes",
                                           "No",
-                                          "I don't know"),
+                                          "I don't know what continuous integration is"),
                                         levels = c("Yes",
                                                    "No",
-                                                   "I don't know")),
+                                                   "I don't know what continuous integration is")),
                          n = c(2/9, 1/3, 4/9))
 
   expect_equal(got, expected)

diff --git a/tests/testthat/test-summarise_coding_tools.R b/tests/testthat/test-summarise_coding_tools.R
@@ -1,22 +1,22 @@
 # Coding tools frequency tables (access or knowledge)
 
 dummy_data <- data.frame(
-  knowledge_R = c("Yes", rep("No", 2), rep("Don't Know", 3)),
-  access_R = c(rep("Yes", 2), "No", rep("Don't Know", 3)),
-  knowledge_SQL = c(rep("Yes", 3), rep("No", 2), "Don't Know"),
-  access_SQL = c("Yes", rep("No", 3), rep("Don't Know", 2)),
-  knowledge_SAS = c(rep("Yes", 2), rep("No", 3), "Don't Know"),
-  access_SAS = c(rep("Yes", 3), "No", rep("Don't Know", 2)),
-  knowledge_VBA = c("Yes", rep("No", 2), rep("Don't Know", 3)),
-  access_VBA = c(rep("Yes", 2), "No", rep("Don't Know", 3)),
-  knowledge_python = c(rep("Yes", 3), rep("No", 2), "Don't Know"),
-  access_python = c("Yes", rep("No", 3), rep("Don't Know", 2)),
-  knowledge_SPSS = c(rep("Yes", 2), rep("No", 3), "Don't Know"),
-  access_SPSS = c(rep("Yes", 3), "No", rep("Don't Know", 2)),
-  knowledge_stata = c("Yes", rep("No", 2), rep("Don't Know", 3)),
-  access_stata = c(rep("Yes", 2), "No", rep("Don't Know", 3)),
-  knowledge_matlab = c(rep("Yes", 3), rep("No", 2), "Don't Know"),
-  access_matlab = c("Yes", rep("No", 5), rep("Don't Know", 0)) # Used to check zero counts aren't missing
+  knowledge_R = c("Yes", rep("No", 2), rep("Don't know", 3)),
+  access_R = c(rep("Yes", 2), "No", rep("Don't know", 3)),
+  knowledge_SQL = c(rep("Yes", 3), rep("No", 2), "Don't know"),
+  access_SQL = c("Yes", rep("No", 3), rep("Don't know", 2)),
+  knowledge_SAS = c(rep("Yes", 2), rep("No", 3), "Don't know"),
+  access_SAS = c(rep("Yes", 3), "No", rep("Don't know", 2)),
+  knowledge_VBA = c("Yes", rep("No", 2), rep("Don't know", 3)),
+  access_VBA = c(rep("Yes", 2), "No", rep("Don't know", 3)),
+  knowledge_python = c(rep("Yes", 3), rep("No", 2), "Don't know"),
+  access_python = c("Yes", rep("No", 3), rep("Don't know", 2)),
+  knowledge_SPSS = c(rep("Yes", 2), rep("No", 3), "Don't know"),
+  access_SPSS = c(rep("Yes", 3), "No", rep("Don't know", 2)),
+  knowledge_stata = c("Yes", rep("No", 2), rep("Don't know", 3)),
+  access_stata = c(rep("Yes", 2), "No", rep("Don't know", 3)),
+  knowledge_matlab = c(rep("Yes", 3), rep("No", 2), "Don't know"),
+  access_matlab = c("Yes", rep("No", 5), rep("Don't know", 0)) # Used to check zero counts aren't missing
 )
 
 test_that("summarise_coding_tools missing data is handled correctly", {
@@ -41,8 +41,8 @@ test_that("summarise_coding_tools knowledge output is as expected", {
                                                   "SQL",
                                                   "Stata",
                                                   "VBA"), each=3),
-                                   "value" = factor(rep(c("Yes", "Don't Know", "No"), 8),
-                                                    levels = c("Yes", "Don't Know", "No")),
+                                   "value" = factor(rep(c("Yes", "Don't know", "No"), 8),
+                                                    levels = c("Yes", "Don't know", "No")),
                                    "n" = c(1/2, 1/6, 1/3, 1/2, 1/6, 1/3, 1/6, 1/2,
                                            1/3, 1/3, 1/6, 1/2, 1/3, 1/6, 1/2, 1/2,
                                            1/6, 1/3, 1/6, 1/2, 1/3, 1/6, 1/2, 1/3))
@@ -63,8 +63,8 @@ test_that("summarise_coding_tools access output is as expected", {
                                                "SQL",
                                                "Stata",
                                                "VBA"), each=3),
-                                "value" = factor(rep(c("Yes", "Don't Know", "No"), 8),
-                                                 levels = c("Yes", "Don't Know", "No")),
+                                "value" = factor(rep(c("Yes", "Don't know", "No"), 8),
+                                                 levels = c("Yes", "Don't know", "No")),
                                 "n" = c(1/6, 0, 5/6, 1/6, 1/3, 1/2, 1/3, 1/2,
                                         1/6, 1/2, 1/3, 1/6, 1/2, 1/3, 1/6, 1/6,
                                         1/3, 1/2, 1/3, 1/2, 1/6, 1/3, 1/2, 1/6))

diff --git a/tests/testthat/test-summarise_dep_man.R b/tests/testthat/test-summarise_dep_man.R
@@ -2,7 +2,7 @@
 dummy_data <- data.frame(dep_management = c(NA,
                                             rep("Yes", 2),
                                             rep("No", 3),
-                                            rep("I don't know", 4)))
+                                            rep("I don't know what dependency management is", 4)))
 
 test_that("summarise_dep_man validation works", {
 
@@ -26,10 +26,10 @@ test_that("summarise_dep_man output is as expected", {
 
   expected <- data.frame(value = factor(c("Yes",
                                           "No",
-                                          "I don't know"),
+                                          "I don't know what dependency management is"),
                                         levels = c("Yes",
                                                    "No",
-                                                   "I don't know")),
+                                                   "I don't know what dependency management is")),
                          n = c(2/9, 1/3, 4/9))
 
   expect_equal(got, expected)