Skip to content

Commit

Permalink
Quarto render (#69)
Browse files Browse the repository at this point in the history
* added zero response handling to professions filter

* Fixed typo in summarise_coding_tools

* Fixed typos on ci & dep management freq table functions

* Added dept and workplace data cleaning. Fixed summarise_rap_champ_status

* Updated tests

* Update docs

---------

Co-authored-by: ldavies99 <[email protected]>
  • Loading branch information
CHCRowley and ldavies99 authored Dec 20, 2023
1 parent d672e73 commit 711e839
Show file tree
Hide file tree
Showing 15 changed files with 137 additions and 88 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export(break_q_names)
export(calculate_freqs)
export(check_skip_logic)
export(clean_departments)
export(clean_workplace)
export(compare_models)
export(create_filtered_pages)
export(create_y_lab)
Expand Down
44 changes: 44 additions & 0 deletions R/data_cleaning.R
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,20 @@ clean_departments <- function(data) {

data$department[data$workplace == "NHS"] <- "NHS"

data$department[data$other_department_name == "Office for National Statistics"] <- "Office for National Statistics"

data$department[data$other_department_name == "Data Science Campus"] <- "Office for National Statistics"

data$department[data$other_department_name == "Welsh Revenue Authority"] <- "Welsh Government"

data$department[data$other_department_name == "Equality Hub, Cabinet Office"] <- "Cabinet Office (excl. agencies)"

data$department[data$other_department_name == "Natural England"] <- "Natural England"

data$department[data$other_department_name == "Department for Communities"] <- "Northern Ireland Executive"

data$department[data$other_department_name == "Department of Education Northern Ireland"] <- "Northern Ireland Executive"

defra_orgs <- c(
"Department for Environment, Food and Rural Affairs (excl. agencies)",
"Forestry Commission",
Expand All @@ -163,3 +177,33 @@ clean_departments <- function(data) {

}

#' @title Clean workplace data
#'
#' @description reclassify 'other' text responses into CS/NHS
#'
#' @param data cleaned CARS dataset
#'
#' @return CARS dataset
#' @export

clean_workplace <- function(data) {

data$workplace[data$workplace == "MOD"] <- "Civil service, including devolved administrations"

data$workplace[data$workplace == "HMRC"] <- "Civil service, including devolved administrations"

data$workplace[data$workplace == "The Pensions Regulator"] <- "Civil service, including devolved administrations"

data$workplace[data$workplace == "Scottish Funding Council"] <- "Civil service, including devolved administrations"

data$workplace[data$workplace == "Office for Students"] <- "Civil service, including devolved administrations"

data$workplace[data$workplace == "Office for students"] <- "Civil service, including devolved administrations"

data$workplace[data$workplace == "OfS"] <- "Civil service, including devolved administrations"

data$workplace[data$workplace == "Dstl"] <- "Civil service, including devolved administrations"

return(data)

}
2 changes: 1 addition & 1 deletion R/derive_vars.R
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ derive_rap_champ_status <- function(data){
have_RAP_champ == "Yes" & know_RAP_champ == "Yes" ~ "Yes, and I know who the RAP Champion is",
have_RAP_champ == "Yes" & know_RAP_champ == "No" ~ "Yes, but I don't know who the RAP Champion is",
have_RAP_champ == "No" ~ "No",
have_RAP_champ == "I don't know" ~ "I don't know"))
have_RAP_champ == "Don't know" ~ "I don't know"))

}

Expand Down
9 changes: 5 additions & 4 deletions R/frequency-tables.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ summarise_all <- function(data, all_tables = FALSE) {
coding_practices = summarise_coding_practices(data),
doc = summarise_doc(data),
rap_knowledge = summarise_rap_knowledge(data),
rap_champ_status = summarise_rap_champ_status(data),
rap_opinions = summarise_rap_opinions(data),
basic_rap_scores = summarise_rap_basic(data),
advanced_rap_scores = summarise_rap_advanced(data),
Expand Down Expand Up @@ -122,7 +123,7 @@ summarise_coding_tools <- function(data, type = list("knowledge", "access"), pro
"access_SPSS", "knowledge_stata", "access_stata",
"knowledge_matlab", "access_matlab")

levels <- c("Yes", "Don't Know", "No")
levels <- c("Yes", "Don't know", "No")

labels <- c("R", "SQL", "SAS", "VBA", "Python", "SPSS", "Stata", "Matlab")

Expand Down Expand Up @@ -483,7 +484,7 @@ summarise_ci <- function(data) {

levels <- c("Yes",
"No",
"I don't know")
"I don't know what continuous integration is")

frequencies <- calculate_freqs(data, questions, levels)

Expand Down Expand Up @@ -511,7 +512,7 @@ summarise_dep_man <- function(data) {

levels <- c("Yes",
"No",
"I don't know")
"I don't know what dependency management is")

frequencies <- calculate_freqs(data, questions, levels)

Expand Down Expand Up @@ -539,7 +540,7 @@ summarise_rep_workflow <- function(data) {

levels <- c("Yes",
"No",
"I don't know")
"I don't know what reproducible workflows are")

frequencies <- calculate_freqs(data, questions, levels)

Expand Down
65 changes: 31 additions & 34 deletions R/render.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,41 +40,39 @@ create_filtered_pages <- function(data, type = c("professions", "departments"),
dir.create(filtered_pages_path)

if (type == "professions") {
prof_cols <- c(
"prof_DE",
"prof_DS",
"prof_DDAT",
"prof_GAD",
"prof_GES",
"prof_geog",
"prof_GORS",
"prof_GSR",
"prof_GSG"
prof_ref <- data.frame(prof_cols = grep("prof", colnames(data), value = TRUE),
prof_names = c("government data engineers",
"government data scientists",
"digital and data profession (DDAT)",
"government actuary's department (GAD)",
"government economic service (GES)",
"government geography profession",
"government operational research (GORS)",
"government social research (GSR)",
"government statistician group (GSG)",
"no government profession",
"other government profession"),
filenames = c("data-engineers.qmd",
"data-scientists.qmd",
"digital-and-data.qmd",
"government-actuarys-department.qmd",
"government-economic-service.qmd",
"government-geography.qmd",
"government-operational-research.qmd",
"government-social-research.qmd",
"government-statistician-group.qmd",
"no-government-profession.qmd",
"other-government-profession.qmd"
)
)

prof_names <- c(
"government data engineers",
"government data scientists",
"digital and data profession (DDAT)",
"government actuary's department (GAD)",
"government economic service (GES)",
"government geography profession",
"government operational research (GORS)",
"government social research (GSR)",
"government statistician group (GSG)"
)
prof_cols <- data %>%
dplyr::select(dplyr::contains("prof") & !dplyr::contains("none")) %>%
dplyr::select_if(~ any(. == "Yes")) %>%
colnames()

filenames <- c(
"data-engineers.qmd",
"data-scientists.qmd",
"digital-and-data.qmd",
"government-actuarys-department.qmd",
"government-economic-service.qmd",
"government-geography.qmd",
"government-operational-research.qmd",
"government-social-research.qmd",
"government-statician-group.qmd"
)
prof_names <- prof_ref$prof_names[prof_ref$prof_cols %in% prof_cols]
filenames <- prof_ref$filenames[prof_ref$prof_cols %in% prof_cols]

n_pages <- length(prof_cols)
} else if (type == "departments") {
Expand Down Expand Up @@ -115,7 +113,7 @@ create_filtered_pages <- function(data, type = c("professions", "departments"),
title <- paste0("Department summary: ", dep_list[i])
}

# Custom open and close tags are used here to avoid clashes with quarto syntax
# Custom open and close tags are used here to avoid clashes with quarto syntax
contents <- glue::glue(template, .open = "{{{", .close = "}}}") %>% as.character()

path <- paste0(filtered_pages_path, "/", filenames[[i]])
Expand Down Expand Up @@ -146,7 +144,6 @@ create_filtered_pages <- function(data, type = c("professions", "departments"),

}


#' @title Display programming languages filtered by profession
#'
#' @param table frequency table (languages_by_prof, see frequency table functions).
Expand Down
3 changes: 2 additions & 1 deletion main.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@ library(magrittr)
data <- CARS::get_tidy_data_file("2023_data.csv") %>%
CARS::rename_cols() %>%
CARS::apply_skip_logic() %>%
CARS::clean_workplace() %>%
CARS::clean_departments() %>%
CARS::derive_vars()

CARS::create_filtered_pages(data, type = "departments")
CARS::create_filtered_pages(type = "professions")
CARS::create_filtered_pages(data, type = "professions")
CARS::render_site()
17 changes: 17 additions & 0 deletions man/clean_workplace.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion quarto/main/data_collection.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@ library(magrittr)
# Setup
all_wave_data <- CARS::get_all_waves(mode = "file")
data <- CARS::get_tidy_data_file("2022_data.csv") %>%
data <- CARS::get_tidy_data_file("2023_data.csv") %>%
CARS::rename_cols() %>%
CARS::apply_skip_logic() %>%
CARS::clean_workplace() %>%
CARS::clean_departments() %>%
CARS::derive_vars()
Expand Down
3 changes: 2 additions & 1 deletion quarto/main/summary.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@ output:
library(magrittr)
data <- CARS::get_tidy_data_file("2022_data.csv") %>%
data <- CARS::get_tidy_data_file("2023_data.csv") %>%
CARS::rename_cols() %>%
CARS::apply_skip_logic() %>%
CARS::clean_workplace() %>%
CARS::clean_departments() %>%
CARS::derive_vars()
Expand Down
18 changes: 2 additions & 16 deletions quarto/templates/summary.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@ output:
library(magrittr)
data <- CARS::get_tidy_data_file("2022_data.csv") %>%
data <- CARS::get_tidy_data_file("2023_data.csv") %>%
CARS::rename_cols() %>%
CARS::apply_skip_logic() %>%
CARS::clean_workplace() %>%
CARS::clean_departments() %>%
CARS::derive_vars()
Expand Down Expand Up @@ -47,21 +48,6 @@ CARS::wrap_outputs("coding-freq", plot, table)
```


### What code is being used for

We asked respondents what data operations they carry out in their work, and whether they use code to do them. Please note, we did not ask how much of each data operation is done with code or how often.

Respondents who don't do the operation at all have been removed.

```{r}
plot <- CARS::plot_stacked(tables$operations, xlab = "Operation", font_size = 14)
table <- CARS::df_to_table(tables$operations, column_headers = c("Operation", "I do some or all of this by coding (%)", "I do this without coding (%)"), crosstab = TRUE)
CARS::wrap_outputs("operations", plot, table)
```

### Access to and knowledge of programming languages

Given a list of programming tools, we asked respondents to answer "Yes", "No" or "Don't know" for the following questions;
Expand Down
6 changes: 3 additions & 3 deletions tests/testthat/test-summarise_ci.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
dummy_data <- data.frame(CI = c(NA,
rep("Yes", 2),
rep("No", 3),
rep("I don't know", 4)))
rep("I don't know what continuous integration is", 4)))

test_that("summarise_ci validation works", {

Expand All @@ -26,10 +26,10 @@ test_that("summarise_ci output is as expected", {

expected <- data.frame(value = factor(c("Yes",
"No",
"I don't know"),
"I don't know what continuous integration is"),
levels = c("Yes",
"No",
"I don't know")),
"I don't know what continuous integration is")),
n = c(2/9, 1/3, 4/9))

expect_equal(got, expected)
Expand Down
40 changes: 20 additions & 20 deletions tests/testthat/test-summarise_coding_tools.R
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
# Coding tools frequency tables (access or knowledge)

dummy_data <- data.frame(
knowledge_R = c("Yes", rep("No", 2), rep("Don't Know", 3)),
access_R = c(rep("Yes", 2), "No", rep("Don't Know", 3)),
knowledge_SQL = c(rep("Yes", 3), rep("No", 2), "Don't Know"),
access_SQL = c("Yes", rep("No", 3), rep("Don't Know", 2)),
knowledge_SAS = c(rep("Yes", 2), rep("No", 3), "Don't Know"),
access_SAS = c(rep("Yes", 3), "No", rep("Don't Know", 2)),
knowledge_VBA = c("Yes", rep("No", 2), rep("Don't Know", 3)),
access_VBA = c(rep("Yes", 2), "No", rep("Don't Know", 3)),
knowledge_python = c(rep("Yes", 3), rep("No", 2), "Don't Know"),
access_python = c("Yes", rep("No", 3), rep("Don't Know", 2)),
knowledge_SPSS = c(rep("Yes", 2), rep("No", 3), "Don't Know"),
access_SPSS = c(rep("Yes", 3), "No", rep("Don't Know", 2)),
knowledge_stata = c("Yes", rep("No", 2), rep("Don't Know", 3)),
access_stata = c(rep("Yes", 2), "No", rep("Don't Know", 3)),
knowledge_matlab = c(rep("Yes", 3), rep("No", 2), "Don't Know"),
access_matlab = c("Yes", rep("No", 5), rep("Don't Know", 0)) # Used to check zero counts aren't missing
knowledge_R = c("Yes", rep("No", 2), rep("Don't know", 3)),
access_R = c(rep("Yes", 2), "No", rep("Don't know", 3)),
knowledge_SQL = c(rep("Yes", 3), rep("No", 2), "Don't know"),
access_SQL = c("Yes", rep("No", 3), rep("Don't know", 2)),
knowledge_SAS = c(rep("Yes", 2), rep("No", 3), "Don't know"),
access_SAS = c(rep("Yes", 3), "No", rep("Don't know", 2)),
knowledge_VBA = c("Yes", rep("No", 2), rep("Don't know", 3)),
access_VBA = c(rep("Yes", 2), "No", rep("Don't know", 3)),
knowledge_python = c(rep("Yes", 3), rep("No", 2), "Don't know"),
access_python = c("Yes", rep("No", 3), rep("Don't know", 2)),
knowledge_SPSS = c(rep("Yes", 2), rep("No", 3), "Don't know"),
access_SPSS = c(rep("Yes", 3), "No", rep("Don't know", 2)),
knowledge_stata = c("Yes", rep("No", 2), rep("Don't know", 3)),
access_stata = c(rep("Yes", 2), "No", rep("Don't know", 3)),
knowledge_matlab = c(rep("Yes", 3), rep("No", 2), "Don't know"),
access_matlab = c("Yes", rep("No", 5), rep("Don't know", 0)) # Used to check zero counts aren't missing
)

test_that("summarise_coding_tools missing data is handled correctly", {
Expand All @@ -41,8 +41,8 @@ test_that("summarise_coding_tools knowledge output is as expected", {
"SQL",
"Stata",
"VBA"), each=3),
"value" = factor(rep(c("Yes", "Don't Know", "No"), 8),
levels = c("Yes", "Don't Know", "No")),
"value" = factor(rep(c("Yes", "Don't know", "No"), 8),
levels = c("Yes", "Don't know", "No")),
"n" = c(1/2, 1/6, 1/3, 1/2, 1/6, 1/3, 1/6, 1/2,
1/3, 1/3, 1/6, 1/2, 1/3, 1/6, 1/2, 1/2,
1/6, 1/3, 1/6, 1/2, 1/3, 1/6, 1/2, 1/3))
Expand All @@ -63,8 +63,8 @@ test_that("summarise_coding_tools access output is as expected", {
"SQL",
"Stata",
"VBA"), each=3),
"value" = factor(rep(c("Yes", "Don't Know", "No"), 8),
levels = c("Yes", "Don't Know", "No")),
"value" = factor(rep(c("Yes", "Don't know", "No"), 8),
levels = c("Yes", "Don't know", "No")),
"n" = c(1/6, 0, 5/6, 1/6, 1/3, 1/2, 1/3, 1/2,
1/6, 1/2, 1/3, 1/6, 1/2, 1/3, 1/6, 1/6,
1/3, 1/2, 1/3, 1/2, 1/6, 1/3, 1/2, 1/6))
Expand Down
6 changes: 3 additions & 3 deletions tests/testthat/test-summarise_dep_man.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
dummy_data <- data.frame(dep_management = c(NA,
rep("Yes", 2),
rep("No", 3),
rep("I don't know", 4)))
rep("I don't know what dependency management is", 4)))

test_that("summarise_dep_man validation works", {

Expand All @@ -26,10 +26,10 @@ test_that("summarise_dep_man output is as expected", {

expected <- data.frame(value = factor(c("Yes",
"No",
"I don't know"),
"I don't know what dependency management is"),
levels = c("Yes",
"No",
"I don't know")),
"I don't know what dependency management is")),
n = c(2/9, 1/3, 4/9))

expect_equal(got, expected)
Expand Down
Loading

0 comments on commit 711e839

Please sign in to comment.