From c753f2ec0b15b5c69f796085be8577b51134ba57 Mon Sep 17 00:00:00 2001 From: daviel9 Date: Thu, 4 Jan 2024 15:59:46 +0000 Subject: [PATCH 1/3] Change levels to match new survery responses and reorder the levels --- R/frequency-tables.R | 12 ++- tests/testthat/test-summarise_coding_tools.R | 82 ++++++++++---------- 2 files changed, 48 insertions(+), 46 deletions(-) diff --git a/R/frequency-tables.R b/R/frequency-tables.R index a314847..c9628e7 100644 --- a/R/frequency-tables.R +++ b/R/frequency-tables.R @@ -123,7 +123,11 @@ summarise_coding_tools <- function(data, type = list("knowledge", "access"), pro "access_SPSS", "knowledge_stata", "access_stata", "knowledge_matlab", "access_matlab") - levels <- c("Yes", "Don't know", "No") + if (type == "knowledge") { + levels <- c("Yes", "No", "Not required for my work") + } else { + levels <- c("Yes", "No", "Don't know") + } labels <- c("R", "SQL", "SAS", "VBA", "Python", "SPSS", "Stata", "Matlab") @@ -1142,7 +1146,7 @@ summarise_os_vs_prop <- function(data) { data.frame %>% get_ci(freq_col = 2, n_col = 3) - os_freqs <- cbind(lang_type = "open source", os_freqs) + os_freqs <- cbind(lang_type = "Open Source", os_freqs) prop_freqs <- data %>% dplyr::group_by(year) %>% @@ -1150,11 +1154,11 @@ summarise_os_vs_prop <- function(data) { data.frame %>% get_ci(freq_col = 2, n_col = 3) - prop_freqs <- cbind(lang_type = "proprietary", prop_freqs) + prop_freqs <- cbind(lang_type = "Proprietary", prop_freqs) grouped_lang_freqs <- rbind(os_freqs, prop_freqs) grouped_lang_freqs$year <- as.character(grouped_lang_freqs$year) - grouped_lang_freqs$lang_type <- factor(grouped_lang_freqs$lang_type, levels = c("open source", "proprietary")) + grouped_lang_freqs$lang_type <- factor(grouped_lang_freqs$lang_type, levels = c("Open Source", "Proprietary")) return(grouped_lang_freqs) } diff --git a/tests/testthat/test-summarise_coding_tools.R b/tests/testthat/test-summarise_coding_tools.R index ac12fab..477a8ff 100644 --- a/tests/testthat/test-summarise_coding_tools.R +++ b/tests/testthat/test-summarise_coding_tools.R @@ -1,22 +1,22 @@ # Coding tools frequency tables (access or knowledge) dummy_data <- data.frame( - knowledge_R = c("Yes", rep("No", 2), rep("Don't know", 3)), - access_R = c(rep("Yes", 2), "No", rep("Don't know", 3)), - knowledge_SQL = c(rep("Yes", 3), rep("No", 2), "Don't know"), - access_SQL = c("Yes", rep("No", 3), rep("Don't know", 2)), - knowledge_SAS = c(rep("Yes", 2), rep("No", 3), "Don't know"), - access_SAS = c(rep("Yes", 3), "No", rep("Don't know", 2)), - knowledge_VBA = c("Yes", rep("No", 2), rep("Don't know", 3)), - access_VBA = c(rep("Yes", 2), "No", rep("Don't know", 3)), - knowledge_python = c(rep("Yes", 3), rep("No", 2), "Don't know"), - access_python = c("Yes", rep("No", 3), rep("Don't know", 2)), - knowledge_SPSS = c(rep("Yes", 2), rep("No", 3), "Don't know"), - access_SPSS = c(rep("Yes", 3), "No", rep("Don't know", 2)), - knowledge_stata = c("Yes", rep("No", 2), rep("Don't know", 3)), - access_stata = c(rep("Yes", 2), "No", rep("Don't know", 3)), - knowledge_matlab = c(rep("Yes", 3), rep("No", 2), "Don't know"), - access_matlab = c("Yes", rep("No", 5), rep("Don't know", 0)) # Used to check zero counts aren't missing + knowledge_R = c("Yes", "No", "Not required for my work"), + access_R = c("Yes", "No", "Don't know"), + knowledge_SQL = c("Yes", "No", "Not required for my work"), + access_SQL = c("Yes", "No", "Don't know"), + knowledge_SAS = c("Yes", "No", "Not required for my work"), + access_SAS = c("Yes", "No", "Don't know"), + knowledge_VBA = c("Yes", "No", "Not required for my work"), + access_VBA = c("Yes", "No", "Don't know"), + knowledge_python = c("Yes", "No", "Not required for my work"), + access_python = c("Yes", "No", "Don't know"), + knowledge_SPSS = c("Yes", "No", "Not required for my work"), + access_SPSS = c("Yes", "No", "Don't know"), + knowledge_stata = c("Yes", "No", "Not required for my work"), + access_stata = c("Yes", "No", "Don't know"), + knowledge_matlab = c("Yes", "No", "No"), + access_matlab = c("Yes", "No", "No") # Used to check zero counts aren't missing ) test_that("summarise_coding_tools missing data is handled correctly", { @@ -33,19 +33,18 @@ test_that("summarise_coding_tools knowledge output is as expected", { got_knowledge <- summarise_coding_tools(dummy_data, "knowledge") - expected_knowledge <- data.frame("name" = rep(c("Matlab", - "Python", - "R", - "SAS", - "SPSS", - "SQL", - "Stata", - "VBA"), each=3), - "value" = factor(rep(c("Yes", "Don't know", "No"), 8), - levels = c("Yes", "Don't know", "No")), - "n" = c(1/2, 1/6, 1/3, 1/2, 1/6, 1/3, 1/6, 1/2, - 1/3, 1/3, 1/6, 1/2, 1/3, 1/6, 1/2, 1/2, - 1/6, 1/3, 1/6, 1/2, 1/3, 1/6, 1/2, 1/3)) + expected_knowledge <- data.frame(name = rep(c("Matlab", + "Python", + "R", + "SAS", + "SPSS", + "SQL", + "Stata", + "VBA"), each=3), + value = factor(rep(c("Yes", "No", "Not required for my work"), 8), + levels = c("Yes", "No", "Not required for my work")), + n = c(1/3, 2/3, 0, rep(1/3, times=21)) + ) expect_equal(got_knowledge, expected_knowledge) @@ -55,19 +54,18 @@ test_that("summarise_coding_tools access output is as expected", { got_access <- summarise_coding_tools(dummy_data, "access") - expected_access <- data.frame("name" = rep(c("Matlab", - "Python", - "R", - "SAS", - "SPSS", - "SQL", - "Stata", - "VBA"), each=3), - "value" = factor(rep(c("Yes", "Don't know", "No"), 8), - levels = c("Yes", "Don't know", "No")), - "n" = c(1/6, 0, 5/6, 1/6, 1/3, 1/2, 1/3, 1/2, - 1/6, 1/2, 1/3, 1/6, 1/2, 1/3, 1/6, 1/6, - 1/3, 1/2, 1/3, 1/2, 1/6, 1/3, 1/2, 1/6)) + expected_access <- data.frame(name = rep(c("Matlab", + "Python", + "R", + "SAS", + "SPSS", + "SQL", + "Stata", + "VBA"), each=3), + value = factor(rep(c("Yes", "No", "Don't know"), 8), + levels = c("Yes", "No", "Don't know")), + n = c(1/3, 2/3, 0, rep(1/3, times=21)) + ) expect_equal(got_access, expected_access) From 78370a940ea456f674fc09f79dced8e999e7b575 Mon Sep 17 00:00:00 2001 From: daviel9 Date: Fri, 5 Jan 2024 12:14:08 +0000 Subject: [PATCH 2/3] Rearrange programming languages --- R/frequency-tables.R | 9 +++++---- tests/testthat/test-summarise_coding_tools.R | 16 ++++++++-------- .../testthat/test-summarise_languages_by_prof.R | 6 +++--- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/R/frequency-tables.R b/R/frequency-tables.R index 2fd33bb..e6178df 100644 --- a/R/frequency-tables.R +++ b/R/frequency-tables.R @@ -134,7 +134,8 @@ summarise_coding_tools <- function(data, type = list("knowledge", "access"), pro questions <- questions[grepl(paste0(type, "_"), questions)] - frequencies <- calculate_freqs(data, questions, levels, labels, prop = prop) + frequencies <- calculate_freqs(data, questions, levels, labels, prop = prop) %>% + dplyr::arrange(match(name, c("Python", "R", "SQL", "Matlab", "SAS", "SPSS", "Stata", "VBA"))) return(frequencies) } @@ -1152,7 +1153,7 @@ summarise_os_vs_prop <- function(data) { data.frame %>% get_ci(freq_col = 2, n_col = 3) - os_freqs <- cbind(lang_type = "Open Source", os_freqs) + os_freqs <- cbind(lang_type = "open source", os_freqs) prop_freqs <- data %>% dplyr::group_by(year) %>% @@ -1160,11 +1161,11 @@ summarise_os_vs_prop <- function(data) { data.frame %>% get_ci(freq_col = 2, n_col = 3) - prop_freqs <- cbind(lang_type = "Proprietary", prop_freqs) + prop_freqs <- cbind(lang_type = "proprietary", prop_freqs) grouped_lang_freqs <- rbind(os_freqs, prop_freqs) grouped_lang_freqs$year <- as.character(grouped_lang_freqs$year) - grouped_lang_freqs$lang_type <- factor(grouped_lang_freqs$lang_type, levels = c("Open Source", "Proprietary")) + grouped_lang_freqs$lang_type <- factor(grouped_lang_freqs$lang_type, levels = c("open source", "proprietary")) return(grouped_lang_freqs) } diff --git a/tests/testthat/test-summarise_coding_tools.R b/tests/testthat/test-summarise_coding_tools.R index 477a8ff..4288e4b 100644 --- a/tests/testthat/test-summarise_coding_tools.R +++ b/tests/testthat/test-summarise_coding_tools.R @@ -33,17 +33,17 @@ test_that("summarise_coding_tools knowledge output is as expected", { got_knowledge <- summarise_coding_tools(dummy_data, "knowledge") - expected_knowledge <- data.frame(name = rep(c("Matlab", - "Python", + expected_knowledge <- data.frame(name = rep(c("Python", "R", + "SQL", + "Matlab", "SAS", "SPSS", - "SQL", "Stata", "VBA"), each=3), value = factor(rep(c("Yes", "No", "Not required for my work"), 8), levels = c("Yes", "No", "Not required for my work")), - n = c(1/3, 2/3, 0, rep(1/3, times=21)) + n = c(rep(1/3, times=9), 1/3, 2/3, 0, rep(1/3, times=12)) ) expect_equal(got_knowledge, expected_knowledge) @@ -54,17 +54,17 @@ test_that("summarise_coding_tools access output is as expected", { got_access <- summarise_coding_tools(dummy_data, "access") - expected_access <- data.frame(name = rep(c("Matlab", - "Python", + expected_access <- data.frame(name = rep(c("Python", "R", + "SQL", + "Matlab", "SAS", "SPSS", - "SQL", "Stata", "VBA"), each=3), value = factor(rep(c("Yes", "No", "Don't know"), 8), levels = c("Yes", "No", "Don't know")), - n = c(1/3, 2/3, 0, rep(1/3, times=21)) + n = c(rep(1/3, times=9), 1/3, 2/3, 0, rep(1/3, times=12)) ) expect_equal(got_access, expected_access) diff --git a/tests/testthat/test-summarise_languages_by_prof.R b/tests/testthat/test-summarise_languages_by_prof.R index 74b0045..41393c4 100644 --- a/tests/testthat/test-summarise_languages_by_prof.R +++ b/tests/testthat/test-summarise_languages_by_prof.R @@ -4,7 +4,7 @@ knowledge_response <- rep(c( NA, "Yes", "No", - "Don't know"), + "Not required for my work"), each = 3, times = 6) prof_response <- rep(c( @@ -61,12 +61,12 @@ test_that("summarise_languages_by_prof output is as expected", { expected <- data.frame( lang = rep(c( - "Matlab", "Python", "R", + "SQL", + "Matlab", "SAS", "SPSS", - "SQL", "Stata", "VBA"), times = 9), From 2e00d5b7e6841174ccd7a0858dfeb480a6cae99d Mon Sep 17 00:00:00 2001 From: daviel9 Date: Fri, 5 Jan 2024 12:48:26 +0000 Subject: [PATCH 3/3] Fix rap comp table --- R/frequency-tables.R | 4 ++-- tests/testthat/test-summarise_rap_comp.R | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/R/frequency-tables.R b/R/frequency-tables.R index e6178df..d926a4f 100644 --- a/R/frequency-tables.R +++ b/R/frequency-tables.R @@ -429,7 +429,7 @@ summarise_rap_comp <- function(data) { "Team open source code", "Version control", "Peer review", - "Development QA", + "Proportionate QA", "Documentation", "Functions", "Unit testing", @@ -443,7 +443,7 @@ summarise_rap_comp <- function(data) { "open_code_score", "version_control_score", "peer_review_score", - "development_QA_score", + "proportionate_QA_score", "doc_score", "function_score", "unit_test_score", diff --git a/tests/testthat/test-summarise_rap_comp.R b/tests/testthat/test-summarise_rap_comp.R index f77bde3..5355423 100644 --- a/tests/testthat/test-summarise_rap_comp.R +++ b/tests/testthat/test-summarise_rap_comp.R @@ -13,7 +13,7 @@ dummy_data <- data.frame( open_code_score = rep(c(NA, 1, 0), times = 5), version_control_score = rep(c(NA, 1, 0), times = 5), peer_review_score = rep(c(NA, 1, 0), times = 5), - development_QA_score = rep(c(NA, 1, 0), times = 5), + proportionate_QA_score = rep(c(NA, 1, 0), times = 5), doc_score = rep(c(NA, 1, 0), times = 5), basic_rap_score = rep(c(NA, 1, 0), times = 5), function_score = rep(c(NA, 1, 0), times = 5), @@ -48,7 +48,7 @@ test_that("summarise_rap_comp output is as expected", { "Team open source code", "Version control", "Peer review", - "Development QA", + "Proportionate QA", "Documentation", "Functions", "Unit testing", @@ -62,7 +62,7 @@ test_that("summarise_rap_comp output is as expected", { "Team open source code", "Version control", "Peer review", - "Development QA", + "Proportionate QA", "Documentation", "Functions", "Unit testing",