From c753f2ec0b15b5c69f796085be8577b51134ba57 Mon Sep 17 00:00:00 2001
From: daviel9 <Luke.Davies@ons.gov.uk>
Date: Thu, 4 Jan 2024 15:59:46 +0000
Subject: [PATCH 1/3] Change levels to match new survery responses and reorder
 the levels

---
 R/frequency-tables.R                         | 12 ++-
 tests/testthat/test-summarise_coding_tools.R | 82 ++++++++++----------
 2 files changed, 48 insertions(+), 46 deletions(-)

diff --git a/R/frequency-tables.R b/R/frequency-tables.R
index a314847..c9628e7 100644
--- a/R/frequency-tables.R
+++ b/R/frequency-tables.R
@@ -123,7 +123,11 @@ summarise_coding_tools <- function(data, type = list("knowledge", "access"), pro
                  "access_SPSS", "knowledge_stata", "access_stata",
                  "knowledge_matlab", "access_matlab")
 
-  levels <- c("Yes", "Don't know", "No")
+  if (type == "knowledge") {
+    levels <- c("Yes", "No", "Not required for my work")
+  } else {
+    levels <- c("Yes", "No", "Don't know")
+  }
 
   labels <- c("R", "SQL", "SAS", "VBA", "Python", "SPSS", "Stata", "Matlab")
 
@@ -1142,7 +1146,7 @@ summarise_os_vs_prop <- function(data) {
     data.frame %>%
     get_ci(freq_col = 2, n_col = 3)
 
-  os_freqs <- cbind(lang_type = "open source", os_freqs)
+  os_freqs <- cbind(lang_type = "Open Source", os_freqs)
 
   prop_freqs <- data %>%
     dplyr::group_by(year) %>%
@@ -1150,11 +1154,11 @@ summarise_os_vs_prop <- function(data) {
     data.frame %>%
     get_ci(freq_col = 2, n_col = 3)
 
-  prop_freqs <- cbind(lang_type = "proprietary", prop_freqs)
+  prop_freqs <- cbind(lang_type = "Proprietary", prop_freqs)
 
   grouped_lang_freqs <- rbind(os_freqs, prop_freqs)
   grouped_lang_freqs$year <- as.character(grouped_lang_freqs$year)
-  grouped_lang_freqs$lang_type <- factor(grouped_lang_freqs$lang_type, levels = c("open source", "proprietary"))
+  grouped_lang_freqs$lang_type <- factor(grouped_lang_freqs$lang_type, levels = c("Open Source", "Proprietary"))
 
   return(grouped_lang_freqs)
 }
diff --git a/tests/testthat/test-summarise_coding_tools.R b/tests/testthat/test-summarise_coding_tools.R
index ac12fab..477a8ff 100644
--- a/tests/testthat/test-summarise_coding_tools.R
+++ b/tests/testthat/test-summarise_coding_tools.R
@@ -1,22 +1,22 @@
 # Coding tools frequency tables (access or knowledge)
 
 dummy_data <- data.frame(
-  knowledge_R = c("Yes", rep("No", 2), rep("Don't know", 3)),
-  access_R = c(rep("Yes", 2), "No", rep("Don't know", 3)),
-  knowledge_SQL = c(rep("Yes", 3), rep("No", 2), "Don't know"),
-  access_SQL = c("Yes", rep("No", 3), rep("Don't know", 2)),
-  knowledge_SAS = c(rep("Yes", 2), rep("No", 3), "Don't know"),
-  access_SAS = c(rep("Yes", 3), "No", rep("Don't know", 2)),
-  knowledge_VBA = c("Yes", rep("No", 2), rep("Don't know", 3)),
-  access_VBA = c(rep("Yes", 2), "No", rep("Don't know", 3)),
-  knowledge_python = c(rep("Yes", 3), rep("No", 2), "Don't know"),
-  access_python = c("Yes", rep("No", 3), rep("Don't know", 2)),
-  knowledge_SPSS = c(rep("Yes", 2), rep("No", 3), "Don't know"),
-  access_SPSS = c(rep("Yes", 3), "No", rep("Don't know", 2)),
-  knowledge_stata = c("Yes", rep("No", 2), rep("Don't know", 3)),
-  access_stata = c(rep("Yes", 2), "No", rep("Don't know", 3)),
-  knowledge_matlab = c(rep("Yes", 3), rep("No", 2), "Don't know"),
-  access_matlab = c("Yes", rep("No", 5), rep("Don't know", 0)) # Used to check zero counts aren't missing
+  knowledge_R = c("Yes", "No", "Not required for my work"),
+  access_R = c("Yes", "No", "Don't know"),
+  knowledge_SQL = c("Yes", "No", "Not required for my work"),
+  access_SQL = c("Yes", "No", "Don't know"),
+  knowledge_SAS = c("Yes", "No", "Not required for my work"),
+  access_SAS = c("Yes", "No", "Don't know"),
+  knowledge_VBA = c("Yes", "No", "Not required for my work"),
+  access_VBA = c("Yes", "No", "Don't know"),
+  knowledge_python = c("Yes", "No", "Not required for my work"),
+  access_python = c("Yes", "No", "Don't know"),
+  knowledge_SPSS = c("Yes", "No", "Not required for my work"),
+  access_SPSS = c("Yes", "No", "Don't know"),
+  knowledge_stata = c("Yes", "No", "Not required for my work"),
+  access_stata = c("Yes", "No", "Don't know"),
+  knowledge_matlab = c("Yes", "No", "No"),
+  access_matlab = c("Yes", "No", "No") # Used to check zero counts aren't missing
 )
 
 test_that("summarise_coding_tools missing data is handled correctly", {
@@ -33,19 +33,18 @@ test_that("summarise_coding_tools knowledge output is as expected", {
 
   got_knowledge <- summarise_coding_tools(dummy_data, "knowledge")
 
-  expected_knowledge <- data.frame("name" = rep(c("Matlab",
-                                                  "Python",
-                                                  "R",
-                                                  "SAS",
-                                                  "SPSS",
-                                                  "SQL",
-                                                  "Stata",
-                                                  "VBA"), each=3),
-                                   "value" = factor(rep(c("Yes", "Don't know", "No"), 8),
-                                                    levels = c("Yes", "Don't know", "No")),
-                                   "n" = c(1/2, 1/6, 1/3, 1/2, 1/6, 1/3, 1/6, 1/2,
-                                           1/3, 1/3, 1/6, 1/2, 1/3, 1/6, 1/2, 1/2,
-                                           1/6, 1/3, 1/6, 1/2, 1/3, 1/6, 1/2, 1/3))
+  expected_knowledge <- data.frame(name = rep(c("Matlab",
+                                                "Python",
+                                                "R",
+                                                "SAS",
+                                                "SPSS",
+                                                "SQL",
+                                                "Stata",
+                                                "VBA"), each=3),
+                                   value = factor(rep(c("Yes", "No", "Not required for my work"), 8),
+                                                  levels = c("Yes", "No", "Not required for my work")),
+                                   n = c(1/3, 2/3, 0, rep(1/3, times=21))
+  )
 
   expect_equal(got_knowledge, expected_knowledge)
 
@@ -55,19 +54,18 @@ test_that("summarise_coding_tools access output is as expected", {
 
   got_access <- summarise_coding_tools(dummy_data, "access")
 
-  expected_access <- data.frame("name" = rep(c("Matlab",
-                                               "Python",
-                                               "R",
-                                               "SAS",
-                                               "SPSS",
-                                               "SQL",
-                                               "Stata",
-                                               "VBA"), each=3),
-                                "value" = factor(rep(c("Yes", "Don't know", "No"), 8),
-                                                 levels = c("Yes", "Don't know", "No")),
-                                "n" = c(1/6, 0, 5/6, 1/6, 1/3, 1/2, 1/3, 1/2,
-                                        1/6, 1/2, 1/3, 1/6, 1/2, 1/3, 1/6, 1/6,
-                                        1/3, 1/2, 1/3, 1/2, 1/6, 1/3, 1/2, 1/6))
+  expected_access <- data.frame(name = rep(c("Matlab",
+                                             "Python",
+                                             "R",
+                                             "SAS",
+                                             "SPSS",
+                                             "SQL",
+                                             "Stata",
+                                             "VBA"), each=3),
+                                value = factor(rep(c("Yes", "No", "Don't know"), 8),
+                                               levels = c("Yes", "No", "Don't know")),
+                                n = c(1/3, 2/3, 0, rep(1/3, times=21))
+  )
 
   expect_equal(got_access, expected_access)
 

From 78370a940ea456f674fc09f79dced8e999e7b575 Mon Sep 17 00:00:00 2001
From: daviel9 <Luke.Davies@ons.gov.uk>
Date: Fri, 5 Jan 2024 12:14:08 +0000
Subject: [PATCH 2/3] Rearrange programming languages

---
 R/frequency-tables.R                             |  9 +++++----
 tests/testthat/test-summarise_coding_tools.R     | 16 ++++++++--------
 .../testthat/test-summarise_languages_by_prof.R  |  6 +++---
 3 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/R/frequency-tables.R b/R/frequency-tables.R
index 2fd33bb..e6178df 100644
--- a/R/frequency-tables.R
+++ b/R/frequency-tables.R
@@ -134,7 +134,8 @@ summarise_coding_tools <- function(data, type = list("knowledge", "access"), pro
 
   questions <- questions[grepl(paste0(type, "_"), questions)]
 
-  frequencies <- calculate_freqs(data, questions, levels, labels, prop = prop)
+  frequencies <- calculate_freqs(data, questions, levels, labels, prop = prop) %>%
+    dplyr::arrange(match(name, c("Python", "R", "SQL", "Matlab", "SAS", "SPSS", "Stata", "VBA")))
 
   return(frequencies)
 }
@@ -1152,7 +1153,7 @@ summarise_os_vs_prop <- function(data) {
     data.frame %>%
     get_ci(freq_col = 2, n_col = 3)
 
-  os_freqs <- cbind(lang_type = "Open Source", os_freqs)
+  os_freqs <- cbind(lang_type = "open source", os_freqs)
 
   prop_freqs <- data %>%
     dplyr::group_by(year) %>%
@@ -1160,11 +1161,11 @@ summarise_os_vs_prop <- function(data) {
     data.frame %>%
     get_ci(freq_col = 2, n_col = 3)
 
-  prop_freqs <- cbind(lang_type = "Proprietary", prop_freqs)
+  prop_freqs <- cbind(lang_type = "proprietary", prop_freqs)
 
   grouped_lang_freqs <- rbind(os_freqs, prop_freqs)
   grouped_lang_freqs$year <- as.character(grouped_lang_freqs$year)
-  grouped_lang_freqs$lang_type <- factor(grouped_lang_freqs$lang_type, levels = c("Open Source", "Proprietary"))
+  grouped_lang_freqs$lang_type <- factor(grouped_lang_freqs$lang_type, levels = c("open source", "proprietary"))
 
   return(grouped_lang_freqs)
 }
diff --git a/tests/testthat/test-summarise_coding_tools.R b/tests/testthat/test-summarise_coding_tools.R
index 477a8ff..4288e4b 100644
--- a/tests/testthat/test-summarise_coding_tools.R
+++ b/tests/testthat/test-summarise_coding_tools.R
@@ -33,17 +33,17 @@ test_that("summarise_coding_tools knowledge output is as expected", {
 
   got_knowledge <- summarise_coding_tools(dummy_data, "knowledge")
 
-  expected_knowledge <- data.frame(name = rep(c("Matlab",
-                                                "Python",
+  expected_knowledge <- data.frame(name = rep(c("Python",
                                                 "R",
+                                                "SQL",
+                                                "Matlab",
                                                 "SAS",
                                                 "SPSS",
-                                                "SQL",
                                                 "Stata",
                                                 "VBA"), each=3),
                                    value = factor(rep(c("Yes", "No", "Not required for my work"), 8),
                                                   levels = c("Yes", "No", "Not required for my work")),
-                                   n = c(1/3, 2/3, 0, rep(1/3, times=21))
+                                   n = c(rep(1/3, times=9), 1/3, 2/3, 0, rep(1/3, times=12))
   )
 
   expect_equal(got_knowledge, expected_knowledge)
@@ -54,17 +54,17 @@ test_that("summarise_coding_tools access output is as expected", {
 
   got_access <- summarise_coding_tools(dummy_data, "access")
 
-  expected_access <- data.frame(name = rep(c("Matlab",
-                                             "Python",
+  expected_access <- data.frame(name = rep(c("Python",
                                              "R",
+                                             "SQL",
+                                             "Matlab",
                                              "SAS",
                                              "SPSS",
-                                             "SQL",
                                              "Stata",
                                              "VBA"), each=3),
                                 value = factor(rep(c("Yes", "No", "Don't know"), 8),
                                                levels = c("Yes", "No", "Don't know")),
-                                n = c(1/3, 2/3, 0, rep(1/3, times=21))
+                                n = c(rep(1/3, times=9), 1/3, 2/3, 0, rep(1/3, times=12))
   )
 
   expect_equal(got_access, expected_access)
diff --git a/tests/testthat/test-summarise_languages_by_prof.R b/tests/testthat/test-summarise_languages_by_prof.R
index 74b0045..41393c4 100644
--- a/tests/testthat/test-summarise_languages_by_prof.R
+++ b/tests/testthat/test-summarise_languages_by_prof.R
@@ -4,7 +4,7 @@ knowledge_response <- rep(c(
   NA,
   "Yes",
   "No",
-  "Don't know"),
+  "Not required for my work"),
   each = 3, times = 6)
 
 prof_response <- rep(c(
@@ -61,12 +61,12 @@ test_that("summarise_languages_by_prof output is as expected", {
   expected <- data.frame(
 
     lang = rep(c(
-      "Matlab",
       "Python",
       "R",
+      "SQL",
+      "Matlab",
       "SAS",
       "SPSS",
-      "SQL",
       "Stata",
       "VBA"),
       times = 9),

From 2e00d5b7e6841174ccd7a0858dfeb480a6cae99d Mon Sep 17 00:00:00 2001
From: daviel9 <Luke.Davies@ons.gov.uk>
Date: Fri, 5 Jan 2024 12:48:26 +0000
Subject: [PATCH 3/3] Fix rap comp table

---
 R/frequency-tables.R                     | 4 ++--
 tests/testthat/test-summarise_rap_comp.R | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/R/frequency-tables.R b/R/frequency-tables.R
index e6178df..d926a4f 100644
--- a/R/frequency-tables.R
+++ b/R/frequency-tables.R
@@ -429,7 +429,7 @@ summarise_rap_comp <- function(data) {
               "Team open source code",
               "Version control",
               "Peer review",
-              "Development QA",
+              "Proportionate QA",
               "Documentation",
               "Functions",
               "Unit testing",
@@ -443,7 +443,7 @@ summarise_rap_comp <- function(data) {
                  "open_code_score",
                  "version_control_score",
                  "peer_review_score",
-                 "development_QA_score",
+                 "proportionate_QA_score",
                  "doc_score",
                  "function_score",
                  "unit_test_score",
diff --git a/tests/testthat/test-summarise_rap_comp.R b/tests/testthat/test-summarise_rap_comp.R
index f77bde3..5355423 100644
--- a/tests/testthat/test-summarise_rap_comp.R
+++ b/tests/testthat/test-summarise_rap_comp.R
@@ -13,7 +13,7 @@ dummy_data <- data.frame(
   open_code_score = rep(c(NA, 1, 0), times = 5),
   version_control_score = rep(c(NA, 1, 0), times = 5),
   peer_review_score = rep(c(NA, 1, 0), times = 5),
-  development_QA_score = rep(c(NA, 1, 0), times = 5),
+  proportionate_QA_score = rep(c(NA, 1, 0), times = 5),
   doc_score = rep(c(NA, 1, 0), times = 5),
   basic_rap_score = rep(c(NA, 1, 0), times = 5),
   function_score = rep(c(NA, 1, 0), times = 5),
@@ -48,7 +48,7 @@ test_that("summarise_rap_comp output is as expected", {
       "Team open source code",
       "Version control",
       "Peer review",
-      "Development QA",
+      "Proportionate QA",
       "Documentation",
       "Functions",
       "Unit testing",
@@ -62,7 +62,7 @@ test_that("summarise_rap_comp output is as expected", {
          "Team open source code",
          "Version control",
          "Peer review",
-         "Development QA",
+         "Proportionate QA",
          "Documentation",
          "Functions",
          "Unit testing",