UBC-MDS · wenyunie · Dec 8, 2023 · Dec 8, 2023 · Dec 8, 2023
diff --git a/R/clean_speed_dating_dat.R b/R/clean_speed_dating_dat.R
@@ -15,24 +15,40 @@
 #' clean_speed_dating_dat()
 clean_speed_dating_dat <- function(dat){
   # check that the correct data frame is being used
-  if (sum(c("attr", "sinc", "intel", "fun", "amb",
-          "attr5_1", "sinc5_1", "intel5_1", "fun5_1", "amb5_1") %in% names(dat))!=10){
+  if (
+    sum(c("attr", "sinc", "intel", "fun", "amb",
+          "attr5_1", "sinc5_1", "intel5_1", "fun5_1", "amb5_1") %in% 
+        names(dat))!=10
+  ){
     stop("Check that you are using the correct data set!")
   }
+  # check that there is more than 1 observation in the data frame
+  if (nrow(dat) <= 1){
+    stop("Your data frame only has 1 row of data")
+  }
   # obtaining other rating of each individual by aggregating
   # across their partner's ratings of them
   dat |> 
     dplyr::select(pid, attr:amb) |> 
     dplyr::group_by(pid) |> 
-    dplyr::summarise(dplyr::across(c(attr:amb), ~mean(., na.rm = TRUE))) |> 
-    dplyr::rename_at(dplyr::vars(attr:amb), ~ paste0(., "_other_rating")) -> other_rating
+    dplyr::summarise(
+      dplyr::across(c(attr:amb), ~mean(., na.rm = TRUE))
+    ) |> 
+    dplyr::rename_at(
+      dplyr::vars(attr:amb), ~ paste0(., "_other_rating")
+    ) -> other_rating
 
   # obtaining self rating  
-  dat |> 
+  self_rating <- dat |> 
     dplyr::select(iid, attr5_1:amb5_1) |> 
     unique() |> 
-    dplyr::filter_at(vars(attr5_1:amb5_1), ~ !is.na(.)) -> self_rating
+    dplyr::filter_at(vars(attr5_1:amb5_1), ~ !is.na(.)) 
 
-  cleaned_data <- merge(other_rating, self_rating, by.x = "pid", by.y = "iid")
+  cleaned_data <- merge(
+    other_rating, 
+    self_rating, 
+    by.x = "pid", 
+    by.y = "iid"
+  )
 
 }
diff --git a/tests/testthat/helper_clean_speed_dating_dat.R b/tests/testthat/helper_clean_speed_dating_dat.R
@@ -30,4 +30,10 @@ wrong_df <- tribble(
   2,6,6,9,9,9,9,3,8,7,5,5,7
 ) 
 
+short_df <- tribble(
+  ~iid, ~pid, ~attr5_1, ~sinc5_1, ~intel5_1, ~fun5_1, ~amb5_1, ~attr, ~sinc, ~intel, ~fun, ~amb, ~like,
+  2,6,6,9,9,9,9,3,8,7,5,5,7
+) 
+
+
 df <- clean_speed_dating_dat(raw_df)
diff --git a/tests/testthat/test_clean_speed_dating_dat.R b/tests/testthat/test_clean_speed_dating_dat.R
@@ -8,18 +8,41 @@ test_that("Test that the raw data frame has the relevant input", {
                "Check that you are using the correct data set!")
 })
 
+test_that("Test that the data has more than 1 row", {
+  expect_error(clean_speed_dating_dat(short_df),
+               "Your data frame only has 1 row of data")
+})
+
 # examine output
 test_that("Examine that there are 11 columns in the cleaned data set", {
   expect_equal(ncol(df), 11)
-  expect_equal(names(df), c("pid", "attr_other_rating", "sinc_other_rating", 
-                            "intel_other_rating", "fun_other_rating", "amb_other_rating",  
-                            "attr5_1", "sinc5_1", "intel5_1", "fun5_1", "amb5_1"))
+  expect_equal(names(df), c("pid", 
+                            "attr_other_rating", 
+                            "sinc_other_rating", 
+                            "intel_other_rating", 
+                            "fun_other_rating", 
+                            "amb_other_rating",  
+                            "attr5_1", 
+                            "sinc5_1", 
+                            "intel5_1", 
+                            "fun5_1", 
+                            "amb5_1"))
 })
 
 test_that("There should be 2 of each attribute in the cleaned data set", {
-  expect_true(sum(str_detect(colnames(df), '^attr')) == 2)
-  expect_true(sum(str_detect(colnames(df), '^sinc')) == 2)
-  expect_true(sum(str_detect(colnames(df), '^intel')) == 2)
-  expect_true(sum(str_detect(colnames(df), '^fun')) == 2)
-  expect_true(sum(str_detect(colnames(df), '^amb')) == 2)
+  expect_true(
+    sum(str_detect(colnames(df), '^attr')) == 2
+  )
+  expect_true(
+    sum(str_detect(colnames(df), '^sinc')) == 2
+  )
+  expect_true(
+    sum(str_detect(colnames(df), '^intel')) == 2
+  )
+  expect_true(
+    sum(str_detect(colnames(df), '^fun')) == 2
+  )
+  expect_true(
+    sum(str_detect(colnames(df), '^amb')) == 2
+  )
 })