From a9090df5082f84083fdef39fc7e49dafa75c00b4 Mon Sep 17 00:00:00 2001 From: jacobvjk Date: Mon, 2 Sep 2024 12:39:39 +0200 Subject: [PATCH] add tests --- R/run_calculate_match_success_rate.R | 6 +- .../test-run_calculate_match_success_rate.R | 100 +++++++++++++++++- 2 files changed, 101 insertions(+), 5 deletions(-) diff --git a/R/run_calculate_match_success_rate.R b/R/run_calculate_match_success_rate.R index 18b6e4fb..8fa646a1 100644 --- a/R/run_calculate_match_success_rate.R +++ b/R/run_calculate_match_success_rate.R @@ -422,10 +422,10 @@ combine_raw_and_matched_loan_books <- function(raw_lbk_with_sectors, matched_prioritized <- matched_prioritized %>% dplyr::select(-"sector") %>% dplyr::mutate( - id_loan_matched = gsub(paste0("_", .data[["group_id"]], collapse = "|"), "", .data[["id_loan"]]) + id_loan = gsub(paste0("_", .data[["group_id"]], collapse = "|"), "", .data[["id_loan"]]) ) %>% dplyr::mutate( - id_loan_matched = gsub(paste0("_", .env$all_sectors, collapse="|"), "", .data[["id_loan_matched"]]) + id_loan = gsub(paste0("_", .env$all_sectors, collapse="|"), "", .data[["id_loan"]]) ) # use left_join so that unmatched loans are properly accounted for @@ -445,7 +445,7 @@ combine_raw_and_matched_loan_books <- function(raw_lbk_with_sectors, "sector_classification_direct_loantaker", "lei_direct_loantaker", "isin_direct_loantaker", - "id_loan" = "id_loan_matched", + "id_loan", "group_id", "sector" = "sector_abcd", "borderline" diff --git a/tests/testthat/test-run_calculate_match_success_rate.R b/tests/testthat/test-run_calculate_match_success_rate.R index 8849056e..8a5460ca 100644 --- a/tests/testthat/test-run_calculate_match_success_rate.R +++ b/tests/testthat/test-run_calculate_match_success_rate.R @@ -1,3 +1,99 @@ -test_that("multiplication works", { - expect_equal(2 * 2, 4) +# combine_raw_and_matched_loan_books +test_that("combine_raw_and_matched_loan_books identifies correct matched and unmatched loans", { + test_raw <- r2dii.data::loanbook_demo %>% + dplyr::mutate(group_id = "test") + + possible_matches_direct <- test_raw %>% + dplyr::distinct(.data[["id_loan"]], .data[["name_direct_loantaker"]]) %>% + dplyr::semi_join(r2dii.data::abcd_demo, by = c("name_direct_loantaker" = "name_company")) + + non_matches_direct <- test_raw %>% + dplyr::distinct(.data[["id_loan"]], .data[["name_direct_loantaker"]]) %>% + dplyr::anti_join(r2dii.data::abcd_demo, by = c("name_direct_loantaker" = "name_company")) + + test_raw <- test_raw %>% + dplyr::filter( + .data[["id_loan"]] %in% possible_matches_direct[["id_loan"]][1] | + .data[["id_loan"]] %in% non_matches_direct[["id_loan"]][1] + ) + + nace_sectors <- r2dii.data::sector_classifications %>% + dplyr::filter(.data$code_system == "NACE") + + test_raw_lbk_with_sectors <- add_sectors_to_raw_lbk( + raw_lbk = test_raw, + sector_classification_system = nace_sectors + ) + + test_matched_prio <- test_raw %>% + r2dii.match::match_name(abcd = r2dii.data::abcd_demo, by_sector = TRUE, min_score = 1) + + test_lbk_match_success <- combine_raw_and_matched_loan_books( + raw_lbk_with_sectors = test_raw_lbk_with_sectors, + matched_prioritized = test_matched_prio + ) + + matched <- test_lbk_match_success %>% + dplyr::filter(.data[["matched"]] == "Matched") %>% + dplyr::distinct(.data[["id_loan"]], .data[["name_direct_loantaker"]]) + + testthat::expect_equal(matched, possible_matches_direct[1, ]) + + not_matched <- test_lbk_match_success %>% + dplyr::filter(.data[["matched"]] == "Not matched") %>% + dplyr::distinct(.data[["id_loan"]], .data[["name_direct_loantaker"]]) + + testthat::expect_equal(not_matched, non_matches_direct[1, ]) +}) + +test_that("combine_raw_and_matched_loan_books removes group_id from id_loan where needed", { + test_raw <- r2dii.data::loanbook_demo %>% + dplyr::mutate(group_id = "test") + + nace_sectors <- r2dii.data::sector_classifications %>% + dplyr::filter(.data$code_system == "NACE") + + test_raw_lbk_with_sectors <- add_sectors_to_raw_lbk( + raw_lbk = test_raw, + sector_classification_system = nace_sectors + ) + + test_matched_prio <- test_raw %>% + r2dii.match::match_name(abcd = r2dii.data::abcd_demo, by_sector = TRUE, min_score = 1) %>% + dplyr::mutate(id_loan = paste(id_loan, group_id, sep = "_")) + + test_lbk_match_success <- combine_raw_and_matched_loan_books( + raw_lbk_with_sectors = test_raw_lbk_with_sectors, + matched_prioritized = test_matched_prio + ) + + testthat::expect_equal(test_raw$id_loan, unique(test_lbk_match_success$id_loan)) + + testthat::expect_contains(test_raw$id_loan, gsub("_test", "", test_matched_prio$id_loan)) +}) + +test_that("combine_raw_and_matched_loan_books removes sector_abcd from id_loan where needed", { + test_raw <- r2dii.data::loanbook_demo %>% + dplyr::mutate(group_id = "test") + + nace_sectors <- r2dii.data::sector_classifications %>% + dplyr::filter(.data$code_system == "NACE") + + test_raw_lbk_with_sectors <- add_sectors_to_raw_lbk( + raw_lbk = test_raw, + sector_classification_system = nace_sectors + ) + + test_matched_prio <- test_raw %>% + r2dii.match::match_name(abcd = r2dii.data::abcd_demo, by_sector = TRUE, min_score = 1) %>% + dplyr::mutate(id_loan = paste(id_loan, sector_abcd, sep = "_")) + + test_lbk_match_success <- combine_raw_and_matched_loan_books( + raw_lbk_with_sectors = test_raw_lbk_with_sectors, + matched_prioritized = test_matched_prio + ) + + testthat::expect_equal(test_raw$id_loan, unique(test_lbk_match_success$id_loan)) + + testthat::expect_contains(unique(r2dii.data::sector_classifications$sector), gsub(paste0(test_raw$id_loan, "_", collapse = "|"), "", test_matched_prio$id_loan)) })