From e32fee6d257bff782d331893b004044d36030a92 Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Thu, 7 Mar 2024 08:33:41 +0100
Subject: [PATCH 01/28] add ID join option preserving old functionality

---
 R/match_name.R | 74 ++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 66 insertions(+), 8 deletions(-)

diff --git a/R/match_name.R b/R/match_name.R
index a2910b81..62ab22ec 100644
--- a/R/match_name.R
+++ b/R/match_name.R
@@ -31,6 +31,9 @@
 #'   only `sector`, the value in the `name` column should be `NA` and
 #'   vice-versa. This file can be used to manually match loanbook companies to
 #'   abcd.
+#' @param join_id A character string naming an ID column used to left join
+#'   `abcd` to the `loanbook` (e.g. "lei"). ID column must be present, as named,
+#'   in both input datasets.
 #' @param ... Arguments passed on to [stringdist::stringsim()].
 #' @param ald `r lifecycle::badge('superseded')` `ald` has been superseded by
 #'   `abcd`.
@@ -111,6 +114,7 @@ match_name <- function(loanbook,
                        method = "jw",
                        p = 0.1,
                        overwrite = NULL,
+                       join_id = NULL,
                        ald = deprecated(),
                        ...) {
   restore <- options(datatable.allow.cartesian = TRUE)
@@ -125,9 +129,28 @@ match_name <- function(loanbook,
     abcd <- ald
   }
 
-  match_name_impl(
+  prep_abcd <- restructure_abcd(abcd)
+
+  if (!is.null(join_id)) {
+    check_join_id(join_id, loanbook, prep_abcd)
+
+    join_matched <- dplyr::inner_join(
+      loanbook, prep_abcd, by = join_id
+      )
+    join_matched <- dplyr::mutate(
+      join_matched,
+      !!join_id := NULL,
+      score = 1
+      )
+
+    loanbook <- dplyr::filter(
+      loanbook, !.data[[join_id]] %in% join_matched[[join_id]]
+      )
+  }
+
+  out <- match_name_impl(
     loanbook = loanbook,
-    abcd = abcd,
+    prep_abcd = prep_abcd,
     by_sector = by_sector,
     min_score = min_score,
     method = method,
@@ -135,16 +158,30 @@ match_name <- function(loanbook,
     overwrite = overwrite,
     ...
   )
+
+  if (nrow(out) != 0 && exists("join_matched")) {
+    out <- dplyr::bind_rows(join_matched, out)
+  } else if (nrow(out) == 0 && exists("join_matched")) {
+    out <- join_matched
+  }
+
+  if (identical(nrow(out), 0L)) {
+    rlang::warn("Found no match.")
+    return(empty_loanbook_tibble(loanbook, dplyr::groups(loanbook)))
+  }
+
+  out
 }
 
 match_name_impl <- function(loanbook,
-                            abcd,
+                            prep_abcd,
                             by_sector = TRUE,
                             min_score = 0.8,
                             method = "jw",
                             p = 0.1,
                             overwrite = NULL,
                             ...) {
+
   old_groups <- dplyr::groups(loanbook)
   loanbook <- ungroup(loanbook)
 
@@ -153,7 +190,6 @@ match_name_impl <- function(loanbook,
   loanbook_rowid <- tibble::rowid_to_column(loanbook)
 
   prep_lbk <- restructure_loanbook(loanbook_rowid, overwrite = overwrite)
-  prep_abcd <- restructure_abcd(abcd)
 
   if (by_sector) {
     a <- expand_alias(prep_lbk, prep_abcd)
@@ -164,8 +200,8 @@ match_name_impl <- function(loanbook,
   setDT(a)
 
   if (identical(nrow(a), 0L)) {
-    rlang::warn("Found no match.")
-    return(empty_loanbook_tibble(loanbook, old_groups))
+    rlang::inform("Found no match via fuzzy matching.")
+    return(a)
   }
 
   a <- unique(a)[
@@ -179,8 +215,8 @@ match_name_impl <- function(loanbook,
   a <- a[score >= min_score, ]
 
   if (identical(nrow(a), 0L)) {
-    rlang::warn("Found no match.")
-    return(empty_loanbook_tibble(loanbook, old_groups))
+    rlang::inform("Found no match via fuzzy matching.")
+    return(a)
   }
 
   l <- rename(prep_lbk, alias_lbk = "alias")
@@ -349,3 +385,25 @@ names_added_by_match_name <- function() {
     "borderline"
   )
 }
+
+check_join_id <- function(join_id, loanbook, abcd) {
+  stopifnot(is.character(join_id))
+
+  if (!rlang::has_name(loanbook, join_id)) {
+    rlang::abort(
+      "join_id_not_in_loanbook",
+      message = glue(
+        "The join_id `{join_id}` must be present in both `loanbook` and `abcd`. It's not present in `loanbook`."
+      )
+    )
+  } else if (!rlang::has_name(abcd, join_id)) {
+    rlang::abort(
+      "join_id_not_in_abcd",
+      message = glue(
+        "The join_id `{join_id}` must be present in both `loanbook` and `abcd`. It's not present in `abcd`."
+      )
+    )
+  }
+
+  invisible(join_id)
+}

From d7e91c0c011c2a6749eef384fae795035a1fbe36 Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Thu, 7 Mar 2024 08:56:28 +0100
Subject: [PATCH 02/28] restructure_abcd gains join_id col

---
 R/match_name.R       |  2 +-
 R/restructure_abcd.R | 28 ++++++++++++++++++++++------
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/R/match_name.R b/R/match_name.R
index 62ab22ec..57985196 100644
--- a/R/match_name.R
+++ b/R/match_name.R
@@ -129,7 +129,7 @@ match_name <- function(loanbook,
     abcd <- ald
   }
 
-  prep_abcd <- restructure_abcd(abcd)
+  prep_abcd <- restructure_abcd(abcd, join_id)
 
   if (!is.null(join_id)) {
     check_join_id(join_id, loanbook, prep_abcd)
diff --git a/R/restructure_abcd.R b/R/restructure_abcd.R
index 06c8bd6a..095181d8 100644
--- a/R/restructure_abcd.R
+++ b/R/restructure_abcd.R
@@ -5,6 +5,8 @@
 #' from values in the `name_company` column.
 #'
 #' @param data A data frame. Should be an asset-level dataset.
+#' @param join_id (Optional) A string giving the name of an `ID` column to
+#'   preserve in the restructuring
 #'
 #' @seealso [r2dii.data::abcd_demo] `to_alias()`.
 #'
@@ -14,13 +16,27 @@
 #' @examples
 #' restructure_abcd(r2dii.data::abcd_demo)
 #' @noRd
-restructure_abcd <- function(data) {
-  check_crucial_names(data, c("name_company", "sector"))
+restructure_abcd <- function(data, join_id = NULL) {
+  crucial_names <- c("name_company", "sector")
+  if (!is.null(join_id)) {
+    check_crucial_names(data, c(join_id, crucial_names))
+
+    out <- dplyr::transmute(
+      data,
+      name = .data$name_company,
+      sector = tolower(.data$sector),
+      !!join_id := .data[[join_id]]
+    )
+  } else {
+    check_crucial_names(data, crucial_names)
+
+    out <- dplyr::transmute(
+      data,
+      name = .data$name_company,
+      sector = tolower(.data$sector)
+    )
+  }
 
-  out <- dplyr::transmute(
-    data,
-    name = .data$name_company, sector = tolower(.data$sector)
-  )
   out <- distinct(out)
   add_alias(out)
 }

From a23be0870462b3deec0ba1ed966b7e0005061281 Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Thu, 7 Mar 2024 09:04:05 +0100
Subject: [PATCH 03/28] new argument gains some tests

---
 tests/testthat/test-match_name.R | 33 ++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/tests/testthat/test-match_name.R b/tests/testthat/test-match_name.R
index e5b28bb6..6878ec23 100644
--- a/tests/testthat/test-match_name.R
+++ b/tests/testthat/test-match_name.R
@@ -723,3 +723,36 @@ test_that("allows custom `sector_classifications` via options() (#354)", {
   expect_equal(nrow(out), 1L)
   options(old)
 })
+
+test_that("`join_id` optionally joins as expected (#135)", {
+
+  loanbook <- tibble(
+    sector_classification_system = "NACE",
+    sector_classification_direct_loantaker = "100", # this generally shouldn't match to anything
+    id_ultimate_parent = c("UP15", "UP16"),
+    name_ultimate_parent = c("Foo", "Bar"),
+    id_direct_loantaker = c("C294", "C295"),
+    name_direct_loantaker = "Baz",
+    id_col = c("LEI123", NA_character_)
+  )
+
+  abcd <- tibble(
+    name_company = "alpine knits india pvt. limited",
+    sector = "power",
+    id_col = "LEI123"
+  )
+
+  # expect no match here, the company names are completely different
+  expect_warning(match_name(loanbook, abcd, join_id = NULL), "no match")
+
+  # expect exactly one match here, based on input ID
+  out_with_join_id <- match_name(loanbook, abcd, join_id = "id_col")
+  expect_equal(nrow(out_with_join_id), 1L)
+
+  # should work for id_col with any name, e.g. `lei`
+  loanbook_lei <- rename(loanbook, lei = id_col)
+  abcd_lei <- rename(abcd, lei = id_col)
+  out_lei <- match_name(loanbook_lei, abcd_lei, join_id = "lei")
+  expect_equal(nrow(out_lei), 1L)
+
+})

From 4af13abcf6020f549aee3dfe2049e1d9fb940af1 Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Thu, 7 Mar 2024 09:06:05 +0100
Subject: [PATCH 04/28] update NEWS.md

---
 NEWS.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 309f1189..01fc90f6 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,7 @@
 # r2dii.match (development version)
 
+* `match_name` gains argument `join_id` allowing an optional perfect join based on a mutual ID column between `loanbook` and `abcd` inputs, prior to attempting fuzzy matching (#135).
+
 # r2dii.match 0.1.4
 
 * `to_alias` can now handle strange encodings without error (#425, @kalashsinghal @Tilmon).

From 841794c80e91fc749c9f55bed2c6e483e6335484 Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Thu, 7 Mar 2024 09:06:21 +0100
Subject: [PATCH 05/28] document

---
 man/match_name.Rd | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/man/match_name.Rd b/man/match_name.Rd
index bf4bebaf..d78733c6 100644
--- a/man/match_name.Rd
+++ b/man/match_name.Rd
@@ -12,6 +12,7 @@ match_name(
   method = "jw",
   p = 0.1,
   overwrite = NULL,
+  join_id = NULL,
   ald = deprecated(),
   ...
 )
@@ -39,6 +40,10 @@ only \code{sector}, the value in the \code{name} column should be \code{NA} and
 vice-versa. This file can be used to manually match loanbook companies to
 abcd.}
 
+\item{join_id}{A character string naming an ID column used to left join
+\code{abcd} to the \code{loanbook} (e.g. "lei"). ID column must be present, as named,
+in both input datasets.}
+
 \item{ald}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} \code{ald} has been superseded by
 \code{abcd}.}
 

From a44a868731e86041f98f48ae1e6c45a329ace637 Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Thu, 7 Mar 2024 17:03:20 +0100
Subject: [PATCH 06/28] expect output contains optional join_by col

---
 tests/testthat/test-match_name.R | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tests/testthat/test-match_name.R b/tests/testthat/test-match_name.R
index 6878ec23..befad7c1 100644
--- a/tests/testthat/test-match_name.R
+++ b/tests/testthat/test-match_name.R
@@ -724,6 +724,8 @@ test_that("allows custom `sector_classifications` via options() (#354)", {
   options(old)
 })
 
+
+
 test_that("`join_id` optionally joins as expected (#135)", {
 
   loanbook <- tibble(
@@ -756,3 +758,15 @@ test_that("`join_id` optionally joins as expected (#135)", {
   expect_equal(nrow(out_lei), 1L)
 
 })
+
+test_that("with `join_id`, outputs data with join column (#135)", {
+
+  out <- match_name(
+    fake_lbk(foo = "1"),
+    fake_abcd(foo = "1"),
+    join_id = "foo"
+    )
+
+  expect_contains(names(out), "foo")
+
+})

From 08f6b3ab9fe1ac265a43306c00595a3918aabdc7 Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Thu, 7 Mar 2024 17:03:27 +0100
Subject: [PATCH 07/28] dont remove join_by column

---
 R/match_name.R | 42 ++++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/R/match_name.R b/R/match_name.R
index 57985196..b8b86b54 100644
--- a/R/match_name.R
+++ b/R/match_name.R
@@ -137,31 +137,33 @@ match_name <- function(loanbook,
     join_matched <- dplyr::inner_join(
       loanbook, prep_abcd, by = join_id
       )
-    join_matched <- dplyr::mutate(
-      join_matched,
-      !!join_id := NULL,
-      score = 1
-      )
+
+    join_matched <- dplyr::mutate(join_matched, score = 1)
 
     loanbook <- dplyr::filter(
-      loanbook, !.data[[join_id]] %in% join_matched[[join_id]]
-      )
+      loanbook,
+      !.data[[join_id]] %in% join_matched[[join_id]]
+    )
   }
 
-  out <- match_name_impl(
-    loanbook = loanbook,
-    prep_abcd = prep_abcd,
-    by_sector = by_sector,
-    min_score = min_score,
-    method = method,
-    p = p,
-    overwrite = overwrite,
-    ...
-  )
+  if (nrow(loanbook) != 0) {
+    fuzzy_matched <- match_name_impl(
+      loanbook = loanbook,
+      prep_abcd = prep_abcd,
+      by_sector = by_sector,
+      min_score = min_score,
+      method = method,
+      p = p,
+      overwrite = overwrite,
+      ...
+    )
+  } else {
+    fuzzy_matched <- tibble()
+  }
 
-  if (nrow(out) != 0 && exists("join_matched")) {
-    out <- dplyr::bind_rows(join_matched, out)
-  } else if (nrow(out) == 0 && exists("join_matched")) {
+  if (exists("join_matched")) {
+    out <- dplyr::bind_rows(join_matched, fuzzy_matched)
+  } else if (nrow(fuzzy_matched) == 0 && exists("join_matched")) {
     out <- join_matched
   }
 

From 0a674331e8b158c55f091ff5da937c1cd63dc363 Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Mon, 11 Mar 2024 09:38:10 +0100
Subject: [PATCH 08/28] gains as_join_by (from dplyr)

---
 R/match_name.R | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/R/match_name.R b/R/match_name.R
index b8b86b54..6d1bb20b 100644
--- a/R/match_name.R
+++ b/R/match_name.R
@@ -409,3 +409,35 @@ check_join_id <- function(join_id, loanbook, abcd) {
 
   invisible(join_id)
 }
+
+as_join_by <- function(x) {
+
+  if (rlang::is_list(x)) {
+    if (length(x) != 1L) {
+      rlang::abort("`join_id` must be a list of length 1.")
+    }
+    x_name <- names(x) %||% x
+    y_name <- unname(x)
+  } else if (rlang::is_character(x)) {
+    x_name <- names(x) %||% x
+    y_name <- unname(x)
+
+    # If x partially named, assume unnamed are the same in both tables
+    x_name[x_name == ""] <- y_name[x_name == ""]
+  } else {
+    rlang::abort("`by` must be a list or a character vector.")
+  }
+
+  if (!rlang::is_character(x_name)) {
+    rlang::abort("`by$x` must evaluate to a character vector.")
+  }
+  if (!rlang::is_character(y_name)) {
+    rlang::abort("`by$y` must evaluate to a character vector.")
+  }
+
+  c(x_name, y_name)
+}
+
+`%||%` <- function(x, y) {
+  if (is.null(x)) y else x
+}

From 4490e634e902da6b8fc0bcc5dfc4a90fcc9097fa Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Mon, 11 Mar 2024 10:00:56 +0100
Subject: [PATCH 09/28] accept named list input

---
 R/match_name.R | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/R/match_name.R b/R/match_name.R
index 6d1bb20b..8c37fb09 100644
--- a/R/match_name.R
+++ b/R/match_name.R
@@ -140,9 +140,12 @@ match_name <- function(loanbook,
 
     join_matched <- dplyr::mutate(join_matched, score = 1)
 
+    join_by_list <- as_join_by(join_id)
+    loanbook_join_id <- join_by_list[[1]]
+
     loanbook <- dplyr::filter(
       loanbook,
-      !.data[[join_id]] %in% join_matched[[join_id]]
+      !.data[[loanbook_join_id]] %in% join_matched[[loanbook_join_id]]
     )
   }
 
@@ -165,6 +168,8 @@ match_name <- function(loanbook,
     out <- dplyr::bind_rows(join_matched, fuzzy_matched)
   } else if (nrow(fuzzy_matched) == 0 && exists("join_matched")) {
     out <- join_matched
+  } else {
+    out <- fuzzy_matched
   }
 
   if (identical(nrow(out), 0L)) {
@@ -389,16 +394,17 @@ names_added_by_match_name <- function() {
 }
 
 check_join_id <- function(join_id, loanbook, abcd) {
-  stopifnot(is.character(join_id))
 
-  if (!rlang::has_name(loanbook, join_id)) {
+  join_id_list <- as_join_by(join_id)
+
+  if (!rlang::has_name(loanbook, join_id_list[[1]])) {
     rlang::abort(
       "join_id_not_in_loanbook",
       message = glue(
         "The join_id `{join_id}` must be present in both `loanbook` and `abcd`. It's not present in `loanbook`."
       )
     )
-  } else if (!rlang::has_name(abcd, join_id)) {
+  } else if (!rlang::has_name(abcd, join_id_list[[2]])) {
     rlang::abort(
       "join_id_not_in_abcd",
       message = glue(

From e3d71edd5ff948b5a20a388fa1c512fbab3c3dd8 Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Mon, 11 Mar 2024 10:00:59 +0100
Subject: [PATCH 10/28] refactor tests

---
 tests/testthat/test-match_name.R | 57 +++++++++++++++++++++-----------
 1 file changed, 37 insertions(+), 20 deletions(-)

diff --git a/tests/testthat/test-match_name.R b/tests/testthat/test-match_name.R
index befad7c1..e9417d8b 100644
--- a/tests/testthat/test-match_name.R
+++ b/tests/testthat/test-match_name.R
@@ -724,23 +724,15 @@ test_that("allows custom `sector_classifications` via options() (#354)", {
   options(old)
 })
 
-
-
 test_that("`join_id` optionally joins as expected (#135)", {
 
-  loanbook <- tibble(
-    sector_classification_system = "NACE",
-    sector_classification_direct_loantaker = "100", # this generally shouldn't match to anything
-    id_ultimate_parent = c("UP15", "UP16"),
-    name_ultimate_parent = c("Foo", "Bar"),
-    id_direct_loantaker = c("C294", "C295"),
-    name_direct_loantaker = "Baz",
-    id_col = c("LEI123", NA_character_)
-  )
+  loanbook <- fake_lbk(
+    name_direct_loantaker = "DL won't fuzzy match",
+    name_ultimate_parent = "UP won't fuzzy match",
+    id_col = "LEI123"
+    )
 
-  abcd <- tibble(
-    name_company = "alpine knits india pvt. limited",
-    sector = "power",
+  abcd <- fake_abcd(
     id_col = "LEI123"
   )
 
@@ -750,17 +742,33 @@ test_that("`join_id` optionally joins as expected (#135)", {
   # expect exactly one match here, based on input ID
   out_with_join_id <- match_name(loanbook, abcd, join_id = "id_col")
   expect_equal(nrow(out_with_join_id), 1L)
+})
+
+test_that("with `join_id` accepts list input indicating different cols", {
 
-  # should work for id_col with any name, e.g. `lei`
-  loanbook_lei <- rename(loanbook, lei = id_col)
-  abcd_lei <- rename(abcd, lei = id_col)
-  out_lei <- match_name(loanbook_lei, abcd_lei, join_id = "lei")
-  expect_equal(nrow(out_lei), 1L)
+  loanbook <- fake_lbk(
+    name_direct_loantaker = "DL won't fuzzy match",
+    name_ultimate_parent = "UP won't fuzzy match",
+    lei_direct_loantaker = "LEI123"
+  )
+
+  abcd <- fake_abcd(
+    lei = "LEI123"
+  )
+
+  out_with_join_id <- match_name(
+    loanbook,
+    abcd,
+    join_id = c(lei_direct_loantaker = "lei")
+    )
+
+  expect_equal(nrow(out_with_join_id), 1L)
 
 })
 
-test_that("with `join_id`, outputs data with join column (#135)", {
+test_that("with `join_id`, outputs data with loanbook join column (#135)", {
 
+  # with `join_id` as character
   out <- match_name(
     fake_lbk(foo = "1"),
     fake_abcd(foo = "1"),
@@ -769,4 +777,13 @@ test_that("with `join_id`, outputs data with join column (#135)", {
 
   expect_contains(names(out), "foo")
 
+  # with `join_id` as named list
+  out <- match_name(
+    fake_lbk(foo_lbk = "1"),
+    fake_abcd(foo_abcd = "1"),
+    join_id = c(foo_lbk = "foo_abcd")
+  )
+
+  expect_contains(names(out), "foo_lbk")
+
 })

From 2d3e62fedc71d169a799f4d888423031a55748e7 Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Mon, 11 Mar 2024 10:04:40 +0100
Subject: [PATCH 11/28] update docs

---
 R/match_name.R    | 7 ++++---
 man/match_name.Rd | 7 ++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/R/match_name.R b/R/match_name.R
index 8c37fb09..f617c8ee 100644
--- a/R/match_name.R
+++ b/R/match_name.R
@@ -31,9 +31,10 @@
 #'   only `sector`, the value in the `name` column should be `NA` and
 #'   vice-versa. This file can be used to manually match loanbook companies to
 #'   abcd.
-#' @param join_id A character string naming an ID column used to left join
-#'   `abcd` to the `loanbook` (e.g. "lei"). ID column must be present, as named,
-#'   in both input datasets.
+#' @param join_id A join specification passe to [dplyr::inner_join()]. If a
+#'   character string, it assumes identical join columns between `loanbook` and
+#'   `abcd`. If a named list, uses the name as the join column of `loanbook` and
+#'   the value as the join column of `abcd`.
 #' @param ... Arguments passed on to [stringdist::stringsim()].
 #' @param ald `r lifecycle::badge('superseded')` `ald` has been superseded by
 #'   `abcd`.
diff --git a/man/match_name.Rd b/man/match_name.Rd
index d78733c6..25d2ad7e 100644
--- a/man/match_name.Rd
+++ b/man/match_name.Rd
@@ -40,9 +40,10 @@ only \code{sector}, the value in the \code{name} column should be \code{NA} and
 vice-versa. This file can be used to manually match loanbook companies to
 abcd.}
 
-\item{join_id}{A character string naming an ID column used to left join
-\code{abcd} to the \code{loanbook} (e.g. "lei"). ID column must be present, as named,
-in both input datasets.}
+\item{join_id}{A join specification passe to \code{\link[dplyr:mutate-joins]{dplyr::inner_join()}}. If a
+character string, it assumes identical join columns between \code{loanbook} and
+\code{abcd}. If a named list, uses the name as the join column of \code{loanbook} and
+the value as the join column of \code{abcd}.}
 
 \item{ald}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} \code{ald} has been superseded by
 \code{abcd}.}

From b1f2044723bd70c29f8db3acb2260730be3284f0 Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Mon, 11 Mar 2024 10:06:23 +0100
Subject: [PATCH 12/28] add example

---
 R/match_name.R    | 19 +++++++++++++++++++
 man/match_name.Rd | 19 +++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/R/match_name.R b/R/match_name.R
index f617c8ee..86b09edd 100644
--- a/R/match_name.R
+++ b/R/match_name.R
@@ -87,6 +87,25 @@
 #'   code_system = "XYZ"
 #' )
 #'
+#' # match on LEI
+#' loanbook <- tibble(
+#'   sector_classification_system = "XYZ",
+#'   sector_classification_direct_loantaker = "D35.11",
+#'   id_ultimate_parent = "UP15",
+#'   name_ultimate_parent = "Won't fuzzy match",
+#'   id_direct_loantaker = "C294",
+#'   name_direct_loantaker = "Won't fuzzy match",
+#'   lei_direct_loantaker = "LEI123"
+#' )
+#'
+#' abcd <- tibble(
+#'   name_company = "alpine knits india pvt. limited",
+#'   sector = "power",
+#'   lei = "LEI123"
+#' )
+#'
+#' match_name(loanbook, abcd, join_by = c(lei_direct_loantaker = "lei"))
+#'
 #' restore <- options(r2dii.match.sector_classifications = your_classifications)
 #'
 #' loanbook <- tibble(
diff --git a/man/match_name.Rd b/man/match_name.Rd
index 25d2ad7e..a5aaf06a 100644
--- a/man/match_name.Rd
+++ b/man/match_name.Rd
@@ -130,6 +130,25 @@ your_classifications <- tibble(
   code_system = "XYZ"
 )
 
+# match on LEI
+loanbook <- tibble(
+  sector_classification_system = "XYZ",
+  sector_classification_direct_loantaker = "D35.11",
+  id_ultimate_parent = "UP15",
+  name_ultimate_parent = "Won't fuzzy match",
+  id_direct_loantaker = "C294",
+  name_direct_loantaker = "Won't fuzzy match",
+  lei_direct_loantaker = "LEI123"
+)
+
+abcd <- tibble(
+  name_company = "alpine knits india pvt. limited",
+  sector = "power",
+  lei = "LEI123"
+)
+
+match_name(loanbook, abcd, join_by = c(lei_direct_loantaker = "lei"))
+
 restore <- options(r2dii.match.sector_classifications = your_classifications)
 
 loanbook <- tibble(

From c60ad21b4e815bc11cf58d7b16c0510c3ba32605 Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Tue, 12 Mar 2024 09:02:13 +0100
Subject: [PATCH 13/28] update test name

---
 tests/testthat/test-match_name.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testthat/test-match_name.R b/tests/testthat/test-match_name.R
index e9417d8b..607299ef 100644
--- a/tests/testthat/test-match_name.R
+++ b/tests/testthat/test-match_name.R
@@ -724,7 +724,7 @@ test_that("allows custom `sector_classifications` via options() (#354)", {
   options(old)
 })
 
-test_that("`join_id` optionally joins as expected (#135)", {
+test_that("with `join_id`, joins as expected (#135)", {
 
   loanbook <- fake_lbk(
     name_direct_loantaker = "DL won't fuzzy match",

From acb0ef3929361edc52175db2196790b67d7de15d Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Tue, 12 Mar 2024 09:15:31 +0100
Subject: [PATCH 14/28] add more complex tests

---
 tests/testthat/test-match_name.R | 60 +++++++++++++++++++++++++++++++-
 1 file changed, 59 insertions(+), 1 deletion(-)

diff --git a/tests/testthat/test-match_name.R b/tests/testthat/test-match_name.R
index 607299ef..536e3925 100644
--- a/tests/testthat/test-match_name.R
+++ b/tests/testthat/test-match_name.R
@@ -742,9 +742,12 @@ test_that("with `join_id`, joins as expected (#135)", {
   # expect exactly one match here, based on input ID
   out_with_join_id <- match_name(loanbook, abcd, join_id = "id_col")
   expect_equal(nrow(out_with_join_id), 1L)
+  expect_contains(names(out_with_join_id), "id_col")
+  expect_equal(unique(out_with_join_id$id_col), "LEI123")
+
 })
 
-test_that("with `join_id` accepts list input indicating different cols", {
+test_that("with `join_id` accepts list input indicating different cols (#135)", {
 
   loanbook <- fake_lbk(
     name_direct_loantaker = "DL won't fuzzy match",
@@ -763,6 +766,8 @@ test_that("with `join_id` accepts list input indicating different cols", {
     )
 
   expect_equal(nrow(out_with_join_id), 1L)
+  expect_contains(names(out_with_join_id), "lei_direct_loantaker")
+  expect_equal(unique(out_with_join_id$lei_direct_loantaker), "LEI123")
 
 })
 
@@ -787,3 +792,56 @@ test_that("with `join_id`, outputs data with loanbook join column (#135)", {
   expect_contains(names(out), "foo_lbk")
 
 })
+
+test_that("with `join_id` and multiple matches, prefers ID (#135)", {
+
+  # loanbook might match at UP
+  loanbook <- fake_lbk(
+    name_direct_loantaker = "DL won't fuzzy match",
+    name_ultimate_parent = "UP will fuzzy match",
+    lei_direct_loantaker = "LEI123"
+  )
+
+  abcd <- fake_abcd(
+    lei = "LEI123",
+    name_company = "UP will fuzzy match"
+  )
+
+  # with `join_id` as named list
+  out <- match_name(
+    loanbook,
+    abcd,
+    join_id = c(lei_direct_loantaker = "lei")
+  )
+
+  expect_equal(unique(out_with_join_id$lei_direct_loantaker), "LEI123")
+
+})
+
+test_that("with `join_id` and one ID match, one fuzzy match, outputs as expected (#135)", {
+
+
+  loanbook <- fake_lbk(
+    id_loan = c("L1", "L2"), # L1 should ID match, L2 should fuzzy match
+    name_direct_loantaker = "DL won't fuzzy match",
+    name_ultimate_parent = c("UP won't fuzzy match", "UP will fuzzy match"),
+    lei_direct_loantaker = c("LEI123", NA_character_)
+  )
+
+  abcd <- fake_abcd(
+    name_company = c("a power company", "UP will fuzzy match"),
+    lei = c("LEI123", NA_character_)
+  )
+
+  out <- match_name(
+    loanbook,
+    abcd,
+    join_id = c(lei_direct_loantaker = "lei")
+  )
+
+  out <- split(out, out$id_loan)
+
+  expect_equal(out$L1$lei_direct_loantaker, "LEI123")
+  expect_equal(out$L2$name, "UP will fuzzy match")
+
+})

From d930aa6051d9000a15df52734cbb0ceea440b21f Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Tue, 12 Mar 2024 09:16:07 +0100
Subject: [PATCH 15/28] update test

---
 tests/testthat/test-match_name.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testthat/test-match_name.R b/tests/testthat/test-match_name.R
index 536e3925..c15922bd 100644
--- a/tests/testthat/test-match_name.R
+++ b/tests/testthat/test-match_name.R
@@ -814,7 +814,7 @@ test_that("with `join_id` and multiple matches, prefers ID (#135)", {
     join_id = c(lei_direct_loantaker = "lei")
   )
 
-  expect_equal(unique(out_with_join_id$lei_direct_loantaker), "LEI123")
+  expect_equal(unique(out$lei_direct_loantaker), "LEI123")
 
 })
 

From 89716c63ee65e141234d2f83ac13c5e89b490a66 Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Tue, 12 Mar 2024 09:46:11 +0100
Subject: [PATCH 16/28] restructure abcd properly

---
 R/match_name.R                   | 31 ++++++++++++++++++++++---------
 R/restructure_abcd.R             | 25 +++++++------------------
 tests/testthat/test-match_name.R |  2 +-
 3 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/R/match_name.R b/R/match_name.R
index 86b09edd..ae1648cd 100644
--- a/R/match_name.R
+++ b/R/match_name.R
@@ -149,16 +149,28 @@ match_name <- function(loanbook,
     abcd <- ald
   }
 
-  prep_abcd <- restructure_abcd(abcd, join_id)
-
   if (!is.null(join_id)) {
-    check_join_id(join_id, loanbook, prep_abcd)
+    check_join_id(join_id, loanbook, abcd)
 
-    join_matched <- dplyr::inner_join(
-      loanbook, prep_abcd, by = join_id
-      )
+    crucial_names <- c("name_company", "sector", join_id)
+    check_crucial_names(abcd, crucial_names)
+
+    prep_abcd <- dplyr::transmute(
+      abcd,
+      name_abcd = .data[["name_company"]],
+      sector_abcd = tolower(.data[["sector"]]),
+      !!join_id := .data[[join_id]]
+    )
 
-    join_matched <- dplyr::mutate(join_matched, score = 1)
+    prep_abcd <- dplyr::distinct(prep_abcd)
+
+    join_matched <- dplyr::inner_join(loanbook, prep_abcd, by = join_id)
+
+    join_matched <- dplyr::mutate(
+      join_matched,
+      score = 1,
+      source = "id joined"
+      )
 
     join_by_list <- as_join_by(join_id)
     loanbook_join_id <- join_by_list[[1]]
@@ -172,7 +184,7 @@ match_name <- function(loanbook,
   if (nrow(loanbook) != 0) {
     fuzzy_matched <- match_name_impl(
       loanbook = loanbook,
-      prep_abcd = prep_abcd,
+      abcd = abcd,
       by_sector = by_sector,
       min_score = min_score,
       method = method,
@@ -201,7 +213,7 @@ match_name <- function(loanbook,
 }
 
 match_name_impl <- function(loanbook,
-                            prep_abcd,
+                            abcd,
                             by_sector = TRUE,
                             min_score = 0.8,
                             method = "jw",
@@ -217,6 +229,7 @@ match_name_impl <- function(loanbook,
   loanbook_rowid <- tibble::rowid_to_column(loanbook)
 
   prep_lbk <- restructure_loanbook(loanbook_rowid, overwrite = overwrite)
+  prep_abcd <- restructure_abcd(abcd)
 
   if (by_sector) {
     a <- expand_alias(prep_lbk, prep_abcd)
diff --git a/R/restructure_abcd.R b/R/restructure_abcd.R
index 095181d8..14a273b7 100644
--- a/R/restructure_abcd.R
+++ b/R/restructure_abcd.R
@@ -16,26 +16,15 @@
 #' @examples
 #' restructure_abcd(r2dii.data::abcd_demo)
 #' @noRd
-restructure_abcd <- function(data, join_id = NULL) {
+restructure_abcd <- function(data) {
   crucial_names <- c("name_company", "sector")
-  if (!is.null(join_id)) {
-    check_crucial_names(data, c(join_id, crucial_names))
-
-    out <- dplyr::transmute(
-      data,
-      name = .data$name_company,
-      sector = tolower(.data$sector),
-      !!join_id := .data[[join_id]]
-    )
-  } else {
-    check_crucial_names(data, crucial_names)
+  check_crucial_names(data, crucial_names)
 
-    out <- dplyr::transmute(
-      data,
-      name = .data$name_company,
-      sector = tolower(.data$sector)
-    )
-  }
+  out <- dplyr::transmute(
+    data,
+    name = .data$name_company,
+    sector = tolower(.data$sector)
+  )
 
   out <- distinct(out)
   add_alias(out)
diff --git a/tests/testthat/test-match_name.R b/tests/testthat/test-match_name.R
index c15922bd..032485f9 100644
--- a/tests/testthat/test-match_name.R
+++ b/tests/testthat/test-match_name.R
@@ -842,6 +842,6 @@ test_that("with `join_id` and one ID match, one fuzzy match, outputs as expected
   out <- split(out, out$id_loan)
 
   expect_equal(out$L1$lei_direct_loantaker, "LEI123")
-  expect_equal(out$L2$name, "UP will fuzzy match")
+  expect_equal(out$L2$name_abcd, "UP will fuzzy match")
 
 })

From 4a815ba7bdbc6ca43f71104b9185eef3a7408fcd Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Tue, 12 Mar 2024 10:59:18 +0100
Subject: [PATCH 17/28] test that new arg works with prioritize

---
 tests/testthat/test-prioritize.R | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tests/testthat/test-prioritize.R b/tests/testthat/test-prioritize.R
index bd90d2c6..5186777a 100644
--- a/tests/testthat/test-prioritize.R
+++ b/tests/testthat/test-prioritize.R
@@ -235,3 +235,12 @@ test_that("with 0-row input returns 0-row input", {
 
   expect_no_error(prioritize(zero_row))
 })
+
+test_that("with `join_id`, outputs as expected", {
+  lbk <- fake_lbk(id_loan = "L1", id_col = "1")
+  abcd <- fake_abcd(id_col = "1")
+  matched <- match_name(lbk, abcd, join_id = "id_col")
+
+  out <- prioritize(matched)
+  expect_equal(nrow(out), 1L)
+})

From f16334d1d538cf3824d0330e0f76a84741f14ed6 Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Tue, 12 Mar 2024 10:59:24 +0100
Subject: [PATCH 18/28] solve failing test

---
 R/match_name.R | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/R/match_name.R b/R/match_name.R
index ae1648cd..aa5068d6 100644
--- a/R/match_name.R
+++ b/R/match_name.R
@@ -164,17 +164,27 @@ match_name <- function(loanbook,
 
     prep_abcd <- dplyr::distinct(prep_abcd)
 
-    join_matched <- dplyr::inner_join(loanbook, prep_abcd, by = join_id)
-
-    join_matched <- dplyr::mutate(
-      join_matched,
-      score = 1,
-      source = "id joined"
+    prep_lbk <- may_add_sector_and_borderline(loanbook)
+    prep_lbk <- distinct(prep_lbk)
+
+    join_matched <- dplyr::inner_join(
+      prep_lbk,
+      prep_abcd,
+      by = join_id,
+      na_matches = "never"
       )
 
     join_by_list <- as_join_by(join_id)
     loanbook_join_id <- join_by_list[[1]]
 
+    join_matched <- dplyr::mutate(
+      join_matched,
+      score = 1,
+      source = "id joined",
+      level = loanbook_join_id,
+      name = .data[["name_abcd"]]
+    )
+
     loanbook <- dplyr::filter(
       loanbook,
       !.data[[loanbook_join_id]] %in% join_matched[[loanbook_join_id]]

From e8d98d2afd088aaeca833e4c3ecfa8d2c663fec5 Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Tue, 12 Mar 2024 11:09:34 +0100
Subject: [PATCH 19/28] skip tests if r2dii.data outdated

---
 tests/testthat/test-match_name.R | 11 +++++------
 tests/testthat/test-prioritize.R |  3 ++-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/testthat/test-match_name.R b/tests/testthat/test-match_name.R
index 032485f9..59b11441 100644
--- a/tests/testthat/test-match_name.R
+++ b/tests/testthat/test-match_name.R
@@ -725,7 +725,7 @@ test_that("allows custom `sector_classifications` via options() (#354)", {
 })
 
 test_that("with `join_id`, joins as expected (#135)", {
-
+  skip_if_r2dii_data_outdated()
   loanbook <- fake_lbk(
     name_direct_loantaker = "DL won't fuzzy match",
     name_ultimate_parent = "UP won't fuzzy match",
@@ -748,7 +748,7 @@ test_that("with `join_id`, joins as expected (#135)", {
 })
 
 test_that("with `join_id` accepts list input indicating different cols (#135)", {
-
+  skip_if_r2dii_data_outdated()
   loanbook <- fake_lbk(
     name_direct_loantaker = "DL won't fuzzy match",
     name_ultimate_parent = "UP won't fuzzy match",
@@ -772,7 +772,7 @@ test_that("with `join_id` accepts list input indicating different cols (#135)",
 })
 
 test_that("with `join_id`, outputs data with loanbook join column (#135)", {
-
+  skip_if_r2dii_data_outdated()
   # with `join_id` as character
   out <- match_name(
     fake_lbk(foo = "1"),
@@ -794,7 +794,7 @@ test_that("with `join_id`, outputs data with loanbook join column (#135)", {
 })
 
 test_that("with `join_id` and multiple matches, prefers ID (#135)", {
-
+  skip_if_r2dii_data_outdated()
   # loanbook might match at UP
   loanbook <- fake_lbk(
     name_direct_loantaker = "DL won't fuzzy match",
@@ -819,8 +819,7 @@ test_that("with `join_id` and multiple matches, prefers ID (#135)", {
 })
 
 test_that("with `join_id` and one ID match, one fuzzy match, outputs as expected (#135)", {
-
-
+  skip_if_r2dii_data_outdated()
   loanbook <- fake_lbk(
     id_loan = c("L1", "L2"), # L1 should ID match, L2 should fuzzy match
     name_direct_loantaker = "DL won't fuzzy match",
diff --git a/tests/testthat/test-prioritize.R b/tests/testthat/test-prioritize.R
index 5186777a..33a6d6f1 100644
--- a/tests/testthat/test-prioritize.R
+++ b/tests/testthat/test-prioritize.R
@@ -236,7 +236,8 @@ test_that("with 0-row input returns 0-row input", {
   expect_no_error(prioritize(zero_row))
 })
 
-test_that("with `join_id`, outputs as expected", {
+test_that("with `match_name` with `join_id`, outputs as expected (#135)", {
+  skip_if_r2dii_data_outdated()
   lbk <- fake_lbk(id_loan = "L1", id_col = "1")
   abcd <- fake_abcd(id_col = "1")
   matched <- match_name(lbk, abcd, join_id = "id_col")

From 9814c6c03b064315b1010ef7239c5cb112a03908 Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Thu, 14 Mar 2024 11:17:20 +0100
Subject: [PATCH 20/28] fix incorrect error messages

---
 R/match_name.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/match_name.R b/R/match_name.R
index aa5068d6..8e47d6f9 100644
--- a/R/match_name.R
+++ b/R/match_name.R
@@ -444,14 +444,14 @@ check_join_id <- function(join_id, loanbook, abcd) {
     rlang::abort(
       "join_id_not_in_loanbook",
       message = glue(
-        "The join_id `{join_id}` must be present in both `loanbook` and `abcd`. It's not present in `loanbook`."
+        "The join_id `{join_id_list[[1]]}` must be present in `loanbook` input."
       )
     )
   } else if (!rlang::has_name(abcd, join_id_list[[2]])) {
     rlang::abort(
       "join_id_not_in_abcd",
       message = glue(
-        "The join_id `{join_id}` must be present in both `loanbook` and `abcd`. It's not present in `abcd`."
+        "The join_id `{join_id_list[[2]]}` must be present in `abcd` input."
       )
     )
   }

From 08f989e84dc85d3ed5d57577d0fdc64ae4717c00 Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Thu, 14 Mar 2024 11:17:29 +0100
Subject: [PATCH 21/28] fix comment in test

---
 tests/testthat/test-match_name.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testthat/test-match_name.R b/tests/testthat/test-match_name.R
index 59b11441..15c1bf33 100644
--- a/tests/testthat/test-match_name.R
+++ b/tests/testthat/test-match_name.R
@@ -782,7 +782,7 @@ test_that("with `join_id`, outputs data with loanbook join column (#135)", {
 
   expect_contains(names(out), "foo")
 
-  # with `join_id` as named list
+  # with `join_id` as a named vector
   out <- match_name(
     fake_lbk(foo_lbk = "1"),
     fake_abcd(foo_abcd = "1"),

From 704fd061abd0523d4dbd51be7c55cb0e7bb6a932 Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Thu, 14 Mar 2024 11:18:11 +0100
Subject: [PATCH 22/28] fix wording of test

---
 tests/testthat/test-match_name.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testthat/test-match_name.R b/tests/testthat/test-match_name.R
index 15c1bf33..79ea4812 100644
--- a/tests/testthat/test-match_name.R
+++ b/tests/testthat/test-match_name.R
@@ -807,7 +807,7 @@ test_that("with `join_id` and multiple matches, prefers ID (#135)", {
     name_company = "UP will fuzzy match"
   )
 
-  # with `join_id` as named list
+  # with `join_id` as named vector
   out <- match_name(
     loanbook,
     abcd,

From 79382c11b7e3081a1a1fefac53e9cac11a22df0a Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Thu, 14 Mar 2024 11:20:10 +0100
Subject: [PATCH 23/28] Update R/match_name.R

Co-authored-by: CJ Yetman <cyetman@rmi.org>
---
 R/match_name.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/match_name.R b/R/match_name.R
index 8e47d6f9..879939a2 100644
--- a/R/match_name.R
+++ b/R/match_name.R
@@ -31,7 +31,7 @@
 #'   only `sector`, the value in the `name` column should be `NA` and
 #'   vice-versa. This file can be used to manually match loanbook companies to
 #'   abcd.
-#' @param join_id A join specification passe to [dplyr::inner_join()]. If a
+#' @param join_id A join specification passed to [dplyr::inner_join()]. If a
 #'   character string, it assumes identical join columns between `loanbook` and
 #'   `abcd`. If a named list, uses the name as the join column of `loanbook` and
 #'   the value as the join column of `abcd`.

From 7b13092ade827f19b3d4b0562024c59252f6c3bf Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Thu, 14 Mar 2024 11:20:26 +0100
Subject: [PATCH 24/28] document

---
 man/match_name.Rd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/man/match_name.Rd b/man/match_name.Rd
index a5aaf06a..76181339 100644
--- a/man/match_name.Rd
+++ b/man/match_name.Rd
@@ -40,7 +40,7 @@ only \code{sector}, the value in the \code{name} column should be \code{NA} and
 vice-versa. This file can be used to manually match loanbook companies to
 abcd.}
 
-\item{join_id}{A join specification passe to \code{\link[dplyr:mutate-joins]{dplyr::inner_join()}}. If a
+\item{join_id}{A join specification passed to \code{\link[dplyr:mutate-joins]{dplyr::inner_join()}}. If a
 character string, it assumes identical join columns between \code{loanbook} and
 \code{abcd}. If a named list, uses the name as the join column of \code{loanbook} and
 the value as the join column of \code{abcd}.}

From 0fe36c62abeb9735e7ec3b0af2f00c9e96c44990 Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Thu, 14 Mar 2024 12:14:00 +0100
Subject: [PATCH 25/28] Update R/match_name.R

Co-authored-by: Jacob Kastl <60064070+jacobvjk@users.noreply.github.com>
---
 R/match_name.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/match_name.R b/R/match_name.R
index 879939a2..6aac4078 100644
--- a/R/match_name.R
+++ b/R/match_name.R
@@ -33,7 +33,7 @@
 #'   abcd.
 #' @param join_id A join specification passed to [dplyr::inner_join()]. If a
 #'   character string, it assumes identical join columns between `loanbook` and
-#'   `abcd`. If a named list, uses the name as the join column of `loanbook` and
+#'   `abcd`. If a named character vector, it uses the name as the join column of `loanbook` and
 #'   the value as the join column of `abcd`.
 #' @param ... Arguments passed on to [stringdist::stringsim()].
 #' @param ald `r lifecycle::badge('superseded')` `ald` has been superseded by

From 8106fe77055be46378bd45794fbfd98c19b39645 Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Thu, 14 Mar 2024 12:14:08 +0100
Subject: [PATCH 26/28] Update tests/testthat/test-match_name.R

Co-authored-by: Jacob Kastl <60064070+jacobvjk@users.noreply.github.com>
---
 tests/testthat/test-match_name.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testthat/test-match_name.R b/tests/testthat/test-match_name.R
index 79ea4812..136fb950 100644
--- a/tests/testthat/test-match_name.R
+++ b/tests/testthat/test-match_name.R
@@ -747,7 +747,7 @@ test_that("with `join_id`, joins as expected (#135)", {
 
 })
 
-test_that("with `join_id` accepts list input indicating different cols (#135)", {
+test_that("with `join_id` accepts named character vector input indicating different cols (#135)", {
   skip_if_r2dii_data_outdated()
   loanbook <- fake_lbk(
     name_direct_loantaker = "DL won't fuzzy match",

From 59e61966ba59c4a1920ccc512641271f686d3c4c Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Thu, 14 Mar 2024 12:15:11 +0100
Subject: [PATCH 27/28] remove more mentions of list

---
 R/match_name.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/match_name.R b/R/match_name.R
index 6aac4078..3a4f3c80 100644
--- a/R/match_name.R
+++ b/R/match_name.R
@@ -463,7 +463,7 @@ as_join_by <- function(x) {
 
   if (rlang::is_list(x)) {
     if (length(x) != 1L) {
-      rlang::abort("`join_id` must be a list of length 1.")
+      rlang::abort("`join_id` must be a vector of length 1.")
     }
     x_name <- names(x) %||% x
     y_name <- unname(x)
@@ -474,7 +474,7 @@ as_join_by <- function(x) {
     # If x partially named, assume unnamed are the same in both tables
     x_name[x_name == ""] <- y_name[x_name == ""]
   } else {
-    rlang::abort("`by` must be a list or a character vector.")
+    rlang::abort("`by` must be a string or a character vector.")
   }
 
   if (!rlang::is_character(x_name)) {

From afb28c67ba2b332229bfaa35d96fc5fd2d28bcf7 Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Thu, 14 Mar 2024 12:15:13 +0100
Subject: [PATCH 28/28] document

---
 man/match_name.Rd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/man/match_name.Rd b/man/match_name.Rd
index 76181339..a54945ee 100644
--- a/man/match_name.Rd
+++ b/man/match_name.Rd
@@ -42,7 +42,7 @@ abcd.}
 
 \item{join_id}{A join specification passed to \code{\link[dplyr:mutate-joins]{dplyr::inner_join()}}. If a
 character string, it assumes identical join columns between \code{loanbook} and
-\code{abcd}. If a named list, uses the name as the join column of \code{loanbook} and
+\code{abcd}. If a named character vector, it uses the name as the join column of \code{loanbook} and
 the value as the join column of \code{abcd}.}
 
 \item{ald}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} \code{ald} has been superseded by