-
Notifications
You must be signed in to change notification settings - Fork 5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: match_name
gains functionality to automatically join by LEI/ISIN (or other specified identifer)
#135
Comments
|
jdhoffa
changed the title
Add matching by LEI/ISIN
feat: Mar 6, 2024
match_name
gains functionality to automatically join by LEI/ISIN (or other specified identifer)
This seems to do it: library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(r2dii.data)
library(r2dii.match)
loanbook <- tibble(
sector_classification_system = "NACE",
sector_classification_direct_loantaker = "100", # this generally shouldn't match to anything
id_ultimate_parent = c("UP15", "UP16"),
name_ultimate_parent = c("Alpine Knits India Pvt. Limited", "Alpine Knits No Match"),
id_direct_loantaker = c("C294", "C295"),
name_direct_loantaker = "Yuamen Xinneng Thermal Power Co Ltd",
lei = c("LEI123", NA_character_)
)
abcd <- tibble(
name_company = "alpine knits india pvt. limited",
sector = "power",
lei = "LEI123"
)
# finds no match using only fuzzy matching
match_name(loanbook, abcd)
#> Warning: Found no match.
#> # A tibble: 0 × 16
#> # ℹ 16 variables: sector_classification_system <chr>,
#> # sector_classification_direct_loantaker <chr>, id_ultimate_parent <chr>,
#> # name_ultimate_parent <chr>, id_direct_loantaker <chr>,
#> # name_direct_loantaker <chr>, lei <chr>, id_2dii <lgl>, level <lgl>,
#> # sector <lgl>, sector_abcd <lgl>, name <lgl>, name_abcd <lgl>, score <lgl>,
#> # source <lgl>, borderline <lgl>
# accepts the match using a direct LEI join
match_name_ <- function(loanbook, abcd, join_id = NULL) {
if (!is.null(join_id)) {
join_id <- "lei"
loanbook <- dplyr::mutate(loanbook, join_id = .data[[join_id]], !!join_id := NULL)
abcd <- dplyr::mutate(abcd, join_id = .data[[join_id]], !!join_id := NULL)
loanbook_joined <- dplyr::inner_join(loanbook, abcd, by = "join_id")
loanbook_joined <- dplyr::mutate(loanbook_joined, !!join_id := NULL)
loanbook <- dplyr::filter(loanbook, !.data[["join_id"]] %in% loanbook_joined[["join_id"]])
}
out <- suppressWarnings(match_name(loanbook, abcd))
if (nrow(out) != 0 && exists("loanbook_joined")) {
out <- dplyr::bind_rows(loanbook_joined, out)
} else if (nrow(out) == 0 && exists("loanbook_joined")) {
out <- loanbook_joined
}
out
}
match_name_(loanbook, abcd, join_id = "lei")
#> # A tibble: 1 × 9
#> sector_classification_system sector_classification_direct…¹ id_ultimate_parent
#> <chr> <chr> <chr>
#> 1 NACE 100 UP15
#> # ℹ abbreviated name: ¹sector_classification_direct_loantaker
#> # ℹ 6 more variables: name_ultimate_parent <chr>, id_direct_loantaker <chr>,
#> # name_direct_loantaker <chr>, join_id <chr>, name_company <chr>,
#> # sector <chr> Created on 2024-03-06 with reprex v2.1.0 |
3 tasks
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
AB#10175
The text was updated successfully, but these errors were encountered: