Skip to content

Commit

Permalink
mfdb_helpers: Add single_matches_only option to mfdb_find_species()
Browse files Browse the repository at this point in the history
...and use it to simplify example
  • Loading branch information
lentinj committed Apr 7, 2020
1 parent 331a5cd commit 2d22db7
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 15 deletions.
1 change: 1 addition & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
* Add single_matches_only option to mfdb_find_species()
* Turn down logging level

2020-03-02 Jamie Lentin <[email protected]>
Expand Down
14 changes: 11 additions & 3 deletions R/mfdb_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,18 @@ mfdb_concatenate_results <- function (...) {
}

# Guess at species based on a partial species name
mfdb_find_species <- Vectorize(function(partial_name) {
matches <- mfdb::species[grepl(
mfdb_find_species <- Vectorize(function(partial_name, single_matches_only = FALSE) {
utils::data('species', package = 'mfdb', envir = environment())
matches <- species[grepl(
paste(strsplit(partial_name, "\\s+")[[1]], collapse = ".*\\s+"),
mfdb::species$description, ignore.case = TRUE),]
species$description, ignore.case = TRUE),]
if (single_matches_only && nrow(matches) != 1) {
return(data.frame(
id = NA,
name = NA,
description = NA,
stringsAsFactors = FALSE))
}
data.frame(
id = matches$id,
name = as.character(matches$name),
Expand Down
38 changes: 26 additions & 12 deletions man/mfdb_helpers.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,17 @@

\usage{
# Find species from abbreviated names
mfdb_find_species(partial_name)
mfdb_find_species(partial_name, single_matches_only = FALSE)
}
\arguments{
\item{partial_name}{
Vector of partial species names, e.g. "Gad Mor", "gad. Mor.",
"Gadus Mor", will all match "Cod (Gadus Morhua)".
}
\item{single_matches_only}{
Logical, default FALSE. If true, return NA for
partial_names with multiple or zero matches.
}
}

\value{
Expand All @@ -30,15 +34,25 @@ mfdb_find_species(c("gad mor", "tube worms"))
# description "Cod (Gadus Morhua)" "Tube Worms (Tubeworms)"

# Can also generate a map to help insert a data.frame of foreign data
\dontrun{
stomachs <- read.csv(text = '
stomach_name,species,digestion_stage,length,weight,count
A,Palaemon Elegans,1,1,10,5
A,Palaemon Elegans,1,4,40,1
B,Palaemon Elegans,1,1,10,5
B,Palaemon Elegans,4,1,10,5
B,Palaemon Elegans,5,1,10,NA
B,Palaemon Elegans,5,1,10,NA
C,Crangon Crangon,2,3.5,9.5,3
D,Palaemon Elegans,1,1.4,10,1
D,Crangon Crangon,5,4,40,1
E,Worms,1,1.4,10,1
')

# Work out a map from all Prey_Species_Name values to MFDB species codes
species_map <- mfdb_find_species(levels(stomachs$Prey_Species_Name))['name',]
# Only include data where there is one possible solution
species_map <- vapply(
species_map,
function (names) if(length(names) == 1) names else as.character(NA),
"")

# Can then remap the species column with:
plyr::revalue(stomachs$Prey_Species_Name, species_map)
}}
species_map <- mfdb_find_species(levels(stomachs$species), single_matches_only = TRUE)['name',]

# Put the new levels back onto the species column
levels(stomachs$species) <- unlist(species_map)

stomachs
}
16 changes: 16 additions & 0 deletions tests/test-mfdb_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,19 @@ ok_group("mfdb_find_species", {
name = c("")[c()],
description = c("")[c()])), "No matches produces empty vector")
})

ok_group("mfdb_find_species:single_matches_only", {
out <- mfdb_find_species(c('camel', 'gadus mor', 'gadus'), single_matches_only = T)
ok(ut_cmp_identical(out['id',], list(
'camel' = NA,
'gadus mor' = 8791030402,
'gadus' = NA)), "IDs either single value, or NA (0 or multiple)")
ok(ut_cmp_identical(out['name',], list(
'camel' = NA,
'gadus mor' = 'COD',
'gadus' = NA)), "names either single value, or NA (0 or multiple)")
ok(ut_cmp_identical(out['description',], list(
'camel' = NA,
'gadus mor' = 'Cod (Gadus Morhua)',
'gadus' = NA)), "Descriptions either single value, or NA (0 or multiple)")
})

0 comments on commit 2d22db7

Please sign in to comment.