Skip to content

Commit

Permalink
Resolved Issue #46; updated and organized package dependencies (and r…
Browse files Browse the repository at this point in the history
…emoved tibble::, added tidyr:: dependency); fixed various formatting/style/workflow errors in lookup*() functions; added warning to lookup_syn() for more than one substance; added tests.
  • Loading branch information
gabrielodom committed Jul 13, 2021
1 parent 59b5c5c commit f164c6e
Show file tree
Hide file tree
Showing 8 changed files with 158 additions and 55 deletions.
10 changes: 5 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -38,21 +38,21 @@ Depends:
Imports:
dplyr,
magrittr,
purrr,
stats,
stringr,
tibble,
tidyr,
tidytext,
utils
Suggests:
purrr,
knitr,
rmarkdown,
conflicted,
knitr,
readr,
rmarkdown,
rvest,
sqldf,
tidyr,
testthat,
tibble,
usethis,
xml2
VignetteBuilder: knitr
5 changes: 4 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@ importFrom(dplyr,anti_join)
importFrom(dplyr,case_when)
importFrom(dplyr,filter)
importFrom(dplyr,mutate)
importFrom(dplyr,rename)
importFrom(dplyr,select)
importFrom(magrittr,"%>%")
importFrom(purrr,map_df)
importFrom(stats,na.omit)
importFrom(stringr,str_detect)
importFrom(stringr,str_replace_all)
importFrom(tibble,tibble)
importFrom(tidyr,drop_na)
importFrom(tidytext,unnest_tokens)
importFrom(utils,data)
46 changes: 36 additions & 10 deletions R/lookup.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,15 @@
#' for in column \code{category}? Defaults to TRUE.
#' @param searchSynonym Should the substances listed in \code{...} be searched
#' for in column \code{synonym}? Defaults to TRUE.
#' @param dropUnmatched Should words in \code{drug_vec} which do not match
#' substances in our database be dropped from the table? Defaults to FALSE.
#'
#' @return A lookup table with category \code{data.frame} having four columns:
#' original search term, drug class, drug category, and drug street name.
#'
#' @importFrom purrr map_df
#' @importFrom tidyr drop_na
#'
#' @export
#'
#' @examples
Expand All @@ -23,23 +29,34 @@
lookup <- function(drug_vec = NULL, ...,
searchClass = TRUE,
searchCategory = TRUE,
searchSynonym = TRUE) {
searchSynonym = TRUE,
dropUnmatched = FALSE) {
# browser()

thingy <- c(drug_vec, as.character(list(...)))
thingy_char <- vapply(
X = as.character(thingy),
FUN = tolower,
FUN.VALUE = character(1),
USE.NAMES = FALSE)
USE.NAMES = FALSE
)

# lookup individual words

answer <- purrr::map_df(thingy_char, .lookup,
searchClass,
searchCategory,
searchSynonym)
answer <- map_df(
.x = thingy_char,
.f = .lookup,
searchClass,
searchCategory,
searchSynonym
)

row.names(answer) <- NULL

if (dropUnmatched) {
answer <- drop_na(answer, "class":"synonym")
}

answer

}
Expand Down Expand Up @@ -68,13 +85,22 @@ lookup <- function(drug_vec = NULL, ...,
# Combine row match logic (use OR for base FALSE layer)
matches_lgl <- classRowMatches | categoryRowMatches | synonymRowMatches

answer <- DOPE::lookup_df[matches_lgl,, drop = FALSE]
answer <- DOPE::lookup_df[matches_lgl, , drop = FALSE]

if (nrow(answer) == 0){
answer <- data.frame(original_word = x, class = NA_character_, category = NA_character_,
synonym = NA_character_)
answer <- data.frame(
original_word = x,
class = NA_character_,
category = NA_character_,
synonym = NA_character_
)
} else{
answer <- data.frame(original_word = x, answer)
answer <- data.frame(
original_word = x,
answer
)
}

answer

}
73 changes: 52 additions & 21 deletions R/lookup_syn.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,58 +7,89 @@
#'
#' @return A lookup table with category \code{data.frame} having three columns:
#' drug class, drug category match, and synonym name.
#'
#' @importFrom magrittr %>%
#' @importFrom dplyr rename select filter
#'
#' @export
#'
#' @examples
#' lookup_syn("zip")

lookup_syn <- function(drug_name) {
# browser()

#binding vars to function
category <- original_word <- synonym <- NULL

# Make sure only a single word is supplied (this function is not designed to
# be vectorised)
if (length(drug_name) > 1L) {
drug_name <- drug_name[1]
warning("Only the first element of drug_name will be used.
If you have multiple substances to query, please use lookup() instead.")
}

# Make sure drug_name is a string
if (is.character(drug_name)){

# make lowercase and remove leading and trailing whitespace
drug_name <- tolower(drug_name) %>% trimws()
drug_name <-
tolower(drug_name) %>%
trimws()

} else {
stop("drug_name should be a string of one drug name")
}

# lookup individual words
matches <- DOPE::lookup(drug_name, searchClass=TRUE,
searchCategory=TRUE,
searchSynonym=TRUE)
matches_df <- DOPE::lookup(
drug_name,
searchClass = TRUE,
searchCategory = TRUE,
searchSynonym = TRUE
)

cat_matches <- unique(matches[,c("category")])
drugCateg_char <- matches_df[ , "category", drop = TRUE]
cat_matches <- unique(drugCateg_char)

if (length(cat_matches) == 1) {

answer <- lookup(
matches_df$category,
searchClass = TRUE,
searchCategory = TRUE,
searchSynonym = FALSE
)

if (length(cat_matches) == 1){
answer <- lookup(matches$category, searchClass=TRUE,
searchCategory=TRUE,
searchSynonym = FALSE)
answer <- answer %>%
dplyr::rename("category_match" = category) %>%
dplyr::select(-original_word)
answer <- subset(answer, synonym != drug_name)
} else if(drug_name %in% matches[,c("category")] ){
answer <- matches[matches$category == drug_name, ]
answer <- answer %>%
dplyr::rename("category_match" = category) %>%
dplyr::select(-original_word)
answer <- subset(answer, synonym != drug_name)
rename("category_match" = category) %>%
select(-original_word) %>%
filter(synonym != drug_name)

} else if (drug_name %in% drugCateg_char) {

answer <- matches_df %>%
filter(category == drug_name) %>%
rename("category_match" = category) %>%
select(-original_word) %>%
filter(synonym != drug_name)

} else {

message(
"Your search matched multiple categories. Please choose one \n",
"from the following list and refine your search. Example: \n",
"lookup_syn('", unique(matches[c("category")])[1,1], "') or ",
"lookup_syn('", unique(matches[c("category")])[2,1], "')"
"lookup_syn('", cat_matches[1], "') or ",
"lookup_syn('", cat_matches[2], "')"
)

answer <- unique(matches[c("category")])
answer <- unique(matches_df["category"])

}

answer

}


Expand Down
4 changes: 2 additions & 2 deletions R/parse.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#' @importFrom stringr str_detect str_replace_all
#' @importFrom tidytext unnest_tokens
#' @importFrom dplyr mutate filter anti_join case_when
#' @importFrom tibble tibble
#'
#' @export
#'
#' @examples
Expand All @@ -37,7 +37,7 @@ parse <- function(drug_vec){
#ex. and combination of "speedball" also preserved

unnestedDrugs_df <-
tibble(drug_vec) %>%
data.frame(drug_vec) %>%
unnest_tokens(
word, drug_vec, token = "regex", pattern = "[,|-|+|&| ]", to_lower = TRUE
) %>%
Expand Down
6 changes: 5 additions & 1 deletion man/lookup.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions tests/testthat/test-lookup.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,15 @@ test_that("lookup with dots works", {
expect_equal(testOut2_df, lookup("zip", "shrooms"))
})


### Test Vector input ###
test_vector <- c("zip", "shrooms")

test_that("lookup with vector as first argument works", {
expect_equal(testOut2_df, lookup(test_vector))
})


### Test return of original word ###
testOut_df3 <- data.frame(
original_word = c("zip"),
Expand All @@ -53,3 +55,17 @@ testOut_df3 <- data.frame(
test_that("the original lookup word is returned in the dataframe", {
expect_equal(testOut_df3, lookup("zip"))
})


### Test Dropping Rows with No Classification ###
# See https://github.com/CTN-0094/DOPE/issues/46
testSentence_char <-
"Uh, there is a lot of sexual, sex in the gay community while on MDMA"
parsedSentence_char <- parse(testSentence_char)

test_that("we can drop unclassified words", {
expect_equal(
lookup(parsedSentence_char, dropUnmatched = TRUE),
lookup("mdma")
)
})
53 changes: 38 additions & 15 deletions tests/testthat/test-lookup_syn.R
Original file line number Diff line number Diff line change
@@ -1,30 +1,53 @@
### Test for single category match ###
testOut_df <- data.frame(
class = rep(c("cannabis"),each=5),
category_match = rep(c("marijuana"), each= 5),
class = rep("cannabis", each = 5),
category_match = rep("marijuana", each = 5),
synonym = c("420", "a-bomb", "acapulco gold", "acapulco red", "ace")
)

test_that("single category match works", {
expect_equal(testOut_df, head(lookup_syn("draf weed"),5))
expect_equal(
testOut_df,
head(lookup_syn("draf weed"), 5)
)
})


### Test that Multiple Entries are Discarded ###
test_that("Drug vector only uses first entry and throws warning", {
expect_warning(
lookup_syn(c("ritalin", "opium")),
"Only the first element"
)
})


### Test Drug Category Search ###
# This is the else if() part
test_that("Drug category matches multiple times handled correctly", {
expect_equal(
data.frame(
class = "stimulant",
category_match = "ritalin",
synonym = "kibbles and bits"
),
lookup_syn("ritalin")
)
})


### Test with multiple category matches ###
testOut_df2 <- data.frame(
category =c(
"amphetamine"
,"cocaine"
,"crack"
,"crack cocaine"
,"khat"
,"methamphetamine"
,"methcathinone"
,"methylphenidate"
,"ritalin"
,"synthetic cathinone"
category = c(
"amphetamine", "cocaine", "crack", "crack cocaine", "khat",
"methamphetamine", "methcathinone", "methylphenidate", "ritalin",
"synthetic cathinone"
)
)

test_that("multiple category match works", {
expect_equal(testOut_df2$category, lookup_syn("stimulant")$category)
expect_equal(
testOut_df2$category,
lookup_syn("stimulant")$category
)
})

1 comment on commit f164c6e

@gabrielodom
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@labouz review please

Please sign in to comment.