From 0b128224c17094f3a71cd1e660085ec309e3ebba Mon Sep 17 00:00:00 2001 From: colin millar Date: Tue, 6 Dec 2016 09:56:18 +0100 Subject: [PATCH 01/19] switch to simple url reader --- DESCRIPTION | 3 +-- NAMESPACE | 3 +-- R/utilities.R | 41 ++++++++++++++++++++++++++++++----------- 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8a3786b..3b9efd3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -4,8 +4,7 @@ Date: 2016-12-05 Title: ICES Vocabularies Database Web Services Authors@R: c(person("Colin", "Millar", role=c("aut","cre"), email="colin.millar@ices.dk"), person("Arni", "Magnusson", role="aut")) -Imports: RCurl, - utils, +Imports: utils, XML Suggests: knitr, testthat diff --git a/NAMESPACE b/NAMESPACE index ee80519..6fddb10 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,10 +4,9 @@ export(findAphia) export(getCodeDetail) export(getCodeList) export(getCodeTypeList) -importFrom(RCurl,basicTextGatherer) -importFrom(RCurl,curlPerform) importFrom(XML,getChildrenStrings) importFrom(XML,removeNodes) importFrom(XML,xmlParse) importFrom(XML,xmlRoot) importFrom(XML,xmlSize) +importFrom(utils,download.file) diff --git a/R/utilities.R b/R/utilities.R index f91b0c0..7307525 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1,16 +1,24 @@ -#' @importFrom RCurl basicTextGatherer -#' @importFrom RCurl curlPerform -curlVocab <- function(url) { - # read only XML table and return as string - reader <- basicTextGatherer() - curlPerform(url = url, - httpheader = c('Content-Type' = "text/xml; charset=utf-8", SOAPAction=""), - writefunction = reader$update, - verbose = FALSE) - # return - reader$value() + +#' @importFrom utils download.file +readVocab <- function(url) { + # create file name + tmp <- tempfile() + # download file + if (os.type("windows")) { + download.file(url, destfile = tmp, quiet = TRUE) + } else if (os.type("unix")) { + download.file(url, destfile = tmp, quiet = TRUE, method = "wget") + } else if (os.type("other")) { + warning("Untested downloading in this platform") + download.file(url, destfile = tmp, quiet = TRUE) + } + on.exit(unlink(tmp)) + + # scan lines + scan(tmp, what = "", sep = "\n", quiet = TRUE) } + #' @importFrom XML xmlParse #' @importFrom XML xmlRoot #' @importFrom XML xmlSize @@ -105,3 +113,14 @@ simplify <- function(x) { } x } + +# returns TRUE if correct operating system is passed as an argument +os.type <- function (type = c("unix", "windows", "other")) +{ + type <- match.arg(type) + if (type %in% c("windows", "unix")) { + .Platform$OS.type == type + } else { + TRUE + } +} From c710472d2c48cb5a56c1e3103620c5354e81e94a Mon Sep 17 00:00:00 2001 From: colin millar Date: Tue, 6 Dec 2016 10:01:34 +0100 Subject: [PATCH 02/19] tidying files prior to changes --- R/getCodeDetail.R | 10 ++++++---- R/getCodeList.R | 7 +++---- R/getCodeTypeList.R | 9 +++------ R/utilities.R | 2 +- man/getCodeDetail.Rd | 3 --- man/getCodeList.Rd | 3 --- man/getCodeTypeList.Rd | 3 --- 7 files changed, 13 insertions(+), 24 deletions(-) diff --git a/R/getCodeDetail.R b/R/getCodeDetail.R index b26532b..43e8378 100644 --- a/R/getCodeDetail.R +++ b/R/getCodeDetail.R @@ -13,8 +13,6 @@ #' #' \code{\link{icesVocab-package}} gives an overview of the package. #' -#' @author Colin Millar. -#' #' @examples #' # Species code 101170 #' getCodeDetail("SpecWoRMS", 101170) @@ -22,10 +20,14 @@ #' @export getCodeDetail <- function(code_type, code) { - # read XML string and parse to data frame + + # form url url <- sprintf("http://vocab.ices.dk/services/pox/GetCodeDetail/%s/%s", code_type, code) - out <- curlVocab(url) + + # read url contents + out <- readVocab(url) + # parse the text string returning a dataframe out <- parseVocab(out) # for now, drop parent and child relations diff --git a/R/getCodeList.R b/R/getCodeList.R index 6dc8e0b..698e62f 100644 --- a/R/getCodeList.R +++ b/R/getCodeList.R @@ -14,8 +14,6 @@ #' #' \code{\link{icesVocab-package}} gives an overview of the package. #' -#' @author Colin Millar. -#' #' @examples #' # Species codes #' getCodeList("SpecWoRMS") @@ -32,8 +30,9 @@ getCodeList <- function(code_type, date = NULL) { url <- sprintf(paste0(url, "/%s"), date) } - # read XML string and parse to data frame - out <- curlVocab(url) + # read url contents + out <- readVocab(url) + # parse the text string returning a dataframe out <- parseVocab(out) out diff --git a/R/getCodeTypeList.R b/R/getCodeTypeList.R index 4c2b813..197de95 100644 --- a/R/getCodeTypeList.R +++ b/R/getCodeTypeList.R @@ -14,8 +14,6 @@ #' #' \code{\link{icesVocab-package}} gives an overview of the package. #' -#' @author Colin Millar. -#' #' @examples #' # Find code type for World Register of Marine Species (WoRMS) #' types <- getCodeTypeList() @@ -35,10 +33,9 @@ getCodeTypeList <- function(date = NULL) { url <- sprintf(paste0(url, "/%s"), date) } - # read and parse XML from API - out <- curlVocab(url = url) - # parse the xml text string suppplied by the Datras webservice - # returning a dataframe + # read url contents + out <- readVocab(url) + # parse the text string returning a dataframe out <- parseVocab(out) # return diff --git a/R/utilities.R b/R/utilities.R index 7307525..4430b02 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -59,7 +59,7 @@ parseVocab <- function(x) { checkVocabWebserviceOK <- function() { # return TRUE if webservice server is good, FALSE otherwise - out <- curlVocab(url = "http://vocab.ices.dk/services/pox/GetCodeDetail/SpecWoRMS/101170") + out <- readVocab(url = "http://vocab.ices.dk/services/pox/GetCodeDetail/SpecWoRMS/101170") # Check the server is not down by insepcting the XML response for internal server error message. if (grepl("Internal Server Error", out)) { diff --git a/man/getCodeDetail.Rd b/man/getCodeDetail.Rd index 354053f..5dd32c3 100644 --- a/man/getCodeDetail.Rd +++ b/man/getCodeDetail.Rd @@ -21,9 +21,6 @@ Get details for a given code. # Species code 101170 getCodeDetail("SpecWoRMS", 101170) -} -\author{ -Colin Millar. } \seealso{ \code{\link{getCodeTypeList}} and \code{\link{getCodeList}} get code types diff --git a/man/getCodeList.Rd b/man/getCodeList.Rd index 0cc95d8..a1b80d3 100644 --- a/man/getCodeList.Rd +++ b/man/getCodeList.Rd @@ -22,9 +22,6 @@ Get codes of a given code type. # Species codes getCodeList("SpecWoRMS") -} -\author{ -Colin Millar. } \seealso{ \code{\link{getCodeTypeList}} and \code{\link{getCodeDetail}} get code types diff --git a/man/getCodeTypeList.Rd b/man/getCodeTypeList.Rd index ff1b3b6..0d23c37 100644 --- a/man/getCodeTypeList.Rd +++ b/man/getCodeTypeList.Rd @@ -24,9 +24,6 @@ types[grep("worms", tolower(types$Description)),] # This code type "SpecWoRMS" can be used when calling getCodeList() -} -\author{ -Colin Millar. } \seealso{ \code{\link{getCodeList}} and \code{\link{getCodeDetail}} get codes of a From 5c2f2835b4b34cfab1ea19a94c0510f9d7cac1ff Mon Sep 17 00:00:00 2001 From: colin millar Date: Tue, 6 Dec 2016 10:35:13 +0100 Subject: [PATCH 03/19] added findCodeType search function --- NAMESPACE | 1 + R/findCodeType.R | 41 +++++++++++++++++++++++++++++++++++++++++ R/getCodeTypeList.R | 6 +++++- man/findCodeType.Rd | 33 +++++++++++++++++++++++++++++++++ man/getCodeTypeList.Rd | 6 +++++- 5 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 R/findCodeType.R create mode 100644 man/findCodeType.Rd diff --git a/NAMESPACE b/NAMESPACE index 6fddb10..f6f3c6c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export(findAphia) +export(findCodeType) export(getCodeDetail) export(getCodeList) export(getCodeTypeList) diff --git a/R/findCodeType.R b/R/findCodeType.R new file mode 100644 index 0000000..e3cae57 --- /dev/null +++ b/R/findCodeType.R @@ -0,0 +1,41 @@ +#' Find a Key +#' +#' Find a lookup key corresponding to a code type. +#' +#' @param code a code name, e.g. Gear Type, or DATRAS to find all DATRAS related codes. +#' @param regex whether to match the stock name as a regular expression. +#' @param full whether to return a data frame with all stock list columns. +#' +#' @return A vector of keys (default) or a data frame if full is TRUE. +#' +#' @seealso +#' \code{\link{getCodeTypeList}} gets a list of code types. +#' +#' \code{\link{icesVocab-package}} gives an overview of the package. +#' +#' @examples +#' findCodeType("worms") +#' +#' findCodeType("DATRAS", full = TRUE) +#' +#' @export + +findCodeType <- function(code, regex = TRUE, full = FALSE) +{ + + # get code list + types <- getCodeTypeList() + + # apply filters + code <- tolower(code) + if (!regex) code <- paste0("^", code, "$") + select <- unlist(lapply(code, grep, tolower(types$Description))) + types <- types[select,] + + # return + if (full) { + types + } else { + types$Key + } +} diff --git a/R/getCodeTypeList.R b/R/getCodeTypeList.R index 197de95..e0c74b5 100644 --- a/R/getCodeTypeList.R +++ b/R/getCodeTypeList.R @@ -12,6 +12,8 @@ #' \code{\link{getCodeList}} and \code{\link{getCodeDetail}} get codes of a #' given type and code details. #' +#' \code{\link{findCodeType}} searches for a code types based on a search string. +#' #' \code{\link{icesVocab-package}} gives an overview of the package. #' #' @examples @@ -19,7 +21,9 @@ #' types <- getCodeTypeList() #' types[grep("worms", tolower(types$Description)),] #' -#' # This code type "SpecWoRMS" can be used when calling getCodeList() +#' findCodeType("worms", full = TRUE) +#' +#' # The code type "SpecWoRMS" can be used when calling getCodeList() #' #' @export diff --git a/man/findCodeType.Rd b/man/findCodeType.Rd new file mode 100644 index 0000000..cc290be --- /dev/null +++ b/man/findCodeType.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/findCodeType.R +\name{findCodeType} +\alias{findCodeType} +\title{Find a Key} +\usage{ +findCodeType(code, regex = TRUE, full = FALSE) +} +\arguments{ +\item{code}{a code name, e.g. Gear Type, or DATRAS to find all DATRAS related codes.} + +\item{regex}{whether to match the stock name as a regular expression.} + +\item{full}{whether to return a data frame with all stock list columns.} +} +\value{ +A vector of keys (default) or a data frame if full is TRUE. +} +\description{ +Find a lookup key corresponding to a code type. +} +\examples{ +findCodeType("worms") + +findCodeType("DATRAS", full = TRUE) + +} +\seealso{ +\code{\link{getCodeTypeList}} gets a list of code types. + +\code{\link{icesVocab-package}} gives an overview of the package. +} + diff --git a/man/getCodeTypeList.Rd b/man/getCodeTypeList.Rd index 0d23c37..3ca417f 100644 --- a/man/getCodeTypeList.Rd +++ b/man/getCodeTypeList.Rd @@ -22,13 +22,17 @@ available. types <- getCodeTypeList() types[grep("worms", tolower(types$Description)),] -# This code type "SpecWoRMS" can be used when calling getCodeList() +findCodeType("worms", full = TRUE) + +# The code type "SpecWoRMS" can be used when calling getCodeList() } \seealso{ \code{\link{getCodeList}} and \code{\link{getCodeDetail}} get codes of a given type and code details. +\code{\link{findCodeType}} searches for a code types based on a search string. + \code{\link{icesVocab-package}} gives an overview of the package. } From fdfca15b05873ff1abd811cedf5171c0f2c17625 Mon Sep 17 00:00:00 2001 From: colin millar Date: Tue, 6 Dec 2016 10:35:13 +0100 Subject: [PATCH 04/19] added findCodeType search function --- NAMESPACE | 1 + R/findCodeType.R | 45 ++++++++++++++++++++++++++++++++++++++++++ R/getCodeTypeList.R | 6 +++++- man/findCodeType.Rd | 38 +++++++++++++++++++++++++++++++++++ man/getCodeTypeList.Rd | 6 +++++- 5 files changed, 94 insertions(+), 2 deletions(-) create mode 100644 R/findCodeType.R create mode 100644 man/findCodeType.Rd diff --git a/NAMESPACE b/NAMESPACE index 6fddb10..f6f3c6c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export(findAphia) +export(findCodeType) export(getCodeDetail) export(getCodeList) export(getCodeTypeList) diff --git a/R/findCodeType.R b/R/findCodeType.R new file mode 100644 index 0000000..5d22328 --- /dev/null +++ b/R/findCodeType.R @@ -0,0 +1,45 @@ +#' Find a Key +#' +#' Find a lookup key corresponding to a code type. +#' +#' @param code a code name, e.g. Gear Type, or DATRAS to find all DATRAS related codes. +#' @param date restrict output to code types modified after a given date in +#' yyyy-mm-dd format, e.g. "2010-12-01" +#' @param regex whether to match the stock name as a regular expression. +#' @param full whether to return a data frame with all stock list columns. +#' +#' @return A vector of keys (default) or a data frame if full is TRUE. +#' +#' @seealso +#' \code{\link{getCodeTypeList}} gets a list of code types. +#' +#' \code{\link{icesVocab-package}} gives an overview of the package. +#' +#' @examples +#' findCodeType("worms") +#' +#' findCodeType("DATRAS", full = TRUE) +#' +#' findCodeType("DATRAS", full = TRUE, date = "2010-01-01") +#' +#' @export + +findCodeType <- function(code, date = NULL, regex = TRUE, full = FALSE) +{ + + # get code list + types <- getCodeTypeList(date) + + # apply filters + code <- tolower(code) + if (!regex) code <- paste0("^", code, "$") + select <- unlist(lapply(code, grep, tolower(types$Description))) + types <- types[select,] + + # return + if (full) { + types + } else { + types$Key + } +} diff --git a/R/getCodeTypeList.R b/R/getCodeTypeList.R index 197de95..e0c74b5 100644 --- a/R/getCodeTypeList.R +++ b/R/getCodeTypeList.R @@ -12,6 +12,8 @@ #' \code{\link{getCodeList}} and \code{\link{getCodeDetail}} get codes of a #' given type and code details. #' +#' \code{\link{findCodeType}} searches for a code types based on a search string. +#' #' \code{\link{icesVocab-package}} gives an overview of the package. #' #' @examples @@ -19,7 +21,9 @@ #' types <- getCodeTypeList() #' types[grep("worms", tolower(types$Description)),] #' -#' # This code type "SpecWoRMS" can be used when calling getCodeList() +#' findCodeType("worms", full = TRUE) +#' +#' # The code type "SpecWoRMS" can be used when calling getCodeList() #' #' @export diff --git a/man/findCodeType.Rd b/man/findCodeType.Rd new file mode 100644 index 0000000..f93eb15 --- /dev/null +++ b/man/findCodeType.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/findCodeType.R +\name{findCodeType} +\alias{findCodeType} +\title{Find a Key} +\usage{ +findCodeType(code, date = NULL, regex = TRUE, full = FALSE) +} +\arguments{ +\item{code}{a code name, e.g. Gear Type, or DATRAS to find all DATRAS related codes.} + +\item{date}{restrict output to code types modified after a given date in +yyyy-mm-dd format, e.g. "2010-12-01"} + +\item{regex}{whether to match the stock name as a regular expression.} + +\item{full}{whether to return a data frame with all stock list columns.} +} +\value{ +A vector of keys (default) or a data frame if full is TRUE. +} +\description{ +Find a lookup key corresponding to a code type. +} +\examples{ +findCodeType("worms") + +findCodeType("DATRAS", full = TRUE) + +findCodeType("DATRAS", full = TRUE, date = "2010-01-01") + +} +\seealso{ +\code{\link{getCodeTypeList}} gets a list of code types. + +\code{\link{icesVocab-package}} gives an overview of the package. +} + diff --git a/man/getCodeTypeList.Rd b/man/getCodeTypeList.Rd index 0d23c37..3ca417f 100644 --- a/man/getCodeTypeList.Rd +++ b/man/getCodeTypeList.Rd @@ -22,13 +22,17 @@ available. types <- getCodeTypeList() types[grep("worms", tolower(types$Description)),] -# This code type "SpecWoRMS" can be used when calling getCodeList() +findCodeType("worms", full = TRUE) + +# The code type "SpecWoRMS" can be used when calling getCodeList() } \seealso{ \code{\link{getCodeList}} and \code{\link{getCodeDetail}} get codes of a given type and code details. +\code{\link{findCodeType}} searches for a code types based on a search string. + \code{\link{icesVocab-package}} gives an overview of the package. } From 5dfc380d8773580c21ca5d0bbdd79d8f54b6d4c7 Mon Sep 17 00:00:00 2001 From: colin millar Date: Tue, 6 Dec 2016 10:42:41 +0100 Subject: [PATCH 05/19] Add search functions to Rd file --- R/getCodeList.R | 5 ++++- man/getCodeList.Rd | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/R/getCodeList.R b/R/getCodeList.R index 698e62f..69b3952 100644 --- a/R/getCodeList.R +++ b/R/getCodeList.R @@ -12,10 +12,13 @@ #' \code{\link{getCodeTypeList}} and \code{\link{getCodeDetail}} get code types #' and code details. #' +#' \code{\link{findCodeType}} and \code{\link{findAphia}} provide support for searching the code type and code lists. +#' #' \code{\link{icesVocab-package}} gives an overview of the package. #' #' @examples -#' # Species codes +#' # Aphia Species codes +#' findCodeType("aphia", full = TRUE) #' getCodeList("SpecWoRMS") #' #' @export diff --git a/man/getCodeList.Rd b/man/getCodeList.Rd index a1b80d3..ad321a1 100644 --- a/man/getCodeList.Rd +++ b/man/getCodeList.Rd @@ -19,7 +19,8 @@ A data frame. Get codes of a given code type. } \examples{ -# Species codes +# Aphia Species codes +findCodeType("aphia", full = TRUE) getCodeList("SpecWoRMS") } @@ -27,6 +28,8 @@ getCodeList("SpecWoRMS") \code{\link{getCodeTypeList}} and \code{\link{getCodeDetail}} get code types and code details. +\code{\link{findCodeType}} and \code{\link{findAphia}} provide support for searching the code type and code lists. + \code{\link{icesVocab-package}} gives an overview of the package. } From f2d0c00e6f5697c03775b9a59a467aebe0f8aa6b Mon Sep 17 00:00:00 2001 From: colin millar Date: Tue, 6 Dec 2016 10:46:29 +0100 Subject: [PATCH 06/19] add findAphia to Rd file --- R/getCodeList.R | 2 ++ man/getCodeList.Rd | 2 ++ 2 files changed, 4 insertions(+) diff --git a/R/getCodeList.R b/R/getCodeList.R index 69b3952..75c13a3 100644 --- a/R/getCodeList.R +++ b/R/getCodeList.R @@ -21,6 +21,8 @@ #' findCodeType("aphia", full = TRUE) #' getCodeList("SpecWoRMS") #' +#' findAphia("cod", full = TRUE) +#' #' @export getCodeList <- function(code_type, date = NULL) { diff --git a/man/getCodeList.Rd b/man/getCodeList.Rd index ad321a1..9e87830 100644 --- a/man/getCodeList.Rd +++ b/man/getCodeList.Rd @@ -23,6 +23,8 @@ Get codes of a given code type. findCodeType("aphia", full = TRUE) getCodeList("SpecWoRMS") +findAphia("cod", full = TRUE) + } \seealso{ \code{\link{getCodeTypeList}} and \code{\link{getCodeDetail}} get code types From feb9d5da6a77d2eeb182dd67aaeb6752b12674a8 Mon Sep 17 00:00:00 2001 From: colin millar Date: Tue, 6 Dec 2016 11:38:54 +0100 Subject: [PATCH 07/19] Add findCode function --- NAMESPACE | 1 + R/findCode.R | 68 ++++++++++++++++++++++++++++++++++++++++++++ R/getCodeDetail.R | 6 ++++ man/findCode.Rd | 44 ++++++++++++++++++++++++++++ man/getCodeDetail.Rd | 6 ++++ 5 files changed, 125 insertions(+) create mode 100644 R/findCode.R create mode 100644 man/findCode.Rd diff --git a/NAMESPACE b/NAMESPACE index f6f3c6c..47aff6b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export(findAphia) +export(findCode) export(findCodeType) export(getCodeDetail) export(getCodeList) diff --git a/R/findCode.R b/R/findCode.R new file mode 100644 index 0000000..109f59b --- /dev/null +++ b/R/findCode.R @@ -0,0 +1,68 @@ +#' Find a Key +#' +#' Look up a key for a given code and code type. +#' +#' @param code_type a search string for a code type, e.g. SpecWorms, or simply worms. +#' @param code a search string for a code, e.g. a species name, cod, or ship name, Clupea. +#' @param regex whether to match as a regular expression. +#' @param full whether to return a data frame. +#' +#' @details Matches are case-insensitive. +#' +#' @return A vector of keys (default) or a data frame if full is TRUE. +#' +#' @seealso +#' \code{\link{getCodeList}} can be used to get all code types; see example on +#' that help page. +#' +#' \code{\link{icesVocab-package}} gives an overview of the package. +#' +#' @examples +#' findCode("aphia", "cod") +#' +#' # Multiple matches +#' findCode("aphia", c("cod", "haddock", "saithe"), full = TRUE) +#' +#' findCodeType("ship", full = TRUE) +#' findCode("ship", "clupea", full = TRUE) +#' +#' +#' @export + +findCode <- function(code_type, code, regex = TRUE, full = FALSE) { + + # get code types + type <- findCodeType(code_type, regex = regex) + # if multiple types - don't combine + if (length(type) == 0) { + return(NULL) # ? + } + codes <- lapply(type, getCodeList) + + # apply filters (to each list element) + code <- tolower(code) + if (!regex) code <- paste0("^", code, "$") + codes <- + lapply(codes, + function(x) { + select <- c(unlist(lapply(code, grep, tolower(x$LongDescription))), + unlist(lapply(code, grep, tolower(x$Description)))) + select <- sort(unique(select)) + x <- x[select,] + + if (full) { + x + } else { + x$Key + } + }) + + # add type as name + names(codes) <- type + + # drop empty types + codes <- codes[sapply(codes, nrow) > 0] + + # return + codes +} diff --git a/R/getCodeDetail.R b/R/getCodeDetail.R index 43e8378..a2f3e04 100644 --- a/R/getCodeDetail.R +++ b/R/getCodeDetail.R @@ -17,6 +17,12 @@ #' # Species code 101170 #' getCodeDetail("SpecWoRMS", 101170) #' +#' # find details of Haddock using the aphia ID +#' findCodeType("aphia", full = TRUE) +#' findCode("species", "haddock", full = TRUE) +#' +#' findAphia("cod", full = TRUE) +#' #' @export getCodeDetail <- function(code_type, code) { diff --git a/man/findCode.Rd b/man/findCode.Rd new file mode 100644 index 0000000..00a0863 --- /dev/null +++ b/man/findCode.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/findCode.R +\name{findCode} +\alias{findCode} +\title{Find a Key} +\usage{ +findCode(code_type, code, regex = TRUE, full = FALSE) +} +\arguments{ +\item{code_type}{a search string for a code type, e.g. SpecWorms, or simply worms.} + +\item{code}{a search string for a code, e.g. a species name, cod, or ship name, Clupea.} + +\item{regex}{whether to match as a regular expression.} + +\item{full}{whether to return a data frame.} +} +\value{ +A vector of keys (default) or a data frame if full is TRUE. +} +\description{ +Look up a key for a given code and code type. +} +\details{ +Matches are case-insensitive. +} +\examples{ +findCode("aphia", "cod") + +# Multiple matches +findCode("aphia", c("cod", "haddock", "saithe"), full = TRUE) + +findCodeType("ship", full = TRUE) +findCode("ship", "clupea", full = TRUE) + + +} +\seealso{ +\code{\link{getCodeList}} can be used to get all code types; see example on +that help page. + +\code{\link{icesVocab-package}} gives an overview of the package. +} + diff --git a/man/getCodeDetail.Rd b/man/getCodeDetail.Rd index 5dd32c3..3ac7025 100644 --- a/man/getCodeDetail.Rd +++ b/man/getCodeDetail.Rd @@ -21,6 +21,12 @@ Get details for a given code. # Species code 101170 getCodeDetail("SpecWoRMS", 101170) +# find details of Haddock using the aphia ID +findCodeType("aphia", full = TRUE) +findCode("species", "haddock", full = TRUE) + +findAphia("cod", full = TRUE) + } \seealso{ \code{\link{getCodeTypeList}} and \code{\link{getCodeList}} get code types From d0512ff2db4bdcbf981f745a651e0b10d5f7d068 Mon Sep 17 00:00:00 2001 From: colin millar Date: Tue, 6 Dec 2016 11:39:13 +0100 Subject: [PATCH 08/19] widen search for findCodeType --- R/findCodeType.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/findCodeType.R b/R/findCodeType.R index 5d22328..770e5d9 100644 --- a/R/findCodeType.R +++ b/R/findCodeType.R @@ -33,7 +33,10 @@ findCodeType <- function(code, date = NULL, regex = TRUE, full = FALSE) # apply filters code <- tolower(code) if (!regex) code <- paste0("^", code, "$") - select <- unlist(lapply(code, grep, tolower(types$Description))) + select <- c(unlist(lapply(code, grep, tolower(types$LongDescription))), + unlist(lapply(code, grep, tolower(types$Description))), + unlist(lapply(code, grep, tolower(types$Key)))) + select <- sort(unique(select)) types <- types[select,] # return From a7a3e9caf419b2402a7eb0ce4ca5f2c4605a5fb0 Mon Sep 17 00:00:00 2001 From: colin millar Date: Tue, 6 Dec 2016 11:45:41 +0100 Subject: [PATCH 09/19] small bug, plus extend code search to Key field --- R/findCode.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/findCode.R b/R/findCode.R index 109f59b..1f06ec9 100644 --- a/R/findCode.R +++ b/R/findCode.R @@ -40,13 +40,14 @@ findCode <- function(code_type, code, regex = TRUE, full = FALSE) { codes <- lapply(type, getCodeList) # apply filters (to each list element) - code <- tolower(code) + code <- tolower(as.character(code)) if (!regex) code <- paste0("^", code, "$") codes <- lapply(codes, function(x) { select <- c(unlist(lapply(code, grep, tolower(x$LongDescription))), - unlist(lapply(code, grep, tolower(x$Description)))) + unlist(lapply(code, grep, tolower(x$Description))), + unlist(lapply(code, grep, tolower(x$Key)))) select <- sort(unique(select)) x <- x[select,] @@ -61,7 +62,7 @@ findCode <- function(code_type, code, regex = TRUE, full = FALSE) { names(codes) <- type # drop empty types - codes <- codes[sapply(codes, nrow) > 0] + codes <- codes[sapply(codes, length) > 0] # return codes From 6441a9ed747f3bc48b5fb5556b5b1a44c62c56da Mon Sep 17 00:00:00 2001 From: colin millar Date: Tue, 6 Dec 2016 11:52:21 +0100 Subject: [PATCH 10/19] bug fix --- R/findCode.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/findCode.R b/R/findCode.R index 1f06ec9..1e2021e 100644 --- a/R/findCode.R +++ b/R/findCode.R @@ -62,7 +62,11 @@ findCode <- function(code_type, code, regex = TRUE, full = FALSE) { names(codes) <- type # drop empty types - codes <- codes[sapply(codes, length) > 0] + if (full) { + codes <- codes[sapply(codes, nrow) > 0] + } else { + codes <- codes[sapply(codes, length) > 0] + } # return codes From a52807ed977b91b4adc071be1242ccef186bbd99 Mon Sep 17 00:00:00 2001 From: colin millar Date: Tue, 6 Dec 2016 12:03:20 +0100 Subject: [PATCH 11/19] small alteration to Rd --- R/getCodeDetail.R | 3 +-- man/getCodeDetail.Rd | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/R/getCodeDetail.R b/R/getCodeDetail.R index a2f3e04..bbe242a 100644 --- a/R/getCodeDetail.R +++ b/R/getCodeDetail.R @@ -18,10 +18,9 @@ #' getCodeDetail("SpecWoRMS", 101170) #' #' # find details of Haddock using the aphia ID -#' findCodeType("aphia", full = TRUE) #' findCode("species", "haddock", full = TRUE) #' -#' findAphia("cod", full = TRUE) +#' getCodeDetail("SpecWoRMS", 126437) #' #' @export diff --git a/man/getCodeDetail.Rd b/man/getCodeDetail.Rd index 3ac7025..a71b6e3 100644 --- a/man/getCodeDetail.Rd +++ b/man/getCodeDetail.Rd @@ -22,10 +22,9 @@ Get details for a given code. getCodeDetail("SpecWoRMS", 101170) # find details of Haddock using the aphia ID -findCodeType("aphia", full = TRUE) findCode("species", "haddock", full = TRUE) -findAphia("cod", full = TRUE) +getCodeDetail("SpecWoRMS", 126437) } \seealso{ From 3695e31a033e74b28b87b0da9ed431440315d38f Mon Sep 17 00:00:00 2001 From: colin millar Date: Tue, 6 Dec 2016 12:41:33 +0100 Subject: [PATCH 12/19] parse vocab detail function developed --- R/getCodeDetail.R | 6 +---- R/utilities.R | 60 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/R/getCodeDetail.R b/R/getCodeDetail.R index bbe242a..e7a8057 100644 --- a/R/getCodeDetail.R +++ b/R/getCodeDetail.R @@ -33,11 +33,7 @@ getCodeDetail <- function(code_type, code) { # read url contents out <- readVocab(url) # parse the text string returning a dataframe - out <- parseVocab(out) - - # for now, drop parent and child relations - out <- out[!grepl("ParentRelation", names(out))] - out <- out[!grepl("ChildRelation", names(out))] + out <- parseVocabDetail(out) out } diff --git a/R/utilities.R b/R/utilities.R index 4430b02..9973288 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -56,6 +56,66 @@ parseVocab <- function(x) { x } +#' @importFrom XML xlmToList +parseVocabDetail <- function(x) { + # parse the xml text string suppplied by the Datras webservice + # returning a dataframe + x <- out + x <- xmlParse(x) + + # convet to list + x <- xmlToList(x)[[1]] + + # get top row + header <- as.data.frame(x[1:5]) + + # get parents + parents <- x[names(x) == "ParentRelation"] + parent_code <- + do.call(rbind, + lapply(unname(parents), + function(y) { + code <- y$Code + code[sapply(code, is.null)] <- NA + as.data.frame(code) + })) + parent_code_type <- + do.call(rbind, + lapply(unname(parents), + function(y) { + code <- y$CodeType + code[sapply(code, is.null)] <- NA + as.data.frame(code) + })) + + # get children + children <- x[names(x) == "ChildRelation"] + child_code <- + do.call(rbind, + lapply(unname(children), + function(y) { + code <- y$Code + code[sapply(code, is.null)] <- NA + as.data.frame(code) + })) + child_code_type <- + do.call(rbind, + lapply(unname(children), + function(y) { + code <- y$CodeType + code[sapply(code, is.null)] <- NA + as.data.frame(code) + })) + + # restructure + out <- list(detail = header, + parents = list(code_types = parent_code_type, codes = parent_code), + children = list(code_types = child_code_type, codes = child_code)) + + # return + out +} + checkVocabWebserviceOK <- function() { # return TRUE if webservice server is good, FALSE otherwise From f5d3e5610fbbaa96de0a60e85d3de94f0fb0e954 Mon Sep 17 00:00:00 2001 From: colin millar Date: Tue, 6 Dec 2016 12:52:43 +0100 Subject: [PATCH 13/19] Improved CodeDetail function --- NAMESPACE | 1 + R/getCodeDetail.R | 6 +++++- R/utilities.R | 31 +++++++++---------------------- man/getCodeDetail.Rd | 5 ++++- 4 files changed, 19 insertions(+), 24 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 47aff6b..d8abd33 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,7 @@ export(getCodeList) export(getCodeTypeList) importFrom(XML,getChildrenStrings) importFrom(XML,removeNodes) +importFrom(XML,xlmToList) importFrom(XML,xmlParse) importFrom(XML,xmlRoot) importFrom(XML,xmlSize) diff --git a/R/getCodeDetail.R b/R/getCodeDetail.R index e7a8057..0a6e5bb 100644 --- a/R/getCodeDetail.R +++ b/R/getCodeDetail.R @@ -14,7 +14,7 @@ #' \code{\link{icesVocab-package}} gives an overview of the package. #' #' @examples -#' # Species code 101170 +#' # Species code 101170 - Myxine glutinosa #' getCodeDetail("SpecWoRMS", 101170) #' #' # find details of Haddock using the aphia ID @@ -22,9 +22,13 @@ #' #' getCodeDetail("SpecWoRMS", 126437) #' +#' # get info for had-43 +#' getCodeDetail("ICES_StockCode", "had-34") +#' #' @export getCodeDetail <- function(code_type, code) { + message("The output from this function is developing. please do not rely on the current output format") # form url url <- sprintf("http://vocab.ices.dk/services/pox/GetCodeDetail/%s/%s", diff --git a/R/utilities.R b/R/utilities.R index 9973288..b4abb00 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -60,52 +60,39 @@ parseVocab <- function(x) { parseVocabDetail <- function(x) { # parse the xml text string suppplied by the Datras webservice # returning a dataframe - x <- out x <- xmlParse(x) # convet to list x <- xmlToList(x)[[1]] # get top row - header <- as.data.frame(x[1:5]) + todf <- function(y) { + y[sapply(y, is.null)] <- NA + as.data.frame(y) + } + header <- todf(x[1:5]) # get parents parents <- x[names(x) == "ParentRelation"] parent_code <- do.call(rbind, lapply(unname(parents), - function(y) { - code <- y$Code - code[sapply(code, is.null)] <- NA - as.data.frame(code) - })) + function(y) todf(y$Code))) parent_code_type <- do.call(rbind, lapply(unname(parents), - function(y) { - code <- y$CodeType - code[sapply(code, is.null)] <- NA - as.data.frame(code) - })) + function(y) todf(y$CodeType))) # get children children <- x[names(x) == "ChildRelation"] child_code <- do.call(rbind, lapply(unname(children), - function(y) { - code <- y$Code - code[sapply(code, is.null)] <- NA - as.data.frame(code) - })) + function(y) todf(y$Code))) child_code_type <- do.call(rbind, lapply(unname(children), - function(y) { - code <- y$CodeType - code[sapply(code, is.null)] <- NA - as.data.frame(code) - })) + function(y) todf(y$CodeType))) # restructure out <- list(detail = header, diff --git a/man/getCodeDetail.Rd b/man/getCodeDetail.Rd index a71b6e3..9b8d8c9 100644 --- a/man/getCodeDetail.Rd +++ b/man/getCodeDetail.Rd @@ -18,7 +18,7 @@ A data frame. Get details for a given code. } \examples{ -# Species code 101170 +# Species code 101170 - Myxine glutinosa getCodeDetail("SpecWoRMS", 101170) # find details of Haddock using the aphia ID @@ -26,6 +26,9 @@ findCode("species", "haddock", full = TRUE) getCodeDetail("SpecWoRMS", 126437) +# get info for had-43 +getCodeDetail("ICES_StockCode", "had-34") + } \seealso{ \code{\link{getCodeTypeList}} and \code{\link{getCodeList}} get code types From a649bead51323aa3652cd44e2417926f3ca5b99e Mon Sep 17 00:00:00 2001 From: colin millar Date: Tue, 6 Dec 2016 12:57:03 +0100 Subject: [PATCH 14/19] misspelled imported function name --- NAMESPACE | 2 +- R/utilities.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d8abd33..8f38abf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,8 +8,8 @@ export(getCodeList) export(getCodeTypeList) importFrom(XML,getChildrenStrings) importFrom(XML,removeNodes) -importFrom(XML,xlmToList) importFrom(XML,xmlParse) importFrom(XML,xmlRoot) importFrom(XML,xmlSize) +importFrom(XML,xmlToList) importFrom(utils,download.file) diff --git a/R/utilities.R b/R/utilities.R index b4abb00..482e222 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -56,7 +56,7 @@ parseVocab <- function(x) { x } -#' @importFrom XML xlmToList +#' @importFrom XML xmlToList parseVocabDetail <- function(x) { # parse the xml text string suppplied by the Datras webservice # returning a dataframe From 989e810304a9f16cd7ebf88508a1ae28cf06117c Mon Sep 17 00:00:00 2001 From: colin millar Date: Tue, 6 Dec 2016 13:00:56 +0100 Subject: [PATCH 15/19] adding web service defintion to read me references --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 15a1ffd..5c64cb1 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,10 @@ ICES Vocabularies database: [http://vocab.ices.dk/](http://vocab.ices.dk/) +ICES Vocabularies POX web service definition: + +[http://vocab.ices.dk/services/POX.aspx](http://vocab.ices.dk/services/POX.aspx) + Development ----------- From c2b124b0dc267c189eb16dcf0d2cc4d7cab5c0c7 Mon Sep 17 00:00:00 2001 From: colin millar Date: Tue, 6 Dec 2016 13:11:13 +0100 Subject: [PATCH 16/19] added NEWS file --- NEWS | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 NEWS diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..5354ddf --- /dev/null +++ b/NEWS @@ -0,0 +1,22 @@ +-------------------------------------------------------------------------------- +icesVocab 1.1-0 (2016-12-08) +-------------------------------------------------------------------------------- +o Added function findAphia() to look up species codes. + +o Added function findCode() to look up codes given a code type. + +o Added function findCodeType() to look up code types. + +o Improved XML parsing, so both leading and trailing white space is removed. + +o Improved XML parsing, so "" and "NA" is converted to NA. + +o Improved XML parsing of code details so that child and parent relations are returned. + +o Improved url reading to avoid the use of RCurl. + + +-------------------------------------------------------------------------------- +icesVocab 1.0-0 (2016-09-15) +-------------------------------------------------------------------------------- +o Initial release. From 794d49344e47ed1df67b64159dbf565e62a4eeda Mon Sep 17 00:00:00 2001 From: colin millar Date: Tue, 6 Dec 2016 13:12:20 +0100 Subject: [PATCH 17/19] removed knitr from DESCRIPTION --- DESCRIPTION | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3b9efd3..b257c2c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,13 +1,12 @@ Package: icesVocab Version: 1.1-0 -Date: 2016-12-05 +Date: 2016-12-08 Title: ICES Vocabularies Database Web Services Authors@R: c(person("Colin", "Millar", role=c("aut","cre"), email="colin.millar@ices.dk"), person("Arni", "Magnusson", role="aut")) Imports: utils, XML -Suggests: knitr, - testthat +Suggests: testthat Description: R interface to access the RECO POX web services of the ICES (International Council for the Exploration of the Sea) Vocabularies database . License: GPL (>= 2) From 9a88ed06e3c54ed2e24ddcfd27ae8849996e13da Mon Sep 17 00:00:00 2001 From: colin millar Date: Tue, 6 Dec 2016 13:13:08 +0100 Subject: [PATCH 18/19] added cran-comments file --- cran-comments.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 cran-comments.md diff --git a/cran-comments.md b/cran-comments.md new file mode 100644 index 0000000..bad69db --- /dev/null +++ b/cran-comments.md @@ -0,0 +1,23 @@ +## Test environments +* local OS X install, R 3.3.2 +* ubuntu 12.04 (on travis-ci), R 3.3.1 +* win-builder (devel and release) + +## R CMD check results + +0 errors | 0 warnings | 1 note + +* This is a new release. + +## Reverse dependencies + +This is a new release, so there are no reverse dependencies. + +--- + +* I have run R CMD check on the NUMBER downstream dependencies. + (Summary at ...). + +* FAILURE SUMMARY + +* All revdep maintainers were notified of the release on RELEASE DATE. From 70c961e97ddf42a774c62560452765fd5e88fe58 Mon Sep 17 00:00:00 2001 From: colin millar Date: Tue, 6 Dec 2016 13:20:37 +0100 Subject: [PATCH 19/19] getting cran ready --- R/getCodeList.R | 3 ++- cran-comments.md | 8 -------- man/getCodeList.Rd | 3 ++- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/R/getCodeList.R b/R/getCodeList.R index 75c13a3..0b85bc5 100644 --- a/R/getCodeList.R +++ b/R/getCodeList.R @@ -19,7 +19,8 @@ #' @examples #' # Aphia Species codes #' findCodeType("aphia", full = TRUE) -#' getCodeList("SpecWoRMS") +#' codes <- getCodeList("SpecWoRMS") +#' head(codes) #' #' findAphia("cod", full = TRUE) #' diff --git a/cran-comments.md b/cran-comments.md index bad69db..f1aedc2 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -13,11 +13,3 @@ This is a new release, so there are no reverse dependencies. ---- - -* I have run R CMD check on the NUMBER downstream dependencies. - (Summary at ...). - -* FAILURE SUMMARY - -* All revdep maintainers were notified of the release on RELEASE DATE. diff --git a/man/getCodeList.Rd b/man/getCodeList.Rd index 9e87830..73283c9 100644 --- a/man/getCodeList.Rd +++ b/man/getCodeList.Rd @@ -21,7 +21,8 @@ Get codes of a given code type. \examples{ # Aphia Species codes findCodeType("aphia", full = TRUE) -getCodeList("SpecWoRMS") +codes <- getCodeList("SpecWoRMS") +head(codes) findAphia("cod", full = TRUE)