updated endpoint list (ropensci#37)

* updated endpoint list * endpoints are now singular * removed to_singular and to_plural * simplified get_endpoints() * put back to_plural() * generated files * restored group handling * generated files * endpoint name changes * endpoint name changes * generated files * endpoint name changes * generated files * removed to_plural() again * back to regular group checking * bumped endpoint count and RoxygenNote version * generated files
mustberuss · Dec 10, 2024 · e0ebd97 · e0ebd97
1 parent e1ca36b
commit e0ebd97
Show file tree

Hide file tree

Showing 19 changed files with 622 additions and 812 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -10,7 +10,7 @@ Authors@R: c(
 Encoding: UTF-8
 Description: Provides functions to simplify the 'PatentsView' API
     (<https://patentsview.org/apis/purpose>) query language,
-    send GET and POST requests to the API's seven endpoints, and parse the data
+    send GET and POST requests to the API's twenty seven endpoints, and parse the data
     that comes back.
 URL: https://docs.ropensci.org/patentsview/index.html
 BugReports: https://github.com/ropensci/patentsview/issues
@@ -28,5 +28,5 @@ Suggests:
     rmarkdown,
     testthat,
     tidyr
-RoxygenNote: 7.1.2
+RoxygenNote: 7.3.2
 Roxygen: list(markdown = TRUE)
diff --git a/R/get-fields.R b/R/get-fields.R
@@ -7,21 +7,21 @@
 #' possible fields for each endpoint).
 #'
 #' @param endpoint The API endpoint whose field list you want to get. See
-#'   \code{\link{get_endpoints}} for a list of the 7 endpoints.
+#'   \code{\link{get_endpoints}} for a list of the 27 endpoints.
 #' @param groups A character vector giving the group(s) whose fields you want
 #'   returned. A value of \code{NULL} indicates that you want all of the
 #'   endpoint's fields (i.e., do not filter the field list based on group
 #'   membership). See the field tables located online to see which groups you
 #'   can specify for a given endpoint (e.g., the
-#'   \href{https://patentsview.org/apis/api-endpoints/patents}{patents
+#'   \href{https://search.patentsview.org/docs/docs/Search%20API/SearchAPIReference/#patent}{patent
 #'   endpoint table}), or use the \code{fieldsdf} table
-#'   (e.g., \code{unique(fieldsdf[fieldsdf$endpoint == "patents", "group"])}).
+#'   (e.g., \code{unique(fieldsdf[fieldsdf$endpoint == "patent", "group"])}).
 #'
 #' @return A character vector with field names.
 #'
 #' @examples
-#' # Get all assignee-level fields for the patents endpoint:
-#' fields <- get_fields(endpoint = "patents", groups = "assignees_at_grant")
+#' # Get all assignee-level fields for the patent endpoint:
+#' fields <- get_fields(endpoint = "patent", groups = "assignees")
 #'
 #' # ...Then pass to search_pv:
 #' \dontrun{
@@ -31,8 +31,8 @@
 #'   fields = fields
 #' )
 #' }
-#' # Get all patent and assignee-level fields for the patents endpoint:
-#' fields <- get_fields(endpoint = "patents", groups = c("assignees_at_grant", "patents"))
+#' # Get all patent and assignee-level fields for the patent endpoint:
+#' fields <- get_fields(endpoint = "patent", groups = c("assignees", "patents"))
 #'
 #' \dontrun{
 #' # ...Then pass to search_pv:
@@ -48,7 +48,7 @@ get_fields <- function(endpoint, groups = NULL) {
   if (is.null(groups)) {
     fieldsdf[fieldsdf$endpoint == endpoint, "field"]
   } else {
-    validate_groups(groups = groups)
+    validate_groups(endpoint, groups = groups)
     fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$group %in% groups, "field"]
   }
 }
@@ -61,10 +61,5 @@ get_fields <- function(endpoint, groups = NULL) {
 #' @return A character vector with the names of each endpoint.
 #' @export
 get_endpoints <- function() {
-  c(
-    "application_citations", "assignees", "cpc_groups", "cpc_subgroups",
-    "cpc_subsections", "inventors", "nber_categories",
-    "nber_subcategories", "patent_citations", "patents",
-    "uspc_subclasses", "uspc_mainclasses"
-  )
+  unique(fieldsdf$endpoint)
 }
diff --git a/R/process-error.R b/R/process-error.R
@@ -11,18 +11,19 @@ throw_if_loc_error <- function(resp) {
     if (num_grps > 2) {
       stop2(
         "Your request resulted in a 500 error, likely because you have ",
-        "requested too many fields in your request (the locations endpoint ",
+        "requested too many fields in your request (the location endpoint ",
         "currently has restrictions on the number of fields/groups you can ",
         "request). Try slimming down your field list and trying again."
       )
     }
   }
 }
 
+# Not sure this is still applicable
 #' @noRd
 hit_locations_ep <- function(url) {
   grepl(
-    "^https://api.patentsview.org/locations/",
+    "^https://search.patentsview.org/api/v1/location/",
     url,
     ignore.case = TRUE
   )
@@ -32,7 +33,7 @@ hit_locations_ep <- function(url) {
 get_num_groups <- function(url) {
   prsd_json_filds <- gsub(".*&f=([^&]*).*", "\\1", utils::URLdecode(url))
   fields <- jsonlite::fromJSON(prsd_json_filds)
-  grps <- fieldsdf[fieldsdf$endpoint == "locations" &
+  grps <- fieldsdf[fieldsdf$endpoint == "location" &
                      fieldsdf$field %in% fields, "group"]
   length(unique(grps))
 }
@@ -52,5 +53,5 @@ xheader_er_or_status <- function(resp) {
 #' @noRd
 get_x_status <- function(resp) {
   headers <- httr::headers(resp)
-  headers[grepl("x-status-reason", names(headers), ignore.case = TRUE)]
+  headers[grepl("x-status-reason$", names(headers), ignore.case = TRUE)]
 }
diff --git a/R/search-pv.R b/R/search-pv.R
@@ -1,6 +1,6 @@
 #' @noRd
 get_base <- function(endpoint) {
-  sprintf("https://search.patentsview.org/api/v1/%s/", to_singular(endpoint))
+  sprintf("https://search.patentsview.org/api/v1/%s/", endpoint)
 }
 
 #' @noRd
@@ -201,7 +201,7 @@ request_apply <- function(ex_res, method, query, base_url, arg_list, api_key, ..
 #'
 #' search_pv(
 #'   query = qry_funs$gt(patent_year = 2010),
-#'   fields = get_fields("patents", c("patents", "assignees_at_grant"))
+#'   fields = get_fields("patent", c("patents", "assignees"))
 #' )
 #'
 #' search_pv(
@@ -212,27 +212,27 @@ request_apply <- function(ex_res, method, query, base_url, arg_list, api_key, ..
 #' )
 #'
 #' search_pv(
-#'   query = qry_funs$eq(name_last = "crew"),
-#'   endpoint = "inventors",
+#'   query = qry_funs$eq(inventor_name_last = "Crew"),
+#'   endpoint = "inventor",
 #'   all_pages = TRUE
 #' )
 #'
 #' search_pv(
-#'   query = qry_funs$contains(name_last = "smith"),
-#'   endpoint = "assignees"
+#'   query = qry_funs$contains(assignee_individual_name_last = "Smith"),
+#'   endpoint = "assignee"
 #' )
 #'
 #' search_pv(
-#'   query = qry_funs$contains(inventors_at_grant.name_last = "smith"),
-#'   endpoint = "patents",
+#'   query = qry_funs$contains(inventors_at_grant.name_last = "Smith"),
+#'   endpoint = "patent",
 #'   config = httr::timeout(40)
 #' )
 #' }
 #'
 #' @export
 search_pv <- function(query,
                       fields = NULL,
-                      endpoint = "patents",
+                      endpoint = "patent",
                       subent_cnts = FALSE,
                       mtchd_subent_only = lifecycle::deprecated(),
                       page = 1,

diff --git a/R/unnest-pv-data.R b/R/unnest-pv-data.R
@@ -3,8 +3,8 @@
 #' This function suggests a value that you could use for the \code{pk} argument
 #' in \code{\link{unnest_pv_data}}, based on the endpoint you searched.
 #' It will return a potential unique identifier for a given entity (i.e., a
-#' given endpoint). For example, it will return "patent_number" when
-#' \code{endpoint = "patents"}.
+#' given endpoint). For example, it will return "patent_id" when
+#' \code{endpoint = "patent"}.
 #'
 #' @param endpoint The endpoint which you would like to know a potential primary
 #'   key for.
@@ -13,21 +13,15 @@
 #'   \code{\link{unnest_pv_data}}.
 #'
 #' @examples
-#' get_ok_pk(endpoint = "inventors") # Returns "inventor_id"
-#' get_ok_pk(endpoint = "cpc_subsections") # Returns "cpc_subsection_id"
+#' get_ok_pk(endpoint = "inventor")
+#' get_ok_pk(endpoint = "cpc_subclass")
+#' get_ok_pk("publication/rel_app_text")
 #'
 #' @export
 get_ok_pk <- function(endpoint) {
-  es_eps <- c(
-    "uspc_mainclasses" = "uspc_mainclass_id",
-    "nber_subcategories" = "nber_subcategory_id",
-    "patents" = "patent_number"
-  )
-  ifelse(
-    endpoint %in% names(es_eps),
-    es_eps[[endpoint]],
-    gsub("s$", "_id", endpoint)
-  )
+  unnested_endpoint <- sub("^(patent|publication)/", "", endpoint)
+  possible_pks <- c("patent_id", "document_number", paste0(unnested_endpoint, "_id"))
+  fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$field %in% possible_pks, "field"]
 }
 
 #' Unnest PatentsView data
@@ -58,9 +52,9 @@ get_ok_pk <- function(endpoint) {
 #' @examples
 #' \dontrun{
 #'
-#' fields <- c("patent_number", "patent_title", "inventor_city", "inventor_country")
+#' fields <- c("patent_id", "patent_title", "inventors.inventor_city", "inventors.inventor_country")
 #' res <- search_pv(query = '{"_gte":{"patent_year":2015}}', fields = fields)
-#' unnest_pv_data(data = res$data, pk = "patent_number")
+#' unnest_pv_data(data = res$data, pk = "patent_id")
 #' }
 #'
 #' @export

diff --git a/R/utils.R b/R/utils.R
@@ -11,29 +11,3 @@ format_num <- function(x) {
     big.mark = ",", scientific = FALSE, trim = TRUE
   )
 }
-
-#' @noRd
-to_singular <- function(plural) {
-  if (endsWith(plural, "ees")) {
-    sub("ees$", "ee", plural)
-  } else if (endsWith(plural, "ies")) {
-    sub("ies$", "y", plural)
-  } else if (endsWith(plural, "es")) {
-    sub("es$", "", plural)
-  } else if (endsWith(plural, "s")) {
-    sub("s$", "", plural)
-  } else {
-    plural
-  }
-}
-
-#' @noRd
-to_plural <- function(singular) {
-  if (endsWith(singular, "y")) {
-    sub("y$", "ies", singular)
-  } else if (endsWith(singular, "s")) {
-    paste0(singular, "es")
-  } else {
-    paste0(singular, "s")
-  }
-}
diff --git a/R/validate-args.R b/R/validate-args.R
@@ -46,11 +46,12 @@ validate_args <- function(api_key, fields, endpoint, method, page, per_page,
 }
 
 #' @noRd
-validate_groups <- function(groups) {
-  ok_grps <- unique(fieldsdf$group)
+validate_groups <- function(endpoint, groups) {
+  ok_grps <- unique(fieldsdf[fieldsdf$endpoint == endpoint, "group"])
   asrt(
     all(groups %in% ok_grps),
-    "group must be one of the following: ", paste(ok_grps, collapse = ", ")
+    "for the ", endpoint, " endpoint, group must be one of the following: ",
+    paste(ok_grps, collapse = ", ")
   )
 }