diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
new file mode 100644
index 0000000..960234c
--- /dev/null
+++ b/.github/workflows/test-coverage.yaml
@@ -0,0 +1,50 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+ push:
+ branches: [main, master]
+ pull_request:
+ branches: [main, master]
+
+name: test-coverage
+
+jobs:
+ test-coverage:
+ runs-on: ubuntu-latest
+ env:
+ GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: r-lib/actions/setup-r@v2
+ with:
+ use-public-rspm: true
+
+ - uses: r-lib/actions/setup-r-dependencies@v2
+ with:
+ extra-packages: any::covr
+ needs: coverage
+
+ - name: Test coverage
+ run: |
+ covr::codecov(
+ quiet = FALSE,
+ clean = FALSE,
+ install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
+ )
+ shell: Rscript {0}
+
+ - name: Show testthat output
+ if: always()
+ run: |
+ ## --------------------------------------------------------------------
+ find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true
+ shell: bash
+
+ - name: Upload test results
+ if: failure()
+ uses: actions/upload-artifact@v4
+ with:
+ name: coverage-test-failures
+ path: ${{ runner.temp }}/package
diff --git a/DESCRIPTION b/DESCRIPTION
index 2f30ac8..53166c0 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
Package: QCkit
Type: Package
Title: NPS Inventory and Monitoring Quality Control Toolkit
-Version: 0.1.3
+Version: 0.1.4
Authors@R: c(
person(given = "Robert",
family = "Baker",
@@ -22,12 +22,12 @@ Authors@R: c(
comment = c(ORCID = "0000-0003-0129-981X")),
person(given = "Amy",
family = "Sherman",
- role = "aut",
+ role = "ctb",
comment = c(ORCID = "0000-0002-9276-0087"))
)
Maintainer: Rob Baker
Description: This package contains a set of useful functions for data munging, quality control, and data flagging. Functions are contributed by multiple U.S. National Park Service staff, contractors, partners and others. These functions will likely be most useful for quality control of NPS data but may have utility beyond their intended functions.
-License: MIT + file LICENSE
+License: CC0
Encoding: UTF-8
LazyData: true
Imports:
@@ -47,15 +47,15 @@ Imports:
data.table,
jsonlite,
here,
- wk,
tibble,
- tidyselect,
- rlang
+ tidyselect
RoxygenNote: 7.2.3
Suggests:
knitr,
rmarkdown,
- testthat (>= 3.0.0)
+ testthat (>= 3.0.0),
+ rlang,
+ wk
VignetteBuilder: knitr
URL: https://github.com/nationalparkservice/QCkit/
BugReports: https://nationalparkservice.github.io/QCkit/
diff --git a/LICENSE.md b/LICENSE.md
index 42cc132..139c68e 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -1,21 +1,43 @@
-# MIT License
-
-Copyright (c) 2022 QCkit authors
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+## creative commons
+
+# CC0 1.0 Universal
+
+CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER.
+
+### Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others.
+
+For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights.
+
+1. __Copyright and Related Rights.__ A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following:
+
+ i. the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work;
+
+ ii. moral rights retained by the original author(s) and/or performer(s);
+
+ iii. publicity and privacy rights pertaining to a person's image or likeness depicted in a Work;
+
+ iv. rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below;
+
+ v. rights protecting the extraction, dissemination, use and reuse of data in a Work;
+
+ vi. database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and
+
+ vii. other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof.
+
+2. __Waiver.__ To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose.
+
+3. __Public License Fallback.__ Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose.
+
+4. __Limitations and Disclaimers.__
+
+ a. No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document.
+
+ b. Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law.
+
+ c. Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work.
+
+ d. Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work.
diff --git a/NAMESPACE b/NAMESPACE
index d7fb7f4..30c91f1 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -19,8 +19,10 @@ export(get_taxon_rank)
export(get_utm_zone)
export(long2UTM)
export(order_cols)
+export(replace_blanks)
export(te_check)
export(utm_to_ll)
+export(validate_coord)
importFrom(lifecycle,deprecated)
importFrom(magrittr,"%>%")
importFrom(stats,sd)
diff --git a/NEWS.md b/NEWS.md
index a31acc8..43f027b 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,8 @@
+# QCkit v0.1.4
+2024-01-23
+* Maintenance on `get_custom_flag()` to align with updated DRR requirements
+* Added function `replace_blanks()` to ingest a directory of .csvs and write them back out to .csv (overwriting the original files) with blanks converted to NA (except if a file has NO data - then it remains blank and needs to be dealt with manually)
+
# QC kit v0.1.3
2023-12-18
* Bugfix for `create_datastore_script()`
@@ -5,7 +10,7 @@
* Code cleanup via linter suggests
* Added warning about turning off VPN for the `covert_utm_to_ll()` documentation
* Moved `convert_datetime_format()` from DPchecker to QCkit
-* Added funtion `fix_utc_offset()`
+* Added function `fix_utc_offset()`
# QCkit 0.1.2 "Automated Shuttle Pilot"
2023-11-20
diff --git a/R/DC_col_check.R b/R/check_dc_cols.R
similarity index 100%
rename from R/DC_col_check.R
rename to R/check_dc_cols.R
diff --git a/R/dates_and_times.R b/R/dates_and_times.R
index 257fa54..bd17182 100644
--- a/R/dates_and_times.R
+++ b/R/dates_and_times.R
@@ -1,6 +1,6 @@
#' Fix UTC offset strings
#'
-#' UTC offsets can be formatted in multiple ways (e.g. -07, -07:00, -0700) and R often struggles to parse these offsets. This function takes date/time strings with valid UTC offsets, and formats them so that they are consistent and readable by R.
+#' UTC offsets can be formatted in multiple ways (e.g. -07, -07:00, -0700) and R often struggles to parse these offsets. This function takes date/time strings with valid UTC offsets, and formats them so that they are consistent and readable by R. Here, you can supply a vector of dates in ISO 8601 format and they will be returned in a consistent format compatible with R. Date strings with missing or invalid UTC offsets will result in a warning.
#'
#' @param datetime_strings Character vector of dates in ISO 8601 format
#'
@@ -8,8 +8,9 @@
#' @export
#'
#' @examples
-#' datetimes <- c("2023-11-16T03:32:49+07:00","2023-11-16T03:32:49-07","2023-11-16T03:32:49","2023-11-16T03:32:49Z")
-#' fix_utc_offset(datetimes) # returns c("2023-11-16T03:32:49+0700", "2023-11-16T03:32:49-0700", "2023-11-16T03:32:49", "2023-11-16T03:32:49+0000") and warns about missing offset (see third element)
+#' datetimes <- c("2023-11-16T03:32:49+07:00", "2023-11-16T03:32:49-07",
+#' "2023-11-16T03:32:49","2023-11-16T03:32:49Z")
+#' fix_utc_offset(datetimes)
#'
fix_utc_offset <- function(datetime_strings) {
datetime_strings <- stringr::str_replace_all(datetime_strings, "[\u2212\u2010\u2011\u2012\u2013\u2014\u2015\ufe58\ufe63\uff0d]", "-") # replace every possible type of dash with a regular minus sign
@@ -19,8 +20,11 @@ fix_utc_offset <- function(datetime_strings) {
stringr::str_extract("[Zz]|((?<=[+-])[0-9]{1,2}:?[0-9]{0,2})$") %>%
stringr::str_remove(":") %>%
stringr::str_replace("[Zz]", "0000")
- new_offsets <- dplyr::case_when(nchar(new_offsets) == 1 ~ paste0("0", new_offsets, "00"),
- nchar(new_offsets) == 2 ~ paste0(new_offsets, "00"),
+ new_offsets <- dplyr::case_when(nchar(new_offsets) == 1 ~ paste0("0",
+ new_offsets,
+ "00"),
+ nchar(new_offsets) == 2 ~ paste0(new_offsets,
+ "00"),
nchar(new_offsets) == 4 ~ new_offsets,
.default = "")
if (any(new_offsets == "")) {
diff --git a/R/geography.R b/R/geography.R
index 3ac1801..d612f9e 100644
--- a/R/geography.R
+++ b/R/geography.R
@@ -1,6 +1,7 @@
#' Retrieve the polygon information for the park unit from NPS REST services
#'
-#' @description `get_park_polygon()` retrieves a geoJSON string for a polygon of a park unit. This is not the official boundary.
+#' @description `get_park_polygon()` retrieves a geoJSON string for a polygon of
+#'a park unit. This is not the official boundary.
#' #'
#' @param unit_code is the four-character unit code as designated by NPS.
#'
@@ -11,10 +12,10 @@
#' }
get_park_polygon <- function(unit_code) {
# get geography from NPS Rest Services
- UnitsURL <- paste0("https://irmaservices.nps.gov/v2/rest/unit/",
- unit_code,
- "/geography")
- xml <- httr::content(httr::GET(UnitsURL))
+ units_url <- paste0("https://irmaservices.nps.gov/v2/rest/unit/",
+ unit_code,
+ "/geography")
+ xml <- httr::content(httr::GET(units_url))
# Create spatial feature from polygon info returned from NPS
park_polygon <- sf::st_as_sfc(xml[[1]]$Geography, geoJSON = TRUE)
@@ -24,17 +25,20 @@ get_park_polygon <- function(unit_code) {
#' Check whether a coordinate pair is within the polygon of a park unit
#'
-#' @description `validate_coord()` compares a coordinate pair (in decimal degrees) to the polygon for a park unit as provided through the NPS
+#' @description `validate_coord()` compares a coordinate pair (in decimal
+#' degrees) to the polygon for a park unit as provided through the NPS
#' Units rest services. The function returns a value of TRUE or FALSE.
#'
#'
#' @param unit_code is the four-character unit code as designated by NPS.
-#' @param lat latitude, in decimal degrees
+#' @param lat latitude, in decimal degrees.
#' @param lon longitude, in decimal degrees.
#'
+#' @export
+#'
#' @examples
#' \dontrun{
-#' qc_ValidateCoordinates("OBRI", 36.07951, -84.65610)
+#' validate_coord("OBRI", 36.07951, -84.65610)
#' }
validate_coord <- function(unit_code, lat, lon) {
# get geography from NPS Rest Services
@@ -51,9 +55,12 @@ validate_coord <- function(unit_code, lat, lon) {
#' Return UTM Zone
#'
-#' @description `get_utm_zone()` replaces `convert_long_2_utm()` as this function name is more descriptive. `get_utm_zone()` takes a longitude coordinate and returns the corresponding UTM zone.
+#' @description `get_utm_zone()` replaces `convert_long_2_utm()` as this
+#' function name is more descriptive. `get_utm_zone()` takes a longitude
+#' coordinate and returns the corresponding UTM zone.
#'
-#' @details Input a longitude (decimal degree) coordinate and this simple function returns the number of the UTM zone where that point falls.
+#' @details Input a longitude (decimal degree) coordinate and this simple
+#' function returns the number of the UTM zone where that point falls.
#'
#' @param lon - Decimal degree longitude value
#'
@@ -71,16 +78,22 @@ get_utm_zone <- function(lon) {
#' Return UTM Zone
#'
#' @description `r lifecycle::badge("deprecated")`
-#' `convert_long_2_utm()` was deprecated in favor of `get_utm_zone()` as the new funciton name more accurately reflects what the function does.`convert_long_to_utm()` take a longitude coordinate and returns the corresponding UTM zone.
+#' `convert_long_2_utm()` was deprecated in favor of `get_utm_zone()` as the
+#' new funciton name more accurately reflects what the function does.
+#' `convert_long_to_utm()` take a longitude coordinate and returns the
+#' corresponding UTM zone.
#'
-#' @details Input a longitude (decimal degree) coordinate and this simple function returns the number of the UTM zone where that point falls.
+#' @details Input a longitude (decimal degree) coordinate and this simple
+#' function returns the number of the UTM zone where that point falls.
#'
#' @param lon - Decimal degree longitude value
#'
#' @return The function returns a numeric UTM zone (between 1 and 60).
#' @export
convert_long_to_utm <- function(lon) {
- lifecycle::deprecate_soft(when = "0.1.4", "convert_long_to_utm()", "get_utm_zone()")
+ lifecycle::deprecate_soft(when = "0.1.4",
+ "convert_long_to_utm()",
+ "get_utm_zone()")
## Function to get the UTM zone for a given longitude
return((floor((lon + 180) / 6) %% 60) + 1)
}
@@ -89,11 +102,15 @@ convert_long_to_utm <- function(lon) {
#'
#' @description
#' `r lifecycle::badge("deprecated")`
-#' `long2UTM` was deprecated in favor of `convert_long_to_utm()` to enforce a consistent function naming pattern across the package and to conform to the tidyverse style guide.
+#' `long2UTM` was deprecated in favor of `convert_long_to_utm()` to enforce a
+#' consistent function naming pattern across the package and to conform to the
+#' tidyverse style guide.
#'
-#' `long2UTM()` take a longitude coordinate and returns the corresponding UTM zone.
+#' `long2UTM()` take a longitude coordinate and returns the corresponding UTM
+#' zone.
#'
-#' @details Input a longitude (decimal degree) coordinate and this simple function returns the number of the UTM zone where that point falls.
+#' @details Input a longitude (decimal degree) coordinate and this simple
+#' function returns the number of the UTM zone where that point falls.
#'
#' @param lon - Decimal degree longitude value
#'
@@ -108,13 +125,22 @@ long2UTM <- function(lon) {
#' Convert Coordinates Into a Polygon to Obscure Specific Location
#'
-#' @description `fuzz_location()` "fuzzes" a specific location to something less precise prior to public release of information about sensitive resources for which data are not to be released to the public. This function takes coordinates in either UTM or decimal degrees, converts to UTM (if in decimal degrees), creates a bounding box based on rounding of UTM coordinates, and then creates a polygon from the resultant points. The function returns a string in Well-Known-Text format.
+#' @description `fuzz_location()` "fuzzes" a specific location to something less
+#' precise prior to public release of information about sensitive resources for
+#' which data are not to be released to the public. This function takes
+#' coordinates in either UTM or decimal degrees, converts to UTM (if in decimal
+#' degrees), creates a bounding box based on rounding of UTM coordinates, and
+#' then creates a polygon from the resultant points. The function returns a
+#' string in Well-Known-Text format.
#'
#' @details Details will be defined later.
#'
#' @param lat - The latitude in either UTMs or decimal degrees.
#' @param lon - The longitude in either UTMs or decimal degrees
-#' @param coord_ref_sys - The EPSG coordinate system of the latitude and longitude coordinates. Either 4326 for decimal degrees/WGS84 datum, 4269 for decimal degrees/NAD83, or 326xx for UTM/WGS84 datum (where the xx is the northern UTM zone). For example 32616 is for UTM zone 16N.
+#' @param coord_ref_sys - The EPSG coordinate system of the latitude and
+#' longitude coordinates. Either 4326 for decimal degrees/WGS84 datum, 4269 for
+#' decimal degrees/NAD83, or 326xx for UTM/WGS84 datum (where the xx is the
+#' northern UTM zone). For example 32616 is for UTM zone 16N.
#' @param fuzz_level - Use "Fuzzed - 10km", "Fuzzed - 1km", or "Fuzzed - 100m"
#'
#' @export
@@ -135,7 +161,8 @@ fuzz_location <- function(lat,
}
#for decimal degrees, convert to UTM locations and identify proper CRS
if (coord_ref_sys == 4326 || coord_ref_sys == 4269) {
- #coordinates are in decimal degrees WGS84 or NAD83 and we need to convert to UTM; find the appropriate UTM EPSG code
+ #coordinates are in decimal degrees WGS84 or NAD83 and we need to convert to
+ #UTM; find the appropriate UTM EPSG code
if (lat > 0) {
#northern hemisphere (N) UTM zones start at 32601 and go to 32660
tempcrs <- get_utm_zone(lon) + 32600
@@ -144,7 +171,7 @@ fuzz_location <- function(lat,
tempcrs <- get_utm_zone(lon) + 32700
}
- #convert the points to UTM given their existing CRS (decimal degree WGS84 or NAD83)
+ #convert points to UTM given their CRS (decimal degree WGS84 or NAD83)
point <- sf::st_point(c(lon, lat))
point <- sf::st_sfc(point, crs = coord_ref_sys)
pointutm <- sf::st_transform(x = point, crs = tempcrs)
@@ -206,17 +233,33 @@ fuzz_location <- function(lat,
#' Coordinate Conversion from UTM to Latitude and Longitude
#'
-#' @description `convert_utm_to_ll()` takes your dataframe with UTM coordinates in separate Easting and Northing columns, and adds on an additional two columns with the converted decimalLatitude and decimalLongitude coordinates using the reference coordinate system WGS84. You may need to turn the VPN OFF for this function to work properly.
-#'
-#' @details Define the name of your dataframe, the easting and northing columns within it, the UTM zone within which those coordinates are located, and the reference coordinate system (datum). UTM Northing and Easting columns must be in separate columns prior to running the function. If a datum is not defined, the function will default to "WGS84". If there are missing coordinates in your dataframe they will be preserved, however they will be moved to the end of your dataframe. Note that some parameter names are not in snake_case but instead reflect DarwinCore naming conventions.
-#'
-#' @param df - The dataframe with UTM coordinates you would like to convert. Input the name of your dataframe.
-#' @param EastingCol - The name of your Easting UTM column. Input the name in quotations, ie. "EastingCol".
-#' @param NorthingCol - The name of your Northing UTM column. Input the name in quotations, ie. "NorthingCol".
+#' @description `convert_utm_to_ll()` takes your dataframe with UTM coordinates
+#' in separate Easting and Northing columns, and adds on an additional two
+#' columns with the converted decimalLatitude and decimalLongitude coordinates
+#' using the reference coordinate system WGS84. You may need to turn the VPN OFF
+#' for this function to work properly.
+#'
+#' @details Define the name of your dataframe, the easting and northing columns
+#' within it, the UTM zone within which those coordinates are located, and the
+#' reference coordinate system (datum). UTM Northing and Easting columns must be
+#' in separate columns prior to running the function. If a datum is not defined,
+#' the function will default to "WGS84". If there are missing coordinates in
+#' your dataframe they will be preserved, however they will be moved to the end
+#' of your dataframe. Note that some parameter names are not in snake_case but
+#' instead reflect DarwinCore naming conventions.
+#'
+#' @param df - The dataframe with UTM coordinates you would like to convert.
+#' Input the name of your dataframe.
+#' @param EastingCol - The name of your Easting UTM column. Input the name in
+#' quotations, ie. "EastingCol".
+#' @param NorthingCol - The name of your Northing UTM column. Input the name in
+#' quotations, ie. "NorthingCol".
#' @param zone - The UTM Zone. Input the zone number in quotations, ie. "17".
-#' @param datum - The datum used in the coordinate reference system of your coordinates. Input in quotations, ie. "WGS84"
+#' @param datum - The datum used in the coordinate reference system of your
+#' coordinates. Input in quotations, ie. "WGS84"
#'
-#' @return The function returns your dataframe, mutated with an additional two columns of decimal Longitude and decimal Latitude.
+#' @return The function returns your dataframe, mutated with an additional two
+#' columns of decimal Longitude and decimal Latitude.
#' @export
#'
#' @examples
diff --git a/R/replace_blanks.R b/R/replace_blanks.R
new file mode 100644
index 0000000..8ee3783
--- /dev/null
+++ b/R/replace_blanks.R
@@ -0,0 +1,45 @@
+#' Replaces all blank cells with NA
+#'
+#' @details `replace_blanks()` is particularly useful for exporting data from a
+#' database (such as access) and converting it to a data package with metadata.
+#'
+#' `replace_blanks()` will import all .csv files in the current working
+#' directory. The files are then written back out to the same directory,
+#' overwriting the old .csv files. Any blank cells in the original .csv files
+#' will be replaced with NA.
+#'
+#' One exception is if a .csv contains NO data (i.e. just column names and no
+#' data in any of the cells). In this case, the blanks will not be replaced with
+#' NA (as the function cannot determine how many NAs to include).
+#'
+#' @param directory String. Path to the file(s) to have blanks replaced with
+#' NAs. Defaults to the working directory of the project (here::here())
+#'
+#' @return list of data frames (invisibly)
+#'
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#' replace_blanks()
+#' }
+replace_blanks <- function(directory = here::here()) {
+ #get list of .csv file names
+ my_path <- list.files(path = directory, pattern="*.csv",
+ full.names = TRUE)
+ #import .csvs as dataframes; each dataframe is an item in the list "my_data"
+ my_data <- lapply(my_path, function(x) readr::read_csv(x,
+ show_col_types=FALSE))
+
+ #extract just the file name
+ my_path <- basename(my_path)
+
+ #give each dataframe a name basd on the filename where it originated
+ names(my_data) <- gsub(".csv", "", my_path)
+ #write each dataframe back to .csv
+ for (i in seq_along(my_data)) {
+ readr::write_csv(my_data[[i]], file = paste0(directory, "/",
+ names(my_data)[[i]], ".csv"))
+ }
+ return(invisible())
+}
diff --git a/R/summarize_qc_flags.R b/R/summarize_qc_flags.R
index 13549b0..9093547 100644
--- a/R/summarize_qc_flags.R
+++ b/R/summarize_qc_flags.R
@@ -1,3 +1,236 @@
+#' Creates dataframe(s) summarizing data quality
+#'
+#' @description
+#' `r lifecycle::badge("experimental")`
+#' get_custom_flags returns data frames that that summarize data
+#' quality control flags (one that summarizes at the data file level and one for each column). The summaries include all data
+#' with quality control flagging (a column name that ends in "_flag") and
+#' optionally any additional custom columns the user specifies, either by column
+#' name or number.
+#'
+#' The use can specify which of the 2 data frames (or all as a list of
+#' dataframes) should be returned.
+#'
+#' The number of each flag type for each column (A, AE, R, P) is reported.
+#' Unflagged columns are assumed to have only accepted (or missing) data. The
+#' total number of data points in the specified columns (and data flagging
+#' columns for) each .csv are also reported. NAs considered missing data. An
+#' Unweighted Relative Response (RRU) is calculated as the total number of
+#' accepted data points (A, AE, and data that are not flagged) divided by the
+#'total number of data points (excluding missing values) in all specified
+#' columns (and the flagged columns).
+#'
+#' @details Flagged columns must have names ending in "_flag". Missing values
+#' must be specified as NA. The function counts cells within "*_flag" columns
+#' that start with one of the flagging characters (A, AE, R, P) and ignores
+#' trailing characters and white spaces. For custom columns that do not include
+#' a specific flagging column, all non-missing (non-NA) values are considered
+#' Accepted (A).
+#'
+#' The intent of get_custom_flags is for integration into reports on data
+#' quality, such as Data Release Reports (DRRs).
+#'
+#' @param directory is the path to the data package .csv files (defaults to the
+#' current working directory).
+#' @param cols A comma delimited list of column names. If left unspecified,
+#' defaults to just flagged columns.
+#' @param output A string indicating what output should be provided. "columns"
+#' returns a summary table of QC flags and RRU values in each specified column
+#' for every data file. "files" returns a summary table of total QC flags and
+#' mean across each data file. "all" will return all three
+#' data frames in a single list.
+#'
+#' @return a dataframe with quality control summary information summarized at
+#' the specified level(s).
+#'
+#' @importFrom stats sd
+#' @importFrom utils read.csv
+#'
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#'
+#' get_custom_flags("~/my_data_package_directory", cols = c("scientificName",
+#' "locality"),
+#' output="all")
+#' cols <- colnames(read.csv("mydata.csv"))[c(1:4, 7, 10)]
+#' get_custom_flags(cols = cols, output="files")
+#' }
+get_custom_flags <- function(directory = here::here(),
+ cols = (""),
+ output = c("all",
+ "files",
+ "columns")) {
+
+ fileList <- list.files(path = directory, pattern = "\\.csv$",
+ full.names = TRUE)
+
+
+ dfList <- suppressMessages(lapply(fileList, readr::read_csv))
+
+ names(dfList) <- base::basename(fileList)
+
+
+
+ cust_flags <- NULL
+
+ for (i in seq_along(dfList)) {
+ # get custom columns:
+ cust_cols <- dfList[[i]] %>%
+ dplyr::select(any_of(cols) & !contains("_flag"))
+ if (ncol(cust_cols) > 0) {
+ for (j in seq_along(cust_cols)) {
+ A_flag <- sum(!is.na(cust_cols[j]))
+ AE_flag <- 0
+ R_flag <- 0
+ P_flag <- 0
+ RRU <- A_flag / (nrow(cust_cols[j]))
+ Cell_count <- nrow(cust_cols[j])
+ filename <- names(dfList)[i]
+ column <- colnames(cust_cols)[j]
+ flags <- assign(
+ paste0(names(dfList)[i]),
+ tibble::tibble(
+ filename = names(dfList[i]),
+ column,
+ Cell_count,
+ A_flag,
+ AE_flag,
+ R_flag,
+ P_flag,
+ RRU
+ )
+ )
+
+ # add to df_flags dataframe:
+ cust_flags <- rbind(cust_flags, flags)
+ }
+ }
+
+ # get just flagging columns:
+ flags_only <- dfList[[i]] %>% dplyr::select(contains("_flag"))
+
+ if (ncol(flags_only) > 0) {
+ # for each column in data and each data flags:
+ for (j in seq_along(flags_only)) {
+ # count each flag type; don't count NAs. Should count all cells that
+ # start with the flagging letter and ignore anything (i.e. Quality
+ # Assessment codes)
+ A_flag <- suppressWarnings(sum(stringr::str_count(
+ flags_only[j],
+ "\\bA"
+ ), na.rm = TRUE))
+ AE_flag <- suppressWarnings(sum(stringr::str_count(
+ flags_only[j],
+ "\\bAE"
+ ), na.rm = TRUE))
+ R_flag <- suppressWarnings(sum(stringr::str_count(
+ flags_only[j],
+ "\\bR"
+ ), na.rm = TRUE))
+ P_flag <- suppressWarnings(sum(stringr::str_count(
+ flags_only[j],
+ "\\bP"
+ ), na.rm = TRUE))
+ Cell_count <- nrow(flags_only[j])
+
+ RRU <- ((A_flag + AE_flag) / Cell_count)
+
+ filename <- names(dfList)[i]
+ column <- colnames(flags_only)[j]
+
+ # make a dataframe with data:
+ flags <- assign(
+ paste0(names(dfList)[i]),
+ tibble::tibble(
+ filename = names(dfList)[i],
+ column,
+ Cell_count,
+ A_flag,
+ AE_flag,
+ R_flag,
+ P_flag,
+ RRU
+ )
+ )
+
+ # add to df_flags dataframe:
+ cust_flags <- rbind(cust_flags, flags)
+ }
+ }
+
+ # if there are no flagging columns:
+ if (ncol(flags_only) == 0 && ncol(cust_cols) == 0) {
+ filename <- names(dfList)[i]
+ column <- NA
+ A_flag <- NA
+ AE_flag <- NA
+ R_flag <- NA
+ P_flag <- NA
+ Cell_count <- NA
+ RRU <- NA
+
+ flags <- data.frame(
+ filename = names(dfList)[i],
+ column,
+ Cell_count,
+ A_flag,
+ AE_flag,
+ R_flag,
+ P_flag,
+ RRU
+ )
+
+ cust_flags <- rbind(cust_flags, flags)
+ }
+ }
+
+ #generate summary statistics for each column:
+ data_file_summaries <- cust_flags %>%
+ dplyr::group_by(filename) %>%
+ dplyr::summarize("A" = sum(A_flag),
+ "AE" = sum(AE_flag),
+ "P" = sum(P_flag),
+ "R" = sum(R_flag),
+ "% Accepted" = mean(RRU)) %>%
+ dplyr::rename("File Name" = filename) %>%
+ dplyr::mutate(`% Accepted` = paste0(formatC(100 * `% Accepted`,
+ format = "f", digits = 1),
+ "%"))
+
+ cust_flags <- cust_flags %>%
+ dplyr::mutate(
+ column = stringr::str_remove(column, "_flag"),
+ RRU = paste0(formatC(100 * RRU, format = "f", digits = 1), "%")) %>%
+ dplyr::select(
+ "File Name" = filename,
+ "Measure" = column,
+ "Number of Records" = Cell_count,
+ "A" = A_flag,
+ "AE" = AE_flag,
+ "R" = R_flag,
+ "P" = P_flag,
+ "% Accepted" = RRU)
+
+
+ qc_summary <- list(cust_flags,
+ data_file_summaries)
+
+ names(qc_summary) <- c("Column Level QC Summaries",
+ "Data File Level QC Summaries")
+
+ if (output == "files") {
+ return(qc_summary[[2]])
+ }
+ if (output == "columns") {
+ return(qc_summary[[1]])
+ }
+ if (output == "all") {
+ return(qc_summary)
+ }
+}
+
#' Create Table of Data Quality Flags Found in a Data Package
#'
#' @description
@@ -39,7 +272,7 @@
#'
get_dp_flags <- function(directory = here::here()) {
- lifecycle::deprecate_warn("1.0.1", "get_dp_flags()", "get_custom_flags()")
+ lifecycle::deprecate_soft(when = "0.1.1", "get_dp_flags()", "get_custom_flags()")
fileList <- list.files(path = directory, pattern = "\\.csv$",
full.names = TRUE)
@@ -139,7 +372,9 @@ get_dp_flags <- function(directory = here::here()) {
#'
get_df_flags <- function(directory = here::here()) {
- lifecycle::deprecate_warn("1.0.1", "get_dp_flags()", "get_custom_flags()")
+ lifecycle::deprecate_soft(when = "0.1.1",
+ "get_df_flags()",
+ "get_custom_flags()")
# get list of .csv files in the specified directory
fileList <- list.files(path = directory, pattern = "\\.csv$",
@@ -249,7 +484,7 @@ get_df_flags <- function(directory = here::here()) {
#'
get_dc_flags <- function(directory = here::here()) {
- lifecycle::deprecate_warn("1.0.1", "get_dp_flags()", "get_custom_flags()")
+ lifecycle::deprecate_soft(when = "0.1.1", "get_dc_flags()", "get_custom_flags()")
fileList <- list.files(path = directory, pattern = "\\.csv$",
full.names = TRUE)
@@ -260,7 +495,6 @@ get_dc_flags <- function(directory = here::here()) {
dc_flags <- NULL
for (i in seq_along(dfList)) {
- print(paste0("i=", i))
# get just flagging columns:
flags_only <- dfList[[i]] %>% dplyr::select(ends_with("_flag"))
@@ -269,7 +503,6 @@ get_dc_flags <- function(directory = here::here()) {
if (ncol(flags_only) > 0) {
# for each column in data each data file with flags:
for (j in seq_along(flags_only)) {
- print(paste0("j=", j))
# count each flag type; don't count NAs. Should count all cells that
# start with the flagging letter and ignore anything (i.e. Quality
# Assessment codes)
@@ -339,233 +572,3 @@ get_dc_flags <- function(directory = here::here()) {
return(dc_flags)
}
-
-#' Creates dataframe(s) summarizing data quality
-#'
-#' @description
-#' `r lifecycle::badge("experimental")`
-#' get_custom_flags returns data frames that that summarize data
-#' quality control flags (one that summarizes at the data file level and one for each column). The summaries include all data
-#' with quality control flagging (a column name that ends in "_flag") and
-#' optionally any additional custom columns the user specifies, either by column
-#' name or number.
-#'
-#' The use can specify which of the 2 data frames (or all as a list of
-#' dataframes) should be returned.
-#'
-#' The number of each flag type for each column (A, AE, R, P) is reported.
-#' Unflagged columns are assumed to have only accepted (or missing) data. The
-#' total number of data points in the specified columns (and data flagging
-#' columns for) each .csv are also reported. NAs considered missign data. An
-#' Unweighted Relative Response (RRU) is calculated as the total number of
-#' accepted data points (A, AE, and data that are not flagged) divided by the
-#'total number of data points (excluding missing values) in all specified
-#' columns (and the flagged columns).
-#'
-#' @details Flagged columns must have names ending in "_flag". Missing values
-#' must be specified as NA. The function counts cells within "*_flag" columns
-#' that start with one of the flagging characters (A, AE, R, P) and ignores
-#' trailing characters and white spaces. For custom columns that do not include
-#' a specific flagging column, all non-missing (non-NA) values are considered
-#' Accepted (A).
-#'
-#' The intent of get_custom_flags is for integration into reports on data
-#' quality, such as Data Release Reports (DRRs).
-#'
-#' @param directory is the path to the data package .csv files (defaults to the
-#' current working directory).
-#' @param cols A comma delimited list of column names. If left unspecified,
-#' defaults to just flagged columns.
-#' @param output A string indicating what output should be provided. "columns"
-#' returns a summary table of QC flags and RRU values in each specified column
-#' for every data file. "files" returns a summary table of total QC flags and
-#' mean across each data file. "all" will return all three
-#' data frames in a single list.
-#'
-#' @return a dataframe with quality control summary information summarized at
-#' the specified level(s).
-#'
-#' @importFrom stats sd
-#' @importFrom utils read.csv
-#'
-#' @export
-#'
-#' @examples
-#' \dontrun{
-#'
-#' get_custom_flags("~/my_data_package_directory", cols = c("scientificName",
-#' "locality"),
-#' output="all")
-#' cols <- colnames(read.csv("mydata.csv"))[c(1:4, 7, 10)]
-#' get_custom_flags(cols = cols, output="files")
-#' }
-get_custom_flags <- function(directory = here::here(),
- cols = (""),
- output = c("all",
- "files",
- "columns")) {
-
- fileList <- list.files(path = directory, pattern = "\\.csv$",
- full.names = TRUE)
-
-
- dfList <- suppressMessages(lapply(fileList, readr::read_csv))
-
- names(dfList) <- base::basename(fileList)
-
-
-
- cust_flags <- NULL
-
- for (i in seq_along(dfList)) {
- # get custom columns:
- cust_cols <- dfList[[i]] %>% dplyr::select(any_of(cols) & !contains("_flag"))
- if (ncol(cust_cols) > 0) {
- for (j in seq_along(cust_cols)) {
- A_flag <- sum(!is.na(cust_cols[j]))
- AE_flag <- 0
- R_flag <- 0
- P_flag <- 0
- RRU <- A_flag / (nrow(cust_cols[j]))
- Cell_count <- nrow(cust_cols[j])
- filename <- names(dfList)[i]
- column <- colnames(cust_cols)[j]
- flags <- assign(
- paste0(names(dfList)[i]),
- tibble::tibble(
- filename = names(dfList[i]),
- column,
- Cell_count,
- A_flag,
- AE_flag,
- R_flag,
- P_flag,
- RRU
- )
- )
-
- # add to df_flags dataframe:
- cust_flags <- rbind(cust_flags, flags)
- }
- }
-
- # get just flagging columns:
- flags_only <- dfList[[i]] %>% dplyr::select(contains("_flag"))
-
- if (ncol(flags_only) > 0) {
- # for each column in data and each data flags:
- for (j in seq_along(flags_only)) {
- # count each flag type; don't count NAs. Should count all cells that
- # start with the flagging letter and ignore anything (i.e. Quality
- # Assessment codes)
- A_flag <- suppressWarnings(sum(stringr::str_count(
- flags_only[j],
- "\\bA"
- ), na.rm = TRUE))
- AE_flag <- suppressWarnings(sum(stringr::str_count(
- flags_only[j],
- "\\bAE"
- ), na.rm = TRUE))
- R_flag <- suppressWarnings(sum(stringr::str_count(
- flags_only[j],
- "\\bR"
- ), na.rm = TRUE))
- P_flag <- suppressWarnings(sum(stringr::str_count(
- flags_only[j],
- "\\bP"
- ), na.rm = TRUE))
- Cell_count <- nrow(flags_only[j])
-
- RRU <- ((A_flag + AE_flag) / Cell_count)
-
- filename <- names(dfList)[i]
- column <- colnames(flags_only)[j]
-
- # make a dataframe with data:
- flags <- assign(
- paste0(names(dfList)[i]),
- tibble::tibble(
- filename = names(dfList)[i],
- column,
- Cell_count,
- A_flag,
- AE_flag,
- R_flag,
- P_flag,
- RRU
- )
- )
-
- # add to df_flags dataframe:
- cust_flags <- rbind(cust_flags, flags)
- }
- }
-
- # if there are no flagging columns:
- if (ncol(flags_only) == 0 && ncol(cust_cols) == 0) {
- filename <- names(dfList)[i]
- column <- NA
- A_flag <- NA
- AE_flag <- NA
- R_flag <- NA
- P_flag <- NA
- Cell_count <- NA
- RRU <- NA
-
- flags <- data.frame(
- filename = names(dfList)[i],
- column,
- Cell_count,
- A_flag,
- AE_flag,
- R_flag,
- P_flag,
- RRU
- )
-
- cust_flags <- rbind(cust_flags, flags)
- }
- }
-
- #generate summary statistics for each column:
- data_file_summaries <- cust_flags %>%
- dplyr::group_by(filename) %>%
- dplyr::summarize("A" = sum(A_flag),
- "AE" = sum(AE_flag),
- "P" = sum(P_flag),
- "R" = sum(R_flag),
- "% Accepted" = mean(RRU)) %>%
- dplyr::rename("File Name" = filename) %>%
- dplyr::mutate(`% Accepted` = paste0(formatC(100 * `% Accepted`, format = "f", digits = 1), "%"))
-
- cust_flags <- cust_flags %>%
- dplyr::mutate(
- column = stringr::str_remove(column, "_flag"),
- RRU = paste0(formatC(100 * RRU, format = "f", digits = 1), "%")) %>%
- dplyr::select(
- "File Name" = filename,
- "Measure" = column,
- "Number of Records" = Cell_count,
- "A" = A_flag,
- "AE" = AE_flag,
- "R" = R_flag,
- "P" = P_flag,
- "% Accepted" = RRU)
-
-
- qc_summary <- list(cust_flags,
- data_file_summaries)
-
- names(qc_summary) <- c("Column Level QC Summaries",
- "Data File Level QC Summaries")
-
- if (output == "files") {
- return(qc_summary[[2]])
- }
- if (output == "columns") {
- return(qc_summary[[1]])
- }
- if (output == "all") {
- return(qc_summary)
- }
-}
diff --git a/R/utils.R b/R/utils.R
index e9af64f..2ecc8c3 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -22,4 +22,17 @@ assign("QC_ds_dev_api", "https://irmadevservices.nps.gov/datastore-secure/v6/res
.QC_ds_dev_api <- function(x){
get("QC_ds_dev_api", envir = .pkgglobalenv)
-}
\ No newline at end of file
+}
+
+#this gets rid of the "no visible binding for global variable 'x'" error in build checks:
+globalVariables(c("any_of",
+ "contains",
+ "ends_with",
+ "filter",
+ "species_col.y",
+ "species_col.x",
+ "x",
+ "y",
+ "capture.output",
+ "title",
+ "% Accepted"))
\ No newline at end of file
diff --git a/docs/404.html b/docs/404.html
index 87bd7bb..5be2d8c 100644
--- a/docs/404.html
+++ b/docs/404.html
@@ -32,7 +32,7 @@
QCkit
- 0.1.3
+ 0.1.4
@@ -86,7 +86,7 @@
-long2UTM was deprecated in favor of convert_long_to_utm() to enforce a consistent function naming pattern across the package and to conform to the tidyverse style guide.
-
long2UTM() take a longitude coordinate and returns the corresponding UTM zone.
+long2UTM was deprecated in favor of convert_long_to_utm() to enforce a
+consistent function naming pattern across the package and to conform to the
+tidyverse style guide.
+
long2UTM() take a longitude coordinate and returns the corresponding UTM
+zone.
@@ -78,7 +84,8 @@
Value
Details
-
Input a longitude (decimal degree) coordinate and this simple function returns the number of the UTM zone where that point falls.
+
Input a longitude (decimal degree) coordinate and this simple
+function returns the number of the UTM zone where that point falls.