From 1c7fdd68c4509e05c7b4a0caa3e4867a0675f589 Mon Sep 17 00:00:00 2001 From: Rob Baker Date: Mon, 12 Feb 2024 14:23:02 -0700 Subject: [PATCH 1/4] update rmd to standardize code chunk names. --- .../templates/NPS_DRR/skeleton/skeleton.Rmd | 29 ++++++++++++------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/inst/rmarkdown/templates/NPS_DRR/skeleton/skeleton.Rmd b/inst/rmarkdown/templates/NPS_DRR/skeleton/skeleton.Rmd index 86b263a..bdcc8d9 100644 --- a/inst/rmarkdown/templates/NPS_DRR/skeleton/skeleton.Rmd +++ b/inst/rmarkdown/templates/NPS_DRR/skeleton/skeleton.Rmd @@ -21,7 +21,6 @@ Specific Instructions on filling out this template: https://nationalparkservice.github.io/QCkit/articles/Using-the-DRR-Template.html ---> ``` - ```{r user_edited_parameterss, include=FALSE} # The title of your DRR. Should all DRR start with "Data Release Report:"? Should we enforce titles specifically referencing the data package(s) the Report is about? title <- "DRR Title" @@ -308,7 +307,7 @@ The Data Records section should be used to explain each data record associated w This DRR describes the data package *`r dataPackageTitle`* which contains a metadata file and `r length(dataPackage_fileNames)` data files. These data were compiled and processed for dissemination by the National Park Service Inventory and Monitoring Division (IMD) and are available at `r dataPackageDOI` (see Table 1). -```{r FileTable, echo=FALSE} +```{r file_table, echo=FALSE} filelist <- data.frame(dataPackage_fileNames, dataPackage_fileSizes, dataPackage_fileDescript) knitr::kable(filelist, caption = paste0("**Table 1. ", dataPackageTitle, ": List of data files.**"), col.names = c("**File Name**", "**Size**", "**Description**"), format = "pandoc") @@ -324,13 +323,13 @@ The Data Quality Evaluation section should present any analyses that are needed *Required Table* -```{r dataAcceptanceCriteria, echo=FALSE, eval=TRUE} +```{r data_acceptance_criteria, echo = FALSE, eval = TRUE} # To turn off, set eval=FALSE. # Generates a table of acceptance criteria for each of the data quality fields in your data package. Mitigations taken when data did not meet the acceptance criteria should be described textually in the Data Quality Evaluation section. -# Specify which columns in your data package are data quality fields in the dataQualityFields variable. In the example below, data quality fields/columns in the data package are listed in the format [FieldName]_flag. These data quality fields relate to the respective temporal, taxonomic, and geographic data. +# Specify which columns in your data package are data quality fields in the data_quality_fields variable. In the example below, data quality fields/columns in the data package are listed in the format [FieldName]_flag. These data quality fields relate to the respective temporal, taxonomic, and geographic data. -dataQualityFields <- c( +data_quality_fields <- c( "eventDate_flag", "scientificName_flag", "coordinate_flag" @@ -338,17 +337,23 @@ dataQualityFields <- c( # Brief description of the acceptance criteria for each respective data quality field. The order of the acceptance criteria must match the order of the data quality fields. -dataQualityAcceptanceCriteria <- c( +data_quality_acceptance_criteria <- c( "Sampling event date within the start and end dates of the project", "Taxon exists within Integrated Taxonomic Information System and GBIF", "Sampling location is within the park unit boundaries" ) -data_criteria<-data.frame(dataQualityFields = str_remove(dataQualityFields, "_flag"), dataQualityAcceptanceCriteria) +data_criteria <- data.frame(data_quality_fields = + str_remove(data_quality_fields, "_flag"), + data_quality_acceptance_criteria) data_criteria %>% NMFSReports::format_cells(1:3, 1, "bold") %>% - knitr::kable(caption = "**Table 2. Acceptance criteria for data evaluated.**", col.names=c("**Field**", "**Acceptance Criteria**"), format="pandoc", align = 'c') + knitr::kable(caption = "**Table 2. Acceptance criteria for data evaluated.**", + col.names=c("**Field**", + "**Acceptance Criteria**"), + format="pandoc", + align = 'c') ``` @@ -357,7 +362,9 @@ data_criteria %>% # Generates a table summarizing QC at the column level within each file. All flagged columns are included. To add additional non-flagged columns, specify them with column names: cols=("my_unflagged_data1", "my_unflagged_data2)" or numbers: cols=c(1:4). All non-missing data in unflagged columns is assumed accepted. If a file has no flagged columns and no specified custom columns, all values for that data file will be listed as "NA". #set directory to the location of your data package: -dc_flags <- QCkit::get_custom_flags(here::here("Untitled", "BICY_Example"), output="columns") +dc_flags <- QCkit::get_custom_flags(here::here("Untitled", + "BICY_Example"), + output="columns") dc_flags$`File Name` <- gsub(".csv", "", dc_flags$`File Name`) @@ -554,7 +561,7 @@ United States Geologic Survey (USGS). 2016. BioData - Aquatic Bioassessment Data In most cases, Code listing is not required. If all QA/QC and data manipulations were performed elsewhere, you should cite that code in the methods (and leave the "Listing" code chunk as the default settings: eval=FALSE and echo=FALSE). If you have developed custom scripts, you can add those to DataStore with the reference type "Script" and cite them in the DRR. Some people have developed code to perform QA/QC or data manipulation within the DRR itself. In that case, you must set the "Listing" code chunk to eval=TRUE and echo=TRUE to fully document the QA/QC process. -```{r Listing, ref.label=knitr::all_labels(), echo=TRUE, eval=TRUE} +```{r listing, ref.label=knitr::all_labels(), echo=TRUE, eval=TRUE} ``` @@ -564,7 +571,7 @@ In most cases, Code listing is not required. If all QA/QC and data manipulations In most cases you do not need to report session info (leave the "session-info" code chunk parameters in their default state: eval=FALSE). Session and version information is only necessary if you have set the "Listing" code chunk to eval=TRUE in appendix A. In that case, change the "session-info" code chunk parameters to eval=TRUE. -```{r session-info, eval=TRUE, echo=FALSE, cache=FALSE} +```{r session_info, eval=TRUE, echo=FALSE, cache=FALSE} sessionInfo() Sys.time() ``` From a4ae531a7c0ac89c3ae87249b11608e5202b7954 Mon Sep 17 00:00:00 2001 From: Rob Baker Date: Mon, 12 Feb 2024 14:23:15 -0700 Subject: [PATCH 2/4] update documentation --- vignettes/articles/Starting-a-DRR.Rmd | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/vignettes/articles/Starting-a-DRR.Rmd b/vignettes/articles/Starting-a-DRR.Rmd index ff3dd09..fc1f056 100644 --- a/vignettes/articles/Starting-a-DRR.Rmd +++ b/vignettes/articles/Starting-a-DRR.Rmd @@ -20,15 +20,17 @@ knitr::opts_chunk$set(echo = FALSE) [Purpose and Scope of Data Release Reports](DRR_Purpose_and_Scope.html) -This is a template is for use when drafting Data Release Reports. DRRs are created by the National Park Service and provide detailed descriptions of valuable research datasets, including the methods used to collect the data and technical analyses supporting the quality of the measurements. Data Release Reports focus on helping others reuse data rather than presenting results, testing hypotheses, or presenting new interpretations and in-depth analyses. +DRRs are created by the National Park Service and provide detailed descriptions of valuable research datasets, including the methods used to collect the data and technical analyses supporting the quality of the measurements. Data Release Reports focus on helping others reuse data rather than presenting results, testing hypotheses, or presenting new interpretations and in-depth analyses. -# About this Template +# About the DRR Template -This template contains an rmarkdown template file, default folder structure for project files, and all the necessary template files to generate an unformatted .docx file. Upon submission for publication, the .docx file will be ingested by EXstyles, converted to an .xml file and fully formatted according to NPS branding upon final publication. The goal of this process is to relieve data producers, managers, and scientists from the burden of formatting and allow them to focus primarily on content. Consequently, the .docx generated for the publication process may not be visually appealing. The content, however, should focus on the production, quality, and utility of NPS data packages. +Opening a new NPS DRR Template will write a folder to the current working directory that contains the an rmarkdown (.rmd) file that is the DRR Tempate, a references.bib file for bibtex references, a national-park-service-DRR,csl file for formatting references, and a sub-folder, BICY_Example with an example data package that can be used to knit an example DRR to .docx. + +Upon submission for publication, the .docx file will be ingested by EXstyles, converted to an .xml file and fully formatted according to NPS branding and in compliance with 508 accessibility requirements upon final publication. The goal of this process is to relieve data producers, managers, and scientists from the burden of formatting and allow them to focus primarily on content. Consequently, the .docx generated for the publication process may not be visually appealing. The content, however, should focus on the production, quality, and utility of NPS data packages. # How to Start a DRR -1. **To start your DRR you will need all of your data in flat .csv files**. All quality assurance, quality control, and quality flagging should be completed. Ideally you have already created or are in the process of creating a data package. All of the .csv files you want to describe in the DRR should be in a single folder with **no additional .csv files** (other files such as .txt and .xml will be ignored). This folder can be the same folder you used/are using to create a data package. +1. **To start your DRR you will need all of your data in flat .csv files**. All quality assurance, quality control, and quality flagging should be completed. Ideally you have already created or are in the process of creating a data package (see the [documentation](https://nationalparkservice.github.io/EMLeditor/articles/a02_EML_creation_script.html) associated with the R package [EMLeditor](https://nationalparkservice.github.io/EMLeditor/index.html) for data package creation). All of the .csv files you want to describe in the DRR should be in a single folder with **no additional .csv files** (other files such as .txt and .xml will be ignored). This folder can be the same folder you used/are using to create a data package. 2. **Using Rstudio, open an R project** (Select: File \> New Project...) in the same folder as your .csv files. If you already have an R project (.Rproj) initiated from creating a data package, you can use that same R project. @@ -51,7 +53,7 @@ devtools::install_github("nationalparkservice/QCkit") library(QCkit) ``` -4. **Open a new DRR Template**. From within Rstudio, select the "File" drop down menu. Choose "New File" and then "R markdown...". This will open up a pop-up dialog box. Select "From Template" on in the right-hand list and choose the template labelled "NPS_DRR {QCkit}". You can change the file and folder name to something other than "Untitled", but if you do so the example DRR will not render properly. Click OK. A new folder will be generated in your current working directory titled, "Untitled" (or whatever name you have opted to call it). +4. **Open a new DRR Template**. From within Rstudio, select the "File" drop down menu. Choose "New File >" and then "R markdown...". This will open up a pop-up dialog box. Select "From Template" on in the right-hand list and choose the template labelled "NPS_DRR {QCkit}". You can change the file and folder name to something other than "Untitled", but if you do so the example DRR will not render properly. Click OK. A new folder will be generated in your current working directory titled, "Untitled" (or whatever name you have opted to call it). ```{r echo = FALSE, out.width = "40%", out.height = "20%", fig.caption = "Opening a new DRR Template"} knitr::include_graphics(c("new_rmd.png", "new_rmd_template.png")) @@ -59,11 +61,11 @@ knitr::include_graphics(c("new_rmd.png", "new_rmd_template.png")) 5. After selecting "OK" two things will happen: First, you the DRR Template file will open up. It is called "Untitled.Rmd" by default. Second, a new folder will be created called "Untitled" (Unless you opted to change the default "Name:" in the "New R Markdown" pop up, then these will have whatever name you gave them). -6. **Edit the DRR Template** according to your specifications and the instructions in the ["Using the DRR Template"](Using-the-DRR-Template.html) guide. +6. **Edit the DRR Template** to reflect the data you would like to descibe and according to the instructions in the ["Using the DRR Template"](Using-the-DRR-Template.html) guide. 7. **"knit" the .rmd file to Word** when you are done editing it. Submit the resulting .docx file for publication (via a yet-to-be-determined process). # Examples -**Knit your own example DRR:** Assuming you left the "Name:" as the default "untitled", you should be able to knit the DRR template in to an example .docx that could be submitted for publication. If you opted to change the Name, you will need to update the the file paths before kniting. +**Knit your own example DRR:** Assuming you left the "Name:" as the default "Untitled", you should be able to knit the DRR template in to an example .docx that could be submitted for publication. If you opted to change the Name, you will need to update the the file paths before knitting. From ce213f7fecfd6a5bbc578c04163f80f8e0bbe655 Mon Sep 17 00:00:00 2001 From: Rob Baker Date: Mon, 12 Feb 2024 14:23:30 -0700 Subject: [PATCH 3/4] update documentation to reflect updated DRR template --- vignettes/articles/Using-the-DRR-Template.Rmd | 85 ++++++++++++------- 1 file changed, 53 insertions(+), 32 deletions(-) diff --git a/vignettes/articles/Using-the-DRR-Template.Rmd b/vignettes/articles/Using-the-DRR-Template.Rmd index 7097cae..df12084 100644 --- a/vignettes/articles/Using-the-DRR-Template.Rmd +++ b/vignettes/articles/Using-the-DRR-Template.Rmd @@ -89,23 +89,25 @@ the YAML header. if publishing in the semi-official DRR series. Set to NULL if there is no reportNumber. - `DRR_DSRefID`. This is the DataStore reference ID for the - report. - - `authorNames`. A list of the author's names. + report. It should be 7 digits long. + - `authorNames`. A list of the author's names. If an author has multiple + institutional affiliations, the author should be listed once for each + affiliation. - `authorAffiliations`. A list of the author's affiliations. The order of author affiliations must match the order of the authors in the `authorNames` list. Note that the entirety of each - affiliation is enclosed in a single set of quotations. Line - breaks are indicated with the
tag. Do not worry about - indentation or word wrapping. If two authors have the same + affiliation is enclosed in a single set of quotations. Do not worry + about indentation or word wrapping. If two authors have the same affiliation, list the affiliation twice. - `authorORCID`. A list of ORCID iDs for each author in the format - "xxxx-xxxx-xxxx-xxxx". If an author does not have an ORCID iD, + "(xxxx-xxxx-xxxx-xxxx)". If an author does not have an ORCID iD, specify NA (no quotes). The order of ORCID iDs (and NAs) must - correspond to the order of authors in the `authorNames` list. - Future iterations of the DRR Template will pull ORCID iDs from - metadata and eventually from Active Directory. See - [ORCID](https://www.orcid.org/) for more information about ORCID - iDs or to register an ORCID iD. + correspond to the order of authors in the `authorNames` list. If an + author was listed more than once in the `authorNames` list, the + corresponding ORCID (or NA) should also be listed more than once. Future + iterations of the DRR Template will pull ORCID iDs from metadata and + eventually from Active Directory. See [ORCID](https://www.orcid.org/) + for more information about ORCID iDs or to register an ORCID iD. - `DRRabstract`. The abstract for the DRR (which may be distinct from the data package abstract). Pay careful attention to non-standard characters, line breaks, carriage returns, and @@ -133,7 +135,9 @@ the YAML header. reference number. - `dataPackage_fileNames`. List the file names in your data package. Do NOT include metadata files. For example, include - "my_data.csv" but do NOT include "my_metadata.xml". + "my_data.csv" but do NOT include "my_metadata.xml". Note: Because data + packages contain only .csv and .xml files, all data files should be + .csv. - `dataPackage_fileSizes`. List the approximate size of each data file. Make sure the order of the file sizes corresponds to the order of file names in `dataPackage_fileNames`. @@ -144,15 +148,13 @@ the YAML header. have already created metadata for your data package in EML format, this should be the same text as found in the "entityDescription" element for each data file. - -- `setup`. Most users will not need to edit this code chunk. There is - one code snippet for loading packages; the `RRpackages` section is a +- `setup_do_not_edit`. Most users will not need to edit this code chunk. There + is one code snippet for loading packages; the `RRpackages` section is a suite of packages that are used to assist with reproducible reporting. You may not need these for your report, but we have - included them as part of the base recommended packages. If you plan - to perform you QC as part of the DRR construction process, you can - add a second code snipped to import necessary packages for your QC - process here. + included them as part of the base recommended packages. If you plan to + perform you QC as part of the DRR construction process, you can add a second + code snipped to import necessary packages for your QC process here. - `title_do_not_edit`. These parameters are auto-generated based on either the EML you supplied (when that becomes an option) or the @@ -163,19 +165,38 @@ the YAML header. writes the author names, ORCID iDs, and affiliations to the .docx document based on information supplied in user-edited-parameters. -- `LoadData`. Any datasets you need to load can go here. For most - people these datasets are used to generate summary statistics on - proportions of data that were flagged as accepted (A) accepted, - estimated (AE) and rejected (R) during the quality control process. - -- `FileTable`. Do not edit. Generates a table of file names, sizes, +- `file_table`. Do not edit. Generates a table of file names, sizes, and descriptions in the data package being described by the DRR. - -- `dataFlaggingTable`. This sample code provides a summary table - defining the suggested data flagging codes. There is no need to edit - this table. - -- `Listing`. Appendix A, by default is the code listing. This will + +- `data_acceptance_criteria`. If you did not use the standard data quality + assurance flags (A = accepted, AE = Accepted (estimated), R = Rejected, P = + Provisional), set this code chunk to `eval = FALSE` and generate your own + custom code chunk to summarize your custom data flagging procedures. If you + did use the standard QA flags, indicate which fields in your data files + contain flagged data. Assuming your column names are unique, you do not need + to specify which file the columns are in. If your column names are not + unique, you will need to design your own summary table. Briefly describe the + acceptance criteria for each data quality flagged column in the same order + as the you specified the columns. + +- `data_colunn_flagging`. Uses the input from `data_acceptance_criteria` to + generate a table summarizing the data quality flagging in the data package. + If you set `data_acceptance_criteria` parameter `eval = FALSE`, also set + `data_column_flagging` parameter to `eval = FALSE`. Update the first line + (which in the example points to BICY_Example) to point to the directory + where your data are. + +- `data_package_flagging`. If you used standard QA flags in your data package, + leave the parameter `eval = TRUE`. If you did not use standard QA flags, set + `eval = FALSE` and design your own custom summary table to handle your + custom flagging protocols. If you set `eval = TRUE`, update the file path + to pointing to your data files (in the example, the path points to the + directory "BICY_Example"). + +- `figure1`. This is an example code chunk for inserting figures. Edit and + re-deploy as necessary to include as many or as few figures as your require. + +- `listing`. Appendix A, by default is the code listing. This will generate all code used in generating the report and data packages. In most cases, code listing is not required. If all QA/QC processes and data manipulations were performed elsewhere, you should cite @@ -184,7 +205,7 @@ the YAML header. If you have developed custom scripts, you can add those to DataStore with the reference "Script" and cite them in the DRR. -- `session-info` is the information about the versions of R and +- `session_-_info` is the information about the versions of R and packages used in generating the report. In most cases, you do not need to report session info (leave the session-info code chunk parameters in their default state: eval=FALSE). Session and version From 2c9d05ff144ebfdf02ec99b0c73de2d4f9dd0c07 Mon Sep 17 00:00:00 2001 From: Rob Baker Date: Mon, 12 Feb 2024 14:27:23 -0700 Subject: [PATCH 4/4] update via pkgdown and devtools::document --- docs/articles/Starting-a-DRR.html | 53 ++++++++-------- docs/articles/Using-the-DRR-Template.html | 74 +++++++++++++++-------- docs/pkgdown.yml | 2 +- 3 files changed, 80 insertions(+), 49 deletions(-) diff --git a/docs/articles/Starting-a-DRR.html b/docs/articles/Starting-a-DRR.html index 733b133..a0220b4 100644 --- a/docs/articles/Starting-a-DRR.html +++ b/docs/articles/Starting-a-DRR.html @@ -102,8 +102,7 @@

DRRs: Background

Purpose and Scope of Data Release Reports

-

This is a template is for use when drafting Data Release Reports. -DRRs are created by the National Park Service and provide detailed +

DRRs are created by the National Park Service and provide detailed descriptions of valuable research datasets, including the methods used to collect the data and technical analyses supporting the quality of the measurements. Data Release Reports focus on helping others reuse data @@ -111,19 +110,23 @@

DRRs: Background
-

About this Template +

About the DRR Template

-

This template contains an rmarkdown template file, default folder -structure for project files, and all the necessary template files to -generate an unformatted .docx file. Upon submission for publication, the -.docx file will be ingested by EXstyles, converted to an .xml file and -fully formatted according to NPS branding upon final publication. The -goal of this process is to relieve data producers, managers, and -scientists from the burden of formatting and allow them to focus -primarily on content. Consequently, the .docx generated for the -publication process may not be visually appealing. The content, however, -should focus on the production, quality, and utility of NPS data -packages.

+

Opening a new NPS DRR Template will write a folder to the current +working directory that contains the an rmarkdown (.rmd) file that is the +DRR Tempate, a references.bib file for bibtex references, a +national-park-service-DRR,csl file for formatting references, and a +sub-folder, BICY_Example with an example data package that can be used +to knit an example DRR to .docx.

+

Upon submission for publication, the .docx file will be ingested by +EXstyles, converted to an .xml file and fully formatted according to NPS +branding and in compliance with 508 accessibility requirements upon +final publication. The goal of this process is to relieve data +producers, managers, and scientists from the burden of formatting and +allow them to focus primarily on content. Consequently, the .docx +generated for the publication process may not be visually appealing. The +content, however, should focus on the production, quality, and utility +of NPS data packages.

How to Start a DRR @@ -132,11 +135,13 @@

How to Start a DRRdocumentation +associated with the R package EMLeditor +for data package creation). All of the .csv files you want to describe +in the DRR should be in a single folder with no additional .csv +files (other files such as .txt and .xml will be ignored). This +folder can be the same folder you used/are using to create a data +package.

  • Using Rstudio, open an R project (Select: File > New Project…) in the same folder as your .csv files. If you already have an R project (.Rproj) initiated from creating a data package, you @@ -166,7 +171,7 @@

    How to Start a DRR
  • Open a new DRR Template. From within Rstudio, -select the “File” drop down menu. Choose “New File” and then “R +select the “File” drop down menu. Choose “New File >” and then “R markdown…”. This will open up a pop-up dialog box. Select “From Template” on in the right-hand list and choose the template labelled “NPS_DRR {QCkit}”. You can change the file and folder name to something @@ -182,8 +187,8 @@

    How to Start a DRR“Using the DRR Template” +
  • Edit the DRR Template to reflect the data you +would like to descibe and according to the instructions in the “Using the DRR Template” guide.

  • “knit” the .rmd file to Word when you are done editing it. Submit the resulting .docx file for publication (via a @@ -194,10 +199,10 @@

    How to Start a DRRExamples

    Knit your own example DRR: Assuming you left the -“Name:” as the default “untitled”, you should be able to knit the DRR +“Name:” as the default “Untitled”, you should be able to knit the DRR template in to an example .docx that could be submitted for publication. If you opted to change the Name, you will need to update the the file -paths before kniting.

    +paths before knitting.

  • diff --git a/docs/articles/Using-the-DRR-Template.html b/docs/articles/Using-the-DRR-Template.html index c605d0f..451e9c7 100644 --- a/docs/articles/Using-the-DRR-Template.html +++ b/docs/articles/Using-the-DRR-Template.html @@ -158,24 +158,27 @@

    Code ChunksORCID for more information about ORCID +to the order of authors in the authorNames list. If an +author was listed more than once in the authorNames list, +the corresponding ORCID (or NA) should also be listed more than once. +Future iterations of the DRR Template will pull ORCID iDs from metadata +and eventually from Active Directory. See ORCID for more information about ORCID iDs or to register an ORCID iD.
  • DRRabstract. The abstract for the DRR (which may be @@ -209,7 +212,9 @@

    Code ChunksCode ChunksCode ChunksCode Chunks