diff --git a/.github/workflows/check-templates.yml b/.github/workflows/check-templates.yml index de5e9aac..e1992c23 100644 --- a/.github/workflows/check-templates.yml +++ b/.github/workflows/check-templates.yml @@ -7,6 +7,11 @@ on: default: '4.1' required: false type: string + exclude-templates-data: + description: 'List of data generated by templates to exclude (templates script will run, but results won't be push to pharmaverseadam repo) - comma seperated list' + default: 'adrs_basic_onco' + required: false + type: 'string' push: branches: - main @@ -189,7 +194,7 @@ jobs: } shell: Rscript {0} - - name: Add other data formats and suffixes + - name: Add other data formats and suffixes, exclude data run: | library(readxl) library(zip) @@ -202,34 +207,80 @@ jobs: folder_path <- file.path(Sys.getenv("GITHUB_WORKSPACE"), "tmp") rda_files <- list.files(path = folder_path, pattern = "\\.rda$") + ex_files <- unlist(strsplit("${{ inputs.exclude-templates-data }}", ",")) # files to exclude + for (rda_file in rda_files) { - print(sprintf("converting file %s", rda_file)) - data <- loadRData(file.path(folder_path, rda_file)) - suffix <- gsub("pharmaverse/admiral", "", "${{ github.repository }}") - if (nchar(suffix) > 0) { - rda_file_w_suffix <- gsub("\\.rda$", sprintf("_%s.rda", suffix), rda_file) - csv_file <- gsub("\\.rda$", sprintf("_%s.csv", suffix), rda_file) - file.rename(file.path(folder_path, rda_file), file.path(folder_path, rda_file_w_suffix)) + # delete file if part of inputs.exclude-templates-data + if (rda_file %in% ex_files) { + file.remove(file.path(folder_path, rda_file)) + cat("Deleted:", rda_file, "\n") } else { - csv_file <- gsub("\\.rda$", ".csv", rda_file) + print(sprintf("converting file %s", rda_file)) + data <- loadRData(file.path(folder_path, rda_file)) + suffix <- gsub("pharmaverse/admiral", "", "${{ github.repository }}") + if (nchar(suffix) > 0) { + rda_file_renamed <- gsub("\\.rda$", sprintf("_%s.rda", suffix), rda_file) + csv_file <- gsub("\\.rda$", sprintf("_%s.csv", suffix), rda_file) + file.rename(file.path(folder_path, rda_file), file.path(folder_path, rda_file_renamed)) + } + else { + csv_file <- gsub("\\.rda$", ".csv", rda_file) + rda_file_renamed <- rda_file + } + write.csv(data, file = file.path(folder_path, csv_file), row.names = FALSE) + + # create associated documentation inside R folder + dataset_name <- gsub("\\.rda$", "", rda_file_renamed) + doc_string <- paste( + '#', sprintf('Dataset %s', dataset_name), + '#', + sprintf('%s dataset', dataset_name), + '#', + sprintf('@name %s', dataset_name), + '# @docType data', + '# @format A data frame with', ncol(data), 'columns:', + ' \\describe{', + sapply(names(data), function(col_name) { + paste('# \\item{', col_name, '}{Description goes here}') + }, USE.NAMES = FALSE), + '# }', + sprintf('# @source Generated from ${{ github.repository_name }}.'), + '# @references None', + sprintf('# @examples\n# load(%s)', dataset_name), + '', + collapse = '\n' + ) + + doc_dir <- "datasets_doc" + if (!file.exists(doc_dir)) { + dir.create(doc_dir, recursive = TRUE) + } + writeLines(doc_string, con = file.path(doc_dir, paste0(dataset_name, ".R"))) } - write.csv(data, file = file.path(folder_path, csv_file), row.names = FALSE) } shell: Rscript {0} # zip templates data - - name: zip templates data + - name: zip artifacts data run: | find "$GITHUB_WORKSPACE/tmp" -type f \( -name "*.rda" -o -name "*.csv" \) -exec zip -j "$GITHUB_WORKSPACE/data.zip" {} \; + find "$GITHUB_WORKSPACE/datasets_doc" -type f \( -name "*.R" -o \) -exec zip -j "$GITHUB_WORKSPACE/doc.zip" {} \; # store templates data as artifacts - - name: Archive production artifacts + - name: Archive templates data uses: actions/upload-artifact@v3 with: name: data_templates path: ${{ github.workspace }}/data.zip + # store templates data as artifacts + - name: Archive doc + uses: actions/upload-artifact@v3 + with: + name: doc_templates + path: ${{ github.workspace }}/doc.zip + - name: Checkout repo (PR) 🛎 uses: actions/checkout@v3 with: @@ -255,6 +306,7 @@ jobs: run: | head_data_folder=$(git ls-tree -r HEAD data) # check if data folder exists on remote main branch unzip "${GITHUB_WORKSPACE}/data.zip" -d "${GITHUB_WORKSPACE}/inst/extdata/" + unzip "${GITHUB_WORKSPACE}/doc.zip" -d "${GITHUB_WORKSPACE}/R" # move every rda files inside data folder mkdir -p data for rda_file in inst/extdata/*.rda; do @@ -296,7 +348,7 @@ jobs: uses: stefanzweifel/git-auto-commit-action@v4 with: commit_message: templates data updates from ${{ github.repository }} - file_pattern: 'inst/extdata/* data/*' + file_pattern: 'inst/extdata/* data/* R/*' commit_user_name: pharmaverse-bot commit_user_email: 113703390+pharmaverse-bot@users.noreply.github.com branch: "${{ steps.branch.outputs.source-branch }}"