From a258cd3b557e3cd3ba221cefcfcbcd8ceb5f3060 Mon Sep 17 00:00:00 2001 From: Alex Axthelm Date: Wed, 10 Apr 2024 12:19:40 +0200 Subject: [PATCH] feat: add more checks on repo status (#13) --- .Rbuildignore | 1 + .github/{workflows => }/CODEOWNERS | 0 .gitignore | 3 + .lintr | 9 +++ DESCRIPTION | 10 ++- R/codeowners.R | 51 +++++++++++++ R/dependencies.R | 63 ++++++++++++++++ R/generate_package_table.R | 39 ++-------- R/get_gh_file.R | 61 ++++++++++++++++ R/get_repos.R | 20 ++++++ R/prod_checks.R | 53 ++++++++++++++ R/repo_lists.R | 23 ++++++ R/rulesets.R | 98 +++++++++++++++++++++++++ R/utils.R | 12 ++++ R/workflows.R | 112 +++++++++++++++++++++++++++++ inst/extdata/github-repos.Rmd | 99 +++++++++++++++++++++++++ 16 files changed, 617 insertions(+), 37 deletions(-) rename .github/{workflows => }/CODEOWNERS (100%) create mode 100644 .lintr create mode 100644 R/codeowners.R create mode 100644 R/dependencies.R create mode 100644 R/get_gh_file.R create mode 100644 R/get_repos.R create mode 100644 R/prod_checks.R create mode 100644 R/repo_lists.R create mode 100644 R/rulesets.R create mode 100644 R/utils.R create mode 100644 R/workflows.R create mode 100644 inst/extdata/github-repos.Rmd diff --git a/.Rbuildignore b/.Rbuildignore index 6bea544..e472355 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -4,3 +4,4 @@ ^assets$ ^README\.Rmd$ ^\.github$ +^\.lintr$ diff --git a/.github/workflows/CODEOWNERS b/.github/CODEOWNERS similarity index 100% rename from .github/workflows/CODEOWNERS rename to .github/CODEOWNERS diff --git a/.gitignore b/.gitignore index 5b6a065..2064942 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,6 @@ .Rhistory .RData .Ruserdata +inst/extdata/*_cache +inst/extdata/*.html +!inst/extdata/*.[Rr][Mm][Dd] diff --git a/.lintr b/.lintr new file mode 100644 index 0000000..90bd541 --- /dev/null +++ b/.lintr @@ -0,0 +1,9 @@ +linters: all_linters() +exclusions: list( + "tests/testthat.R", + "tests/testthat/test-generate_package_table.R", + "R/generate_package_table.R", + "R/repo_lists.R" = c( + nonportable_path_linter = Inf + ) + ) diff --git a/DESCRIPTION b/DESCRIPTION index bd1ad80..95934b8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: pacta.sit.rep Title: What the Package Does (One Line, Title Case) -Version: 0.0.0.9002 +Version: 0.0.0.9003 Authors@R: person("Jackson", "Hoffart", , "jackson.hoffart@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-8600-5042")) @@ -10,12 +10,18 @@ Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.1 Suggests: + gt, + memoise, testthat (>= 3.0.0) Config/testthat/edition: 3 Imports: + base64enc, dplyr, gh, glue, + jsonlite, + pak, purrr, rlang, - tibble + tibble, + yaml diff --git a/R/codeowners.R b/R/codeowners.R new file mode 100644 index 0000000..596aa9f --- /dev/null +++ b/R/codeowners.R @@ -0,0 +1,51 @@ +get_codeowner <- function( + repo_fullname, + path = ".github/CODEOWNERS", # nolint: non_portable_path + format = TRUE +) { + content <- get_gh_text_file(repo_fullname, file_path = path) + if (is.null(content)) { + return(NULL) + } + default_owner <- grep( + pattern = "^\\s*\\*\\s+@\\S+", + x = content, + value = TRUE + ) + default_owner_clean <- gsub( + pattern = "^\\s*\\*\\s+@|\\s*$", + replacement = "", + x = default_owner + ) + if (format) { + default_owner_clean <- paste0("@", default_owner_clean) + } + return(default_owner_clean) +} +if (requireNamespace("memoise")) { + get_codeowner <- memoise::memoise(get_codeowner) +} + + +get_codeowner_errors <- function(repo_fullname) { + raw_content <- tryCatch( + expr = { + raw_content <- gh::gh( + "GET /repos/{repo}/codeowners/errors", # nolint: non_portable_path + repo = repo_fullname + ) + }, + error = function(e) { + NA_character_ + } + ) + if (identical(raw_content, NA_character_)) { + # Early return + return(NA_integer_) + } + errors <- raw_content[["errors"]] + return(length(errors)) +} +if (requireNamespace("memoise")) { + get_codeowner_errors <- memoise::memoise(get_codeowner_errors) +} diff --git a/R/dependencies.R b/R/dependencies.R new file mode 100644 index 0000000..d0191a7 --- /dev/null +++ b/R/dependencies.R @@ -0,0 +1,63 @@ +identify_gh_deps <- function(repo_fullname) { + deps <- tryCatch( + expr = { + pak::pkg_deps(repo_fullname) + }, + error = function(e) { + NULL + } + ) + if (is.null(deps)) { + return(NA_character_) + } + gh_deps <- deps[["type"]] == "github" + gh_dep_refs <- deps[gh_deps, "ref"] + return(gh_dep_refs) +} +if (requireNamespace("memoise")) { + identify_gh_deps <- memoise::memoise(identify_gh_deps) +} + +identify_if_dep <- function( + repo_fullname, + repos_to_check = list( + prod = c(prod_workflows, r2dii.repos), + experimental = experimental_workflows + ), + return_max = TRUE +) { + + dep_lists <- lapply(repos_to_check, function(repos) { + deps <- lapply( + X = repos, + FUN = identify_gh_deps + ) + clean_deps <- unique(unlist(deps)) + return(clean_deps) + }) + + dep_levels <- NULL + for (i in seq_along(dep_lists)) { + if ( + repo_fullname %in% dep_lists[[i]] || + repo_fullname %in% repos_to_check[[i]] + ) { + dep_levels <- c(names(dep_lists)[i], dep_levels) + } + } + if (is.null(dep_levels)) { + dep_levels <- NA_character_ + } + + dep_levels_factor <- factor( + x = dep_levels, + levels = c("experimental", "prod"), + ordered = TRUE + ) + + if (return_max) { + return(max(dep_levels_factor)) + } else { + return(dep_levels_factor) + } +} diff --git a/R/generate_package_table.R b/R/generate_package_table.R index eb36ebe..f03e369 100644 --- a/R/generate_package_table.R +++ b/R/generate_package_table.R @@ -98,15 +98,15 @@ format_maintainer <- function(maintainer) { format_maintainer_v <- Vectorize(format_maintainer) table_lifecycle <- function(repo_path) { - readme <- fetch_readme(repo_path) + readme <- get_gh_text_file(repo_path, file_path = "README.md") if (is.null(readme)) { return(NA_character_) } - pattern <- "https://img.shields.io/badge/lifecycle-.*.svg" + pattern <- "https://img.shields.io/badge/lifecycle-\\S+.svg" - lifecycle_badge <- readme[grepl(pattern, readme)] + lifecycle_badge <- readme[grepl(pattern, readme)][1] lifecycle_badge <- gsub(".*(https[^)]*\\.svg).*", "\\1", lifecycle_badge) if (length(lifecycle_badge) == 0) { @@ -136,7 +136,7 @@ table_latest_sha <- function(repo_path) { } table_maintainer <- function(repo_path) { - codeowners <- fetch_codeowners(repo_path) + codeowners <- get_gh_text_file(repo_path, file_path = ".github/CODEOWNERS") if (is.null(codeowners)) { return(NA_character_) } @@ -153,22 +153,6 @@ table_maintainer <- function(repo_path) { return(maintainer) } -fetch_codeowners <- function(repo_path) { - response <- tryCatch( - gh::gh( - "/repos/{repo}/contents/.github/CODEOWNERS", - repo = repo_path, - ref = "main" - ), - error = function(cond) return(NULL) - ) - - if (!is.null(response)) { - return(readLines(response$download_url)) - } - return(NULL) -} - fetch_main_sha <- function(repo_path) { response <- tryCatch( gh::gh( @@ -183,18 +167,3 @@ fetch_main_sha <- function(repo_path) { } return(NULL) } - -fetch_readme <- function(repo_path) { - response <- tryCatch( - gh::gh( - "/repos/{repo}/contents/README.md", - repo = repo_path - ), - error = function(cond) return(NULL) - ) - - if (!is.null(response)) { - return(readLines(response$download_url)) - } - return(NULL) -} diff --git a/R/get_gh_file.R b/R/get_gh_file.R new file mode 100644 index 0000000..a8028c0 --- /dev/null +++ b/R/get_gh_file.R @@ -0,0 +1,61 @@ +get_gh_text_file <- function(repo_fullname, file_path) { + response <- tryCatch( + expr = { + gh::gh( + "/repos/{repo_fullname}/contents/{file_path}", # nolint: non_portable_path + repo_fullname = repo_fullname, + file_path = file_path + ) + }, + error = function(cond) return(NULL) + ) + if (is.null(response)) { + out <- NULL + } else if (response[["content"]] == "") { + # files larger than 1MB are not embedded in content + out <- readLines( + response[["download_url"]] + ) + } else { + out <- strsplit( + x = rawToChar( + base64enc::base64decode( + response[["content"]] + ) + ), + split = "\n", + fixed = TRUE + )[[1L]] + } + return(out) +} +if (requireNamespace("memoise")) { + get_gh_text_file <- memoise::memoise(get_gh_text_file) +} + +get_gh_dir_listing <- function(repo_fullname, dir_path) { + response <- tryCatch( + expr = { + gh::gh( + "/repos/{repo_fullname}/contents/{dir_path}", # nolint: non_portable_path + repo_fullname = repo_fullname, + dir_path = dir_path + ) + }, + error = function(cond) return(NULL) + ) + if (is.null(response)) { + out <- NULL + } else { + out <- lapply( + response, + function(x) { + list( + name = x[["name"]], + path = x[["path"]] + ) + } + ) + } + return(out) +} diff --git a/R/get_repos.R b/R/get_repos.R new file mode 100644 index 0000000..797023a --- /dev/null +++ b/R/get_repos.R @@ -0,0 +1,20 @@ +get_repos <- function(org) { + repo_fetch <- gh::gh( + "GET /orgs/{org}/repos", # nolint: non_portable_path + org = org, per_page = 20L) + all_repos <- repo_fetch + while ( + grepl( + pattern = "rel=\"next\"", + x = attr(repo_fetch, "response")[["link"]], + fixed = TRUE + ) + ) { + repo_fetch <- gh::gh_next(repo_fetch) + all_repos <- c(all_repos, repo_fetch) + } + return(all_repos) +} +if (requireNamespace("memoise")) { + get_repos <- memoise::memoise(get_repos) +} diff --git a/R/prod_checks.R b/R/prod_checks.R new file mode 100644 index 0000000..b130309 --- /dev/null +++ b/R/prod_checks.R @@ -0,0 +1,53 @@ +prod_checks <- function(repo_json) { + if (is.null(repo_json[["codeowner"]])) { + codeowner <- get_codeowner(repo_json[["full_name"]]) + repo_json[["codeowner"]] <- codeowner + } + if (is.null(repo_json[["codeowner_errors"]])) { + codeowner_errors <- get_codeowner_errors(repo_json[["full_name"]]) + repo_json[["codeowner_errors"]] <- codeowner_errors + } + if (is.null(repo_json[["gh_deps"]])) { + gh_deps <- identify_gh_deps(repo_json[["full_name"]]) + repo_json[["gh_deps"]] <- gh_deps + } + if (is.null(repo_json[["dep_tree"]])) { + dep_tree <- identify_if_dep(repo_json[["full_name"]]) + repo_json[["dep_tree"]] <- dep_tree + } + if (is.null(repo_json[["lifecycle_badge"]])) { + lifecycle_badge <- format_lifecycle( + table_lifecycle(repo_json[["full_name"]]) + ) + repo_json[["lifecycle_badge"]] <- lifecycle_badge + } + if (is.null(repo_json[["has_docker"]])) { + repo_json[["has_docker"]] <- !is.null( + get_gh_text_file(repo_json[["full_name"]], "Dockerfile") + ) + } + if (is.null(repo_json[["has_R"]])) { + repo_json[["has_R"]] <- !is.null( + get_gh_dir_listing(repo_json[["full_name"]], "R") + ) + } + expected_workflows <- NULL + if (repo_json[["has_R"]]) { + expected_workflows <- c(expected_workflows, "R.yml") + } + if (repo_json[["has_docker"]]) { + expected_workflows <- c(expected_workflows, "docker.yml") + } + if (is.null(repo_json[["workflows"]])) { + workflows <- workflow_summary(repo_json[["full_name"]]) + repo_json[["workflows"]] <- workflows + } + if (is.null(repo_json[["enabled_rules"]])) { + enabled_rules <- check_actions_rulesets(repo_json[["full_name"]]) + repo_json[["enabled_rules"]] <- enabled_rules + } + return(repo_json) +} +if (requireNamespace("memoise")) { + prod_checks <- memoise::memoise(prod_checks) +} diff --git a/R/repo_lists.R b/R/repo_lists.R new file mode 100644 index 0000000..a339848 --- /dev/null +++ b/R/repo_lists.R @@ -0,0 +1,23 @@ +prod_workflows <- c( + "RMI-PACTA/pactaverse", + "RMI-PACTA/workflow.data.preparation", + "RMI-PACTA/workflow.portfolio.parsing", + "RMI-PACTA/workflow.prepare.pacta.indices", + "RMI-PACTA/workflow.scenario.preparation", + "RMI-PACTA/workflow.transition.monitor" +) + +r2dii.repos <- c( + "RMI-PACTA/r2dii.data", + "RMI-PACTA/r2dii.match", + "RMI-PACTA/r2dii.analysis", + "RMI-PACTA/r2dii.plot" +) + +experimental_workflows <- c( + "RMI-PACTA/workflow.mfm2023", + "RMI-PACTA/workflow.pacta", + "RMI-PACTA/workflow.pacta.data.qa", + "RMI-PACTA/workflow.pacta.report" +) + diff --git a/R/rulesets.R b/R/rulesets.R new file mode 100644 index 0000000..70d6a26 --- /dev/null +++ b/R/rulesets.R @@ -0,0 +1,98 @@ +get_rulesets <- function(repo_fullname) { + response <- tryCatch( + expr = { + gh::gh( + "/repos/{repo_fullname}/rulesets", # nolint: non_portable_path + repo_fullname = repo_fullname + ) + }, + error = function(cond) return(NULL) + ) + if (is.null(response)) { + out <- NULL + } else { + out <- vapply( + X = response, + FUN = function(ruleset) { + get_ruleset_contents(repo_fullname, ruleset[["id"]]) + }, + FUN.VALUE = list(1L) + ) + } + return(out) +} +if (requireNamespace("memoise")) { + get_rulesets <- memoise::memoise(get_rulesets) +} + +get_ruleset_contents <- function(repo_fullname, ruleset_id) { + response <- tryCatch( + expr = { + gh::gh( + "/repos/{repo_fullname}/rulesets/{ruleset_id}", # nolint: non_portable_path + repo_fullname = repo_fullname, + ruleset_id = ruleset_id + ) + }, + error = function(cond) return(NULL) + ) + if (is.null(response)) { + out <- NULL + } else { + out <- list( + list( + id = response[["id"]], + name = response[["name"]], + target = response[["target"]], + enforcement = response[["enforcement"]], + conditions = response[["conditions"]], + rules = response[["rules"]] + ) + ) + } + return(out) +} +if (requireNamespace("memoise")) { + get_ruleset_contents <- memoise::memoise(get_ruleset_contents) +} + +check_actions_rulesets <- function(repo_fullname) { + ruleset_repo <- "RMI-PACTA/actions" # nolint: non_portable_path + rulesets <- get_rulesets(repo_fullname) + if (is.null(rulesets)) { + return(NULL) + } + expected_rules <- get_gh_dir_listing( + repo_fullname = ruleset_repo, + "rulesets" + ) + expected <- list() + for (i in seq_along(expected_rules)) { + file_content <- get_gh_text_file( + repo_fullname = ruleset_repo, + expected_rules[[i]][["path"]] + ) + filename <- expected_rules[[i]][["name"]] + expected[filename] <- list( + jsonlite::fromJSON( + txt = file_content, + simplifyDataFrame = FALSE, + simplifyVector = FALSE + ) + ) + } + enabled_rulesets <- vapply( + X = expected, + FUN = function(expected_ruleset) { + any(vapply( + X = rulesets, + FUN = function(ruleset) { + list_is_subset(expected_ruleset, ruleset) + }, + FUN.VALUE = logical(1L) + )) + }, + FUN.VALUE = logical(1L) + ) + return(as.list(enabled_rulesets)) +} diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..ef30df9 --- /dev/null +++ b/R/utils.R @@ -0,0 +1,12 @@ +# check if all elements of x are in y, recursively +list_is_subset <- function(x, y) { + if (is.list(x) && is.list(y)) { + all(vapply(x, function(x_elem) { + any(vapply(y, function(y_elem) { + list_is_subset(x_elem, y_elem) + }, logical(1L))) + }, logical(1L))) + } else { + identical(x, y) + } +} diff --git a/R/workflows.R b/R/workflows.R new file mode 100644 index 0000000..0afb512 --- /dev/null +++ b/R/workflows.R @@ -0,0 +1,112 @@ +get_workflows <- function(repo_fullname) { + listing <- get_gh_dir_listing( + repo_fullname, + dir_path = ".github/workflows" # nolint: non_portable_path + ) + + contents <- list() + for (i in seq_along(listing)) { + file_content <- get_gh_text_file(repo_fullname, listing[[i]][["path"]]) + filename <- listing[[i]][["name"]] + contents[filename] <- list(file_content) + } + return(contents) +} +if (requireNamespace("memoise")) { + get_workflows <- memoise::memoise(get_workflows) +} + +parse_workflow <- function(workflow) { + workflow_list <- yaml::yaml.load(workflow) + + # {yaml} interprets `on:` as `TRUE` + triggers <- workflow_list[[TRUE]] + jobs <- workflow_list[["jobs"]] + rmi_actions <- vapply( + X = jobs, + FUN = function(job) { + if (is.null(job[["uses"]])) { + return(NA_character_) + } else { + out <- grep( + pattern = "RMI-PACTA/actions", # nolint: non_portable_path + x = job[["uses"]], + value = TRUE + ) + if (length(out) == 0L) { + return(NA_character_) + } + return(out) + } + }, + FUN.VALUE = character(1L) + ) + uses_rmi_actions <- !all(is.null(rmi_actions)) + return( + list( + triggers = triggers, + rmi_actions = rmi_actions, + uses_rmi_actions = uses_rmi_actions + ) + ) +} + +check_workflows <- function(repo_fullname) { + workflow_definitions <- get_workflows(repo_fullname) + lapply( + workflow_definitions, + parse_workflow + ) +} + +workflow_summary <- function( + repo_fullname, + expected = c( + "R.yml", + "docker.yml" + ) +) { + workflows <- check_workflows(repo_fullname) + workflow_summary <- list() + for (wf in expected) { + if (is.null(workflows[wf])) { + is_expected <- FALSE + details <- NULL + } else { + if ( + !("push" %in% names(workflows[[wf]][["triggers"]])) || + is.null(workflows[[wf]][["triggers"]][["push"]]) + ) { + checks_main <- FALSE + } else { + checks_main <- "main" %in% workflows[[wf]][["triggers"]][["push"]] + } + checks_prs <- "pull_request" %in% names(workflows[[wf]][["triggers"]]) + standard_checks <- checks_main && checks_prs + uses_correct_rmi_actions <- grepl( + pattern = wf, + x = workflows[[wf]][["rmi_actions"]], + fixed = TRUE + ) + is_expected <- standard_checks && uses_correct_rmi_actions + details <- list( + checks_main = checks_main, + checks_prs = checks_prs, + standard_checks = standard_checks, + uses_correct_rmi_actions = uses_correct_rmi_actions + ) + } + workflow_summary[[wf]] <- list( + standard = is_expected, + details = details + ) + } + workflow_summary[["all_standard"]] <- all( + vapply( + X = workflow_summary, + FUN = function(x) x[["standard"]], + FUN.VALUE = logical(1L) + ) + ) + return(workflow_summary) +} diff --git a/inst/extdata/github-repos.Rmd b/inst/extdata/github-repos.Rmd new file mode 100644 index 0000000..3a22d28 --- /dev/null +++ b/inst/extdata/github-repos.Rmd @@ -0,0 +1,99 @@ +--- +title: "GitHub repo details" +output: + html_document: + code_folding: hide + toc: true + theme: united +--- + +```{r setup, include=FALSE} +knitr::opts_chunk[["set"]]( + echo = TRUE, + cache = TRUE +) +``` + +```{r get_repos} + +org <- "RMI-PACTA" +all_repos <- get_repos(org) + +``` + +```{r active_repos} +active_repos <- all_repos |> + lapply( + FUN = function(x) { + if (x[["archived"]] || x[["disabled"]]) { + return(NULL) + } else { + return(x) + } + } + ) +#remove null elements +active_repos <- active_repos[!vapply( + X = active_repos, + FUN = is.null, + FUN.VALUE = logical(1L) +)] +``` + + +```{r identify_prod_dependencies, include=FALSE} + +prod_checked <- lapply(active_repos, prod_checks) + +``` + +```{r identify_prod_essential} + +prod_tibble <- prod_checked |> + lapply( + FUN = function(x) { + pruned <- list( + repo = x[["full_name"]], + codeowner = x[["codeowner"]], + codeowner_errors = x[["codeowner_errors"]], + dep_tree = x[["dep_tree"]], + lifecycle = x[["lifecycle_badge"]], + standard_workflows = x[["workflows"]][["all_standard"]], + main_protected = x[["enabled_rules"]][["protect-main.json"]], + private = x[["private"]], + updated = x[["updated_at"]], + open_issues = x[["open_issues_count"]], + license = x[["license"]][["name"]], + default_branch = x[["default_branch"]] + ) + clean <- lapply(pruned, function(x) { + if (is.null(x)) { + return(NA_character_) + } else { + return(x) + } + }) + return(clean) + } + ) |> + lapply( + FUN = tibble::as_tibble + ) |> + dplyr::bind_rows() |> + dplyr::arrange(desc(dep_tree), repo) + +knitr::kable(prod_tibble) + +``` + +```{r nice_output} + +prod_tibble |> + dplyr::select(-lifecycle) |> + gt::gt() |> + gt::opt_interactive( + use_pagination = FALSE, + use_filter = TRUE, + use_search = TRUE + ) +```