diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 7fa6ef7..e4601c8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -8,41 +8,28 @@ on: pull_request: branches: - main - # run once a week + # run once a week schedule: - cron: "0 0 * * *" - workflow_dispatch: + workflow_dispatch: jobs: GenBank: runs-on: ubuntu-latest + container: colebrookson/virion:latest steps: - uses: actions/checkout@v3 - - uses: gautamkrishnar/keepalive-workflow@v1 - - name: Setup Julia - uses: julia-actions/setup-julia@v1 + - uses: julia-actions/setup-julia@v1 with: version: 1.7 - - name: Julia dependencies (DF) - run: julia -e 'using Pkg; Pkg.activate("."); Pkg.add("CSV"); Pkg.add("DataFrames")' - - name: NCBITaxonomy (version from MAIN branch!) - run: julia -e 'using Pkg; Pkg.activate("."); Pkg.add(PackageSpec(name="NCBITaxonomy", rev="main"))' - - name: Setup R - uses: r-lib/actions/setup-r@v2 - - name: Libraries for tidyverse - run: sudo apt-get install -y libharfbuzz-dev libfribidi-dev - - name: dependencies!!!! + - name: Download GenBank run: | - sudo apt-get install libcurl4-openssl-dev libarchive-dev - sudo Rscript -e 'install.packages(c("taxize", "tidyverse", "RCurl", "readr", "vroom", "magrittr", "fs", "data.table", "zip", "rglobi", "lubridate", "R.utils"), repos = "http://cran.us.r-project.org")' - - name: Download GenBank - run: | - Rscript -e 'source("Code/02_1a_Download GenBank.R")' + Rscript -e 'source("Code/02_1a_Download GenBank.R")' - name: Digest GenBank run: | Rscript -e 'source("Code/02_1b_Digest GenBank.R")' - name: Format GenBank run: | - Rscript -e 'source("Code/02_1c_Format GenBank.R")' + Rscript -e 'source("Code/02_1c_Format GenBank.R")' - name: Save artifacts uses: actions/upload-artifact@v2 with: @@ -50,25 +37,9 @@ jobs: path: Intermediate/Formatted/GenbankFormatted.csv.gz Globi: runs-on: ubuntu-latest + container: colebrookson/virion:latest steps: - uses: actions/checkout@v3 - - name: Setup R - uses: r-lib/actions/setup-r@v2 - - name: Libraries for tidyverse - run: sudo apt-get install -y libharfbuzz-dev libfribidi-dev - - name: dependencies!!!! - run: | - sudo apt-get install libcurl4-openssl-dev libarchive-dev - sudo Rscript -e 'install.packages(c("taxize", "tidyverse", "RCurl", "readr", "vroom", "magrittr", "fs", "data.table", "zip", "devtools", "lubridate"), repos = "http://cran.us.r-project.org")' - sudo Rscript -e 'devtools::install_github("ropensci/rglobi")' - - name: Setup Julia - uses: julia-actions/setup-julia@v1 - with: - version: 1.7 - - name: Julia dependencies (DF) - run: julia -e 'using Pkg; Pkg.activate("."); Pkg.add("CSV"); Pkg.add("DataFrames")' - - name: NCBITaxonomy (version from MAIN branch!) - run: julia -e 'using Pkg; Pkg.activate("."); Pkg.add(PackageSpec(name="NCBITaxonomy", rev="main"))' - name: Download Globi run: | Rscript -e 'source("Code/02_3a_Download GLOBI.R")' @@ -85,15 +56,10 @@ jobs: path: Intermediate/Formatted/GLOBIFormatted.csv Finish: runs-on: ubuntu-latest + container: colebrookson/virion:latest needs: [GenBank, Globi] steps: - uses: actions/checkout@v3 - - name: Setup R - uses: r-lib/actions/setup-r@v2 - - name: dependencies!!! - run: | - sudo apt-get install libcurl4-openssl-dev libarchive-dev libharfbuzz-dev libfribidi-dev - sudo Rscript -e 'install.packages(c("taxize", "tidyverse", "RCurl", "readr", "vroom", "magrittr", "fs", "data.table", "R.utils", "zip", "rglobi", "lubridate", "tidyft"), repos = "http://cran.us.r-project.org")' - name: Get GenBank uses: actions/download-artifact@v2 with: diff --git a/Code/001_Julia functions.R b/Code/001_Julia functions.R index 5454674..b1d7cf2 100644 --- a/Code/001_Julia functions.R +++ b/Code/001_Julia functions.R @@ -7,8 +7,8 @@ install.ncbi <- function() { # install the packages if needed - JuliaCall::julia_install_package_if_needed("NCBITaxonomy") + JuliaCall::julia_install_package_if_needed("NCBITaxonomy") JuliaCall::julia_install_package_if_needed("DataFrames") JuliaCall::julia_install_package_if_needed("CSV") JuliaCall::julia_install_package_if_needed("ProgressMeter") -} \ No newline at end of file +} diff --git a/Code/02_1a_Download GenBank.R b/Code/02_1a_Download GenBank.R index 5e403e3..240412d 100644 --- a/Code/02_1a_Download GenBank.R +++ b/Code/02_1a_Download GenBank.R @@ -11,24 +11,17 @@ library(magrittr) url = paste0("https://ftp.ncbi.nlm.nih.gov/genomes/Viruses/AllNuclMetadata/", "AllNuclMetadata.csv.gz") -d = tryCatch(utils::download.file(url, destfile = here::here( - "./Source/AllNuclMetadata.csv.gz")), - error = function(e){-999}) -if(d == -999) { - while (d == -999){ - Sys.sleep(600) - d = tryCatch(utils::download.file( - url, destfile = here::here("./Source/AllNuclMetadata.csv.gz")), - error = function(e){-999}) - } -} +location <- here::here("./Source/") +system(paste0("wget ", url, " -P ", location)) # reading this in - use data.table seq <- data.table::fread(here::here("./Source/AllNuclMetadata.csv.gz"), select = c("#Accession", "Release_Date", "Species", "Host", "Collection_Date")) -seq %<>% dplyr::rename(Accession = "#Accession") +print("readin") +seq %<>% dplyr::rename(Accession = "#Accession") # write out ==================================================================== vroom::vroom_write(seq, here::here("./Source/sequences.csv")) +print("written") diff --git a/Code/02_1b_Digest GenBank.R b/Code/02_1b_Digest GenBank.R index e560639..f80d392 100644 --- a/Code/02_1b_Digest GenBank.R +++ b/Code/02_1b_Digest GenBank.R @@ -11,13 +11,15 @@ library(magrittr) rentrez::set_entrez_key("ec345b39079e565bdfa744c3ef0d4b03ba08") # get the functions to do all the dictionary stuff -if(!exists("vdict")) {source(here::here("./Code/001_TaxizeFunctions.R"))} -if(!exists("jvdict")) {source(here::here("./Code/001_Julia functions.R"))} +source(here::here("./Code/001_TaxizeFunctions.R")) +source(here::here("./Code/001_Julia functions.R")) if(!file.exists(here::here("./Source/sequences.csv"))){ zip::unzip(here::here("./Source/GenBank.zip"), exdir = "Source") } +install.ncbi() + gb <- data.table::fread(here::here("./Source/sequences.csv")) %>% dplyr::as_tibble() diff --git a/Code/02_2a_Digest PREDICT.R b/Code/02_2a_Digest PREDICT.R index 9414729..dddfea1 100644 --- a/Code/02_2a_Digest PREDICT.R +++ b/Code/02_2a_Digest PREDICT.R @@ -30,7 +30,7 @@ predict.raw %>% TRUE, FALSE)) %>% mutate(Host = str_replace(Host, " \\*",""), - Host = str_replace(Host, "cf. ","")) %>% + Host = str_replace(Host, "cf. ","")) %>% # Back up the virus names before doing anything else diff --git a/Code/02_3b_Digest GLOBI.R b/Code/02_3b_Digest GLOBI.R index 4b2539d..00bf20b 100644 --- a/Code/02_3b_Digest GLOBI.R +++ b/Code/02_3b_Digest GLOBI.R @@ -1,12 +1,12 @@ - -if(!exists('vdict')) {source('Code/001_TaxizeFunctions.R')} -if(!exists('jvdict')) {source('Code/001_Julia functions.R')} +source('Code/001_TaxizeFunctions.R') +source('Code/001_Julia functions.R') rentrez::set_entrez_key("ec345b39079e565bdfa744c3ef0d4b03ba08") library(tidyverse) library(taxize) library(magrittr) library(vroom) +install.ncbi() globi <- read_csv('Source/GLOBI-raw.csv') diff --git a/Code/Code_Dev/host.jl b/Code/Code_Dev/host.jl index b74b091..45445f5 100644 --- a/Code/Code_Dev/host.jl +++ b/Code/Code_Dev/host.jl @@ -1,5 +1,5 @@ -using NCBITaxonomy using DataFrames +using NCBITaxonomy import CSV include(joinpath(pwd(), "Code/Code_Dev/taxonomizer.jl")) diff --git a/Code/Code_Dev/pathogen.jl b/Code/Code_Dev/pathogen.jl index be621fe..68c004c 100644 --- a/Code/Code_Dev/pathogen.jl +++ b/Code/Code_Dev/pathogen.jl @@ -1,5 +1,5 @@ -using NCBITaxonomy using DataFrames +using NCBITaxonomy import CSV using ProgressMeter diff --git a/Code/Code_Dev/virus.jl b/Code/Code_Dev/virus.jl index de23ca7..e6a7b7e 100644 --- a/Code/Code_Dev/virus.jl +++ b/Code/Code_Dev/virus.jl @@ -1,5 +1,5 @@ -using NCBITaxonomy using DataFrames +using NCBITaxonomy import CSV include(joinpath(pwd(), "Code/Code_Dev/taxonomizer.jl")) diff --git a/here b/here deleted file mode 100644 index e69de29..0000000