Skip to content

Commit

Permalink
Merge pull request #78 from colebrookson/main
Browse files Browse the repository at this point in the history
docker image working
  • Loading branch information
colebrookson authored Oct 1, 2023
2 parents e15b03f + 7afa539 commit 8b7fc29
Show file tree
Hide file tree
Showing 10 changed files with 27 additions and 66 deletions.
52 changes: 9 additions & 43 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,67 +8,38 @@ on:
pull_request:
branches:
- main
# run once a week
# run once a week
schedule:
- cron: "0 0 * * *"
workflow_dispatch:
workflow_dispatch:
jobs:
GenBank:
runs-on: ubuntu-latest
container: colebrookson/virion:latest
steps:
- uses: actions/checkout@v3
- uses: gautamkrishnar/keepalive-workflow@v1
- name: Setup Julia
uses: julia-actions/setup-julia@v1
- uses: julia-actions/setup-julia@v1
with:
version: 1.7
- name: Julia dependencies (DF)
run: julia -e 'using Pkg; Pkg.activate("."); Pkg.add("CSV"); Pkg.add("DataFrames")'
- name: NCBITaxonomy (version from MAIN branch!)
run: julia -e 'using Pkg; Pkg.activate("."); Pkg.add(PackageSpec(name="NCBITaxonomy", rev="main"))'
- name: Setup R
uses: r-lib/actions/setup-r@v2
- name: Libraries for tidyverse
run: sudo apt-get install -y libharfbuzz-dev libfribidi-dev
- name: dependencies!!!!
- name: Download GenBank
run: |
sudo apt-get install libcurl4-openssl-dev libarchive-dev
sudo Rscript -e 'install.packages(c("taxize", "tidyverse", "RCurl", "readr", "vroom", "magrittr", "fs", "data.table", "zip", "rglobi", "lubridate", "R.utils"), repos = "http://cran.us.r-project.org")'
- name: Download GenBank
run: |
Rscript -e 'source("Code/02_1a_Download GenBank.R")'
Rscript -e 'source("Code/02_1a_Download GenBank.R")'
- name: Digest GenBank
run: |
Rscript -e 'source("Code/02_1b_Digest GenBank.R")'
- name: Format GenBank
run: |
Rscript -e 'source("Code/02_1c_Format GenBank.R")'
Rscript -e 'source("Code/02_1c_Format GenBank.R")'
- name: Save artifacts
uses: actions/upload-artifact@v2
with:
name: GenBankFormatted
path: Intermediate/Formatted/GenbankFormatted.csv.gz
Globi:
runs-on: ubuntu-latest
container: colebrookson/virion:latest
steps:
- uses: actions/checkout@v3
- name: Setup R
uses: r-lib/actions/setup-r@v2
- name: Libraries for tidyverse
run: sudo apt-get install -y libharfbuzz-dev libfribidi-dev
- name: dependencies!!!!
run: |
sudo apt-get install libcurl4-openssl-dev libarchive-dev
sudo Rscript -e 'install.packages(c("taxize", "tidyverse", "RCurl", "readr", "vroom", "magrittr", "fs", "data.table", "zip", "devtools", "lubridate"), repos = "http://cran.us.r-project.org")'
sudo Rscript -e 'devtools::install_github("ropensci/rglobi")'
- name: Setup Julia
uses: julia-actions/setup-julia@v1
with:
version: 1.7
- name: Julia dependencies (DF)
run: julia -e 'using Pkg; Pkg.activate("."); Pkg.add("CSV"); Pkg.add("DataFrames")'
- name: NCBITaxonomy (version from MAIN branch!)
run: julia -e 'using Pkg; Pkg.activate("."); Pkg.add(PackageSpec(name="NCBITaxonomy", rev="main"))'
- name: Download Globi
run: |
Rscript -e 'source("Code/02_3a_Download GLOBI.R")'
Expand All @@ -85,15 +56,10 @@ jobs:
path: Intermediate/Formatted/GLOBIFormatted.csv
Finish:
runs-on: ubuntu-latest
container: colebrookson/virion:latest
needs: [GenBank, Globi]
steps:
- uses: actions/checkout@v3
- name: Setup R
uses: r-lib/actions/setup-r@v2
- name: dependencies!!!
run: |
sudo apt-get install libcurl4-openssl-dev libarchive-dev libharfbuzz-dev libfribidi-dev
sudo Rscript -e 'install.packages(c("taxize", "tidyverse", "RCurl", "readr", "vroom", "magrittr", "fs", "data.table", "R.utils", "zip", "rglobi", "lubridate", "tidyft"), repos = "http://cran.us.r-project.org")'
- name: Get GenBank
uses: actions/download-artifact@v2
with:
Expand Down
4 changes: 2 additions & 2 deletions Code/001_Julia functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
install.ncbi <- function() {

# install the packages if needed
JuliaCall::julia_install_package_if_needed("NCBITaxonomy")
JuliaCall::julia_install_package_if_needed("NCBITaxonomy")
JuliaCall::julia_install_package_if_needed("DataFrames")
JuliaCall::julia_install_package_if_needed("CSV")
JuliaCall::julia_install_package_if_needed("ProgressMeter")
}
}
17 changes: 5 additions & 12 deletions Code/02_1a_Download GenBank.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,17 @@ library(magrittr)

url = paste0("https://ftp.ncbi.nlm.nih.gov/genomes/Viruses/AllNuclMetadata/",
"AllNuclMetadata.csv.gz")
d = tryCatch(utils::download.file(url, destfile = here::here(
"./Source/AllNuclMetadata.csv.gz")),
error = function(e){-999})

if(d == -999) {
while (d == -999){
Sys.sleep(600)
d = tryCatch(utils::download.file(
url, destfile = here::here("./Source/AllNuclMetadata.csv.gz")),
error = function(e){-999})
}
}
location <- here::here("./Source/")
system(paste0("wget ", url, " -P ", location))

# reading this in - use data.table
seq <- data.table::fread(here::here("./Source/AllNuclMetadata.csv.gz"),
select = c("#Accession", "Release_Date", "Species",
"Host", "Collection_Date"))
seq %<>% dplyr::rename(Accession = "#Accession")
print("readin")
seq %<>% dplyr::rename(Accession = "#Accession")

# write out ====================================================================
vroom::vroom_write(seq, here::here("./Source/sequences.csv"))
print("written")
6 changes: 4 additions & 2 deletions Code/02_1b_Digest GenBank.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,15 @@ library(magrittr)
rentrez::set_entrez_key("ec345b39079e565bdfa744c3ef0d4b03ba08")

# get the functions to do all the dictionary stuff
if(!exists("vdict")) {source(here::here("./Code/001_TaxizeFunctions.R"))}
if(!exists("jvdict")) {source(here::here("./Code/001_Julia functions.R"))}
source(here::here("./Code/001_TaxizeFunctions.R"))
source(here::here("./Code/001_Julia functions.R"))

if(!file.exists(here::here("./Source/sequences.csv"))){
zip::unzip(here::here("./Source/GenBank.zip"), exdir = "Source")
}

install.ncbi()

gb <- data.table::fread(here::here("./Source/sequences.csv")) %>%
dplyr::as_tibble()

Expand Down
2 changes: 1 addition & 1 deletion Code/02_2a_Digest PREDICT.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ predict.raw %>%
TRUE,
FALSE)) %>%
mutate(Host = str_replace(Host, " \\*",""),
Host = str_replace(Host, "cf. ","")) %>%
Host = str_replace(Host, "cf. ","")) %>%

# Back up the virus names before doing anything else

Expand Down
6 changes: 3 additions & 3 deletions Code/02_3b_Digest GLOBI.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@

if(!exists('vdict')) {source('Code/001_TaxizeFunctions.R')}
if(!exists('jvdict')) {source('Code/001_Julia functions.R')}
source('Code/001_TaxizeFunctions.R')
source('Code/001_Julia functions.R')
rentrez::set_entrez_key("ec345b39079e565bdfa744c3ef0d4b03ba08")

library(tidyverse)
library(taxize)
library(magrittr)
library(vroom)
install.ncbi()

globi <- read_csv('Source/GLOBI-raw.csv')

Expand Down
2 changes: 1 addition & 1 deletion Code/Code_Dev/host.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
using NCBITaxonomy
using DataFrames
using NCBITaxonomy
import CSV

include(joinpath(pwd(), "Code/Code_Dev/taxonomizer.jl"))
Expand Down
2 changes: 1 addition & 1 deletion Code/Code_Dev/pathogen.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
using NCBITaxonomy
using DataFrames
using NCBITaxonomy
import CSV
using ProgressMeter

Expand Down
2 changes: 1 addition & 1 deletion Code/Code_Dev/virus.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
using NCBITaxonomy
using DataFrames
using NCBITaxonomy
import CSV

include(joinpath(pwd(), "Code/Code_Dev/taxonomizer.jl"))
Expand Down
Empty file removed here
Empty file.

0 comments on commit 8b7fc29

Please sign in to comment.