Skip to content

Commit

Permalink
GenBank Flu site release (#630)
Browse files Browse the repository at this point in the history
* Update flu config file for master docker deployment

* Remove example_data_folder field from config, consolidate dev and prod config files

* Separate docker deployments for gisaid/genbank flu sites

* formatting

* More serotype consolidation, add H5, H7, and H9

* switch to real data folder

* Robustness to references with no data (i.e., missing serotypes). really only useful for test datasets

* Annotate more possible fields to download from NCBI virus

* Backfill missing isolate IDs (set_id) with strain and then, as a last resort, Accession ID. Seems like recent sequences prefer to use strain rather than set_id

* Rename ingest files to conform with main workflow inputs (make it the same as other workflows)

* Ignore flu genbank data folder
  • Loading branch information
atc3 authored Feb 24, 2024
1 parent 0b487de commit 7df2e91
Show file tree
Hide file tree
Showing 22 changed files with 288 additions and 107 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ example_data_genbank/rsv/**
example_data_genbank/flu/**
example_data_genbank/sars2/**
data_flu_small/**
data_flu_genbank

# scratch notebooks
workflow_main/notebooks/**
Expand Down
7 changes: 1 addition & 6 deletions config/config_flu_genbank.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,12 @@ virus: "flu"

# Path to folder with downloaded and processed data
# This path is relative to the project root
data_folder: "example_data_genbank/flu"
data_folder: "data_flu_genbank"

# Path to folder with genome information (reference.fasta, genes.json, proteins.json)
# This path is relative to the project root
static_data_folder: "static_data/flu"

# Path to folder with data to use in development
# This path is relative to the project root
# Only used for database seeding in development
example_data_folder: "example_data_genbank/flu"

# Database for this virus
postgres_db: "flu_genbank"

Expand Down
5 changes: 0 additions & 5 deletions config/config_flu_gisaid.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,6 @@ data_folder: "data_gisaid_flu"
# This path is relative to the project root
static_data_folder: "static_data/flu"

# Path to folder with data to use in development
# This path is relative to the project root
# Only used for database seeding in development
example_data_folder: "data_gisaid_flu"

# Database for this virus
postgres_db: "flu_gisaid"

Expand Down
4 changes: 0 additions & 4 deletions config/config_rsv_custom.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@ data_folder: "data_custom"
# This path is relative to the project root
static_data_folder: "static_data"

# Path to folder with data to use in development
# This path is relative to the project root
example_data_folder: "example_data_genbank"

# Database for this virus
postgres_db: "rsv_custom"

Expand Down
5 changes: 0 additions & 5 deletions config/config_rsv_genbank.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,6 @@ data_folder: "example_data_genbank/rsv"
# This path is relative to the project root
static_data_folder: "static_data/rsv"

# Path to folder with data to use in development
# This path is relative to the project root
# Only used for database seeding in development
example_data_folder: "example_data_genbank/rsv"

# Database for this virus
postgres_db: "rsv_genbank"

Expand Down
4 changes: 0 additions & 4 deletions config/config_sars2_custom.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@ data_folder: "data_custom"
# This path is relative to the project root
static_data_folder: "static_data/sars2"

# Path to folder with data to use in development
# This path is relative to the project root
example_data_folder: "example_data_genbank"

# Database for this virus
postgres_db: "cg_custom"

Expand Down
4 changes: 0 additions & 4 deletions config/config_sars2_genbank.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@ data_folder: "data_genbank/sars2"
# This path is relative to the project root
static_data_folder: "static_data/sars2"

# Path to folder with data to use in development
# This path is relative to the project root
example_data_folder: "data_genbank/sars2"

# Database for this virus
postgres_db: "cg_genbank"

Expand Down
4 changes: 0 additions & 4 deletions config/config_sars2_genbank_dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@ data_folder: "example_data_genbank/sars2"
# This path is relative to the project root
static_data_folder: "static_data/sars2"

# Path to folder with data to use in development
# This path is relative to the project root
example_data_folder: "example_data_genbank/sars2"

# Database for this virus
postgres_db: "cg_genbank_dev"

Expand Down
4 changes: 0 additions & 4 deletions config/config_sars2_gisaid.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@ data_folder: "data"
# This path is relative to the project root
static_data_folder: "static_data/sars2"

# Path to folder with data to use in development
# This path is relative to the project root
example_data_folder: "data"

# Database for this virus
postgres_db: "cg_gisaid"

Expand Down
4 changes: 0 additions & 4 deletions config/config_sars2_gisaid_private.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@ data_folder: "data_private"
# This path is relative to the project root
static_data_folder: "static_data/sars2"

# Path to folder with data to use in development
# This path is relative to the project root
example_data_folder: "data_private"

# Database for this virus
postgres_db: "cg_gisaid_private"

Expand Down
71 changes: 71 additions & 0 deletions docker-compose.flu.genbank.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# docker compose -f docker-compose.flu.genbank.yml up -d
name: pathmut-flu-genbank
version: "3.7"

services:
server:
restart: always
build:
context: ./
dockerfile: ./services/server/dev.Dockerfile
environment:
- LOGINS=user1:pass1,user2:pass2
- FLASK_APP=cg_server/main.py
- FLASK_ENV=development
- CONFIGFILE=/opt/config/config_flu_genbank.yaml
- CONSTANTSFILE=/opt/constants/defs.json
- DATA_PATH=/data
- STATIC_DATA_PATH=/opt/static_data
- POSTGRES_USER=postgres
- POSTGRES_PASSWORD=cg
- POSTGRES_DB=flu_genbank_dev
- POSTGRES_HOST=db
- POSTGRES_PORT=5432
- POSTGRES_MAX_CONN=20
- FRONTEND_PORT=3002
# command: "gunicorn --bind :5003 --workers 1 --threads 8 --timeout 0 cg_server.main:app"
command: "flask run --host 0.0.0.0 --port=5003"
ports:
- 5003:5003
working_dir: /app
volumes:
- ./services/server:/app:cached # Mount the server python code at run-time, so that the flask development server can refresh on changes
- ./example_data_genbank/flu:/data:cached # Mount the data at run-time (for database seeding only). Should prevent sending all the data over unnecessarily at build-time
- ./src/constants:/opt/constants:cached
- ./config:/opt/config:cached
- ./static_data/flu:/opt/static_data:cached
depends_on:
- db
db:
image: postgres:12-alpine
environment:
- POSTGRES_USER=postgres
- POSTGRES_PASSWORD=cg
- POSTGRES_DB=flu_genbank_dev
- POSTGRES_HOST=db
- POSTGRES_PORT=5432
- POSTGRES_MAX_CONN=20
command: "postgres -c 'config_file=/etc/postgresql/postgresql.conf'"
volumes:
- postgres_data:/var/lib/postgresql/data/
- ./services/postgres/postgres.conf:/etc/postgresql/postgresql.conf # Custom config
ports: # Expose 5432 so we can use DB administration tools
- 5432:5432
frontend:
restart: always
build:
context: ./
dockerfile: ./services/frontend/Dockerfile
environment:
CONFIGFILE: /app/config/config_flu_genbank.yaml
working_dir: /app
volumes:
- ./src:/app/src:cached # Mount the JS code at run-time, so the babel server can recompile the app on file changes
- ./config:/app/config:cached
- ./static_data:/app/static_data:cached
command: "npm start -s"
ports:
- 3002:3000

volumes:
postgres_data:
8 changes: 4 additions & 4 deletions docker-compose.flu.yml → docker-compose.flu.gisaid.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# docker compose -f docker-compose.flu.yml up -d
name: pathmut-flu
# docker compose -f docker-compose.flu.gisaid.yml up -d
name: pathmut-flu-gisaid
version: "3.7"

services:
Expand All @@ -12,7 +12,7 @@ services:
- LOGINS=user1:pass1,user2:pass2
- FLASK_APP=cg_server/main.py
- FLASK_ENV=development
- CONFIGFILE=/opt/config/config_flu_gisaid_dev.yaml
- CONFIGFILE=/opt/config/config_flu_gisaid.yaml
- CONSTANTSFILE=/opt/constants/defs.json
- DATA_PATH=/data
- STATIC_DATA_PATH=/opt/static_data
Expand Down Expand Up @@ -57,7 +57,7 @@ services:
context: ./
dockerfile: ./services/frontend/Dockerfile
environment:
CONFIGFILE: /app/config/config_flu_gisaid_dev.yaml
CONFIGFILE: /app/config/config_flu_gisaid.yaml
working_dir: /app
volumes:
- ./src:/app/src:cached # Mount the JS code at run-time, so the babel server can recompile the app on file changes
Expand Down
2 changes: 1 addition & 1 deletion services/server/cg_server/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,6 @@
seed_database(conn)

insert_sequences(
conn, os.getenv("DATA_PATH", project_root / config["example_data_folder"]),
conn, os.getenv("DATA_PATH", project_root / config["data_folder"]),
)
conn.commit()
2 changes: 1 addition & 1 deletion services/server/cg_server/db_seed/seed.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

# root/services/server/cg_server/db_seed/seed.py
project_root = Path(__file__).parent.parent.parent.parent.parent
data_path = Path(os.getenv("DATA_PATH", project_root / config["example_data_folder"]))
data_path = Path(os.getenv("DATA_PATH", project_root / config["data_folder"]))
static_data_path = Path(
os.getenv("STATIC_DATA_PATH", project_root / config["static_data_folder"])
)
Expand Down
4 changes: 1 addition & 3 deletions services/server/cg_server/routes/seed.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ def force_seed(conn):

print("Seeding DB from /force_seed")
seed_database(conn)
insert_sequences(
conn, os.getenv("DATA_PATH", project_root / config["example_data_folder"])
)
insert_sequences(conn, os.getenv("DATA_PATH", project_root / config["data_folder"]))

return "Done!"
Loading

0 comments on commit 7df2e91

Please sign in to comment.