Skip to content

Commit

Permalink
Add rule to annotate GIHSN samples
Browse files Browse the repository at this point in the history
Adds a new `gihsn_sample` column to the metadata to indicate whether
"GIHSN" was found in the `strain` as proxy for whether the sample came
from the Global Influenza Hospital Surveillance Network (GIHSN).
Follows the existing pattern of using `True` and `False` boolean values.

Resolves <#196>
  • Loading branch information
joverlee521 committed Nov 20, 2024
1 parent fc325b3 commit dfcd671
Showing 1 changed file with 22 additions and 1 deletion.
23 changes: 22 additions & 1 deletion workflow/snakemake_rules/select_strains.smk
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,27 @@ rule join_metadata:
--output {output.metadata:q} 2>&1 | tee {log}
"""

# Annotate strains in the metadata that have "GIHSN" in the strain name to
# indicate whether it was collected as part of the Global Influenza Hospital
# Surveillance Network (GIHSN)
rule annotate_metadata_with_gihsn:
input:
metadata="data/{lineage}/metadata_joined.tsv",
output:
metadata="data/{lineage}/metadata_with_gihsn.tsv",
conda: "../envs/nextstrain.yaml"
benchmark:
"benchmarks/annotate_metadata_with_gihsn_{lineage}.txt"
log:
"logs/annotate_metadata_with_gihsn_{lineage}.txt"
shell:
"""
csvtk --tabs mutate2 \
--expression '${{strain}}=~"(GIHSN)" ? "True" : "False"' \
--name gihsn_sample \
{input.metadata} > {output.metadata}
"""

rule build_reference_strains_table:
input:
references="config/{lineage}/reference_strains.txt",
Expand All @@ -96,7 +117,7 @@ rule build_reference_strains_table:
# later.
rule annotate_metadata_with_reference_strains:
input:
metadata="data/{lineage}/metadata_joined.tsv",
metadata="data/{lineage}/metadata_with_gihsn.tsv",
references="data/{lineage}/reference_strains.tsv",
output:
metadata="data/{lineage}/metadata.tsv",
Expand Down

0 comments on commit dfcd671

Please sign in to comment.