From b09a5019316203ef2463a6da741db01cc7c2d140 Mon Sep 17 00:00:00 2001 From: Richard Neher Date: Sun, 27 Oct 2024 18:22:00 +0100 Subject: [PATCH] nextclade: enable clade proposals --- nextclade/config/auspice_config.json | 13 ++++++++- nextclade/config/config_dict.yaml | 29 ++++++++++++++----- nextclade/dataset_config/h1n1pdm/includes.txt | 7 +++++ nextclade/dataset_config/h3n2/includes.txt | 10 +++++++ nextclade/scripts/merge_jsons.py | 15 ++++++++-- 5 files changed, 64 insertions(+), 10 deletions(-) diff --git a/nextclade/config/auspice_config.json b/nextclade/config/auspice_config.json index ed218b66..8d0e215e 100644 --- a/nextclade/config/auspice_config.json +++ b/nextclade/config/auspice_config.json @@ -19,6 +19,16 @@ "title": "Country", "type": "categorical" }, + { + "key": "subclade", + "title": "Subclade", + "type": "categorical" + }, + { + "key": "proposed_clade", + "title": "Subclade proposals", + "type": "categorical" + }, { "key": "region", "title": "Region", @@ -38,7 +48,8 @@ "filters": [ "region", "country", - "clade_membership" + "clade_membership", + "subclade" ], "display_defaults": { "color_by": "clade_membership", diff --git a/nextclade/config/config_dict.yaml b/nextclade/config/config_dict.yaml index ecbc9f83..d0a8e0cd 100644 --- a/nextclade/config/config_dict.yaml +++ b/nextclade/config/config_dict.yaml @@ -14,6 +14,9 @@ builds: subclade: url: "seasonal_A-H1N1pdm_HA/main/.auto-generated/subclades.tsv" key: "subclade" + proposed_clade: + url: "seasonal_A-H1N1pdm_HA/main/.auto-generated/subclade_proposals.tsv" + key: "proposed_clade" refs: CY121680: # exclude South Korean genomes because of sequencing artifacts close to the start of HA filter: "--min-date 2009 --probabilistic-sampling --exclude-where country='south_korea' qc.overallStatus='bad' --group-by year --min-length 1500 --subsample-max-sequences 1500" @@ -24,7 +27,7 @@ builds: MW626062: filter: "--min-date 2019 --probabilistic-sampling --exclude-where country='south_korea' qc.overallStatus='bad' --group-by year --min-length 1500 --subsample-max-sequences 1500" clade_offset: 0 - hardmin_date: 2015 + hardmin_date: 2009 reference_EPI_ISL: EPI1812046 reference_strain: A/Wisconsin/588/2019 na: @@ -32,11 +35,14 @@ builds: clade_systems: clade: url: "seasonal_A-H1N1pdm_NA/main/.auto-generated/subclades.tsv" + proposed_clade: + url: "seasonal_A-H1N1pdm_NA/main/.auto-generated/subclade_proposals.tsv" + key: "proposed_clade" refs: MW626056: filter: "--min-date 2019 --probabilistic-sampling --group-by year region --min-length 1400 --subsample-max-sequences 2000" clade_offset: 0 - hardmin_date: 2015 + hardmin_date: 2009 reference_EPI_ISL: EPI1812046 reference_strain: A/Wisconsin/588/2019 h3n2: @@ -51,14 +57,14 @@ builds: short-clade: url: "seasonal_A-H3N2_HA/main/.auto-generated/clades.tsv" key: "short-clade" - emerging_subclade: - url: "seasonal_A-H3N2_HA/emerging/.auto-generated/subclades.tsv" - key: "emerging_subclade" + proposed_clade: + url: "seasonal_A-H3N2_HA/main/.auto-generated/subclade_proposals.tsv" + key: "proposed_clade" refs: EPI1857216: filter: "--min-date 2019 --probabilistic-sampling --group-by year region --min-length 1500 --subsample-max-sequences 2000" clade_offset: -17 - hardmin_date: 2015 + hardmin_date: 2004 reference_EPI_ISL: EPI1857216 reference_strain: A/Darwin/6/2021 CY163680: @@ -72,11 +78,14 @@ builds: clade_systems: clade: url: "seasonal_A-H3N2_NA/main/.auto-generated/subclades.tsv" + proposed_clade: + url: "seasonal_A-H3N2_NA/main/.auto-generated/subclade_proposals.tsv" + key: "proposed_clade" refs: EPI1857215: filter: "--min-date 2019 --probabilistic-sampling --group-by year region --min-length 1400 --subsample-max-sequences 1500" clade_offset: 4 - hardmin_date: 2015 + hardmin_date: 2004 reference_EPI_ISL: EPI1857215 reference_strain: A/Darwin/6/2021 vic: @@ -88,6 +97,9 @@ builds: subclade: url: "seasonal_B-Vic_HA/main/.auto-generated/subclades.tsv" key: "subclade" + proposed_clade: + url: "seasonal_B-Vic_HA/main/.auto-generated/subclade_proposals.tsv" + key: "proposed_clade" refs: KX058884: filter: "--min-date 2014 --probabilistic-sampling --group-by year --min-length 1500 --subsample-max-sequences 2000" @@ -100,6 +112,9 @@ builds: clade_systems: clade: url: "seasonal_B-Vic_NA/main/.auto-generated/subclades.tsv" + proposed_clade: + url: "seasonal_B-Vic_NA/main/.auto-generated/subclade_proposals.tsv" + key: "proposed_clade" refs: CY073894: filter: "--min-date 2014 --probabilistic-sampling --group-by year region --min-length 1400 --subsample-max-sequences 2000" diff --git a/nextclade/dataset_config/h1n1pdm/includes.txt b/nextclade/dataset_config/h1n1pdm/includes.txt index 73496ddf..42047c0e 100644 --- a/nextclade/dataset_config/h1n1pdm/includes.txt +++ b/nextclade/dataset_config/h1n1pdm/includes.txt @@ -1,3 +1,4 @@ +A/Bangladesh/3002/2015 A/Lao/1632/2023 A/NorthCarolina/6/2023 A/Victoria/114/2023 @@ -52,3 +53,9 @@ A/Ghana/FS-11-206/2011 A/Ghana/ARI1181/2011 A/BurkinaFaso/26/2012 A/Yopougon/GR276/2012 +A/Perth/103/2015 +A/Fiji/3/2016 +A/Helsinki/2430/2012 +A/Gansu-Ganzhou/SWL34/2012 +A/Brisbane/96/2012 +A/Minnesota/23/2014 diff --git a/nextclade/dataset_config/h3n2/includes.txt b/nextclade/dataset_config/h3n2/includes.txt index cb0e7c95..46c84604 100644 --- a/nextclade/dataset_config/h3n2/includes.txt +++ b/nextclade/dataset_config/h3n2/includes.txt @@ -15,3 +15,13 @@ A/Macau/9901287/2023 A/California/70/2023 A/Norway/9164/2023 A/Bahrain/1899/2023 + +A/Texas/50/2012 +A/Hawaii/22/2012 +A/Southauckland/7/2012 +A/Victoria/361/2011 +A/Brisbane/297/2014 +A/Austria/93/2014 +A/Southaustralia/2/2013 +A/SouthAfrica/R06488/2017 +A/Bangladesh/3039/2017 diff --git a/nextclade/scripts/merge_jsons.py b/nextclade/scripts/merge_jsons.py index 4db7dd8a..cf8da623 100644 --- a/nextclade/scripts/merge_jsons.py +++ b/nextclade/scripts/merge_jsons.py @@ -1,4 +1,5 @@ -import json, argparse +import json +import argparse def get_clade_configs(name): return { @@ -16,7 +17,14 @@ def get_clade_configs(name): "name": "subclade", "displayName": "Subclade", "description": "Experimental fine-grained subclade annotation." - }}.get(name, {'name':name, "displayName":name, "description":""}) + }, + "proposed_clade": { + "name": "Subclade proposal", + "displayName": "Subclade proposal", + "description": "Includes proposals of new subclades. These can change anytime.", + "hideInWeb": True + } + }.get(name, {'name':name, "displayName":name, "description":""}) if __name__=="__main__": @@ -56,6 +64,9 @@ def get_clade_configs(name): auspice_json['extensions']['nextclade']["clade_node_attrs"] = [ get_clade_configs(c) for c in args.clades if c!='default' ] + if 'subclade' in args.clades: + auspice_json['display_defaults']['color_by'] = 'subclade' + auspice_json['display_defaults']['branch_label'] = 'subclade' with open(args.output_pathogen, 'w') as fh: json.dump(pathogen_json, fh, indent=2)