From 9ce9be0a4099f10039d4c0c711ef2eda21b418bd Mon Sep 17 00:00:00 2001 From: "Jeffrey N. Johnson" Date: Mon, 28 Oct 2024 12:13:26 -0700 Subject: [PATCH] Added some example scripts. --- docs/examples/IMG.csv | 201 ++++++++++++++++++ docs/examples/filter_on_taxon_oid.py | 23 ++ .../match_files_to_ids_in_spreadsheet.py | 131 ++++++++++++ 3 files changed, 355 insertions(+) create mode 100755 docs/examples/IMG.csv create mode 100755 docs/examples/filter_on_taxon_oid.py create mode 100755 docs/examples/match_files_to_ids_in_spreadsheet.py diff --git a/docs/examples/IMG.csv b/docs/examples/IMG.csv new file mode 100755 index 0000000..75b8852 --- /dev/null +++ b/docs/examples/IMG.csv @@ -0,0 +1,201 @@ +taxon_oid,Domain,Sequencing Status,Study Name,Genome Name / Sample Name,Sequencing Center,IMG Genome ID,GTDB-Tk Domain,GTDB-Tk Phylum,GTDB-Tk Class,GTDB-Tk Order,GTDB-Tk Family,GTDB-Tk Genus,GTDB-Tk Species,GOLD Analysis Project ID,Award DOI,Ecosystem,Ecosystem Category,Ecosystem Subtype,Ecosystem Type,Specific Ecosystem,Geographic Location,Latitude,Longitude,Genome Size * assembled,Gene Count * assembled,CheckM2 Completeness,CheckM2 Contamination,ITS AP ID,ITS SP ID,,ITS AP ID - 90 Complete,Taxon-oid (no AP ID) - 90% +2935343106,Bacteria,Permanent Draft,Genome sequencing of novel secondary metabolite producing bacteria for plant Disease control,Pseudomonas vancouverensis DhA-51,DOE Joint Genome Institute (JGI),2935343106,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Pseudomonadaceae,Pseudomonas_E,Pseudomonas_E laurylsulfatiphila,Ga0496529,10.46936/10.25585/60001209,Environmental,Terrestrial,Unclassified,Soil,Unclassified,"USA: Decorah, IA",43.3158,-91.7984,6559376,6105,100,0.12,1327588,1278300,,1000002,2582580701 +2808606414,Bacteria,Permanent Draft,Plant growth-promoting bacteria from the rhizosphere of the beachgrass Ammophila breviligulata,Pantoea sp. SJZ147,DOE Joint Genome Institute (JGI),2808606414,Bacteria,Pseudomonadota,Gammaproteobacteria,Enterobacterales,Enterobacteriaceae,Pantoea,Pantoea allii,Ga0314263,10.46936/10.25585/60000946,Host-associated,Plants,Rhizosphere,Roots,Soil,,,,4917181,4720,100,0,1208563,1208594,,1000005,2710723751 +2919688452,Bacteria,Permanent Draft,"Sorghum-associated microbial communities from plants grown in Nebraska, USA",Pararheinheimera soli 4138,DOE Joint Genome Institute (JGI),2919688452,Bacteria,Pseudomonadota,Gammaproteobacteria,Enterobacterales_A,Alteromonadaceae,Pararheinheimera,Unclassified,Ga0454474,10.46936/10.25585/60001066,Host-associated,Plants,Unclassified,Roots,Unclassified,"USA: Grant County, Nebraska",41.929,-101.928,4595932,4205,100,0.22,1280490,1280598,,1000008,2626542327 +2510436004,Bacteria,Finished,Facultative Metal-reducing Gamma proteobacteria,Shewanella putrefaciens 200,DOE Joint Genome Institute (JGI),2510436004,Bacteria,Pseudomonadota,Gammaproteobacteria,Enterobacterales_A,Shewanellaceae,Shewanella,Shewanella putrefaciens,Ga0025096,,Engineered,Wastewater,Petrochemical,Industrial wastewater,Unclassified,"Alberta, Canada",,,4840251,4310,100,0.37,1377326,,,1000011,2721755200 +2582580701,Bacteria,Draft,"Freshwater microbial communities from Lake Mendota and Trout Bog Lake, Wisconsin, USA",Composite genome from Trout Bog Hypolimnion pan-assembly TBhypo.metabat.6070,DOE Joint Genome Institute (JGI),2582580701,Bacteria,Patescibacteria,Paceibacteria,UBA9983_A,UBA9973,UBA9973,Unclassified,Ga0048118,10.46936/10.25585/60000533,,,,,,,,,878995,971,100,0.04,,,,1000014,2585428052 +2521172639,Bacteria,Permanent Draft,Plant associated metagenomes--Microbial community diversity and host control of community assembly across model and emerging plant ecological genomics systems.,Pseudomonas sp. LAMO17WK12:I5,DOE Joint Genome Institute (JGI),2521172639,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Pseudomonadaceae,Pseudomonas_E,Pseudomonas_E putida_A,Ga0001088,10.46936/10.25585/60000944,,,,,,,,,6103916,5575,100,0.25,1007072,1007071,,1000017,2708742668 +2959993424,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria bacterium 21258,DOE Joint Genome Institute (JGI),2959993424,Bacteria,Actinomycetota,Actinomycetia,Mycobacteriales,Pseudonocardiaceae,Amycolatopsis,Amycolatopsis sp003947415,Ga0532626,10.46936/10.25585/60001355,,,,,,,,,9291592,8705,100,0.95,1357098,1357701,,1000020,2626542068 +2802428814,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase III (KMG-III): the genomes of soil and plant-associated and newly described type strains",Kribbella voronezhensis VKM Ac-2575,DOE Joint Genome Institute (JGI),2802428814,Bacteria,Actinomycetota,Actinomycetia,Propionibacteriales,Kribbellaceae,Kribbella,Kribbella sp004365175,Ga0310573,10.46936/10.25585/60001401,,,,,,,,,8032448,7518,100,0.86,1203304,1203373,,1000023,2708742980 +2772190796,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase III (KMG-III): the genomes of soil and plant-associated and newly described type strains",Algoriphagus boseongensis CECT 8446,DOE Joint Genome Institute (JGI),2772190796,Bacteria,Bacteroidota,Bacteroidia,Cytophagales,Cyclobacteriaceae,Algoriphagus,Algoriphagus boseongensis,Ga0244635,10.46936/10.25585/60001401,,,,,,,,,4176921,3692,100,0,1175205,1175266,,1000026,2711768570 +2671180174,Bacteria,Permanent Draft,Sequencing the genomes of 1000 actinobacteria strains,Streptomyces sp. PAN_FS17,DOE Joint Genome Institute (JGI),2671180174,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces chartreusis,Ga0104729,10.46936/10.25585/60001044,,,,,,,,,10467880,9260,100,0.16,1097316,1097327,,1000029,2710724058 +2954016120,Bacteria,Permanent Draft,Comparative genomics of wheat-associated soil bacteria to identify genetic determinants of phenazine resistance,Flavobacterium sp. W4I14,DOE Joint Genome Institute (JGI),2954016120,Bacteria,Bacteroidota,Bacteroidia,Sphingobacteriales,Sphingobacteriaceae,Pedobacter,Unclassified,Ga0505390,10.46936/10.25585/60000525,Host-associated,Plants,Rhizosphere,Roots,Unclassified,"USA: Washington, Lind Dryland Research Station",,,6446024,5697,100,0.35,1337129,1337233,,1000035,2710723908 +2518645594,Bacteria,Permanent Draft,Plant associated metagenomes--Microbial community diversity and host control of community assembly across model and emerging plant ecological genomics systems.,Pseudomonas sp. URIL14HWK12:I2,DOE Joint Genome Institute (JGI),2518645594,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Pseudomonadaceae,Pseudomonas_E,Pseudomonas_E fulva,Ga0001049,10.46936/10.25585/60000944,,,,,,,,,4717612,4418,100,0.14,1006955,1006954,,1000038,2788500116 +2540341249,Bacteria,Permanent Draft,The Hungate 1000. A catalogue of reference genomes from the rumen microbiome.,Levilactobacillus brevis AG48,DOE Joint Genome Institute (JGI),2540341249,Bacteria,Bacillota,Bacilli,Lactobacillales,Lactobacillaceae,Levilactobacillus,Levilactobacillus brevis,Ga0004233,10.46936/10.25585/60000534,Host-associated,Mammals,Stomach,Digestive system,Rumen,,,,2595994,2716,100,0.92,1020267,1020266,,1000041,2711768342 +2667528171,Bacteria,Permanent Draft,Comparative genomics of bacterial root endophytes of switchgrass collected from native prairies over two seasons,Pseudomonas sp. NFPP22,DOE Joint Genome Institute (JGI),2667528171,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Pseudomonadaceae,Pseudomonas_E,Pseudomonas_E protegens,Ga0059014,10.46936/10.25585/60000672,Host-associated,Plants,Rhizoplane,Roots,Endophytes,,36.8108,-96.4338,6900659,6286,100,0.52,1049704,1049703,,1000044,644736333 +2863263905,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase III (KMG-III): the genomes of soil and plant-associated and newly described type strains",Streptomyces netropsis CECT 3265 (Annotation) (version 2),DOE Joint Genome Institute (JGI),2863263905,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces netropsis,Ga0436945,10.46936/10.25585/60001401,,,,,,,,,8041454,7157,100,0.01,1276925,1205798,,1000047,2711768573 +2963062276,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria bacterium 20858,DOE Joint Genome Institute (JGI),2963062276,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Unclassified,Ga0535158,10.46936/10.25585/60001355,,,,,,,,,8748038,8122,100,0.15,1360415,1361115,,1000050,2519899768 +2738543002,Bacteria,Permanent Draft,"Populus root and rhizosphere microbial communities from Tennessee, USA",Paraburkholderia sp. GV072,DOE Joint Genome Institute (JGI),2738543002,Bacteria,Pseudomonadota,Gammaproteobacteria,Burkholderiales,Burkholderiaceae,Paraburkholderia,Paraburkholderia graminis,Ga0189731,10.46936/10.25585/60000910,Host-associated,Plants,Unclassified,Roots,Unclassified,,,,7284546,6584,100,0.05,1146906,1147035,,1000053,2710723931 +2615840624,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase III (KMG-III): the genomes of soil and plant-associated and newly described type strains",Rhizobium aethiopicum HBR26,DOE Joint Genome Institute (JGI),2615840624,Bacteria,Pseudomonadota,Alphaproteobacteria,Rhizobiales,Rhizobiaceae,Rhizobium,Rhizobium aethiopicum,Ga0061105,10.46936/10.25585/60001401,Host-associated,Plants,Nodule,Roots,Unclassified,,8.597167,39.380353,6557588,6307,100,0.17,1052921,1052920,,1000056,2786546253 +2930353684,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria sp. 003720,DOE Joint Genome Institute (JGI),2930353684,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Unclassified,Ga0498107,10.46936/10.25585/60001355,,,,,,,,,7777176,7140,100,0.31,1306559,1306898,,1000059,2626541712 +2506783046,Bacteria,Finished,Compost Feedstock-Adapted Isolates,Paenibacillus lactis 154,DOE Joint Genome Institute (JGI),2506783046,Bacteria,Bacillota,Bacilli,Paenibacillales,Paenibacillaceae,Paenibacillus,Paenibacillus lactis,Ga0024940,,Engineered,Bioreactor,Unclassified,Aerobic,Unclassified,"Joint BioEnergy Institute, Emeryville, CA 94608",37.831,-122.285,6848306,6305,100,0.03,1377598,,,1000062,2710264579 +2773857658,Bacteria,Permanent Draft,Sequencing the genomes of 1000 actinobacteria strains,Streptomyces sp. 75,DOE Joint Genome Institute (JGI),2773857658,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Unclassified,Ga0181096,10.46936/10.25585/60001044,,,,,,,,,10065237,9273,100,0.11,1133714,1133759,,1000065,2634166269 +2821271981,Bacteria,Permanent Draft,Plant associated metagenomes--Microbial community diversity and host control of community assembly across model and emerging plant ecological genomics systems.,Pseudomonas umsogensis KD5_MF50,DOE Joint Genome Institute (JGI),2821271981,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Pseudomonadaceae,Pseudomonas_E,Pseudomonas_E sp001655615,Ga0372445,10.46936/10.25585/60000944,,,,,,,,,6501538,6065,100,1.46,1221142,1221149,,1000068,2626542097 +2693429899,Bacteria,Permanent Draft,"Genomic Encyclopedia of Archaeal and Bacterial Type Strains, Phase II (KMG-II): from individual species to whole genera",Celeribacter indicus DSM 27257,DOE Joint Genome Institute (JGI),2693429899,Bacteria,Pseudomonadota,Alphaproteobacteria,Rhodobacterales,Rhodobacteraceae,Celeribacter,Celeribacter indicus,Ga0131257,10.46936/10.25585/60001024,Environmental,Aquatic,Oceanic,Marine,Aphotic zone,,,,4816144,4757,100,0.91,1108045,1108153,,1000071,2687453699 +2901051635,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase IV (KMG-IV): sequencing the most valuable type-strain genomes for metagenomic binning, comparative biology and taxonomic classification",Streptococcus gallolyticus macedonicus DSM 15879,DOE Joint Genome Institute (JGI),2901051635,Bacteria,Bacillota,Bacilli,Lactobacillales,Streptococcaceae,Streptococcus,Streptococcus macedonicus,Ga0454009,10.46936/10.25585/60001087,,,,,,,,,2132281,2420,100,0.34,1269373,1269799,,1000074,2711768368 +2599185188,Bacteria,Permanent Draft,Comparative genomics of bacterial root endophytes of switchgrass collected from native prairies over two seasons,Pseudomonas sp. NFACC45,DOE Joint Genome Institute (JGI),2599185188,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Pseudomonadaceae,Pseudomonas_E,Pseudomonas_E tehranensis,Ga0058970,10.46936/10.25585/60000672,Host-associated,Plants,Rhizoplane,Roots,Endophytes,,36.7069,-98.2694,6164180,5577,100,0.29,1049572,1049571,,1000077,2711768292 +2953832134,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria bacterium 21013-2,DOE Joint Genome Institute (JGI),2953832134,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces californicus,Ga0528211,10.46936/10.25585/60001355,,,,,,,,,8656521,7692,100,0.9,1351534,1352422,,1000080,2710264615 +2728369518,Bacteria,Permanent Draft,"Genomic Encyclopedia of Archaeal and Bacterial Type Strains, Phase II (KMG-II): from individual species to whole genera",Actinoplanes italicus DSM 43146,DOE Joint Genome Institute (JGI),2728369518,Bacteria,Actinomycetota,Actinomycetia,Mycobacteriales,Micromonosporaceae,Actinoplanes,Actinoplanes italicus,Ga0180974,10.46936/10.25585/60001024,Environmental,Terrestrial,Unclassified,Soil,Unclassified,,,,11152790,10250,100,1.13,1128690,1129030,,1000083,2710724090 +2731639257,Bacteria,Permanent Draft,"Genomic Encyclopedia of Archaeal and Bacterial Type Strains, Phase II (KMG-II): from individual species to whole genera",Yoonia maritima DSM 101533,DOE Joint Genome Institute (JGI),2731639257,Bacteria,Pseudomonadota,Alphaproteobacteria,Rhodobacterales,Rhodobacteraceae,Yoonia,Yoonia maritima,Ga0180985,10.46936/10.25585/60001024,Environmental,Aquatic,Unclassified,Sediment,Unclassified,,,,3677387,3698,100,0.12,1128712,1129041,,1000089,2786546265 +2919143609,Bacteria,Permanent Draft,"Sorghum-associated microbial communities from plants grown in Nebraska, USA",Bacillus sp. 1751,DOE Joint Genome Institute (JGI),2919143609,Bacteria,Bacillota,Bacilli,Bacillales,Bacillaceae_H,Priestia,Priestia megaterium,Ga0454429,10.46936/10.25585/60001066,Host-associated,Plants,Rhizosphere,Roots,Unclassified,"USA: Memphis, Nebraska",41.146,-96.436,6219228,6777,100,2.78,1280390,1280548,,1000092,2710723786 +640753049,Bacteria,Finished,Shewanella baltica OS185,Shewanella baltica OS185,DOE Joint Genome Institute (JGI),640753049,Bacteria,Pseudomonadota,Gammaproteobacteria,Enterobacterales_A,Shewanellaceae,Shewanella,Shewanella baltica,Ga0030277,,Environmental,Aquatic,Unclassified,Marine,Unclassified,"Gotland deep, Baltic Sea",,,5312910,4618,100,0.23,1377325,,,1000095,2626541884 +2710723751,Bacteria,Permanent Draft,"Soil microbial communities from Rifle, Colorado, USA",Candidatus Liptonbacteria bacterium RIFCSPLOWO2_01_FULL_52_25,DOE Joint Genome Institute (JGI),2710723751,Bacteria,Patescibacteria,Paceibacteria,UBA6257,2-01-FULL-56-20,REEB91,Unclassified,Ga0155706,,,,,,,,,,1072345,1173,100,8.3,,,,1000101,2711768391 +2519103185,Bacteria,Permanent Draft,Multiplexed sequencing for secondary metabolite discovery,Salinispora arenicola CNS673,DOE Joint Genome Institute (JGI),2519103185,Bacteria,Actinomycetota,Actinomycetia,Mycobacteriales,Micromonosporaceae,Micromonospora,Micromonospora arenicola,Ga0026845,10.46936/10.25585/60007409,,,,,,,,,5875699,5421,100,1.35,1378515,,,1000104,2711768316 +2863404153,Bacteria,Permanent Draft,Genome sequencing of plant associated microbes to promote plant fitness in Sorghum bicolor and Oryza sativa,Streptomyces scabiei SAI-025 (Annotation) (version 2),DOE Joint Genome Institute (JGI),2863404153,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces sp020982545,Ga0436964,10.46936/10.25585/60001083,Host-associated,Plants,Unclassified,Roots,Unclassified,USA: California,36.6008,-119.5108,9672205,8597,100,1.35,1276944,1240753,,1000107,2711768667 +2918755432,Bacteria,Permanent Draft,"Sorghum-associated microbial communities from plants grown in Nebraska, USA",Variovorax paradoxus DS996,DOE Joint Genome Institute (JGI),2918755432,Bacteria,Pseudomonadota,Gammaproteobacteria,Burkholderiales,Burkholderiaceae_B,Variovorax,Variovorax paradoxus_A,Ga0451220,10.46936/10.25585/60001066,Environmental,Terrestrial,Unclassified,Soil,Unclassified,,,,7755685,7382,100,1.61,1273033,1273170,,1000110,2588254262 +8013487634,Bacteria,Permanent Draft,Adaptive strategies in a cosmopolitan and abundant soil bacterium,Bradyrhizobium sp. AZCC 1588,DOE Joint Genome Institute (JGI),8013487634,Bacteria,Pseudomonadota,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Bradyrhizobium,Unclassified,Ga0592902,10.46936/10.25585/60000446,Environmental,Terrestrial,Unclassified,Soil,Unclassified,,,,7414024,7178,100,0.31,1408423,1408449,,1000113,2785510843 +2940566133,Bacteria,Permanent Draft,Genome sequencing of biomass degrading isolates from Greek habitats,Microbacterium thalassium ZKA21,DOE Joint Genome Institute (JGI),2940566133,Bacteria,Actinomycetota,Actinomycetia,Actinomycetales,Microbacteriaceae,Microbacterium,Unclassified,Ga0506665,10.46936/10.25585/60001106,,,,,,,,,3817514,3680,100,0.02,1343851,1239590,,1000317,2710264513 +2889276214,Bacteria,Permanent Draft,Genomics of switchgrass bacterial isolates,Paenibacillus sp. PvR133,DOE Joint Genome Institute (JGI),2889276214,Bacteria,Bacillota,Bacilli,Paenibacillales,Paenibacillaceae,Paenibacillus,Paenibacillus polymyxa_B,Ga0438213,10.46936/10.25585/60001249,Host-associated,Plants,Rhizosphere,Roots,Unclassified,USA: Michigan,42.3956,-85.3757,5979355,5469,100,0.55,1268995,1269090,,1000320,2626542054 +2585428105,Bacteria,Permanent Draft,Sequencing of Potential Lignin Degrading Lysinibacillus isolates,Lysinibacillus sp. BC-14,DOE Joint Genome Institute (JGI),2585428105,Bacteria,Bacillota,Bacilli,Bacillales,Planococcaceae,Lysinibacillus,Lysinibacillus fusiformis,Ga0008626,10.46936/10.25585/60007628,Host-associated,Plants,Decayed wood,Wood,Unclassified,,,,4718637,4703,100,0.21,1039603,1039602,,1000323,2708742976 +2935353572,Bacteria,Permanent Draft,Genome sequencing of novel secondary metabolite producing bacteria for plant Disease control,Pseudomonas protegens CHA0,DOE Joint Genome Institute (JGI),2935353572,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Pseudomonadaceae,Pseudomonas_E,Pseudomonas_E protegens,Ga0496531,10.46936/10.25585/60001209,Host-associated,Plants,Rhizosphere,Roots,Soil,Morens,46.844313,6.906016,6955622,6353,100,0.35,1327590,1278313,,1000329,2721755247 +2619619073,Bacteria,Permanent Draft,Phytopathogenic Enterobacteria,Brenneria sp. EniD312,DOE Joint Genome Institute (JGI),2619619073,Bacteria,Pseudomonadota,Gammaproteobacteria,Enterobacterales,Enterobacteriaceae,Brenneria,Brenneria nigrifluens,Ga0036897,,,,,,,,,,4943773,4592,100,0.22,1377588,,,1000332,2785511351 +2945020387,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria sp. 020444-1,DOE Joint Genome Institute (JGI),2945020387,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces wuyuanensis,Ga0507688,10.46936/10.25585/60001355,,,,,,,,,7953702,7175,100,0.1,1339903,1340393,,1000335,2710723546 +2622736582,Bacteria,Permanent Draft,"Genomic Encyclopedia of Archaeal and Bacterial Type Strains, Phase II (KMG-II): from individual species to whole genera",Carnobacterium iners DSM 28070,DOE Joint Genome Institute (JGI),2622736582,Bacteria,Bacillota,Bacilli,Lactobacillales,Carnobacteriaceae,Carnobacterium_A,Carnobacterium_A iners,Ga0070574,10.46936/10.25585/60001024,Host-associated,Microbial,Unclassified,Bacteria,Unclassified,,,,2376326,2368,100,0.57,1058921,1059073,,1000338,2710264698 +641522643,Bacteria,Finished,"Verrucomicrobia, 5 species Proposal # 0087-060710",Opitutus terrae PB90-1,DOE Joint Genome Institute (JGI),641522643,Bacteria,Verrucomicrobiota,Verrucomicrobiae,Opitutales,Opitutaceae,Opitutus,Opitutus terrae,Ga0029689,,Environmental,Terrestrial,Unclassified,Soil,Agricultural land,,,,5957605,4701,100,0.24,1378342,,,1000341,640612222 +2626542327,Bacteria,Permanent Draft,"Soil microbial communities from Rifle, Colorado, USA",Parcubacteria (Giovannonibacteria) bacterium GW2011_GWA1_43_15,DOE Joint Genome Institute (JGI),2626542327,Bacteria,Patescibacteria,Paceibacteria,UBA9983,2-01-FULL-45-33,GWA1-44-29,GWA1-44-29 sp001778695,Ga0075874,,,,,,,,,,675871,831,100,0.42,,,,1000344,2708742682 +2880762256,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase V (KMG-V): Genome sequencing to study the core and pangenomes of soil and plant-associated prokaryotes",Aquimarina megaterium EL_43,DOE Joint Genome Institute (JGI),2880762256,Bacteria,Bacteroidota,Bacteroidia,Flavobacteriales,Flavobacteriaceae,Aquimarina,Aquimarina megaterium,Ga0438113,10.46936/10.25585/60001079,Host-associated,Cnidaria,Unclassified,Unclassified,Unclassified,,,,6046857,5321,100,0.43,1265501,1265525,,1000347,2786546252 +2829791209,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase IV (KMG-IV): sequencing the most valuable type-strain genomes for metagenomic binning, comparative biology and taxonomic classification",Nitrobacter vulgaris DSM 10236,DOE Joint Genome Institute (JGI),2829791209,Bacteria,Pseudomonadota,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Nitrobacter,Nitrobacter vulgaris,Ga0373211,10.46936/10.25585/60001087,,,,,,,,,4293395,4585,100,0.81,1219992,1220158,,1000350,2630968270 +2990710928,Bacteria,Permanent Draft,Genome sequencing of plant associated microbes to promote plant fitness in Sorghum bicolor and Oryza sativa,Acidovorax delafieldii SLBN-75,DOE Joint Genome Institute (JGI),2990710928,Bacteria,Pseudomonadota,Gammaproteobacteria,Burkholderiales,Burkholderiaceae_B,Acidovorax,Acidovorax sp000302535,Ga0484268,10.46936/10.25585/60001083,Host-associated,Plants,Rhizosphere,Roots,Soil,USA: California,38.5376,-121.7644,5002431,4657,100,0.03,1290031,1290068,,1000353,2626542203 +2981240341,Bacteria,Permanent Draft,Genomics of switchgrass bacterial isolates,Methylobacterium sp. PvP083,DOE Joint Genome Institute (JGI),2981240341,Bacteria,Pseudomonadota,Alphaproteobacteria,Rhizobiales,Beijerinckiaceae,Methylobacterium,Methylobacterium radiotolerans,Ga0531216,10.46936/10.25585/60001249,Host-associated,Plants,Phylloplane/Leaf surface,Phyllosphere,Unclassified,USA: Michigan,42.3956,-85.3757,7431278,7438,100,1.7,1362424,1269037,,1000356,2956128196 +637000165,Bacteria,Finished,Methylobacillus flagellatus KT,Methylobacillus flagellatus KT,DOE Joint Genome Institute (JGI),637000165,Bacteria,Pseudomonadota,Gammaproteobacteria,Burkholderiales,Methylophilaceae,Methylobacillus,Methylobacillus flagellatus,Ga0029383,,Engineered,Wastewater,Unclassified,Activated Sludge,Unclassified,Activated sludge found at the wastewater treatment plant in Moscow Russia,55.755786,37.617633,2971517,2824,100,1.2,1378328,,,1000359,2791354759 +2927270008,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase V (KMG-V): Genome sequencing to study the core and pangenomes of soil and plant-associated prokaryotes",Bradyrhizobium japonicum USDA 35,DOE Joint Genome Institute (JGI),2927270008,Bacteria,Pseudomonadota,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Bradyrhizobium,Bradyrhizobium japonicum,Ga0485025,10.46936/10.25585/60001079,Host-associated,Plants,Unclassified,Unclassified,Unclassified,,,,10300319,10385,100,1.39,1300725,1300935,,1000365,2716884135 +2695421013,Bacteria,Permanent Draft,"Genomic Encyclopedia of Archaeal and Bacterial Type Strains, Phase II (KMG-II): from individual species to whole genera",Cognatiyoonia sediminum DSM 28715,DOE Joint Genome Institute (JGI),2695421013,Bacteria,Pseudomonadota,Alphaproteobacteria,Rhodobacterales,Rhodobacteraceae,Cognatiyoonia,Cognatiyoonia sediminum,Ga0131264,10.46936/10.25585/60001024,Environmental,Aquatic,Unclassified,Marine,Unclassified,,,,3257490,3333,100,0,1108059,1108160,,1000368,2785511073 +2791354855,Bacteria,Permanent Draft,"Marine sediment microbial communities from Pacific Ocean, California, USA",Micromonospora sp. CNZ295,DOE Joint Genome Institute (JGI),2791354855,Bacteria,Actinomycetota,Actinomycetia,Mycobacteriales,Micromonosporaceae,Micromonospora,Unclassified,Ga0193643,10.46936/10.25585/60001115,Environmental,Aquatic,Oceanic,Marine,Sediment,"Pacific Ocean: Off the coast of San Diego, USA",,,6439791,6019,100,0.56,1141806,1141863,,1000371,2721755111 +2929121299,Bacteria,Permanent Draft,Genomic analyses of the natural microbiome of Caenorhabditis elegans,Rhodococcus erythropolis JUb83,DOE Joint Genome Institute (JGI),2929121299,Bacteria,Actinomycetota,Actinomycetia,Mycobacteriales,Mycobacteriaceae,Rhodococcus,Rhodococcus erythropolis,Ga0494991,10.46936/10.25585/60001123,,,,,,,,,6759720,6474,100,0.16,1305587,1189022,,1000377,2634166233 +2721755200,Bacteria,Permanent Draft,"Soil microbial communities from Rifle, Colorado, USA",Deltaproteobacteria bacterium GWC2_42_11,DOE Joint Genome Institute (JGI),2721755200,Bacteria,Desulfobacterota_F,GWC2-55-46,UBA9637,GWC2-42-11,GWC2-42-11,GWC2-42-11 sp001797495,Ga0154468,,,,,,,,,,1751043,1806,100,0,,,,1000380,2710723665 +2667527434,Bacteria,Permanent Draft,Genome sequencing of a diverse group of Pseudomonas species,Pseudomonas oryzae KCTC 32247,DOE Joint Genome Institute (JGI),2667527434,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Pseudomonadaceae,Pseudomonas_K,Pseudomonas_K oryzae,Ga0104702,10.46936/10.25585/60000812,,,,,,,,,4642093,4302,100,0.4,1091806,1091816,,1000383,2710723569 +2651870306,Bacteria,Permanent Draft,The Hungate 1000. A catalogue of reference genomes from the rumen microbiome.,Streptococcus equinus Sb04,DOE Joint Genome Institute (JGI),2651870306,Bacteria,Bacillota,Bacilli,Lactobacillales,Streptococcaceae,Streptococcus,Streptococcus equinus_D,Ga0104408,10.46936/10.25585/60000534,Host-associated,Mammals,Stomach,Digestive system,Rumen,,,,1875611,1874,100,0.43,1082093,1082198,,1000386,2713897376 +2904439833,Bacteria,Permanent Draft,"Sorghum-associated microbial communities from plants grown in Nebraska, USA",Herbaspirillum sp. 1589,DOE Joint Genome Institute (JGI),2904439833,Bacteria,Pseudomonadota,Gammaproteobacteria,Burkholderiales,Burkholderiaceae,Herbaspirillum,Herbaspirillum huttiense,Ga0454186,10.46936/10.25585/60001066,Host-associated,Plants,Rhizosphere,Roots,Unclassified,"USA: Memphis, Nebraska",41.146,-96.436,5931679,5442,100,0.23,1275663,1275738,,1000389,2711768732 +2716885052,Bacteria,Permanent Draft,"Animal gut microbial communities from fecal samples from Wisconsin, USA",Fibrobacter sp. UWB13,DOE Joint Genome Institute (JGI),2716885052,Bacteria,Fibrobacterota,Fibrobacteria,Fibrobacterales,Fibrobacteraceae,Fibrobacter,Fibrobacter sp900177805,Ga0136277,10.46936/10.25585/60000667,Host-associated,Mammals,Stomach,Digestive system,Rumen,,,,3804669,3177,100,1.57,1113400,1113448,,1000392,2711768450 +2519899641,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase I: the one thousand microbial genomes (KMG-I) project",Pseudoalteromonas piscicida ATCC 15057,DOE Joint Genome Institute (JGI),2519899641,Bacteria,Pseudomonadota,Gammaproteobacteria,Enterobacterales_A,Alteromonadaceae,Pseudoalteromonas,Pseudoalteromonas piscicida,Ga0001020,10.46936/10.25585/60000886,Environmental,Aquatic,Unclassified,Marine,Unclassified,,,,5279016,4776,100,0.09,1006803,1006802,,1000395,641522617 +2998344455,Bacteria,Permanent Draft,Genome sequencing of plant associated microbes to promote plant fitness in Sorghum bicolor and Oryza sativa,Vogesella perlucida SLBN-145,DOE Joint Genome Institute (JGI),2998344455,Bacteria,Pseudomonadota,Gammaproteobacteria,Burkholderiales,Chromobacteriaceae,Vogesella,Vogesella urethralis,Ga0484270,10.46936/10.25585/60001083,Host-associated,Plants,Rhizosphere,Roots,Soil,USA: California,38.5376,-121.7644,4222996,4083,100,0.25,1290037,1290070,,1000398,2710723898 +2931079999,Bacteria,Permanent Draft,Genome sequencing of novel secondary metabolite producing bacteria for plant Disease control,Bacillus velezensis FZB42_1,DOE Joint Genome Institute (JGI),2931079999,Bacteria,Bacillota,Bacilli,Bacillales,Bacillaceae,Bacillus,Bacillus velezensis,Ga0454245,10.46936/10.25585/60001209,Environmental,Terrestrial,Unclassified,Soil,Unclassified,"USA: River Falls, WI",44.8523,-92.6212,3959738,4044,100,0.24,1278206,1278303,,1000401,2711768305 +2857429247,Bacteria,Permanent Draft,Standard draft genome sequeces of bacteria from Yellowstone Lake,Methylobacterium sp. YL-MPn6-2016,DOE Joint Genome Institute (JGI),2857429247,Bacteria,Pseudomonadota,Alphaproteobacteria,Rhizobiales,Beijerinckiaceae,Methylobacterium,Methylobacterium fujisawaense,Ga0395893,10.46936/10.25585/60001260,Environmental,Aquatic,Lake,Freshwater,Unclassified,"USA: Yellowstone Lake, WY",44.4111,-110.3163,6858912,6636,100,0.21,1251716,1251857,,1000404,2716884137 +2926493430,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase IV (KMG-IV): sequencing the most valuable type-strain genomes for metagenomic binning, comparative biology and taxonomic classification",Bacillus andreesenii DSM 23948,DOE Joint Genome Institute (JGI),2926493430,Bacteria,Bacillota,Bacilli,Bacillales,DSM-18226,Robertmurraya,Robertmurraya sp002480735,Ga0480525,10.46936/10.25585/60001087,,,,,,,,,4336975,4528,100,1.36,1281977,1282193,,1000407,2608642210 +2739367903,Bacteria,Permanent Draft,"Marine sediment microbial communities from Pacific Ocean, California, USA",Micromonospora sp. CNZ285,DOE Joint Genome Institute (JGI),2739367903,Bacteria,Actinomycetota,Actinomycetia,Mycobacteriales,Micromonosporaceae,Micromonospora,Micromonospora aurantiaca,Ga0193637,10.46936/10.25585/60001115,Environmental,Aquatic,Oceanic,Marine,Sediment,"Pacific Ocean: Off the coast of San Diego, USA",,,7006229,6527,100,0.57,1141788,1141857,,1000410,2791354765 +2710724218,Bacteria,Permanent Draft,Phycosphere-associated microbial communities from algal ponds,Alteromonas sp. I10,DOE Joint Genome Institute (JGI),2710724218,Bacteria,Pseudomonadota,Gammaproteobacteria,Enterobacterales_A,Alteromonadaceae,Alteromonas,Alteromonas macleodii,Ga0157237,10.46936/10.25585/60001054,,,,,,,,,4673900,4104,100,0.02,1105325,1105341,,1000413,2626542354 +2734482072,Bacteria,Permanent Draft,Understanding mechanisms by which microbes promote plant health,Pseudomonas asplenii ES_PA-B6,DOE Joint Genome Institute (JGI),2734482072,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Pseudomonadaceae,Pseudomonas_E,Unclassified,Ga0181147,10.46936/10.25585/60000775,Environmental,Terrestrial,Unclassified,Soil,Unclassified,,,,7673865,6870,100,1.56,1138492,1138513,,1000416,2503982003 +2622736521,Bacteria,Permanent Draft,"Genomic Encyclopedia of Archaeal and Bacterial Type Strains, Phase II (KMG-II): from individual species to whole genera",Clostridium homopropionicum DSM 5847,DOE Joint Genome Institute (JGI),2622736521,Bacteria,Bacillota_A,Clostridia,Clostridiales,Clostridiaceae,Clostridium_H,Clostridium_H homopropionicum,Ga0070594,10.46936/10.25585/60001024,Engineered,Wastewater,Unclassified,Sewage,Unclassified,,,,3644832,3612,100,1.02,1058995,1059110,,1000419,2630969011 +2524614633,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase I: the one thousand microbial genomes (KMG-I) project",Streptococcus hyovaginalis DSM 12219,DOE Joint Genome Institute (JGI),2524614633,Bacteria,Bacillota,Bacilli,Lactobacillales,Streptococcaceae,Streptococcus,Streptococcus hyovaginalis,Ga0001377,10.46936/10.25585/60000886,Host-associated,Mammals,Unclassified,Unclassified,Unclassified,,,,2077809,2109,100,0.1,1008515,1008514,,1000422,2515154180 +2867883562,Bacteria,Permanent Draft,Genome sequencing of novel secondary metabolite producing bacteria for plant Disease control,Pseudomonas fluorescens KOPRI 25853,DOE Joint Genome Institute (JGI),2867883562,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Pseudomonadaceae,Pseudomonas_E,Pseudomonas_E salomonii,Ga0417199,10.46936/10.25585/60001209,Environmental,Terrestrial,Unclassified,Soil,Unclassified,"USA: Plymouth, MN",,,6423709,6024,100,0.31,1257434,1257762,,1000425,2708743123 +2945483466,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria bacterium 001404,DOE Joint Genome Institute (JGI),2945483466,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Unclassified,Ga0526912,10.46936/10.25585/60001355,,,,,,,,,8865994,8275,100,1.32,1350820,1352065,,1000428,2791354777 +2740891891,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase III (KMG-III): the genomes of soil and plant-associated and newly described type strains",Sphingomonas aurantiaca MA101b,DOE Joint Genome Institute (JGI),2740891891,Bacteria,Pseudomonadota,Alphaproteobacteria,Sphingomonadales,Sphingomonadaceae,Sphingomonas,Sphingomonas aurantiaca,Ga0191660,10.46936/10.25585/60001401,Environmental,Unclassified,Unclassified,Unclassified,Unclassified,Finland,,,4411230,4055,100,1.31,1145613,1145657,,1000431,2721755222 +2574179751,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase I: the one thousand microbial genomes (KMG-I) project",Parasphingorhabdus litoris DSM 22379,DOE Joint Genome Institute (JGI),2574179751,Bacteria,Pseudomonadota,Alphaproteobacteria,Sphingomonadales,Sphingomonadaceae,Parasphingorhabdus,Parasphingorhabdus litoris,Ga0006635,10.46936/10.25585/60000886,Environmental,Aquatic,Unclassified,Marine,Unclassified,"Coast of Jeju Island, Republic of Korea",33.51,126.52,3607795,3531,100,0.51,1030744,1030743,,1000434,2710723615 +643348554,Bacteria,Finished,Geotalea daltonii FRC-32,Geotalea daltonii FRC-32,DOE Joint Genome Institute (JGI),643348554,Bacteria,Desulfobacterota_F,Desulfuromonadia,Geobacterales,Geobacteraceae,Geotalea,Geotalea daltonii,Ga0028909,,Engineered,Bioremediation,Unclassified,Metal,Unclassified,Uranium-contaminated subsurface at US,,,4304501,3894,100,0.33,1378316,,,1000437,2515154116 +641228482,Bacteria,Finished,4 Burkholderia,Burkholderia multivorans ATCC 17616,DOE Joint Genome Institute (JGI),641228482,Bacteria,Pseudomonadota,Gammaproteobacteria,Burkholderiales,Burkholderiaceae,Burkholderia,Burkholderia multivorans,Ga0028073,,Environmental,Terrestrial,Unclassified,Soil,Unclassified,"USA: Berkeley, California",37.87,-122.27,7008622,6373,100,0.55,1378602,,,1000443,2710723551 +2513237145,Bacteria,Permanent Draft,GEBA - Root Nodulating Bacteria,Bradyrhizobium elkanii USDA 3254,DOE Joint Genome Institute (JGI),2513237145,Bacteria,Pseudomonadota,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Bradyrhizobium,Bradyrhizobium pachyrhizi,Ga0025197,10.46936/10.25585/60007492,Host-associated,Plants,Nodule,Roots,Unclassified,Arizona,34.05125493,-111.089437,8979722,8563,100,0.72,1378065,,,1000446,2786546226 +2939253100,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria sp. 050802,DOE Joint Genome Institute (JGI),2939253100,Bacteria,Actinomycetota,Actinomycetia,Mycobacteriales,Mycobacteriaceae,Nocardia,Nocardia salmonicida,Ga0498578,10.46936/10.25585/60001355,,,,,,,,,7362524,6968,100,0.17,1316765,1317368,,1000449,2791354761 +2510461076,Bacteria,Permanent Draft,"Analysis of the clover, pea/bean and lupin microsymbiont genetic pool by studying isolates from distinct Vavilov centres of diversity",Rhizobium leguminosarum bv. trifolii TA1,DOE Joint Genome Institute (JGI),2510461076,Bacteria,Pseudomonadota,Alphaproteobacteria,Rhizobiales,Rhizobiaceae,Rhizobium,Rhizobium ruizarguesonis,Ga0025129,10.46936/10.25585/60007317,Host-associated,Plants,Nodule,Roots,Unclassified,Australia: Tasmania,-41.0335,147.667,8618824,8576,100,12.24,1377865,,,1000452,2710264644 +2928496128,Bacteria,Permanent Draft,"Sorghum-associated microbial communities from plants grown in Nebraska, USA",Stenotrophomonas geniculata 1163,DOE Joint Genome Institute (JGI),2928496128,Bacteria,Pseudomonadota,Gammaproteobacteria,Xanthomonadales,Xanthomonadaceae,Stenotrophomonas,Stenotrophomonas indicatrix,Ga0438570,10.46936/10.25585/60001066,Host-associated,Plants,Unclassified,Roots,Unclassified,"USA: Wann, Nebraska",41.1591,-96.4086,4631123,4286,100,0.28,1272551,1272628,,1000455,2785511060 +2667528184,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase III (KMG-III): the genomes of soil and plant-associated and newly described type strains",Microbulbifer yueqingensis CGMCC 1.10658,DOE Joint Genome Institute (JGI),2667528184,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Cellvibrionaceae,Microbulbifer,Microbulbifer yueqingensis,Ga0079827,10.46936/10.25585/60001401,,,,,,,,,3664854,3261,100,0.21,1076137,1076319,,1000458,2630968269 +2934701332,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria sp. 048426,DOE Joint Genome Institute (JGI),2934701332,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces goshikiensis,Ga0498696,10.46936/10.25585/60001355,,,,,,,,,8880425,8158,100,0.27,1317001,1317486,,1000461,2713897397 +2556921656,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase I: the one thousand microbial genomes (KMG-I) project",Clostridium cellobioparum ATCC 15832,DOE Joint Genome Institute (JGI),2556921656,Bacteria,Bacillota_A,Clostridia,Acetivibrionales,DSM-27016,Ruminiclostridium,Ruminiclostridium cellobioparum,Ga0005296,10.46936/10.25585/60000886,Host-associated,Mammals,Stomach,Digestive system,Rumen,,,,6132222,5220,100,3.37,1024792,1024791,,1000464,2785511069 +650716039,Bacteria,Finished,Novel Microbial Enzymes,Parageobacillus thermoglucosidasius C56-YS93,DOE Joint Genome Institute (JGI),650716039,Bacteria,Bacillota,Bacilli,Bacillales,Anoxybacillaceae,Parageobacillus,Parageobacillus thermoglucosidasius,Ga0028905,,Environmental,Aquatic,Unclassified,Thermal springs,Unclassified,"Obsidian Hot Spring, Yellowstone National Park, Montana, USA",44.603028,-110.865194,3993793,4135,100,0.44,1377646,,,1000470,2708743024 +2558860994,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase I: the one thousand microbial genomes (KMG-I) project",Alkanindiges illinoisensis DSM 15370,DOE Joint Genome Institute (JGI),2558860994,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Moraxellaceae,Alkanindiges,Alkanindiges illinoisensis,Ga0002119,10.46936/10.25585/60000886,Environmental,Terrestrial,Unclassified,Soil,Oil-contaminated,Oilfield in southern Illinois,,,3305253,3119,100,0.02,1011014,1011013,,1000473,2893903249 +2770939463,Bacteria,Permanent Draft,"Subsurface microbial communities from deep shales in Ohio and West Virginia, USA",Garciella nitratireducens GHB3,DOE Joint Genome Institute (JGI),2770939463,Bacteria,Bacillota_A,Clostridia,Eubacteriales,Garciellaceae,Garciella,Garciella nitratireducens,Ga0255681,10.46936/10.25585/60001053,Environmental,Terrestrial,Unclassified,Deep subsurface,Unclassified,USA: Texas,28.885,-97.9,2263229,2259,100,0.83,1182297,1182304,,1000476,2710264608 +2524614552,Bacteria,Permanent Draft,Plant associated metagenomes--Microbial community diversity and host control of community assembly across model and emerging plant ecological genomics systems.,Streptomyces sp. 142MFCol3.1,DOE Joint Genome Institute (JGI),2524614552,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces sp002300165,Ga0000387,10.46936/10.25585/60000944,,,,,,,,,9726042,8727,100,1.96,1003083,1003082,,1000479,2711768516 +2808606402,Bacteria,Permanent Draft,Genome mining of underrepresented organisms for secondary metabolites,Streptomyces sp. T12,DOE Joint Genome Institute (JGI),2808606402,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces sp000527195,Ga0311319,10.46936/10.25585/60001105,,,,,,,,,7803850,7122,100,0.13,1225051,1176533,,1000485,2626541881 +2828075273,Bacteria,Permanent Draft,Studying the diversity of plant-associated saprophytic bacteria and their role in host health and plant-pathogen interactions,Xanthomonas cannabis CFBP 8593,DOE Joint Genome Institute (JGI),2828075273,Bacteria,Pseudomonadota,Gammaproteobacteria,Xanthomonadales,Xanthomonadaceae,Xanthomonas,Xanthomonas cannabis,Ga0372508,10.46936/10.25585/60001156,Host-associated,Plants,Unclassified,Unclassified,Unclassified,,,,4839134,4203,100,0.06,1235872,1236025,,1000491,2708742994 +2959030392,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria bacterium 86181,DOE Joint Genome Institute (JGI),2959030392,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces lydicus,Ga0535015,10.46936/10.25585/60001355,,,,,,,,,8469228,7686,100,0.05,1360129,1360972,,1000494,2708742960 +2617270871,Bacteria,Permanent Draft,Sequencing the genomes of 1000 actinobacteria strains,Micromonospora auratinigra DSM 44815,DOE Joint Genome Institute (JGI),2617270871,Bacteria,Actinomycetota,Actinomycetia,Mycobacteriales,Micromonosporaceae,Micromonospora,Micromonospora auratinigra,Ga0070611,10.46936/10.25585/60001044,Environmental,Terrestrial,Unclassified,Soil,Unclassified,,,,6758500,6242,100,0.09,1059235,1059270,,1000497,2721755123 +2619618823,Bacteria,Permanent Draft,"Genomic Encyclopedia of Archaeal and Bacterial Type Strains, Phase II (KMG-II): from individual species to whole genera",Trichococcus pasteurii DSM 2381,DOE Joint Genome Institute (JGI),2619618823,Bacteria,Bacillota,Bacilli,Lactobacillales,Aerococcaceae,Trichococcus,Trichococcus pasteurii,Ga0070501,10.46936/10.25585/60001024,Engineered,Wastewater,Unclassified,Sewage,Unclassified,,,,3197326,3053,100,2.12,1055251,1055250,,1000503,2708742673 +2517287019,Bacteria,Permanent Draft,Genome sequencing of the marine actinomycete Salinispora spp.,Salinispora pacifica DSM 45544,DOE Joint Genome Institute (JGI),2517287019,Bacteria,Actinomycetota,Actinomycetia,Mycobacteriales,Micromonosporaceae,Micromonospora,Micromonospora pacifica,Ga0026583,10.46936/10.25585/60007342,Environmental,Aquatic,Unclassified,Unclassified,Unclassified,,,,5464752,5064,100,0.56,1377922,,,1000506,2630969009 +2706795032,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase III (KMG-III): the genomes of soil and plant-associated and newly described type strains",Novosphingobium mathurense SM117,DOE Joint Genome Institute (JGI),2706795032,Bacteria,Pseudomonadota,Alphaproteobacteria,Sphingomonadales,Sphingomonadaceae,Novosphingobium,Novosphingobium mathurense,Ga0138998,10.46936/10.25585/60001401,,,,,,,,,4843551,4572,100,0.03,1118301,1118371,,1000509,2713897422 +2521172646,Bacteria,Permanent Draft,Plant associated metagenomes--Microbial community diversity and host control of community assembly across model and emerging plant ecological genomics systems.,Pseudomonas sp. LAMO17WK12:I9,DOE Joint Genome Institute (JGI),2521172646,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Pseudomonadaceae,Pseudomonas_E,Pseudomonas_E chlororaphis,Ga0001092,10.46936/10.25585/60000944,,,,,,,,,7022592,6437,100,0.07,1007084,1007083,,1000515,2708742963 +2956013887,Bacteria,Permanent Draft,Novel secondary metabolite-producing bacteria for plant disease control,Pseudomonas frederiksbergensis 94G2 TE3679,DOE Joint Genome Institute (JGI),2956013887,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Pseudomonadaceae,Pseudomonas_E,Pseudomonas_E sp000282495,Ga0532257,10.46936/10.25585/60001358,Environmental,Terrestrial,Unclassified,Soil,Unclassified,,,,6405982,5976,100,0.07,1355584,1355768,,1000518,2626542211 +2947847472,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria bacterium 88544,DOE Joint Genome Institute (JGI),2947847472,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces parvulus,Ga0528268,10.46936/10.25585/60001355,,,,,,,,,8014841,7377,100,0.34,1351648,1352479,,1000527,2721755103 +643348507,Bacteria,Finished,Multiple Anaeromyxobacter,Anaeromyxobacter dehalogenans 2CP-1,DOE Joint Genome Institute (JGI),643348507,Bacteria,Myxococcota,Myxococcia,Myxococcales,Anaeromyxobacteraceae,Anaeromyxobacter,Anaeromyxobacter dehalogenans,Ga0027433,,Environmental,Aquatic,Lotic,Freshwater,Unclassified,"Lansing, Michigan",42.733621,-84.546582,5029329,4540,100,0.21,1378585,,,1000530,2710723892 +2940377351,Bacteria,Permanent Draft,Genome Encyclopedia of Bacteria and Archaea VI: Functional Genomics of Type Strains,Ereboglobus sp. PH5-5,DOE Joint Genome Institute (JGI),2940377351,Bacteria,Verrucomicrobiota,Verrucomicrobiae,Opitutales,Opitutaceae,Ereboglobus,Unclassified,Ga0505233,10.46936/10.25585/60001193,Host-associated,Arthropoda: Insects,Gut,Digestive system,Unclassified,,,,3508102,2712,100,0.19,1335572,1335665,,1000533,2626542253 +2667527208,Bacteria,Permanent Draft,Genome sequencing of Caldicellulosiruptor bescii uracil auxotroph strains for use in genetic manipulations,Caldicellulosiruptor bescii MACB1018,DOE Joint Genome Institute (JGI),2667527208,Bacteria,Bacillota_A,Thermoanaerobacteria,Caldicellulosiruptorales,Caldicellulosiruptoraceae,Caldicellulosiruptor,Caldicellulosiruptor bescii,Ga0100408,10.46936/10.25585/60000555,,,,,,,,,2953075,2906,100,0.01,1090889,1090899,,1000536,2710264583 +2663762773,Bacteria,Permanent Draft,Genome sequencing of a diverse group of Pseudomonas species,Pseudomonas congelans LMG 21466,DOE Joint Genome Institute (JGI),2663762773,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Pseudomonadaceae,Pseudomonas_E,Pseudomonas_E congelans,Ga0104654,10.46936/10.25585/60000812,Host-associated,Plants,Unclassified,Phyllosphere,Unclassified,Germany,,,5737059,5099,100,0.17,1090598,1090706,,1000539,2710723603 +2825022033,Bacteria,Permanent Draft,Understanding the reciprocal impacts of modified plant cell wall and associated microbiome,Sphingomonas sp. BK345,DOE Joint Genome Institute (JGI),2825022033,Bacteria,Pseudomonadota,Alphaproteobacteria,Sphingomonadales,Sphingomonadaceae,Sphingomonas,Sphingomonas sp014193845,Ga0365278,10.46936/10.25585/60001140,,,,,,,,,4518915,4229,100,1.05,1225246,1225387,,1000545,2710264664 +2585428052,Bacteria,Draft,"Saline, thermophilic phototrophic and chemotrophic mat microbial communities from various locations in USA and Mexico",Porphyrobacter sp.,DOE Joint Genome Institute (JGI),2585428052,Bacteria,Pseudomonadota,Alphaproteobacteria,Sphingomonadales,Sphingomonadaceae,Erythrobacter,Erythrobacter sp900105095,Ga0048143,10.46936/10.25585/60000772,,,,,,,,,3036783,2929,100,0.2,,,,1001028,2708742957 +2540341236,Bacteria,Permanent Draft,Plant associated metagenomes--Microbial community diversity and host control of community assembly across model and emerging plant ecological genomics systems.,Pseudomonas sp. URMO17WK12:I8,DOE Joint Genome Institute (JGI),2540341236,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Pseudomonadaceae,Pseudomonas_E,Pseudomonas_E putida_J,Ga0001067,10.46936/10.25585/60000944,,,,,,,,,5752811,5360,100,0.16,1007009,1007008,,1001031,2708742869 +640069309,Bacteria,Finished,Clostridium thermocellum ATCC 27405,Acetivibrio thermocellus ATCC 27405,DOE Joint Genome Institute (JGI),640069309,Bacteria,Bacillota_A,Clostridia,Acetivibrionales,Acetivibrionaceae,Hungateiclostridium,Hungateiclostridium thermocellum,Ga0028394,,,,,,,,,,3843301,3335,100,1.04,1378380,,,1001034,2785511354 +2579779161,Bacteria,Permanent Draft,Whole Genome Sequencing of Novel Marine Hydrocarbon-Degrading Bacteria,Halomonas sp. TG39a,DOE Joint Genome Institute (JGI),2579779161,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Halomonadaceae,Halomonas,Halomonas ventosae_B,Ga0004744,10.46936/10.25585/60007561,Environmental,Aquatic,Coastal,Marine,Unclassified,,,,4979193,4614,100,0.01,1022325,1022324,,1001040,2710723992 +2518645611,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase I: the one thousand microbial genomes (KMG-I) project",Halomonas lutea DSM 23508,DOE Joint Genome Institute (JGI),2518645611,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Halomonadaceae,Modicisalibacter,Modicisalibacter luteus,Ga0000840,10.46936/10.25585/60000886,Environmental,Aquatic,Saline,Non-marine Saline and Alkaline,Unclassified,"Ebinur Lake, Xinjiang province, north-west China",45.05,82.977,4533090,4368,100,1,1005108,1005107,,1001043,2710723845 +2675903066,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase III (KMG-III): the genomes of soil and plant-associated and newly described type strains",Nonomuraea maritima CGMCC 4.5681,DOE Joint Genome Institute (JGI),2675903066,Bacteria,Actinomycetota,Actinomycetia,Streptosporangiales,Streptosporangiaceae,Nonomuraea,Nonomuraea maritima,Ga0104438,10.46936/10.25585/60001401,,,,,,,,,8267594,7999,100,0.17,1085021,1085106,,1001052,2710264687 +2599185291,Bacteria,Permanent Draft,Comparative genomics of bacterial root endophytes of switchgrass collected from native prairies over two seasons,Pseudomonas sp. NFACC48-1,DOE Joint Genome Institute (JGI),2599185291,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Pseudomonadaceae,Pseudomonas_E,Pseudomonas_E fluorescens_Q,Ga0058973,10.46936/10.25585/60000672,Host-associated,Plants,Rhizoplane,Roots,Endophytes,,36.811,-97.4538,6775623,6141,100,0.27,1049581,1049580,,1001055,639633059 +2703719191,Bacteria,Permanent Draft,Genomic insights into Acetone-Butanol-Ethanol (ABE) fermentation by sequencing solventogenic Clostridia strains,Clostridium beijerinckii DJ056,DOE Joint Genome Institute (JGI),2703719191,Bacteria,Bacillota_A,Clostridia,Clostridiales,Clostridiaceae,Clostridium,Clostridium beijerinckii,Ga0136788,10.46936/10.25585/60000855,,,,,,,,,6146129,5532,100,0.21,1116951,1116976,,1001058,2820280018 +2946882147,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria sp. 4318,DOE Joint Genome Institute (JGI),2946882147,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces fimicarius,Ga0507394,10.46936/10.25585/60001355,,,,,,,,,8221072,7368,100,0.01,1339275,1340079,,1001061,2626542051 +2959087715,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria bacterium 86849,DOE Joint Genome Institute (JGI),2959087715,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces griseoviridis_A,Ga0535033,10.46936/10.25585/60001355,,,,,,,,,9010196,7929,100,0.43,1360165,1360990,,1001064,2713897362 +2926612960,Bacteria,Permanent Draft,Sequencing the genomes of 1000 actinobacteria strains,Streptomyces sp. DSM 40167,DOE Joint Genome Institute (JGI),2926612960,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces sp003947455,Ga0480153,10.46936/10.25585/60001044,,,,,,,,,8177831,7562,100,0,1281717,1282069,,1001067,2788500151 +2503538021,Bacteria,Finished,GEBA-PCC,Chroococcidiopsis thermalis PCC 7203,DOE Joint Genome Institute (JGI),2503538021,Bacteria,Cyanobacteriota,Cyanobacteriia,Cyanobacteriales,Chroococcidiopsidaceae,Chroococcidiopsis,Chroococcidiopsis thermalis,Ga0025550,10.46936/10.25585/60001427,Environmental,Terrestrial,Unclassified,Soil,Unclassified,"Greifswald, Germany",54.10114,13.3885,6689401,6033,100,1.12,1377649,,,1001070,2820027804 +2844826561,Bacteria,Permanent Draft,Sequencing the genomes of 1000 actinobacteria strains,Cryobacterium roopkundense DSM 21065,DOE Joint Genome Institute (JGI),2844826561,Bacteria,Actinomycetota,Actinomycetia,Actinomycetales,Microbacteriaceae,Cryobacterium,Cryobacterium roopkundense,Ga0104588,10.46936/10.25585/60001044,Environmental,Aquatic,Lentic,Freshwater,Unclassified,"India, Roopkund, Himalayan region",,,4531652,4334,100,0.4,1085974,1086306,,1001073,2788500155 +2547132468,Bacteria,Permanent Draft,"Bioprospecting for lignincellulytic microbes and enzymes from natural, highly-evolved plant biomass-degrading systems",Stenotrophomonas sp. CC120222-04,DOE Joint Genome Institute (JGI),2547132468,Bacteria,Pseudomonadota,Gammaproteobacteria,Xanthomonadales,Xanthomonadaceae,Stenotrophomonas,Stenotrophomonas maltophilia_G,Ga0004976,10.46936/10.25585/60007468,,,,,,,,,4620825,4239,100,0.17,1023624,1023623,,1001079,2710723929 +2681812861,Bacteria,Permanent Draft,"Comparative analysis of microorganisms from saline springs in Andes Mountain Range, Colombia",Stenotrophomonas rhizophila USBA GBX 843,DOE Joint Genome Institute (JGI),2681812861,Bacteria,Pseudomonadota,Gammaproteobacteria,Xanthomonadales,Xanthomonadaceae,Stenotrophomonas,Stenotrophomonas maltophilia_Z,Ga0105846,10.46936/10.25585/60000546,Environmental,Terrestrial,Unclassified,Soil,Unclassified,,,,4444921,4019,100,0.15,1094771,1094804,,1001082,2634166254 +2681812917,Bacteria,Permanent Draft,Genome sequencing of a diverse group of Pseudomonas species,Pseudomonas sp. 7SR1,DOE Joint Genome Institute (JGI),2681812917,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Pseudomonadaceae,Pseudomonas_E,Pseudomonas_E sp900156465,Ga0123386,10.46936/10.25585/60000812,,,,,,,,,6090201,5445,100,0.04,1106200,1106205,,1001085,2710723789 +2933142243,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria sp. 050284,DOE Joint Genome Institute (JGI),2933142243,Bacteria,Actinomycetota,Actinomycetia,Streptosporangiales,Streptosporangiaceae,Streptosporangium,Unclassified,Ga0498370,10.46936/10.25585/60001355,,,,,,,,,9090313,8126,100,3.61,1316349,1317160,,1001088,2710724084 +2738543022,Bacteria,Permanent Draft,"Populus root and rhizosphere microbial communities from Tennessee, USA",Novosphingobium sp. GV055,DOE Joint Genome Institute (JGI),2738543022,Bacteria,Pseudomonadota,Alphaproteobacteria,Sphingomonadales,Sphingomonadaceae,Novosphingobium,Novosphingobium capsulatum,Ga0189721,10.46936/10.25585/60000910,Host-associated,Plants,Unclassified,Roots,Unclassified,,,,4835059,4522,100,0.48,1146886,1147025,,1001091,2626542217 +2545824508,Bacteria,Permanent Draft,"Bioprospecting for lignincellulytic microbes and enzymes from natural, highly-evolved plant biomass-degrading systems",Stenotrophomonas sp. CC120223-11,DOE Joint Genome Institute (JGI),2545824508,Bacteria,Pseudomonadota,Gammaproteobacteria,Xanthomonadales,Xanthomonadaceae,Stenotrophomonas,Stenotrophomonas maltophilia_G,Ga0004977,10.46936/10.25585/60007468,,,,,,,,,4671339,4285,100,0.3,1023627,1023626,,1001094,2710264658 +2708742668,Bacteria,Permanent Draft,"Soil microbial communities from Rifle, Colorado, USA",Bacteroidetes bacterium GWF2_40_13,DOE Joint Genome Institute (JGI),2708742668,Bacteria,Bacteroidota,Bacteroidia,Bacteroidales,SZUA-534,GWE2-40-63,GWE2-40-63 sp003512955,Ga0154545,,,,,,,,,,5028073,4051,100,1.42,,,,1001097,2608642205 +2918856767,Bacteria,Permanent Draft,"Sorghum-associated microbial communities from plants grown in Nebraska, USA",Arthrobacter oryzae CC26,DOE Joint Genome Institute (JGI),2918856767,Bacteria,Actinomycetota,Actinomycetia,Actinomycetales,Micrococcaceae,Arthrobacter,Unclassified,Ga0451239,10.46936/10.25585/60001066,Environmental,Terrestrial,Unclassified,Soil,Unclassified,,,,5204753,4919,100,1.61,1273071,1273189,,1001100,2711768208 +2718217659,Bacteria,Permanent Draft,Genomic insights into Acetone-Butanol-Ethanol (ABE) fermentation by sequencing solventogenic Clostridia strains,Clostridium beijerinckii DJ052,DOE Joint Genome Institute (JGI),2718217659,Bacteria,Bacillota_A,Clostridia,Clostridiales,Clostridiaceae,Clostridium,Clostridium beijerinckii,Ga0154038,10.46936/10.25585/60000855,Engineered,Bioreactor,Unclassified,Unclassified,Unclassified,,,,6046223,5513,100,0.74,1123533,1123557,,1001103,2710723543 +2857677899,Bacteria,Permanent Draft,Exploring microbial biodiversity for novel pathways involved in the catabolism of aromatic compounds derived from lignin,Paraburkholderia metrosideri M3D4D,DOE Joint Genome Institute (JGI),2857677899,Bacteria,Pseudomonadota,Gammaproteobacteria,Burkholderiales,Burkholderiaceae,Paraburkholderia,Unclassified,Ga0417146,10.46936/10.25585/60001252,Environmental,Terrestrial,Unclassified,Soil,Unclassified,USA: Yellowstone National Park,,,8499658,7800,100,0.15,1256135,1256283,,1001106,2710723909 +2524023250,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase I: the one thousand microbial genomes (KMG-I) project",Niveispirillum irakense DSM 11586,DOE Joint Genome Institute (JGI),2524023250,Bacteria,Pseudomonadota,Alphaproteobacteria,Azospirillales,Azospirillaceae,Niveispirillum,Niveispirillum irakense,Ga0002316,10.46936/10.25585/60000886,Host-associated,Plants,Unclassified,Roots,Unclassified,Diwaniyah,31.995557,44.924412,5457705,4802,100,0.35,1012005,1012004,,1001112,2710723742 +2932562620,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria sp. 001370,DOE Joint Genome Institute (JGI),2932562620,Bacteria,Actinomycetota,Actinomycetia,Mycobacteriales,Pseudonocardiaceae,Amycolatopsis,Unclassified,Ga0498483,10.46936/10.25585/60001355,,,,,,,,,10424835,10076,100,4.32,1316575,1317273,,1001115,2788500163 +2960194285,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria bacterium 21763,DOE Joint Genome Institute (JGI),2960194285,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces griseoincarnatus,Ga0532664,10.46936/10.25585/60001355,,,,,,,,,7475607,6814,100,0.04,1357174,1357739,,1001121,2710724059 +2956396850,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria bacterium 7820,DOE Joint Genome Institute (JGI),2956396850,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces mutabilis,Ga0532376,10.46936/10.25585/60001355,,,,,,,,,7704747,7000,100,0,1356598,1357451,,1001124,2710723820 +2935741537,Bacteria,Permanent Draft,Studying plant-microbial biological nitrogen fixation (BNF),Bradyrhizobium sp. LA6.7,DOE Joint Genome Institute (JGI),2935741537,Bacteria,Pseudomonadota,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Bradyrhizobium,Bradyrhizobium sp011516635,Ga0500282,10.46936/10.25585/60001350,Host-associated,Plants,Nodule,Roots,Unclassified,,,,9707219,9379,100,0.26,1325923,1326072,,1001133,2708742669 +2582580721,Bacteria,Permanent Draft,Convergent evolution of an endohyphal lifestyle and mutualism in phylogenetically diverse bacteria,Luteibacter sp. 9135,DOE Joint Genome Institute (JGI),2582580721,Bacteria,Pseudomonadota,Gammaproteobacteria,Xanthomonadales,Rhodanobacteraceae,Luteibacter,Luteibacter sp000745005,Ga0004728,10.46936/10.25585/60007569,,,,,,,,,4485268,3839,100,0.06,1022146,1022145,,1001136,2786546293 +2781126045,Bacteria,Permanent Draft,Sequencing the genomes of 1000 actinobacteria strains,Plantibacter flavus DSM 14012,DOE Joint Genome Institute (JGI),2781126045,Bacteria,Actinomycetota,Actinomycetia,Actinomycetales,Microbacteriaceae,Plantibacter,Plantibacter flavus,Ga0197524,10.46936/10.25585/60001044,Host-associated,Plants,Unclassified,Phyllosphere,Unclassified,Germany,,,4317755,4099,100,0.15,1147623,1147927,,1001139,2710723823 +2926174233,Bacteria,Permanent Draft,"Sorghum-associated microbial communities from plants grown in Nebraska, USA",Variovorax boronicumulans DS3748,DOE Joint Genome Institute (JGI),2926174233,Bacteria,Pseudomonadota,Gammaproteobacteria,Burkholderiales,Burkholderiaceae_B,Variovorax,Variovorax paradoxus_C,Ga0466870,10.46936/10.25585/60001066,Environmental,Terrestrial,Unclassified,Soil,Unclassified,,,,7392733,7057,100,1.09,1285496,1285646,,1001142,2608642207 +2946897372,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria sp. 5497,DOE Joint Genome Institute (JGI),2946897372,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces tibetensis,Ga0507396,10.46936/10.25585/60001355,,,,,,,,,9102008,8367,100,0,1339279,1340081,,1001145,2786546290 +2932555117,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria sp. 004281,DOE Joint Genome Institute (JGI),2932555117,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces fimicarius,Ga0498481,10.46936/10.25585/60001355,,,,,,,,,8408172,7502,100,0.41,1316571,1317271,,1001310,2721755244 +2953863150,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria bacterium 21457,DOE Joint Genome Institute (JGI),2953863150,Bacteria,Actinomycetota,Actinomycetia,Mycobacteriales,Pseudonocardiaceae,Amycolatopsis,Unclassified,Ga0528226,10.46936/10.25585/60001355,,,,,,,,,10407830,9779,100,4.91,1351564,1352437,,1001331,2710723958 +2853057518,Bacteria,Permanent Draft,Genomic insights into Acetone-Butanol-Ethanol (ABE) fermentation by sequencing solventogenic Clostridia strains,Clostridium saccharobutylicum DJ071 (Re-Annotation) (version 2),DOE Joint Genome Institute (JGI),2853057518,Bacteria,Bacillota_A,Clostridia,Clostridiales,Clostridiaceae,Clostridium,Clostridium saccharobutylicum,Ga0423237,10.46936/10.25585/60000855,Engineered,Bioreactor,Unclassified,Unclassified,Unclassified,,,,5110066,4650,100,0.07,1268024,1134408,,1001334,2786546684 +2695420918,Bacteria,Permanent Draft,"Sequencing of bacterial isolates from soil warming experiment in Harvard Forest, Massachusetts, USA",Burkholderia sp. GAS332,DOE Joint Genome Institute (JGI),2695420918,Bacteria,Pseudomonadota,Gammaproteobacteria,Burkholderiales,Burkholderiaceae,Paraburkholderia,Paraburkholderia sp900142905,Ga0132004,10.46936/10.25585/60000854,Environmental,Terrestrial,Forest,Soil,Unclassified,"USA: Harvard Forest, Massachusetts",42.54,-72.18,10416547,9503,100,3.18,1111425,1111474,,1001340,640963031 +2562617198,Bacteria,Permanent Draft,Sequencing the genomes of two abundant thermophiles in Great Basin hot springs: Thermocrinis jamiesonii and novel Chloroflexi Thermoflexus hugenholtzii,Thermocrinis jamiesonii GBS1,DOE Joint Genome Institute (JGI),2562617198,Bacteria,Aquificota,Aquificae,Aquificales,Aquificaceae,Thermocrinis,Thermocrinis sp000702425,Ga0003358,10.46936/10.25585/60007554,,,,,,"Great Boiling Spring (GBS), Nevada",,,1315625,1463,100,0.21,1015918,1015917,,1001343,2791354764 +2502957028,Bacteria,Finished,Sulfur reducing delta proteobacteria,Desulfatibacillum aliphaticivorans AK-01,DOE Joint Genome Institute (JGI),2502957028,Bacteria,Desulfobacterota,Desulfobacteria,Desulfobacterales,Desulfatibacillaceae,Desulfatibacillum,Desulfatibacillum aliphaticivorans,Ga0025296,,Environmental,Aquatic,Lotic,Freshwater,Unclassified,"Sediment from the Arthur Kill, NJ/NY waterway",40.437746,-74.20347,6517073,5320,100,0.78,1378675,,,1001346,2513237161 +2571042916,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase I: the one thousand microbial genomes (KMG-I) project",Pseudodesulfovibrio halophilus DSM 5663,DOE Joint Genome Institute (JGI),2571042916,Bacteria,Desulfobacterota,Desulfovibrionia,Desulfovibrionales,Desulfovibrionaceae,Pseudodesulfovibrio,Unclassified,Ga0006629,10.46936/10.25585/60000886,Environmental,Unclassified,Unclassified,Unclassified,Unclassified,"Solar Lake, Sinai",29.422946,34.829983,3376200,3195,100,0.12,1030726,1030725,,1001349,2711768267 +2963394512,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria bacterium 87899,DOE Joint Genome Institute (JGI),2963394512,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces albidoflavus,Ga0534931,10.46936/10.25585/60001355,,,,,,,,,7158399,6368,100,0.04,1359777,1360796,,1001352,2820053807 +2811994874,Bacteria,Permanent Draft,Genome sequencing of plant associated microbes to promote plant fitness in Sorghum bicolor and Oryza sativa,Nocardioides sp. SLBN-35,DOE Joint Genome Institute (JGI),2811994874,Bacteria,Actinomycetota,Actinomycetia,Propionibacteriales,Nocardioidaceae,Nocardioides,Nocardioides sp006715725,Ga0314629,10.46936/10.25585/60001083,Host-associated,Plants,Unclassified,Roots,Unclassified,USA: California,38.5376,-121.7644,5367947,5207,100,0.13,1217911,1218030,,1001358,2626542100 +2767802474,Bacteria,Permanent Draft,Genome sequencing of rice bacterial endophytes,Aeromonas veronii E2102,DOE Joint Genome Institute (JGI),2767802474,Bacteria,Pseudomonadota,Gammaproteobacteria,Enterobacterales_A,Aeromonadaceae,Aeromonas,Aeromonas veronii,Ga0222353,10.46936/10.25585/60001111,Host-associated,Plants,Unclassified,Unclassified,Unclassified,,,,4502955,4178,100,0.15,1162648,1162742,,1001361,2721755187 +2814123446,Bacteria,Permanent Draft,Sequencing the genomes of 1000 actinobacteria strains,Ornithinicoccus hortensis DSM 12335,DOE Joint Genome Institute (JGI),2814123446,Bacteria,Actinomycetota,Actinomycetia,Actinomycetales,Dermatophilaceae,Ornithinicoccus,Ornithinicoccus hortensis,Ga0197519,10.46936/10.25585/60001044,Environmental,Terrestrial,Unclassified,Soil,Unclassified,Germany,,,3935242,3706,100,0.1,1147608,1147922,,1001364,2708742872 +2600255393,Bacteria,Permanent Draft,Metagenomic and metatranscriptomic analysis of forest soil communities across North America,Janthinobacterium sp. 551a,DOE Joint Genome Institute (JGI),2600255393,Bacteria,Pseudomonadota,Gammaproteobacteria,Burkholderiales,Burkholderiaceae,Janthinobacterium,Janthinobacterium sp900112025,Ga0059271,10.46936/10.25585/60007402,Environmental,Terrestrial,Loam,Soil,Unclassified,,,,6459176,5632,100,0.22,1050304,1050303,,1001367,2893884007 +2516653084,Bacteria,Permanent Draft,Sequencing of five epiphytic Sphingomonas strains,Sphingomonas melonis C3,DOE Joint Genome Institute (JGI),2516653084,Bacteria,Pseudomonadota,Alphaproteobacteria,Sphingomonadales,Sphingomonadaceae,Sphingomonas,Sphingomonas aquatilis,Ga0026615,10.46936/10.25585/60007400,Environmental,Terrestrial,Unclassified,Soil,Unclassified,,,,3929644,3774,100,1.38,1378147,,,1001370,2518645527 +2636415977,Bacteria,Permanent Draft,"Genomic Encyclopedia of Archaeal and Bacterial Type Strains, Phase II (KMG-II): from individual species to whole genera",Paracoccus alcaliphilus DSM 8512,DOE Joint Genome Institute (JGI),2636415977,Bacteria,Pseudomonadota,Alphaproteobacteria,Rhodobacterales,Rhodobacteraceae,Paracoccus,Paracoccus alcaliphilus,Ga0074829,10.46936/10.25585/60001024,Environmental,Terrestrial,Unclassified,Soil,Unclassified,,,,4610056,4641,100,0.22,1068035,1068512,,1001373,2788500162 +2831315640,Bacteria,Permanent Draft,Sequencing the genomes of 1000 actinobacteria strains,Microbacterium invictum DSM 19600,DOE Joint Genome Institute (JGI),2831315640,Bacteria,Actinomycetota,Actinomycetia,Actinomycetales,Microbacteriaceae,Microbacterium,Microbacterium invictum,Ga0104583,10.46936/10.25585/60001044,Engineered,Solid waste,Composting,Household waste,Unclassified,Portugal,,,3580260,3359,100,0.25,1085964,1086301,,1001418,2708742948 +2934784383,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria sp. 007126,DOE Joint Genome Institute (JGI),2934784383,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces sp000772045,Ga0498707,10.46936/10.25585/60001355,,,,,,,,,8065768,7336,100,0,1317023,1317497,,1001454,2786546245 +2947233263,Bacteria,Permanent Draft,Comparative genomics of wheat-associated soil bacteria to identify genetic determinants of phenazine resistance,Pseudomonas synxantha W2I4,DOE Joint Genome Institute (JGI),2947233263,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Pseudomonadaceae,Pseudomonas_E,Pseudomonas_E synxantha,Ga0505367,10.46936/10.25585/60000525,Host-associated,Plants,Rhizosphere,Roots,Unclassified,"USA: Washington, Lind Dryland Research Station",,,6439278,6114,100,0.19,1337060,1337210,,1001457,2786546391 +2934688754,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria sp. 049893,DOE Joint Genome Institute (JGI),2934688754,Bacteria,Actinomycetota,Actinomycetia,Mycobacteriales,Micromonosporaceae,Micromonospora,Micromonospora chalcea,Ga0498693,10.46936/10.25585/60001355,,,,,,,,,6675348,6170,100,0.68,1316995,1317483,,1002192,2821312900 +2864705386,Bacteria,Permanent Draft,Exploring microbial biodiversity for novel pathways involved in the catabolism of aromatic compounds derived from lignin,Sphingobium sp. B9G5A,DOE Joint Genome Institute (JGI),2864705386,Bacteria,Pseudomonadota,Alphaproteobacteria,Sphingomonadales,Sphingomonadaceae,Sphingobium,Unclassified,Ga0417181,10.46936/10.25585/60001252,Environmental,Aquatic,River,Freshwater,Unclassified,"USA: Calhoun, Tennessee",,,5142395,5002,100,3.84,1256252,1256322,,1002195,2721755250 +2828130390,Bacteria,Permanent Draft,Studying the diversity of plant-associated saprophytic bacteria and their role in host health and plant-pathogen interactions,Xanthomonas arboricola CFBP 6826,DOE Joint Genome Institute (JGI),2828130390,Bacteria,Pseudomonadota,Gammaproteobacteria,Xanthomonadales,Xanthomonadaceae,Xanthomonas,Xanthomonas arboricola,Ga0372522,10.46936/10.25585/60001156,Host-associated,Plants,Unclassified,Unclassified,Unclassified,,,,4946363,4252,100,0.04,1235900,1236039,,1002198,2710264623 +2947020993,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase V (KMG-V): Genome sequencing to study the core and pangenomes of soil and plant-associated prokaryotes",Bradyrhizobium japonicum USDA 140,DOE Joint Genome Institute (JGI),2947020993,Bacteria,Pseudomonadota,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Bradyrhizobium,Bradyrhizobium diazoefficiens,Ga0530950,10.46936/10.25585/60001079,Host-associated,Plants,Unclassified,Unclassified,,,,,9021424,8820,100,0.24,1346674,1346916,,1002900,2711768783 +2626542068,Bacteria,Permanent Draft,"Soil microbial communities from Rifle, Colorado, USA",Parcubacteria (Wolfebacteria) bacterium GW2011_GWA2_47_9b,DOE Joint Genome Institute (JGI),2626542068,Bacteria,Patescibacteria,Paceibacteria,UBA6257,UBA9933,UBA9933,UBA9933 sp001029635,Ga0075986,,,,,,,,,,1015306,1149,100,13.59,,,,1002903,2711768521 +2824303488,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase III (KMG-III): the genomes of soil and plant-associated and newly described type strains",Pseudoduganella violacea CECT 8897,DOE Joint Genome Institute (JGI),2824303488,Bacteria,Pseudomonadota,Gammaproteobacteria,Burkholderiales,Burkholderiaceae,Pseudoduganella,Pseudoduganella violacea,Ga0372442,10.46936/10.25585/60001401,,,,,,,,,6247076,5634,100,0.49,1205787,1205866,,1002906,2710723902 +2852618963,Bacteria,Permanent Draft,Plant growth-promoting bacteria from the rhizosphere of the beachgrass Ammophila breviligulata,Herbaspirillum sp. SJZ102,DOE Joint Genome Institute (JGI),2852618963,Bacteria,Pseudomonadota,Gammaproteobacteria,Burkholderiales,Burkholderiaceae,Herbaspirillum,Herbaspirillum sp006715495,Ga0401611,10.46936/10.25585/60000946,Host-associated,Plants,Rhizosphere,Roots,Soil,,,,4577824,4196,100,0.22,1256547,1256587,,1002912,2785511325 +2756170205,Bacteria,Permanent Draft,Genomic insights into Acetone-Butanol-Ethanol (ABE) fermentation by sequencing solventogenic Clostridia strains,Clostridium beijerinckii DJ195,DOE Joint Genome Institute (JGI),2756170205,Bacteria,Bacillota_A,Clostridia,Clostridiales,Clostridiaceae,Clostridium,Clostridium beijerinckii,Ga0215616,10.46936/10.25585/60000855,Engineered,Bioreactor,Unclassified,Unclassified,Unclassified,,,,6154977,5559,100,0.21,1155446,1155681,,1002915,2786546730 +2939280713,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Streptomyces sp. 004575,DOE Joint Genome Institute (JGI),2939280713,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces vinaceus,Ga0498586,10.46936/10.25585/60001355,,,,,,,,,8527648,7717,100,1.83,1316781,1317376,,1002918,2710723721 +2828219312,Bacteria,Permanent Draft,Studying the diversity of plant-associated saprophytic bacteria and their role in host health and plant-pathogen interactions,Xanthomonas arboricola F12,DOE Joint Genome Institute (JGI),2828219312,Bacteria,Pseudomonadota,Gammaproteobacteria,Xanthomonadales,Xanthomonadaceae,Xanthomonas,Xanthomonas arboricola,Ga0372556,10.46936/10.25585/60001156,Host-associated,Plants,Unclassified,Unclassified,Unclassified,,,,4967385,4218,100,0.01,1235968,1236073,,1002921,2734482258 +2509276011,Bacteria,Permanent Draft,Genome sequencing of lignin-degrading bacterial isolates,Pseudomonas thermotolerans J53,DOE Joint Genome Institute (JGI),2509276011,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Pseudomonadaceae,Pseudomonas_E,Pseudomonas_E thermotolerans,Ga0025414,10.46936/10.25585/60007471,,,,,,,,,3753311,3586,100,0,1377979,,,1002927,2710723922 +2918220818,Bacteria,Permanent Draft,Supraglacial microbial communities from various locations in Antarctica and Canada,Psychrobacter sp. PL19,DOE Joint Genome Institute (JGI),2918220818,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Moraxellaceae,Psychrobacter,Unclassified,Ga0417220,10.46936/10.25585/60001242,Environmental,Aquatic,Ice,Freshwater,Glacier,Antarctica: Pony Lake,-77.55,166.15,3119997,2569,100,0.01,1259256,1257642,,1002930,2608642188 +2506783062,Bacteria,Finished,BESC Consolidated BioProcessing (CBP) candidates- Transfer of Baton,Thermoanaerobacterium thermosaccharolyticum M0795,DOE Joint Genome Institute (JGI),2506783062,Bacteria,Bacillota_A,Thermoanaerobacteria,Thermoanaerobacterales,Thermoanaerobacteraceae,Thermoanaerobacterium,Thermoanaerobacterium thermosaccharolyticum,Ga0024948,,,,,,,,,,2894276,2922,100,0.2,1378457,,,1002933,2710723576 +2857690720,Bacteria,Permanent Draft,Exploring microbial biodiversity for novel pathways involved in the catabolism of aromatic compounds derived from lignin,Pseudomonas sp. H1D6A,DOE Joint Genome Institute (JGI),2857690720,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Pseudomonadaceae,Pseudomonas_E,Pseudomonas_E sp003014915,Ga0417129,10.46936/10.25585/60001252,Environmental,Terrestrial,Unclassified,Soil,Unclassified,USA: Yellowstone National Park,,,6614742,6140,100,0.45,1256081,1256265,,1002936,2734482264 +2952215653,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Streptomyces sp. 48014,DOE Joint Genome Institute (JGI),2952215653,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces californicus,Ga0526860,10.46936/10.25585/60001355,,,,,,,,,8506152,7522,100,0.11,1350716,1352013,,1002939,2786546724 +2738541340,Bacteria,Permanent Draft,Sequencing the genomes of 1000 actinobacteria strains,Streptomyces sp. 1121.2,DOE Joint Genome Institute (JGI),2738541340,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces sp900129855,Ga0151191,10.46936/10.25585/60001044,,,,,,,,,10155046,9355,100,0.19,1119086,1119125,,1002942,2791354781 +2684623027,Bacteria,Permanent Draft,Genomic insights into Acetone-Butanol-Ethanol (ABE) fermentation by sequencing solventogenic Clostridia strains,Clostridium saccharoperbutylacetonicum 049,DOE Joint Genome Institute (JGI),2684623027,Bacteria,Bacillota_A,Clostridia,Clostridiales,Clostridiaceae,Clostridium,Clostridium saccharoperbutylacetonicum,Ga0123429,10.46936/10.25585/60000855,Engineered,Bioreactor,Unclassified,Unclassified,Unclassified,,,,6385397,5860,100,0.89,1107299,1107331,,1002945,2626541718 +2744055047,Bacteria,Permanent Draft,Genomic insights into Acetone-Butanol-Ethanol (ABE) fermentation by sequencing solventogenic Clostridia strains,Clostridium beijerinckii DJ226,DOE Joint Genome Institute (JGI),2744055047,Bacteria,Bacillota_A,Clostridia,Clostridiales,Clostridiaceae,Clostridium,Clostridium beijerinckii,Ga0183058,10.46936/10.25585/60000855,Engineered,Bioreactor,Unclassified,Unclassified,Unclassified,,,,6140068,5531,100,0.21,1139505,1139658,,1002948,2721755122 +2574179779,Bacteria,Permanent Draft,Revising methanotrophy: a comprehensive genomic probing of the unexpected genetic and metabolic diversity of aerobic methane consuming bacteria.,Methylomarinum vadi IT-4,DOE Joint Genome Institute (JGI),2574179779,Bacteria,Pseudomonadota,Gammaproteobacteria,Methylococcales,Methylomonadaceae,Methylomarinum,Methylomarinum vadi,Ga0003666,10.46936/10.25585/60007396,Environmental,Aquatic,Hydrothermal vents,Marine,Unclassified,"Taketomi Island, Okinawa, Japan",,,4335162,4054,100,0.81,1017514,1017513,,1002951,2596583565 +2956289201,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria bacterium 005257,DOE Joint Genome Institute (JGI),2956289201,Bacteria,Actinomycetota,Actinomycetia,Mycobacteriales,Micromonosporaceae,Micromonospora,Unclassified,Ga0528048,10.46936/10.25585/60001355,,,,,,,,,6292991,5987,100,0.07,1351204,1352257,,1002957,2513237159 +2959076205,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria bacterium 86668,DOE Joint Genome Institute (JGI),2959076205,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces flaveolus,Ga0535031,10.46936/10.25585/60001355,,,,,,,,,8278577,7755,100,0.01,1360161,1360988,,1002963,2608642186 +2929061982,Bacteria,Permanent Draft,Genomic analyses of the natural microbiome of Caenorhabditis elegans,Lactococcus lactis BIGb0220,DOE Joint Genome Institute (JGI),2929061982,Bacteria,Bacillota,Bacilli,Lactobacillales,Streptococcaceae,Lactococcus,Lactococcus lactis,Ga0494984,10.46936/10.25585/60001123,,,,,,,,,2711551,2832,100,0.35,1305524,1305545,,1002966,2708742624 +2728369736,Bacteria,Permanent Draft,"Genomic Encyclopedia of Archaeal and Bacterial Type Strains, Phase II (KMG-II): from individual species to whole genera",Salinibacterium amurskyense DSM 16400,DOE Joint Genome Institute (JGI),2728369736,Bacteria,Actinomycetota,Actinomycetia,Actinomycetales,Microbacteriaceae,Rhodoglobus,Rhodoglobus amurskyensis,Ga0181001,10.46936/10.25585/60001024,Environmental,Aquatic,Unclassified,Marine,Unclassified,,,,2776536,2682,100,0.02,1128744,1129057,,1002969,641522626 +2507262057,Bacteria,Permanent Draft,GLBRC,Enterobacteriaceae bacterium FGI 57,DOE Joint Genome Institute (JGI),2507262057,Bacteria,Pseudomonadota,Gammaproteobacteria,Enterobacterales,Enterobacteriaceae,Pseudocitrobacter,Pseudocitrobacter corydidari,Ga0001636,10.46936/10.25585/60007468,Host-associated,Arthropoda: Insects,Unclassified,Unclassified,Unclassified,"Pipeline Road, Panama",,,4762179,4548,100,0.06,1009417,1009416,,1002972,2634166238 +2852840312,Bacteria,Permanent Draft,Genomic insights into Acetone-Butanol-Ethanol (ABE) fermentation by sequencing solventogenic Clostridia strains,Clostridium beijerinckii DJ051 (Re-Annotation) (version 2),DOE Joint Genome Institute (JGI),2852840312,Bacteria,Bacillota_A,Clostridia,Clostridiales,Clostridiaceae,Clostridium,Clostridium beijerinckii,Ga0423194,10.46936/10.25585/60000855,,,,,,,,,6260681,5912,100,0.69,1267981,1105613,,1002975,2785511301 +2754412423,Bacteria,Permanent Draft,"Subsurface microbial communities from deep shales in Ohio and West Virginia, USA",Escherichia coli 334,DOE Joint Genome Institute (JGI),2754412423,Bacteria,Pseudomonadota,Gammaproteobacteria,Enterobacterales,Enterobacteriaceae,Escherichia,Escherichia coli,Ga0213565,10.46936/10.25585/60001053,,,,,,,,,4538395,4444,100,0.15,1159401,1159415,,1002978,2708742962 +2962921974,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria bacterium 5955,DOE Joint Genome Institute (JGI),2962921974,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Unclassified,Ga0532564,10.46936/10.25585/60001355,,,,,,,,,7924310,6783,100,0.25,1356974,1357639,,1002981,2711768330 +641228480,Bacteria,Finished,WFO-LANL Proposal,Brucella canis ATCC 23365,DOE Joint Genome Institute (JGI),641228480,Bacteria,Pseudomonadota,Alphaproteobacteria,Rhizobiales,Rhizobiaceae,Brucella,Brucella melitensis,Ga0027886,,Host-associated,Mammals,Unclassified,Unclassified,Unclassified,,,,3312769,3408,100,0.06,1378596,,,1002984,2515154113 +2690315628,Bacteria,Permanent Draft,"Populus root and rhizosphere microbial communities from Tennessee, USA",Rhizobium sp. PDC82,DOE Joint Genome Institute (JGI),2690315628,Bacteria,Pseudomonadota,Alphaproteobacteria,Rhizobiales,Rhizobiaceae,Agrobacterium,Agrobacterium fabrum,Ga0115500,10.46936/10.25585/60000910,,,,,,,,,5582960,5320,100,0.15,1094049,1094158,,1002987,2626541795 +2928706899,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase V (KMG-V): Genome sequencing to study the core and pangenomes of soil and plant-associated prokaryotes",Bradyrhizobium elkanii USDA 101,DOE Joint Genome Institute (JGI),2928706899,Bacteria,Pseudomonadota,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Bradyrhizobium,Bradyrhizobium elkanii,Ga0485075,10.46936/10.25585/60001079,Host-associated,Plants,Unclassified,Unclassified,Unclassified,,,,9857245,9745,100,5.56,1300875,1300985,,1002990,643692017 +2811994881,Bacteria,Permanent Draft,Genome sequencing of plant associated microbes to promote plant fitness in Sorghum bicolor and Oryza sativa,Pseudomonas sp. SLBN-26,DOE Joint Genome Institute (JGI),2811994881,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Pseudomonadaceae,Pseudomonas_F,Pseudomonas_F otitidis,Ga0314627,10.46936/10.25585/60001083,Host-associated,Plants,Unclassified,Roots,Unclassified,USA: California,38.5376,-121.7644,6298475,5851,100,0.02,1217905,1218028,,1002993,2713897363 +2767802481,Bacteria,Permanent Draft,Genome sequencing of rice bacterial endophytes,Herbaspirillum seropedicae AG215,DOE Joint Genome Institute (JGI),2767802481,Bacteria,Pseudomonadota,Gammaproteobacteria,Burkholderiales,Burkholderiaceae,Herbaspirillum,Herbaspirillum huttiense,Ga0222375,10.46936/10.25585/60001111,Host-associated,Plants,Unclassified,Unclassified,Unclassified,,,,5783094,5204,100,0.25,1162692,1162764,,1002996,2791354758 +2757320379,Bacteria,Permanent Draft,Genomic insights into Acetone-Butanol-Ethanol (ABE) fermentation by sequencing solventogenic Clostridia strains,Clostridium beijerinckii DJ331,DOE Joint Genome Institute (JGI),2757320379,Bacteria,Bacillota_A,Clostridia,Clostridiales,Clostridiaceae,Clostridium,Clostridium beijerinckii,Ga0206024,10.46936/10.25585/60000855,Engineered,Bioreactor,Unclassified,Unclassified,Unclassified,,,,6027870,5480,100,0.48,1155602,1155729,,1002999,2724678916 +2853467124,Bacteria,Permanent Draft,Genomic insights into Acetone-Butanol-Ethanol (ABE) fermentation by sequencing solventogenic Clostridia strains,Clostridium beijerinckii DJ273 (Re-Annotation) (version 2),DOE Joint Genome Institute (JGI),2853467124,Bacteria,Bacillota_A,Clostridia,Clostridiales,Clostridiaceae,Clostridium,Clostridium beijerinckii,Ga0423318,10.46936/10.25585/60000855,Engineered,Bioreactor,Unclassified,Unclassified,Unclassified,,,,6140016,5631,100,0.2,1268105,1139692,,1003002,2513237165 +2838054893,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase V (KMG-V): Genome sequencing to study the core and pangenomes of soil and plant-associated prokaryotes",Variovorax guangxiensis 34/80,DOE Joint Genome Institute (JGI),2838054893,Bacteria,Pseudomonadota,Gammaproteobacteria,Burkholderiales,Burkholderiaceae_B,Variovorax,Variovorax guangxiensis,Ga0394393,10.46936/10.25585/60001079,Host-associated,Plants,Nodule,Roots,Unclassified,,,,7451788,7016,100,0.53,1244297,1244407,,1003005,2716884129 +2528768092,Bacteria,Permanent Draft,Rhizosphere Grand Challenge Single Cell Sequencing,Ralstonia sp. JGI 0001001-B07 (unscreened),DOE Joint Genome Institute (JGI),2528768092,Bacteria,Pseudomonadota,Gammaproteobacteria,Burkholderiales,Burkholderiaceae,Ralstonia,Ralstonia pickettii_B,Ga0002247,10.46936/10.25585/60001412,,,,,,,,,5444360,5256,100,0.75,1011523,1011522,,1003008,2626541789 +2602042104,Bacteria,Permanent Draft,Comparative genomics of bacterial root endophytes of switchgrass collected from native prairies over two seasons,Klebsiella sp. NFIX26,DOE Joint Genome Institute (JGI),2602042104,Bacteria,Pseudomonadota,Gammaproteobacteria,Enterobacterales,Enterobacteriaceae,Klebsiella,Klebsiella quasipneumoniae,Ga0059050,10.46936/10.25585/60000672,Host-associated,Plants,Rhizoplane,Roots,Endophytes,,36.6844,-98.2026,5281639,5071,100,0.05,1049812,1049811,,1003011,2820065746 +2875892803,Bacteria,Permanent Draft,Genomic insights into Acetone-Butanol-Ethanol (ABE) fermentation by sequencing solventogenic Clostridia strains,Clostridium beijerinckii DJ321 (Re-Annotation) (version 2),DOE Joint Genome Institute (JGI),2875892803,Bacteria,Bacillota_A,Clostridia,Clostridiales,Clostridiaceae,Clostridium,Clostridium beijerinckii,Ga0423366,10.46936/10.25585/60000855,Engineered,Bioreactor,Unclassified,Unclassified,Unclassified,,,,6018262,5748,100,0.46,1268153,1155726,,1003014,2710264522 +2667527447,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase III (KMG-III): the genomes of soil and plant-associated and newly described type strains",Aequorivita viscosa CGMCC 1.11023,DOE Joint Genome Institute (JGI),2667527447,Bacteria,Bacteroidota,Bacteroidia,Flavobacteriales,Flavobacteriaceae,Aequorivita,Aequorivita viscosa,Ga0079842,10.46936/10.25585/60001401,,,,,,,,,3535532,3168,100,0.05,1076167,1076334,,1003017,2716884154 +2963474935,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria bacterium 87898,DOE Joint Genome Institute (JGI),2963474935,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces albidoflavus,Ga0534946,10.46936/10.25585/60001355,,,,,,,,,7226925,6321,100,0,1359807,1360811,,1003020,2626541842 +2984407380,Bacteria,Permanent Draft,"Genomic Encyclopedia of Type Strains, Phase V (KMG-V): Genome sequencing to study the core and pangenomes of soil and plant-associated prokaryotes",Paraburkholderia youngii JPY403,DOE Joint Genome Institute (JGI),2984407380,Bacteria,Pseudomonadota,Gammaproteobacteria,Burkholderiales,Burkholderiaceae,Paraburkholderia,Paraburkholderia youngii,Ga0537080,10.46936/10.25585/60001079,,,,,,,,,9259621,8664,100,1.76,1376113,1250738,,1003023,2710264700 +2963369651,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria bacterium 87863,DOE Joint Genome Institute (JGI),2963369651,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Streptomyces anulatus,Ga0534923,10.46936/10.25585/60001355,,,,,,,,,8635599,7614,100,0.27,1359761,1360788,,1003026,2785511009 +2926315868,Bacteria,Permanent Draft,"Sorghum-associated microbial communities from plants grown in Nebraska, USA",Paenibacillus mucilaginosus CC578,DOE Joint Genome Institute (JGI),2926315868,Bacteria,Bacillota,Bacilli,Paenibacillales,NBRC-103111,Paenibacillus_G,Paenibacillus_G mucilaginosus,Ga0466906,10.46936/10.25585/60001066,Environmental,Terrestrial,Unclassified,Soil,Unclassified,,,,8985665,7938,100,1.02,1285568,1285682,,1003029,2582580679 +648028060,Bacteria,Finished,DOEM_782007,Thermoanaerobacterium thermosaccharolyticum DSM 571,DOE Joint Genome Institute (JGI),648028060,Bacteria,Bacillota_A,Thermoanaerobacteria,Thermoanaerobacterales,Thermoanaerobacteraceae,Thermoanaerobacterium,Thermoanaerobacterium thermosaccharolyticum,Ga0031175,,Environmental,Aquatic,Groundwater,Freshwater,Unclassified,,,,2785752,2830,100,0.33,1378396,,,1003032,2711768233 +2663762770,Bacteria,Permanent Draft,Genome sequencing of a diverse group of Pseudomonas species,Pseudomonas panipatensis LMG 24738,DOE Joint Genome Institute (JGI),2663762770,Bacteria,Pseudomonadota,Gammaproteobacteria,Pseudomonadales,Pseudomonadaceae,Pseudomonas,Pseudomonas panipatensis,Ga0104674,10.46936/10.25585/60000812,,,,,,,,,5775530,5395,100,0.05,1090640,1090727,,1003035,2626541966 +2951629530,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria bacterium 020597,DOE Joint Genome Institute (JGI),2951629530,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Unclassified,Ga0526784,10.46936/10.25585/60001355,,,,,,,,,9010950,8112,100,0.24,1350564,1351937,,1003038,2516493006 +2690315635,Bacteria,Permanent Draft,"Populus root and rhizosphere microbial communities from Tennessee, USA",Variovorax sp. OK212,DOE Joint Genome Institute (JGI),2690315635,Bacteria,Pseudomonadota,Gammaproteobacteria,Burkholderiales,Burkholderiaceae_B,Variovorax,Variovorax sp900112425,Ga0115516,10.46936/10.25585/60000910,,,,,,,,,8802773,8100,100,2.83,1094081,1094174,,1003041,2711768792 +2675903042,Bacteria,Permanent Draft,"Active sludge and wastewater microbial communities from Klosterneuburg, Austria",Nitrosomonas sp. 33 Nm33,DOE Joint Genome Institute (JGI),2675903042,Bacteria,Pseudomonadota,Gammaproteobacteria,Burkholderiales,Nitrosomonadaceae,Nitrosomonas,Nitrosomonas sp900107265,Ga0111709,10.46936/10.25585/60000917,,,,,,"Hamburg, Germany",53.5586,9.8611,3233933,3144,100,0.12,1098046,1098111,,1003044,2721755246 +2939453919,Bacteria,Permanent Draft,Large-scale genome sequencing for establishing a Natural Products Genomics Resource Center (NPGRC),Actinobacteria sp. 046797,DOE Joint Genome Institute (JGI),2939453919,Bacteria,Actinomycetota,Actinomycetia,Streptomycetales,Streptomycetaceae,Streptomyces,Unclassified,Ga0498559,10.46936/10.25585/60001355,,,,,,,,,7633325,6862,100,0,1316727,1317349,,1003047,2626542067 +2757320445,Bacteria,Permanent Draft,"Populus root and rhizosphere microbial communities from Tennessee, USA",Acidovorax sp. OV235,DOE Joint Genome Institute (JGI),2757320445,Bacteria,Pseudomonadota,Gammaproteobacteria,Burkholderiales,Burkholderiaceae_B,Acidovorax,Unclassified,Ga0215733,10.46936/10.25585/60000910,Host-associated,Plants,Unclassified,Unclassified,Unclassified,,,,6984668,6528,100,0.64,1163445,1163628,,1003050,2791354782 +2788499842,Bacteria,Permanent Draft,Genomic analyses of the natural microbiome of Caenorhabditis elegans,Raoultella sp. BIGb0149,DOE Joint Genome Institute (JGI),2788499842,Bacteria,Pseudomonadota,Gammaproteobacteria,Enterobacterales,Enterobacteriaceae,Klebsiella,Klebsiella terrigena,Ga0304798,10.46936/10.25585/60001123,Host-associated,Invertebrates,Unclassified,Nematoda,Unclassified,,,,5622012,5396,100,0.25,1188903,1189001,,1003053,2721755239 diff --git a/docs/examples/filter_on_taxon_oid.py b/docs/examples/filter_on_taxon_oid.py new file mode 100755 index 0000000..b8edbdd --- /dev/null +++ b/docs/examples/filter_on_taxon_oid.py @@ -0,0 +1,23 @@ +# This script searches the JGI Data Portal for files related to a given +# IMG taxon OID, printing out any related metadata. + +import dts +import os + +# connect to the DTS in NERSC Spin +token = os.getenv('DTS_KBASE_DEV_TOKEN') +dts_client = dts.Client(api_key = token, + server = "https://lb-dts.staging.kbase.us") + +# add some JDP-specific search options +jdp_params = { + 'f': 'img_taxon_oid', # filter on taxon OID + 'extra': 'project_id', # add project_id to returned metadata +} + +# do the search +results = dts_client.search(database = 'jdp', + query = '2708742931', + specific = jdp_params) +print(results) + diff --git a/docs/examples/match_files_to_ids_in_spreadsheet.py b/docs/examples/match_files_to_ids_in_spreadsheet.py new file mode 100755 index 0000000..ce8627e --- /dev/null +++ b/docs/examples/match_files_to_ids_in_spreadsheet.py @@ -0,0 +1,131 @@ +# This file extracts ITS AP IDs from a spreadsheet, finds files with those IDs, +# and writes out another spreadsheet with specific file information. + +import csv +import dts +import os + +def extract_columns(csv_file: str, column_names: list = []): + with open(csv_file, 'r') as csvfile: + reader = csv.reader(csvfile) + indices = [-1 for _ in column_names] + columns = [[] for _ in column_names] + for row in reader: + for i in range(len(column_names)): + if indices[i] == -1: # header row + indices[i] = 0 + while row[indices[i]] != column_names[i]: + indices[i] += 1 + else: + columns[i].append(row[indices[i]]) + return columns + +# filter out unwanted files +def filter_results(resources: list): + return [r for r in resources + if (os.path.basename(r.path).startswith('Ga') and + (r.path.endswith('_proteins.faa') and + not (r.path.endswith('genemark_proteins.faa') or + r.path.endswith('prodigal_proteins.faa')))) or + r.path.endswith('.assembled.faa') or + (os.path.basename(r.path).startswith('Ga') and + (r.path.endswith('_cds.gff') or r.path.endswith('_contigs.fna'))) or + r.path.endswith('.assembled.fna') or r.path.endswith('.assembled.gff')] + +# returns true if the given list of files consists of +# * unpaired *contig.fna files and/or +# * *_cds.gff files paired with their *contig.fna counterparts +def files_are_paired(files: list): + contigs, cdses = set(), set() + for file in files: + if file.startswith('Ga') and file.endswith('_contigs.fna'): + contigs.add(file.replace('_contigs.fna', '')) + elif file.endswith('.assembled.fna'): + contigs.add(file.replace('.assembled.fna', '')) + elif file.startswith('Ga') and file.endswith('_cds.gff'): + cdses.add(file.replace('_cds.gff', '')) + elif file.endswith('.assembled.gff'): + cdses.add(file.replace('.assembled.gff', '')) + for cds in cdses: + if not cds in contigs: + return False + return True + +def map_ids(dts_client: dts.Client, csv_file: str): + # fetch ITS AP/SP IDs + (its_ap_ids, taxon_oids) = extract_columns(csv_file = csv_file, + column_names = ['ITS AP ID - 90 Complete', 'taxon_oid']) + query = '' + for its_id in its_ap_ids: + if len(query) == 0: + query += its_id + elif len(query) + 1 + len(its_id) <= 1000: + query += f' | {its_id}' + + # assemble some JDP-specific search parameters: + jdp_params = { + 'f': 'project_id', # search field likely containing IMG AP/SP ID + 'extra': 'project_id', # include this field in file metadata + } + + # send the queries along and gather file info + mapping = {} + results = dts_client.search(database = 'jdp', + query = query, + specific = jdp_params) + results = filter_results(results) + for i, result in enumerate(results): + md = result.to_dict() + dts_id = md['id'] # DTS-sensible ID + if md['extra']['project_id'].startswith('IMG_AP-'): + ap_id = int(md['extra']['project_id'].replace('IMG_AP-', '')) # IMG AP ID + file = md['path'] + if ap_id not in mapping: + mapping[ap_id] = { + 'taxon_oid': taxon_oids[i], + 'dts_ids': [dts_id], + 'files': [file] + } + else: + mapping[ap_id]['dts_ids'].append(dts_id) + mapping[ap_id]['files'].append(file) + print('.', end='', flush=True) + print('.') + + for ap_id in mapping.keys(): + if not files_are_paired(mapping[ap_id]['files']): + del mapping[ap_id] + return mapping + +def write_spreadsheet(csv_file: str, mapping: dict): + with open(csv_file, 'w', newline='') as csvfile: + field_names = ['IMG AP ID', 'IMG taxon OID', 'DTS ID', 'file'] + writer = csv.DictWriter(csvfile, fieldnames=field_names) + writer.writeheader() + for ap_id, md in mapping.items(): + files = md['files'] + if files_are_paired(files): + taxon_oid = md['taxon_oid'] + dts_ids = md['dts_ids'] + for i, file in enumerate(files): + writer.writerow({ + 'IMG AP ID': ap_id, + 'IMG taxon OID': taxon_oid, + 'DTS ID': dts_ids[i], + 'file': file, + }) + +def main(): + # connect to the DTS + token = os.getenv('DTS_KBASE_DEV_TOKEN') + dts_client = dts.Client(api_key = token, + server = "https://lb-dts.staging.kbase.us") + + print("Mapping IDs from IMG.csv...") + mapping = map_ids(dts_client, 'IMG.csv') + + print("Writing files_with_ids.csv...") + write_spreadsheet('files_with_ids.csv', mapping) + +if __name__ == '__main__': + main()