diff --git a/q2_viromics/citations.bib b/q2_viromics/citations.bib index 00ee080..be5c46f 100644 --- a/q2_viromics/citations.bib +++ b/q2_viromics/citations.bib @@ -16,12 +16,3 @@ @article{CheckV year={2021}, publisher={Nature Publishing Group US New York} } - -@article{geNomad, - title={Identification of mobile genetic elements with geNomad}, - author={Camargo, Antonio Pedro and Roux, Simon and Schulz, Frederik and Babinski, Michal and Xu, Yan and Hu, Bin and Chain, Patrick SG and Nayfach, Stephen and Kyrpides, Nikos C}, - journal={Nature Biotechnology}, - pages={1--10}, - year={2023}, - publisher={Nature Publishing Group US New York} -} diff --git a/q2_viromics/genomad_analysis.py b/q2_viromics/genomad_analysis.py deleted file mode 100644 index 0adc015..0000000 --- a/q2_viromics/genomad_analysis.py +++ /dev/null @@ -1,132 +0,0 @@ -# ---------------------------------------------------------------------------- -# Copyright (c) 2024, Bokulich Lab. -# -# Distributed under the terms of the Modified BSD License. -# -# The full license is in the file LICENSE, distributed with this software. -# ---------------------------------------------------------------------------- -import os -import shutil -import subprocess -import tempfile - -from q2_types.per_sample_sequences import ContigSequencesDirFmt - -from q2_viromics._utils import run_command -from q2_viromics.types._format import GenomadDBDirFmt, ViromicsMetadataDirFmt - - -# Run the command for geNomad analysis -def genomad_end_to_end( - tmp, - sequences, - database, - num_threads, - splits, - min_score, - min_number_genes, - conservative_taxonomy, -): - internal_db_name = os.path.join(database.path, os.listdir(database.path)[0]) - - cmd = [ - "genomad", - "end-to-end", - "--cleanup", - "--splits", - str(splits), - "--min-score", - str(min_score), - "--min-number-genes", - str(min_number_genes), - "--threads", - str(num_threads), - "--disable-nn-classification", - ] - - if conservative_taxonomy: - cmd.append("--conservative-taxonomy") - - cmd.extend( - [ - str(sequences), - str(tmp), - str(internal_db_name), - ] - ) - - try: - run_command(cmd) - except subprocess.CalledProcessError as e: - raise Exception( - "An error was encountered while running genomad end-to-end, " - f"(return code {e.returncode}), please inspect " - "stdout and stderr to learn more." - ) - - -def genomad_analysis( - sequences: ContigSequencesDirFmt, - database: GenomadDBDirFmt, - num_threads: int = 1, - splits: int = 8, - min_score: float = 0.7, - min_number_genes: int = 1, - conservative_taxonomy: bool = False, -) -> ( - ContigSequencesDirFmt, - ContigSequencesDirFmt, - ContigSequencesDirFmt, - ViromicsMetadataDirFmt, -): - - viruses = ContigSequencesDirFmt() - proviruses = ContigSequencesDirFmt() - plasmids = ContigSequencesDirFmt() - virus_summary = ViromicsMetadataDirFmt() - - for id, contigs_fp in sequences.sample_dict().items(): - with tempfile.TemporaryDirectory() as tmp: - # Execute the "genomad end_to_end" command - genomad_end_to_end( - tmp, - contigs_fp, - database, - num_threads, - splits, - min_score, - min_number_genes, - conservative_taxonomy, - ) - # Define the filenames and destination paths in a list of tuples - files_and_destinations = [ - ( - f"{id}_contigs_summary/{id}_contigs_virus.fna", - f"{viruses}/{id}_contigs.fa", - ), - ( - f"{id}_contigs_find_proviruses/{id}_contigs_provirus.fna", - f"{proviruses}/{id}_contigs.fa", - ), - ( - f"{id}_contigs_summary/{id}_contigs_plasmid.fna", - f"{plasmids}/{id}_contigs.fa", - ), - ( - f"{id}_contigs_summary/{id}_contigs_virus_summary.tsv", - f"{virus_summary}/{id}_virus_summary.tsv", - ), - ] - - # Ensure the destination directories exist and move files - for filename, dst in files_and_destinations: - src = os.path.join(tmp, filename) - os.makedirs(os.path.dirname(dst), exist_ok=True) - shutil.move(src, dst) - - return ( - viruses, - proviruses, - plasmids, - virus_summary, - ) diff --git a/q2_viromics/genomad_fetch_db.py b/q2_viromics/genomad_fetch_db.py deleted file mode 100644 index c6942ff..0000000 --- a/q2_viromics/genomad_fetch_db.py +++ /dev/null @@ -1,46 +0,0 @@ -# ---------------------------------------------------------------------------- -# Copyright (c) 2024, Bokulich Lab. -# -# Distributed under the terms of the Modified BSD License. -# -# The full license is in the file LICENSE, distributed with this software. -# ---------------------------------------------------------------------------- -import os -import shutil -import subprocess - -from q2_viromics._utils import run_command -from q2_viromics.types._format import GenomadDBDirFmt - - -# Create the command to fetch the geNomad database -def genomad_download_database(database): - cmd = [ - "genomad", - "download-database", - str(database), - ] - - try: - # Execute the command to fetch a geNomad database - run_command(cmd) - except subprocess.CalledProcessError as e: - raise Exception( - "An error was encountered while running geNomad download-database, " - f"(return code {e.returncode}), please inspect " - "stdout and stderr to learn more." - ) - - -# Fetch the geNomad database -def genomad_fetch_db() -> GenomadDBDirFmt: - # Initialize a directory format object to store a geNomad database - database = GenomadDBDirFmt() - - # Construct the command to fetch the geNomad database - genomad_download_database(database) - - if os.path.exists(os.path.join(str(database), "genomad_db/.ipynb_checkpoints/")): - shutil.rmtree(os.path.join(str(database), "genomad_db/.ipynb_checkpoints/")) - - return database diff --git a/q2_viromics/plugin_setup.py b/q2_viromics/plugin_setup.py index 0dc7628..1ac6c70 100644 --- a/q2_viromics/plugin_setup.py +++ b/q2_viromics/plugin_setup.py @@ -15,14 +15,11 @@ from q2_viromics.checkv_analysis import checkv_analysis from q2_viromics.checkv_fetch_db import checkv_fetch_db -from q2_viromics.genomad_analysis import genomad_analysis -from q2_viromics.genomad_fetch_db import genomad_fetch_db from q2_viromics.types._format import ( CheckVDBDirFmt, - GenomadDBDirFmt, ViromicsMetadataDirFmt, ) -from q2_viromics.types._type import CheckVDB, GenomadDB, ViromicsMetadata +from q2_viromics.types._type import CheckVDB, ViromicsMetadata citations = Citations.load("citations.bib", package="q2_viromics") @@ -40,11 +37,10 @@ plugin.register_formats( CheckVDBDirFmt, - GenomadDBDirFmt, ViromicsMetadataDirFmt, ) -plugin.register_semantic_types(CheckVDB, GenomadDB, ViromicsMetadata) +plugin.register_semantic_types(CheckVDB, ViromicsMetadata) plugin.register_artifact_class( CheckVDB, @@ -52,33 +48,11 @@ description=("CheckV database."), ) -plugin.register_artifact_class( - GenomadDB, - directory_format=GenomadDBDirFmt, - description=("Genomad database."), -) - plugin.register_semantic_type_to_format( SampleData[ViromicsMetadata], directory_format=ViromicsMetadataDirFmt, ) -plugin.methods.register_function( - function=genomad_fetch_db, - inputs={}, - parameters={}, - outputs=[("database", GenomadDB)], - parameter_descriptions={}, - output_descriptions={"database": "geNomad database."}, - name="Fetch geNomad database", - description=( - "Fetch the geNomad database that contains the profiles of the markers " - "that are used to classify sequences, their taxonomic information and " - "their functional annotation." - ), - citations=[citations["geNomad"]], -) - plugin.methods.register_function( function=checkv_fetch_db, inputs={}, @@ -131,52 +105,4 @@ citations=[citations["CheckV"]], ) -plugin.methods.register_function( - function=genomad_analysis, - inputs={ - "sequences": SampleData[Contigs], - "database": GenomadDB, - }, - parameters={ - "num_threads": Int % Range(1, None), - "splits": Int % Range(0, None), - "min_score": Float % Range(0, 1), - "min_number_genes": Int % Range(0, None), - "conservative_taxonomy": Bool, - }, - input_descriptions={ - "sequences": "Input sequences.", - "database": "GeNomad database.", - }, - parameter_descriptions={ - "num_threads": "Number of threads to use for prodigal-gv and DIAMOND.", - "splits": "Split the data for the MMseqs2 search. Higher values will " - "reduce memory usage, but will make the search slower. If the " - "MMseqs2 search is failing, try to increase the number of splits.", - "min_score": "Minimum score to flag a sequence as virus or plasmid.", - "min_number_genes": "The minimum number of genes a sequence must encode to " - "be considered for classification as a plasmid or virus.", - "conservative_taxonomy": "Make the virus taxonomic assignment process more " - "conservative. This might reduce the amount of " - "genomes assigned to the family level, but will " - "decrease the rate of family misassignment.", - }, - outputs=[ - ("viruses", SampleData[Contigs]), - ("proviruses", SampleData[Contigs]), - ("plasmid", SampleData[Contigs]), - ("virus_summary", SampleData[ViromicsMetadata]), - ], - output_descriptions={ - "viruses": "Viral sequences.", - "proviruses": "Proviral sequences.", - "plasmid": "Plasmid sequences.", - "virus_summary": "Virus classification summary.", - }, - name="Identify and classify viral genomes", - description="Perform comprehensive viral genome analysis to identify and " - "classify viral, proviral, and plasmid sequences.", - citations=[citations["geNomad"]], -) - importlib.import_module("q2_viromics.types._transformer") diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db deleted file mode 100644 index 966bb3e..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db +++ /dev/null @@ -1,21 +0,0 @@ -???T?????????? - ???????????? -T?????????????? -? - ?? -T??????????????????TD?????????????? -????????????????? - T????????????? - ???TD -??????????? ?? - ????TD -? ???? - ????????????TD - ?????????????? - ???TD - ????????????? ????TD -?????????????????T??????? ??????????T????????????????? -T?????????????????? -??????? T??????? -7????????????????? ??T?????????????? - 7?????????????????7? diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db.dbtype b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db.dbtype deleted file mode 100644 index 8113585..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db.dbtype +++ /dev/null @@ -1,5 +0,0 @@ -???T?????????? - ???????????? -T?????????????? -? - ?? diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db.index b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db.index deleted file mode 100644 index 266cb07..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db.index +++ /dev/null @@ -1,2 +0,0 @@ -0 229170299 2951 -1 68962232 2601 diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db.lookup b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db.lookup deleted file mode 100644 index cc3633e..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db.lookup +++ /dev/null @@ -1,2 +0,0 @@ -0 GENOMAD.070201.VV 0 -1 GENOMAD.179093.PC 0 diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db.source b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db.source deleted file mode 100644 index aebeeac..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db.source +++ /dev/null @@ -1 +0,0 @@ -0 MARKER_MSAs.tar.gz diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db_h b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db_h deleted file mode 100644 index 8113585..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db_h +++ /dev/null @@ -1,5 +0,0 @@ -???T?????????? - ???????????? -T?????????????? -? - ?? diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db_h.dbtype b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db_h.dbtype deleted file mode 100644 index 8113585..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db_h.dbtype +++ /dev/null @@ -1,5 +0,0 @@ -???T?????????? - ???????????? -T?????????????? -? - ?? diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db_h.index b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db_h.index deleted file mode 100644 index 30391f0..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db_h.index +++ /dev/null @@ -1,2 +0,0 @@ -0 712006 19 -1 213883 19 diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db_mapping b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db_mapping deleted file mode 100644 index d2e0936..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db_mapping +++ /dev/null @@ -1,2 +0,0 @@ -0 2561 -3 2561 diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db_taxonomy b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db_taxonomy deleted file mode 100644 index 8113585..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_db_taxonomy +++ /dev/null @@ -1,5 +0,0 @@ -???T?????????? - ???????????? -T?????????????? -? - ?? diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_integrase_db b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_integrase_db deleted file mode 100644 index 966bb3e..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_integrase_db +++ /dev/null @@ -1,21 +0,0 @@ -???T?????????? - ???????????? -T?????????????? -? - ?? -T??????????????????TD?????????????? -????????????????? - T????????????? - ???TD -??????????? ?? - ????TD -? ???? - ????????????TD - ?????????????? - ???TD - ????????????? ????TD -?????????????????T??????? ??????????T????????????????? -T?????????????????? -??????? T??????? -7????????????????? ??T?????????????? - 7?????????????????7? diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_integrase_db.dbtype b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_integrase_db.dbtype deleted file mode 100644 index 966bb3e..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_integrase_db.dbtype +++ /dev/null @@ -1,21 +0,0 @@ -???T?????????? - ???????????? -T?????????????? -? - ?? -T??????????????????TD?????????????? -????????????????? - T????????????? - ???TD -??????????? ?? - ????TD -? ???? - ????????????TD - ?????????????? - ???TD - ????????????? ????TD -?????????????????T??????? ??????????T????????????????? -T?????????????????? -??????? T??????? -7????????????????? ??T?????????????? - 7?????????????????7? diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_integrase_db.index b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_integrase_db.index deleted file mode 100644 index dbcfdff..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_integrase_db.index +++ /dev/null @@ -1,3 +0,0 @@ -0 95910 8676 -1 127290 10326 -2 diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_integrase_db.lookup b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_integrase_db.lookup deleted file mode 100644 index 80ac0d7..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_integrase_db.lookup +++ /dev/null @@ -1,2 +0,0 @@ -0 PHA02601 0 -1 PRK09692 0 diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_integrase_db.source b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_integrase_db.source deleted file mode 100644 index 9e8b92a..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_integrase_db.source +++ /dev/null @@ -1 +0,0 @@ -0 INTEGRASE_MSAs.tar.gz diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_integrase_db_h b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_integrase_db_h deleted file mode 100644 index 966bb3e..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_integrase_db_h +++ /dev/null @@ -1,21 +0,0 @@ -???T?????????? - ???????????? -T?????????????? -? - ?? -T??????????????????TD?????????????? -????????????????? - T????????????? - ???TD -??????????? ?? - ????TD -? ???? - ????????????TD - ?????????????? - ???TD - ????????????? ????TD -?????????????????T??????? ??????????T????????????????? -T?????????????????? -??????? T??????? -7????????????????? ??T?????????????? - 7?????????????????7? diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_integrase_db_h.dbtype b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_integrase_db_h.dbtype deleted file mode 100644 index 966bb3e..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_integrase_db_h.dbtype +++ /dev/null @@ -1,21 +0,0 @@ -???T?????????? - ???????????? -T?????????????? -? - ?? -T??????????????????TD?????????????? -????????????????? - T????????????? - ???TD -??????????? ?? - ????TD -? ???? - ????????????TD - ?????????????? - ???TD - ????????????? ????TD -?????????????????T??????? ??????????T????????????????? -T?????????????????? -??????? T??????? -7????????????????? ??T?????????????? - 7?????????????????7? diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_integrase_db_h.index b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_integrase_db_h.index deleted file mode 100644 index e8b77ae..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_integrase_db_h.index +++ /dev/null @@ -1,2 +0,0 @@ -0 113 10 -1 174 10 diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_marker_metadata.tsv b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_marker_metadata.tsv deleted file mode 100644 index 3b9bcd1..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_marker_metadata.tsv +++ /dev/null @@ -1,2 +0,0 @@ -MARKER MINIMAL_SET SPECIFICITY_CLASS PIELOU_SPECIFICITY SPM_C SPM_P SPM_V GV_MARKER ABUNDANCE_C ABUNDANCE_P ABUNDANCE_V ABUNDANCE_SCALED_C ABUNDANCE_SCALED_P ABUNDANCE_SCALED_V ABUNDANCE_PERCENTILE_C ABUNDANCE_PERCENTILE_P ABUNDANCE_PERCENTILE_V TAXONOMY USCG PLASMID_HALLMARK VIRUS_HALLMARK ANNOTATION_CONJSCAN ANNOTATION_AMR ANNOTATION_ACCESSIONS ANNOTATION_DESCRIPTION SOURCE -GENOMAD.000001.CC 1 CC 1.0000 1.0000 0.0000 0.0000 0 118966.4495 0.0000 0.0000 702.7256 0.0000 0.0000 0.7923 0.0000 0.0000 NA 1398618at2 0 0 NA NA PF00347;K02933;TIGR03654;COG0097 ribosomal protein L6, bacterial type De novo protein clustering diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db deleted file mode 100644 index 966bb3e..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db +++ /dev/null @@ -1,21 +0,0 @@ -???T?????????? - ???????????? -T?????????????? -? - ?? -T??????????????????TD?????????????? -????????????????? - T????????????? - ???TD -??????????? ?? - ????TD -? ???? - ????????????TD - ?????????????? - ???TD - ????????????? ????TD -?????????????????T??????? ??????????T????????????????? -T?????????????????? -??????? T??????? -7????????????????? ??T?????????????? - 7?????????????????7? diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db.dbtype b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db.dbtype deleted file mode 100644 index 966bb3e..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db.dbtype +++ /dev/null @@ -1,21 +0,0 @@ -???T?????????? - ???????????? -T?????????????? -? - ?? -T??????????????????TD?????????????? -????????????????? - T????????????? - ???TD -??????????? ?? - ????TD -? ???? - ????????????TD - ?????????????? - ???TD - ????????????? ????TD -?????????????????T??????? ??????????T????????????????? -T?????????????????? -??????? T??????? -7????????????????? ??T?????????????? - 7?????????????????7? diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db.index b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db.index deleted file mode 100644 index 8c2616b..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db.index +++ /dev/null @@ -1,2 +0,0 @@ -1 68962232 2601 -15 229177376 7326 diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db.lookup b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db.lookup deleted file mode 100644 index cc3633e..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db.lookup +++ /dev/null @@ -1,2 +0,0 @@ -0 GENOMAD.070201.VV 0 -1 GENOMAD.179093.PC 0 diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db.source b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db.source deleted file mode 100644 index aebeeac..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db.source +++ /dev/null @@ -1 +0,0 @@ -0 MARKER_MSAs.tar.gz diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db_h b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db_h deleted file mode 100644 index 966bb3e..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db_h +++ /dev/null @@ -1,21 +0,0 @@ -???T?????????? - ???????????? -T?????????????? -? - ?? -T??????????????????TD?????????????? -????????????????? - T????????????? - ???TD -??????????? ?? - ????TD -? ???? - ????????????TD - ?????????????? - ???TD - ????????????? ????TD -?????????????????T??????? ??????????T????????????????? -T?????????????????? -??????? T??????? -7????????????????? ??T?????????????? - 7?????????????????7? diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db_h.dbtype b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db_h.dbtype deleted file mode 100644 index 966bb3e..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db_h.dbtype +++ /dev/null @@ -1,21 +0,0 @@ -???T?????????? - ???????????? -T?????????????? -? - ?? -T??????????????????TD?????????????? -????????????????? - T????????????? - ???TD -??????????? ?? - ????TD -? ???? - ????????????TD - ?????????????? - ???TD - ????????????? ????TD -?????????????????T??????? ??????????T????????????????? -T?????????????????? -??????? T??????? -7????????????????? ??T?????????????? - 7?????????????????7? diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db_h.index b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db_h.index deleted file mode 100644 index 30391f0..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db_h.index +++ /dev/null @@ -1,2 +0,0 @@ -0 712006 19 -1 213883 19 diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db_mapping b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db_mapping deleted file mode 100644 index d2e0936..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db_mapping +++ /dev/null @@ -1,2 +0,0 @@ -0 2561 -3 2561 diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db_taxonomy b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db_taxonomy deleted file mode 100644 index 966bb3e..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/genomad_mini_db_taxonomy +++ /dev/null @@ -1,21 +0,0 @@ -???T?????????? - ???????????? -T?????????????? -? - ?? -T??????????????????TD?????????????? -????????????????? - T????????????? - ???TD -??????????? ?? - ????TD -? ???? - ????????????TD - ?????????????? - ???TD - ????????????? ????TD -?????????????????T??????? ??????????T????????????????? -T?????????????????? -??????? T??????? -7????????????????? ??T?????????????? - 7?????????????????7? diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/mini_set_ids b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/mini_set_ids deleted file mode 100644 index be7a4c5..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/mini_set_ids +++ /dev/null @@ -1,4 +0,0 @@ -1 -15 -21 -25 diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/names.dmp b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/names.dmp deleted file mode 100644 index 2cc3d96..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/names.dmp +++ /dev/null @@ -1,2 +0,0 @@ -1 | root | | scientific name | -2 | Hoswirudivirus MRV1 | | scientific name | diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/nodes.dmp b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/nodes.dmp deleted file mode 100644 index caccdf3..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/nodes.dmp +++ /dev/null @@ -1,2 +0,0 @@ -1 | 1 | no rank | | 8 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | | -2 | 9865 | species | XX | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/plasmid_hallmark_annotation.txt b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/plasmid_hallmark_annotation.txt deleted file mode 100644 index 1090994..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/plasmid_hallmark_annotation.txt +++ /dev/null @@ -1,2 +0,0 @@ -COG0206 -COG0630 diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/version.txt b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/version.txt deleted file mode 100644 index d3bdbdf..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/version.txt +++ /dev/null @@ -1 +0,0 @@ -1.7 diff --git a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/virus_hallmark_annotation.txt b/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/virus_hallmark_annotation.txt deleted file mode 100644 index 51e43c1..0000000 --- a/q2_viromics/tests/data/type/genomad_database_dir/genomad_db/virus_hallmark_annotation.txt +++ /dev/null @@ -1,2 +0,0 @@ -COG1783 -COG3497 diff --git a/q2_viromics/tests/test_format.py b/q2_viromics/tests/test_format.py index 289ae29..5092b73 100644 --- a/q2_viromics/tests/test_format.py +++ b/q2_viromics/tests/test_format.py @@ -12,7 +12,6 @@ CheckVDBDirFmt, GeneralBinaryFileFormat, GeneralTSVFormat, - GenomadDBDirFmt, HMMFormat, ViromicsMetadataDirFmt, ) @@ -123,15 +122,6 @@ def test_tsv_files_hmm_db_path_maker(self): self.assertEqual(str(result_path), expected_path) -class TestGenomadDBDirFmt(TestPluginBase): - package = "q2_viromics.tests" - - def test_CheckVDB_GeneralTSVFormat(self): - filepath = self.get_data_path("type/genomad_database_dir/") - format = GenomadDBDirFmt(filepath, mode="r") - format.validate() - - class TestViromicsMetadataDirFmt(TestPluginBase): package = "q2_viromics.tests" diff --git a/q2_viromics/tests/test_genomad_analysis.py b/q2_viromics/tests/test_genomad_analysis.py deleted file mode 100644 index 4392705..0000000 --- a/q2_viromics/tests/test_genomad_analysis.py +++ /dev/null @@ -1,164 +0,0 @@ -# ---------------------------------------------------------------------------- -# Copyright (c) 2024, Bokulich Lab. -# -# Distributed under the terms of the Modified BSD License. -# -# The full license is in the file LICENSE, distributed with this software. -# ---------------------------------------------------------------------------- - -import subprocess -import unittest -from unittest.mock import MagicMock, patch - -from q2_viromics.genomad_analysis import genomad_analysis, genomad_end_to_end - - -class TestGenomadAnalysis(unittest.TestCase): - @patch("q2_viromics.genomad_analysis.run_command") - def test_genomad_end_to_end_success(self, mock_run_command): - # Mock the paths - mock_tmp = "/fake/tmp" - mock_sequences = MagicMock() - mock_sequences.__str__.return_value = "/fake/sequences" - mock_database = MagicMock() - mock_database.path = "/fake/database" - mock_database_listdir = ["internal_db"] - - with patch("os.listdir", return_value=mock_database_listdir): - # Call the function - genomad_end_to_end( - mock_tmp, - mock_sequences, - mock_database, - num_threads=1, - splits=0, - min_score=0.7, - min_number_genes=1, - conservative_taxonomy=False, - ) - - # Expected command - expected_cmd = [ - "genomad", - "end-to-end", - "--cleanup", - "--splits", - "0", - "--min-score", - "0.7", - "--min-number-genes", - "1", - "--threads", - "1", - "--disable-nn-classification", - "/fake/sequences", - mock_tmp, - "/fake/database/internal_db", - ] - - # Assert the command was called - mock_run_command.assert_called_once_with(expected_cmd) - - @patch( - "q2_viromics.genomad_analysis.run_command", - side_effect=subprocess.CalledProcessError(1, "cmd"), - ) - def test_genomad_end_to_end_failure(self, mock_run_command): - # Mock the paths - mock_tmp = "/fake/tmp" - mock_sequences = MagicMock() - mock_sequences.__str__.return_value = "/fake/sequences" - mock_database = MagicMock() - mock_database.path = "/fake/database" - mock_database_listdir = ["internal_db"] - - with patch("os.listdir", return_value=mock_database_listdir): - # Call the function and assert it raises an Exception - with self.assertRaises(Exception) as context: - genomad_end_to_end( - mock_tmp, - mock_sequences, - mock_database, - num_threads=1, - splits=0, - min_score=0.7, - min_number_genes=1, - conservative_taxonomy=False, - ) - - self.assertTrue( - "An error was encountered while running genomad end-to-end" - in str(context.exception) - ) - - @patch("q2_viromics.genomad_analysis.genomad_end_to_end") - @patch("q2_viromics.genomad_analysis.ContigSequencesDirFmt") - @patch("shutil.move") - @patch("tempfile.TemporaryDirectory") - @patch("os.path.exists", return_value=True) - @patch("os.makedirs") - @patch("builtins.open", new_callable=unittest.mock.mock_open) - def test_genomad_analysis_success( - self, - mock_open, - mock_makedirs, - mock_path_exists, - mock_tempdir, - mock_shutil_move, - mock_ContigSequencesDirFmt, - mock_genomad_end_to_end, - ): - # Mock the context managers - mock_tempdir.return_value.__enter__.return_value = "/fake/tmp" - - # Mock the ContigSequencesDirFmt instance and its methods - mock_sequences = mock_ContigSequencesDirFmt.return_value - mock_sequences.sample_dict.return_value = {"s1": "/fake/contigs.fa"} - - # Mock the database - mock_database = MagicMock() - mock_database.path = "/fake/database" - - # Call the function - result = genomad_analysis( - mock_sequences, - mock_database, - num_threads=1, - splits=0, - min_score=0.7, - min_number_genes=1, - conservative_taxonomy=False, - ) - - # Assertions - mock_genomad_end_to_end.assert_called_once_with( - "/fake/tmp", "/fake/contigs.fa", mock_database, 1, 0, 0.7, 1, False - ) - - # Expected paths - expected_files_and_destinations = [ - ( - "/fake/tmp/s1_contigs_summary/s1_contigs_virus.fna", - f"{str(result[0])}/s1_contigs.fa", - ), - ( - "/fake/tmp/s1_contigs_find_proviruses/s1_contigs_provirus.fna", - f"{str(result[1])}/s1_contigs.fa", - ), - ( - "/fake/tmp/s1_contigs_summary/s1_contigs_plasmid.fna", - f"{str(result[2])}/s1_contigs.fa", - ), - ( - "/fake/tmp/s1_contigs_summary/s1_contigs_virus_summary.tsv", - f"{str(result[3])}/s1_virus_summary.tsv", - ), - ] - - # Assert if shutil.move was called correctly - for src, dst in expected_files_and_destinations: - mock_shutil_move.assert_any_call(src, dst) - - -if __name__ == "__main__": - unittest.main() diff --git a/q2_viromics/tests/test_genomad_fetch_db.py b/q2_viromics/tests/test_genomad_fetch_db.py deleted file mode 100644 index 21aaee9..0000000 --- a/q2_viromics/tests/test_genomad_fetch_db.py +++ /dev/null @@ -1,87 +0,0 @@ -# ---------------------------------------------------------------------------- -# Copyright (c) 2024, Bokulich Lab. -# -# Distributed under the terms of the Modified BSD License. -# -# The full license is in the file LICENSE, distributed with this software. -# ---------------------------------------------------------------------------- - -import os -import subprocess -import unittest -from unittest.mock import MagicMock, patch - -from q2_viromics.genomad_fetch_db import genomad_download_database, genomad_fetch_db - - -class TestGenomadFetchDb(unittest.TestCase): - @patch("q2_viromics.genomad_fetch_db.run_command") - @patch("q2_viromics.genomad_fetch_db.GenomadDBDirFmt") - def test_genomad_fetch_db_success(self, mock_GenomadDBDirFmt, mock_run_command): - # Mock the GenomadDBDirFmt instance - mock_database = MagicMock() - mock_GenomadDBDirFmt.return_value = mock_database - - # Mock os.path.isdir to simulate presence of .ipynb_checkpoints directory - with patch("os.path.exists", return_value=True): - with patch("shutil.rmtree") as mock_rmtree: - # Call the function - result = genomad_fetch_db() - - # Check if genomad_download_database was called correctly - expected_cmd = [ - "genomad", - "download-database", - str(mock_database), - ] - mock_run_command.assert_called_once_with(expected_cmd) - - # Check if .ipynb_checkpoints directory was removed - mock_rmtree.assert_called_once_with( - os.path.join(str(mock_database), "genomad_db/.ipynb_checkpoints/") - ) - - # Check the return value - self.assertEqual(result, mock_database) - - @patch( - "q2_viromics.genomad_fetch_db.run_command", - side_effect=subprocess.CalledProcessError(1, "cmd"), - ) - @patch("q2_viromics.genomad_fetch_db.GenomadDBDirFmt") - def test_genomad_fetch_db_failure(self, mock_GenomadDBDirFmt, mock_run_command): - # Mock the GenomadDBDirFmt instance - mock_database = MagicMock() - mock_GenomadDBDirFmt.return_value = mock_database - - # Call the function and assert it raises an Exception - with self.assertRaises(Exception) as context: - genomad_fetch_db() - - self.assertTrue( - "An error was encountered while running geNomad download-database" - in str(context.exception) - ) - - @patch("q2_viromics.genomad_fetch_db.run_command") - def test_genomad_download_database_success(self, mock_run_command): - # Mock the database path - mock_database = MagicMock() - mock_database.path = "/fake/path" - - # Call the function - genomad_download_database(mock_database) - - # Expected command - expected_cmd = [ - "genomad", - "download-database", - str(mock_database), - ] - - # Assert the command was called - mock_run_command.assert_called_once_with(expected_cmd) - - -if __name__ == "__main__": - unittest.main() diff --git a/q2_viromics/tests/test_type.py b/q2_viromics/tests/test_type.py index 5e0073d..5f60555 100644 --- a/q2_viromics/tests/test_type.py +++ b/q2_viromics/tests/test_type.py @@ -7,7 +7,7 @@ # ---------------------------------------------------------------------------- from qiime2.plugin.testing import TestPluginBase -from q2_viromics.types._type import CheckVDB, GenomadDB, ViromicsMetadata +from q2_viromics.types._type import CheckVDB, ViromicsMetadata class TestCheckVDbType(TestPluginBase): @@ -17,13 +17,6 @@ def test_CheckVDb_registration(self): self.assertRegisteredSemanticType(CheckVDB) -class TestGenomadDBType(TestPluginBase): - package = "q2_viromics.tests" - - def test_GenomadDB_registration(self): - self.assertRegisteredSemanticType(GenomadDB) - - class TestViromicsMetadataType(TestPluginBase): package = "q2_viromics.tests" diff --git a/q2_viromics/types/__init__.py b/q2_viromics/types/__init__.py index 6e457e4..aaa8aff 100644 --- a/q2_viromics/types/__init__.py +++ b/q2_viromics/types/__init__.py @@ -5,14 +5,12 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- -from ._format import CheckVDBDirFmt, GenomadDBDirFmt, ViromicsMetadataDirFmt -from ._type import CheckVDB, GenomadDB, ViromicsMetadata +from ._format import CheckVDBDirFmt, ViromicsMetadataDirFmt +from ._type import CheckVDB, ViromicsMetadata __all__ = [ "CheckVDB", "ViromicsMetadata", - "GenomadDB", "CheckVDBDirFmt", "ViromicsMetadataDirFmt", - "GenomadDBDirFmt", ] diff --git a/q2_viromics/types/_format.py b/q2_viromics/types/_format.py index d7feaeb..f9bfb42 100644 --- a/q2_viromics/types/_format.py +++ b/q2_viromics/types/_format.py @@ -111,109 +111,6 @@ def tsv_files_hmm_db_path_maker(self, outer_dir, name): return "%s/hmm_db/%s.tsv" % (outer_dir, name) -# Directory format for the Genomad Database -class GenomadDBDirFmt(model.DirectoryFormat): - genomad_db = model.File(r"genomad_db/genomad_db$", format=GeneralBinaryFileFormat) - genomad_db_taxonomy = model.File( - r"genomad_db/genomad_db_taxonomy$", format=GeneralBinaryFileFormat - ) - genomad_marker_metadata = model.File( - r"genomad_db/genomad_marker_metadata\.tsv$", format=GeneralTSVFormat - ) - genomad_mini_db_mapping = model.File( - r"genomad_db/genomad_mini_db_mapping$", format=GeneralTSVFormat - ) - genomad_db_dbtype = model.File( - r"genomad_db/genomad_db\.dbtype$", format=GeneralBinaryFileFormat - ) - genomad_integrase_db = model.File( - r"genomad_db/genomad_integrase_db$", format=GeneralBinaryFileFormat - ) - genomad_mini_db = model.File( - r"genomad_db/genomad_mini_db$", format=GeneralBinaryFileFormat - ) - genomad_mini_db_taxonomy = model.File( - r"genomad_db/genomad_mini_db_taxonomy$", format=GeneralBinaryFileFormat - ) - mini_set_ids = model.File( - r"genomad_db/mini_set_ids$", format=GeneralBinaryFileFormat - ) - names_dmp = model.File(r"genomad_db/names\.dmp$", format=GeneralTSVFormat) - genomad_db_index = model.File( - r"genomad_db/genomad_db\.index$", format=GeneralTSVFormat - ) - genomad_integrase_db_dbtype = model.File( - r"genomad_db/genomad_integrase_db\.dbtype$", format=GeneralBinaryFileFormat - ) - genomad_mini_db_dbtype = model.File( - r"genomad_db/genomad_mini_db\.dbtype$", format=GeneralBinaryFileFormat - ) - nodes_dmp = model.File(r"genomad_db/nodes\.dmp$", format=GeneralTSVFormat) - genomad_db_lookup = model.File( - r"genomad_db/genomad_db\.lookup$", format=GeneralTSVFormat - ) - genomad_integrase_db_index = model.File( - r"genomad_db/genomad_integrase_db\.index$", format=GeneralTSVFormat - ) - genomad_mini_db_index = model.File( - r"genomad_db/genomad_mini_db\.index$", format=GeneralTSVFormat - ) - plasmid_hallmark_annotation = model.File( - r"genomad_db/plasmid_hallmark_annotation\.txt$", format=GeneralBinaryFileFormat - ) - genomad_db_source = model.File( - r"genomad_db/genomad_db\.source$", format=GeneralBinaryFileFormat - ) - genomad_integrase_db_lookup = model.File( - r"genomad_db/genomad_integrase_db\.lookup$", format=GeneralTSVFormat - ) - genomad_mini_db_lookup = model.File( - r"genomad_db/genomad_mini_db\.lookup$", format=GeneralTSVFormat - ) - virus_hallmark_annotation = model.File( - r"genomad_db/virus_hallmark_annotation\.txt$", format=GeneralBinaryFileFormat - ) - genomad_db_h = model.File( - r"genomad_db/genomad_db_h$", format=GeneralBinaryFileFormat - ) - genomad_integrase_db_source = model.File( - r"genomad_db/genomad_integrase_db\.source$", format=GeneralBinaryFileFormat - ) - genomad_mini_db_source = model.File( - r"genomad_db/genomad_mini_db\.source$", format=GeneralBinaryFileFormat - ) - version_txt = model.File( - r"genomad_db/version\.txt$", format=GeneralBinaryFileFormat - ) - genomad_db_h_dbtype = model.File( - r"genomad_db/genomad_db_h\.dbtype$", format=GeneralBinaryFileFormat - ) - genomad_integrase_db_h = model.File( - r"genomad_db/genomad_integrase_db_h$", format=GeneralBinaryFileFormat - ) - genomad_mini_db_h = model.File( - r"genomad_db/genomad_mini_db_h$", format=GeneralBinaryFileFormat - ) - genomad_db_h_index = model.File( - r"genomad_db/genomad_db_h\.index$", format=GeneralTSVFormat - ) - genomad_integrase_db_h_dbtype = model.File( - r"genomad_db/genomad_integrase_db_h\.dbtype$", format=GeneralBinaryFileFormat - ) - genomad_mini_db_h_dbtype = model.File( - r"genomad_db/genomad_mini_db_h\.dbtype$", format=GeneralBinaryFileFormat - ) - genomad_db_mapping = model.File( - r"genomad_db/genomad_db_mapping$", format=GeneralTSVFormat - ) - genomad_integrase_db_h_index = model.File( - r"genomad_db/genomad_integrase_db_h\.index$", format=GeneralTSVFormat - ) - genomad_mini_db_h_index = model.File( - r"genomad_db/genomad_mini_db_h\.index$", format=GeneralTSVFormat - ) - - # Directory format for output tsv files class ViromicsMetadataDirFmt(model.DirectoryFormat): metadata_files = model.FileCollection(r"[^/]+\.tsv$", format=GeneralTSVFormat) diff --git a/q2_viromics/types/_type.py b/q2_viromics/types/_type.py index 490432a..ec5f93c 100644 --- a/q2_viromics/types/_type.py +++ b/q2_viromics/types/_type.py @@ -9,5 +9,4 @@ from qiime2.plugin import SemanticType CheckVDB = SemanticType("CheckVDB") -GenomadDB = SemanticType("GenomadDB") ViromicsMetadata = SemanticType("ViromicsMetadata", variant_of=SampleData.field["type"])