Skip to content

Commit

Permalink
merge main
Browse files Browse the repository at this point in the history
  • Loading branch information
VinzentRisch committed Oct 9, 2024
2 parents 4f76e2e + 8ad824c commit 2aab907
Show file tree
Hide file tree
Showing 11 changed files with 283 additions and 277 deletions.
29 changes: 16 additions & 13 deletions q2_amrfinderplus/annotate.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from typing import Union

from q2_types.feature_data_mag import MAGSequencesDirFmt
Expand Down Expand Up @@ -58,10 +59,9 @@ def annotate(
)

# Set up common parameters for _run_amrfinderplus_analyse
common_params = locals().copy()
del common_params["sequences"]
del common_params["proteins"]
del common_params["loci"]
common_params = {
k: v for k, v in locals().items() if k not in ("sequences", "proteins", "loci")
}

# Innit output formats
amr_annotations = AMRFinderPlusAnnotationsDirFmt()
Expand All @@ -86,27 +86,30 @@ def annotate(
sample_id,
)

for id, file_fp in files_dict.items():
for _id, file_fp in files_dict.items():
# Construct and validate file input paths for amrfinderplus
dna_path, protein_path, gff_path = _get_file_paths(
sequences,
proteins,
loci,
id,
_id,
file_fp,
sample_id,
)

# Define paths for output files
amr_annotations_path = (
amr_annotations.path / sample_id / f"{id}_amr_annotations.tsv"
amr_annotations_path = os.path.join(
str(amr_annotations), sample_id, f"{_id}_amr_annotations.tsv"
)
amr_genes_path = amr_genes.path / sample_id / f"{id}_amr_genes.fasta"
amr_proteins_path = (
amr_proteins.path / sample_id / f"{id}_amr_proteins.fasta"
amr_genes_path = os.path.join(
str(amr_genes), sample_id, f"{_id}_amr_genes.fasta"
)
amr_all_mutations_path = (
amr_all_mutations.path / sample_id / f"{id}_amr_all_mutations.tsv"

amr_proteins_path = os.path.join(
str(amr_proteins), sample_id, f"{_id}_amr_proteins.fasta"
)
amr_all_mutations_path = os.path.join(
str(amr_all_mutations), sample_id, f"{_id}_amr_all_mutations.tsv"
)

# Run amrfinderplus
Expand Down
135 changes: 78 additions & 57 deletions q2_amrfinderplus/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@
version=__version__,
website="https://github.com/bokulich-lab/q2-amrfinderplus",
package="q2_amrfinderplus",
description="A plugin to find acquired antimicrobial resistance genes and point "
"mutations in protein and/or assembled nucleotide sequences with "
"NCBI-AMRFinderPlus.",
description=(
"A plugin to find acquired antimicrobial resistance genes and point mutations "
"in protein and/or assembled nucleotide sequences with NCBI-AMRFinderPlus."
),
short_description="AMR annotation.",
citations=[],
)
Expand All @@ -58,7 +59,9 @@

organisms = [
"Acinetobacter_baumannii",
"Acinetobacter",
"Burkholderia_cepacia",
"Burkholderia_cepacia_complex",
"Burkholderia_pseudomallei",
"Campylobacter",
"Citrobacter_freundii",
Expand All @@ -68,12 +71,15 @@
"Enterococcus_faecalis",
"Enterococcus_faecium",
"Escherichia",
"Escherichia_coli_Shigella",
"Klebsiella",
"Klebsiella_oxytoca",
"Klebsiella_pneumoniae",
"Neisseria_gonorrhoeae",
"Neisseria_meningitidis",
"Pseudomonas_aeruginosa",
"Salmonella",
"Serratia",
"Serratia_marcescens",
"Staphylococcus_aureus",
"Staphylococcus_pseudintermedius",
Expand All @@ -83,11 +89,6 @@
"Vibrio_cholerae",
"Vibrio_parahaemolyticus",
"Vibrio_vulnificus",
"Acinetobacter",
"Burkholderia_cepacia_complex",
"Escherichia_coli_Shigella",
"Klebsiella",
"Serratia",
]


Expand Down Expand Up @@ -147,64 +148,83 @@
}

amrfinderplus_parameter_descriptions = {
"organism": "Taxon used for screening known resistance causing point mutations "
"and blacklisting of common, non-informative genes. Pathogen Detection "
"taxgroup names can also be used.",
"plus": "Provide results from 'Plus' genes such as virulence factors, "
"stress-response genes, etc.",
"report_all_equal": "Report all equally scoring BLAST and HMM matches. This "
"will report multiple lines for a single element if there "
"are multiple reference proteins that have the same score. "
"On those lines the fields Accession of closest sequence "
"and Name of closest sequence will be different showing "
"each of the database proteins that are equally close to "
"the query sequence.",
"ident_min": "Minimum identity for a blast-based hit (Methods BLAST or "
"PARTIAL). Setting this value to something other than -1 "
"will override curated similarity cutoffs. We only recommend "
"using this option if you have a specific reason.",
"curated_ident": "Use the curated threshold for a blast-based hit, if it "
"exists and 0.9 otherwise. This will overwrite the value specified with the "
"'ident_min' parameter.",
"coverage_min": "Minimum proportion of reference gene covered for a "
"BLAST-based hit (Methods BLAST or PARTIAL).",
"organism": (
"Taxon used for screening known resistance causing point mutations and "
"blacklisting of common, non-informative genes. Pathogen Detection taxgroup "
"names can also be used."
),
"plus": (
"Provide results from 'Plus' genes such as virulence factors, stress-response "
"genes, etc."
),
"report_all_equal": (
"Report all equally scoring BLAST and HMM matches. This will report multiple "
"lines for a single element if there are multiple reference proteins that have "
"the same score. On those lines the fields Accession of closest sequence and "
"Name of closest sequence will be different showing each of the database "
"proteins that are equally close to the query sequence."
),
"ident_min": (
"Minimum identity for a blast-based hit (Methods BLAST or PARTIAL). Setting "
"this value to something other than -1 will override curated similarity "
"cutoffs. We only recommend using this option if you have a specific reason."
),
"curated_ident": (
"Use the curated threshold for a blast-based hit, if it exists and 0.9 "
"otherwise."
),
"coverage_min": (
"Minimum proportion of reference gene covered for a BLAST-based hit (Methods "
"BLAST or PARTIAL)."
),
"translation_table": "Translation table used for BLASTX.",
"annotation_format": (
"Specify the format of the GFF file in the loci input. 'standart' refers to "
"NCBI resources such as GenBank and RefSeq."
),
"report_common": "Report proteins common to a taxonomy group.",
"threads": "The number of threads to use for processing. AMRFinderPlus "
"defaults to 4 on hosts with >= 4 cores. Setting this number higher"
" than the number of cores on the running host may cause blastp to "
"fail. Using more than 4 threads may speed up searches.",
"threads": (
"The number of threads to use for processing. AMRFinderPlus defaults to 4 on "
"hosts with >= 4 cores. Setting this number higher than the number of cores on "
"the running host may cause blastp to fail. Using more than 4 threads may "
"speed up searches."
),
}

amrfinderplus_output_descriptions = {
"amr_annotations": "Annotated AMR genes and mutations.",
"amr_all_mutations": "Report of genotypes at all locations screened for point "
"mutations. These files allow you to distinguish between called "
"point mutations that were the sensitive variant and the point "
"mutations that could not be called because the sequence was not "
"found. This file will contain all detected variants from the "
"reference sequence, so it could be used as an initial screen for "
"novel variants. Note 'Gene symbols' for mutations not in the "
"database (identifiable by [UNKNOWN] in the Sequence name field) "
"have offsets that are relative to the start of the sequence "
"indicated in the field 'Accession of closest sequence' while "
"'Gene symbols' from known point-mutation sites have gene symbols "
"that match the Pathogen Detection Reference Gene Catalog "
"standardized nomenclature for point mutations.",
"amr_genes": "Sequences that were identified by AMRFinderPlus as AMR genes. "
"This will include the entire region that aligns to the references for "
"point mutations.",
"amr_proteins": "Protein Sequences that were identified by AMRFinderPlus as "
"AMR genes. This will include the entire region that aligns to the references "
"for point mutations.",
"amr_all_mutations": (
"Report of genotypes at all locations screened for point mutations. These "
"files allow you to distinguish between called point mutations that were the "
"sensitive variant and the point mutations that could not be called because "
"the sequence was not found. This file will contain all detected variants from "
"the reference sequence, so it could be used as an initial screen for novel "
"variants. Note 'Gene symbols' for mutations not in the database (identifiable "
"by [UNKNOWN] in the Sequence name field) have offsets that are relative to "
"the start of the sequence indicated in the field 'Accession of closest "
"sequence' while 'Gene symbols' from known point-mutation sites have gene "
"symbols that match the Pathogen Detection Reference Gene Catalog standardized "
"nomenclature for point mutations."
),
"amr_genes": (
"Sequences that were identified by AMRFinderPlus as AMR genes. This will "
"include the entire region that aligns to the references for point mutations."
),
"amr_proteins": (
"Protein Sequences that were identified by AMRFinderPlus as AMR genes. This "
"will include the entire region that aligns to the references for point "
"mutations"
),
}


amrfinderplus_input_descriptions = {
"sequences": "MAGs or contigs to be annotated with AMRFinderPlus.",
"proteins": "Protein sequences to be annotated with AMRFinderPlus.",
"loci": "GFF files to give sequence coordinates for proteins input. Required "
"for combined searches of protein and DNA sequences.",
"loci": (
"GFF files to give sequence coordinates for proteins input. Required for "
"combined searches of protein and DNA sequences."
),
"amrfinderplus_db": "AMRFinderPlus Database.",
}

Expand All @@ -228,9 +248,10 @@
parameter_descriptions=amrfinderplus_parameter_descriptions,
output_descriptions=amrfinderplus_output_descriptions,
name="Annotate MAGs or contigs with AMRFinderPlus.",
description="Annotate sample data MAGs or contigs with antimicrobial resistance "
"genes with AMRFinderPlus. Check https://github.com/ncbi/amr/wiki for "
"documentation.",
description=(
"Annotate MAGs or contigs with antimicrobial resistance genes with "
"AMRFinderPlus. Check https://github.com/ncbi/amr/wiki for documentation."
),
citations=[citations["feldgarden2021amrfinderplus"]],
)

Expand Down
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Loading

0 comments on commit 2aab907

Please sign in to comment.