Skip to content

Commit

Permalink
added kmer_build_card with tests
Browse files Browse the repository at this point in the history
  • Loading branch information
VinzentRisch committed Mar 27, 2024
1 parent 9010c71 commit d033f0c
Show file tree
Hide file tree
Showing 3 changed files with 142 additions and 0 deletions.
57 changes: 57 additions & 0 deletions q2_amr/card/kmer.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import glob
import json
import os
import shutil
Expand Down Expand Up @@ -220,3 +221,59 @@ def _run_rgi_kmer_query(tmp, input_file, input_type, kmer_size, minimum, threads
f"(return code {e.returncode}), please inspect "
"stdout and stderr to learn more."
)


def kmer_build_card(
card_db: CARDDatabaseDirectoryFormat,
kmer_size: int,
threads: int = 1,
batch_size: int = 100000,
) -> CARDKmerDatabaseDirectoryFormat:
kmer_db = CARDKmerDatabaseDirectoryFormat()

with tempfile.TemporaryDirectory() as tmp:
load_card_db(tmp=tmp, card_db=card_db)

card_fasta = glob.glob(os.path.join(str(card_db), "card_database_v*.fasta"))[0]

run_rgi_kmer_build(
tmp=tmp,
input_directory=str(card_db),
card_fasta=card_fasta,
kmer_size=kmer_size,
threads=threads,
batch_size=batch_size,
)

shutil.move(os.path.join(tmp, f"{kmer_size}_kmer_db.json"), str(kmer_db))
shutil.move(os.path.join(tmp, f"all_amr_{kmer_size}mers.txt"), str(kmer_db))

return kmer_db


def run_rgi_kmer_build(
tmp, input_directory, card_fasta, kmer_size, threads, batch_size
):
cmd = [
"rgi",
"kmer_build",
"--input_directory",
input_directory,
"--card",
card_fasta,
"-k",
kmer_size,
"--threads",
threads,
"--batch_size",
batch_size,
]

try:
run_command(cmd, tmp, verbose=True)
except subprocess.CalledProcessError as e:
raise Exception(
"An error was encountered while running rgi, "
f"(return code {e.returncode}), please inspect "
"stdout and stderr to learn more."
)
53 changes: 53 additions & 0 deletions q2_amr/card/tests/test_kmer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@
_kmer_query_mags,
_kmer_query_reads,
_run_rgi_kmer_query,
kmer_build_card,
kmer_query_mags_card,
kmer_query_reads_card,
run_rgi_kmer_build,
)
from q2_amr.types import (
CARDAlleleAnnotationDirectoryFormat,
Expand Down Expand Up @@ -222,3 +224,54 @@ def test_exception_raised(self):
"tmp", "input_file", "input_type", "kmer_size", "minimum", "threads"
)
self.assertEqual(str(cm.exception), expected_message)

def test_kmer_build_card(self):
mock_run_rgi_kmer_build = MagicMock(side_effect=self.copy_kmer_build_files)
with patch(
"q2_amr.card.kmer.run_rgi_kmer_build", side_effect=mock_run_rgi_kmer_build
), patch("q2_amr.card.kmer.load_card_db"), patch("glob.glob"):
card_db = CARDDatabaseDirectoryFormat()
result = kmer_build_card(card_db=card_db, kmer_size=32)

self.assertIsInstance(result, CARDKmerDatabaseDirectoryFormat)
for file in ["32_kmer_db.json", "all_amr_32mers.txt"]:
self.assertTrue(os.path.exists(os.path.join(str(result), file)))

def copy_kmer_build_files(
self, tmp, input_directory, card_fasta, kmer_size, threads, batch_size
):
src_des_list = [
("kmer_json_test.json", f"{kmer_size}_kmer_db.json"),
("kmer_txt_test.txt", f"all_amr_{kmer_size}mers.txt"),
]
for scr_file, des_file in src_des_list:
shutil.copy(self.get_data_path(scr_file), os.path.join(tmp, des_file))

def test_run_rgi_kmer_build(self):
with patch("q2_amr.card.kmer.run_command") as mock_run_command:
run_rgi_kmer_build(
tmp="path_tmp",
input_directory="path_directory",
card_fasta="path_fasta",
kmer_size="61",
threads="10",
batch_size="1000000",
)
mock_run_command.assert_called_once_with(
[
"rgi",
"kmer_build",
"--input_directory",
"path_directory",
"--card",
"path_fasta",
"-k",
"61",
"--threads",
"10",
"--batch_size",
"1000000",
],
"path_tmp",
verbose=True,
)
32 changes: 32 additions & 0 deletions q2_amr/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from q2_amr.card.kmer import (
_kmer_query_mags,
_kmer_query_reads,
kmer_build_card,
kmer_query_mags_card,
kmer_query_reads_card,
)
Expand Down Expand Up @@ -408,6 +409,37 @@
citations=[citations["alcock_card_2023"]],
)

plugin.methods.register_function(
function=kmer_build_card,
inputs={
"card_db": CARDDatabase,
},
parameters={
"kmer_size": Int % Range(0, None, inclusive_start=False),
"threads": Int % Range(0, None, inclusive_start=False),
"batch_size": Int % Range(0, None, inclusive_start=False),
},
outputs=[
("kmer_db", CARDKmerDatabase),
],
input_descriptions={
"card_db": "CARD Database",
},
parameter_descriptions={
"kmer_size": "Length of k-mers in base pairs.",
"threads": "Number of threads (CPUs) to use.",
"batch_size": "Number of k-mers to query at a time using pyahocorasick--the "
"greater the number the more memory usage.",
},
output_descriptions={
"kmer_db": "K-mer database with custom k-mer size.",
},
name="K-mer build",
description="With kmer_build_card a kmer database can be built with a custom kmer"
" size",
citations=[citations["alcock_card_2023"]],
)

plugin.methods.register_function(
function=partition_mags_annotations,
inputs={"annotations": SampleData[CARDAnnotation]},
Expand Down

0 comments on commit d033f0c

Please sign in to comment.