Skip to content

Commit

Permalink
Put integration test in separate test file.
Browse files Browse the repository at this point in the history
  • Loading branch information
Sann5 committed Oct 9, 2023
1 parent 529d7dd commit 548529a
Show file tree
Hide file tree
Showing 3 changed files with 332 additions and 309 deletions.
4 changes: 2 additions & 2 deletions q2_moshpit/busco/tests/data/plot_as_dict.json
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@
"transform": [
{
"as": "x",
"calculate": "datum[param_1]"
"calculate": "datum[param_2]"
}
],
"width": 600
Expand All @@ -622,7 +622,7 @@
"number_of_scaffolds"
]
},
"name": "param_1",
"name": "param_2",
"value": "scaffold_n50"
}
],
Expand Down
308 changes: 1 addition & 307 deletions q2_moshpit/busco/tests/test_busco.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,9 @@

import os
import tempfile
import zipfile
import contextlib
import pandas as pd
from q2_moshpit.busco.busco import evaluate_busco
from q2_moshpit.busco.utils import (
_parse_busco_params,
_draw_busco_plots,
_zip_busco_plots,
_run_busco,
_draw_busco_plots_for_render,
_collect_summaries_and_save,
_parse_df_columns,
)
from unittest.mock import patch, call, ANY
from unittest.mock import patch, ANY
from qiime2.plugin.testing import TestPluginBase
from q2_types_genomics.per_sample_data._format import MultiMAGSequencesDirFmt

Expand All @@ -41,284 +30,6 @@ def setUpClass(self):
mode="r",
)

def setUp(self):
super().setUp()
with contextlib.ExitStack() as stack:
self._tmp = stack.enter_context(tempfile.TemporaryDirectory())
self.addCleanup(stack.pop_all().close)

# Test `_parse_busco_params`
def test_parse_busco_params_1(self):
observed = _parse_busco_params("auto_lineage", True)
expected = ["--auto-lineage"]
self.assertSetEqual(set(observed), set(expected))

def test_parse_busco_params_2(self):
observed = _parse_busco_params("evalue", 0.66)
expected = ["--evalue", str(0.66)]
self.assertSetEqual(set(observed), set(expected))

def test_parse_busco_params_3(self):
observed = _parse_busco_params("augustus", True)
expected = ["--augustus"]
self.assertSetEqual(set(observed), set(expected))

def test_parse_busco_params_4(self):
observed = _parse_busco_params("lineage_dataset", "bacteria-XYZ")
expected = ["--lineage_dataset", "bacteria-XYZ"]
self.assertSetEqual(set(observed), set(expected))

def test_collect_summaries_and_save(self):
"""
Test for `_collect_summaries_and_save` function.
Uses data stored in ./data. Checks for data frame equality.
"""
with tempfile.TemporaryDirectory() as tmp_path:
path_to_summaries = {}

for i in range(1, 4):
path_to_summaries[f"sample{i}"] = self.get_data_path(
filename=f"batch_summary_sample{i}.txt"
)

observed = _collect_summaries_and_save(
path_to_run_summaries=path_to_summaries,
all_summaries_path=os.path.join(tmp_path, "aggregated.csv"),
)

expected = pd.read_csv(
self.get_data_path(filename="all_batch_summaries.csv")
)
pd.set_option('display.max_columns', None)

try:
pd.testing.assert_frame_equal(observed, expected)
except AssertionError as e:
print(e)
self.assertTrue(False)
else:
self.assertTrue(True)

# Test `_draw_busco_plots`
def draw_n_busco_plots(self, filename, delim):
"""
Creates plot from a table containing information about
one or more samples. Checks for the existence of the output
plots.
Args:
filename (str): name of file in ./data to construct the images
delim (str): delimiter of `filename`
"""
# Create an empty dictionary to store the DataFrames
path_to_run_summaries = {}

# Group the DataFrame by the 'sample_id' column
p = self.get_data_path(f"{filename}")
df = pd.read_csv(p, delimiter=delim)
grouped = df.groupby("sample_id")

# Iterate through the groups and store each group as a
# DataFrame in the dictionary
# Creates output directory with path 'tmp'
with tempfile.TemporaryDirectory() as tmp_path:
for sample_id, group_df in grouped:
path_to_df = f"{tmp_path}/{sample_id}.csv"
group_df.to_csv(path_to_df, sep="\t", index=False)
path_to_run_summaries[sample_id] = path_to_df

# Draw plots
paths_to_plots = _draw_busco_plots(
path_to_run_summaries=path_to_run_summaries,
plots_dir=os.path.join(tmp_path, "plots"),
)

# Check if busco plots are in fact generated
for _, value in paths_to_plots.items():
self.assertTrue(os.path.exists(value))

def test_draw_busco_plots_multiple(self):
self.draw_n_busco_plots(
filename="all_batch_summaries.csv", delim=","
)

def test_draw_busco_plots_one(self):
self.draw_n_busco_plots(
filename="batch_summary_sample1.txt", delim="\t"
)

# Test `_draw_busco_plots_for_render`
def test_draw_busco_plots_for_render(self):
"""
Tests function `_draw_busco_plots_for_render`.
Checks for dictionary equality.
"""
# Load data
p = self.get_data_path("all_batch_summaries.csv")
all_summaries_df = pd.read_csv(p)

# Draw plot
observed = _draw_busco_plots_for_render(
all_summaries_df,
width=600,
height=18,
titleFontSize=20,
labelFontSize=17,
)

# Load expected data
p = self.get_data_path("plot_as_dict.json")
with open(p, "r") as json_file:
expected = json_file.read()

# self.maxDiff = None
self.assertEqual(expected, observed)

# Test `_draw_busco_plots`
def mock_draw_busco_plots(self, tmp_path: str, num_files: int) -> dict:
"""
Mocks the generation of sample wise plots by generating
empty files.
Args:
tmp_path (str): Path where to write the empty files.
num_files (int): number of empty files to create, one per sample.
Returns:
paths_to_plots (dict): dictionary with keys sample_id and value
path to empty file.
"""
# Generate random images
paths_to_plots = {}

# Path to output
out_dir = os.path.join(tmp_path, "zip_this_dir")
os.makedirs(out_dir)

# Loop to create the empty files
for i in range(num_files):
# Specify the name of each empty file
file_name = f"empty_file_{i}.svg"

# Combine the directory path and file name to create the full
# file path
file_path = os.path.join(out_dir, file_name)
paths_to_plots[f"empty_file_{i}"] = file_path

# Create an empty file
with open(file_path, 'w'):
pass

return paths_to_plots

def test_zip_busco_plots_multiple(self):
"""
Checks for existence of zip file.
"""
with tempfile.TemporaryDirectory() as tmp_path:
paths_to_plots = self.mock_draw_busco_plots(
num_files=6, tmp_path=tmp_path
)

# Zip graphs for user download
zip_path = os.path.join(tmp_path, "busco_plots.zip")
_zip_busco_plots(paths_to_plots=paths_to_plots, zip_path=zip_path)

# Check for existence of file
self.assertTrue(zipfile.is_zipfile(zip_path))

def test_zip_busco_plots_one(self):
"""
Checks for existence of zip file.
"""
with tempfile.TemporaryDirectory() as tmp_path:
paths_to_plots = self.mock_draw_busco_plots(
num_files=1, tmp_path=tmp_path
)

# Zip graphs for user download
zip_path = os.path.join(tmp_path, "busco_plots.zip")
_zip_busco_plots(paths_to_plots=paths_to_plots, zip_path=zip_path)

# Check for existence of file
self.assertTrue(zipfile.is_zipfile(zip_path))

@patch('subprocess.run')
def test_run_busco(self, subp_run):
"""
Test function `_run_busco`. Checks for dictionary equality.
"""
output_dir = self.get_data_path("busco_output")
sample_ids = os.listdir(output_dir)

# Initialize assertion objects
expected = {}
calls = []

# Define command arguments
fake_props = ["--a", "--b", "0.6"]

# Fabricate list of calls and the expected output
for sample_id in sample_ids:
# Make a dictionary to compare output
p = os.path.join(output_dir, sample_id, "batch_summary.txt")
expected[sample_id] = p

# Append call to list of calls to assert the patching
calls.append(call(
[
"busco",
"--a",
"--b", "0.6",
"--in", self.get_data_path(f"{sample_id}"),
"--out_path", output_dir,
"-o", sample_id
],
check=True
))

# Run busco and save paths to run summaries
observed = _run_busco(
output_dir=output_dir,
mags=self.mags,
params=fake_props,
)

# Assert output
self.assertDictEqual(expected, observed)

# Check for appropiate calls
subp_run.assert_has_calls(calls, any_order=True)

@patch("subprocess.run")
def test_run_busco_exception(self, subp_run):
"""
Test function `_run_busco`. Checks for a raised exception.
"""
with tempfile.TemporaryDirectory() as tmp_path:
# Define command arguments
fake_props = ["--a", "--b", "0.6"]
output_dir = os.path.join(tmp_path, "busco_output")

with self.assertRaises(FileNotFoundError):
# Run busco and save paths to run summaries
_ = _run_busco(
output_dir=output_dir,
mags=self.mags,
params=fake_props,
)

# Assert that the patch was called once.
cmd = [
"busco",
"--a",
"--b", "0.6",
"--in", self.get_data_path("sample1"),
"--out_path", output_dir,
"-o", "sample1"
]
subp_run.assert_called_once_with(cmd, check=True)

# Integration test busco.
@patch('q2_moshpit.busco.utils._run_busco')
@patch('q2_moshpit.busco.utils._zip_busco_plots')
Expand Down Expand Up @@ -391,20 +102,3 @@ def test_integration_busco(
paths_to_plots=ANY,
zip_path=os.path.join(tmp_path, "busco_plots.zip")
)

def test_parse_df_columns(self):
# This side effect will return the all_summaries_dfs
p1 = self.get_data_path("all_batch_summaries.csv")
observed = pd.read_csv(p1)
observed = _parse_df_columns(observed)

p2 = self.get_data_path("all_batch_summaries_formatted.csv")
expected = pd.read_csv(p2)

try:
pd.testing.assert_frame_equal(observed, expected)
except AssertionError as e:
print(e)
self.assertTrue(False)
else:
self.assertTrue(True)
Loading

0 comments on commit 548529a

Please sign in to comment.