Skip to content

Commit

Permalink
ENH: add action fetch-busco-db and modify evaluate-busco accordin…
Browse files Browse the repository at this point in the history
…gly (#162)

Co-authored-by: Christos Konstantinos Matzoros <[email protected]>
Co-authored-by: Michal Ziemski <[email protected]>
  • Loading branch information
3 people authored Jun 7, 2024
1 parent 4d7ccf8 commit 7ab7588
Show file tree
Hide file tree
Showing 81 changed files with 7,995 additions and 76 deletions.
19 changes: 19 additions & 0 deletions .github/workflows/upload-coverage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,29 @@ jobs:
- run: unzip coverage.zip

- name: Find associated PR
id: pr
uses: actions/[email protected]
with:
script: |
const response = await github.rest.search.issuesAndPullRequests({
q: 'repo:${{ github.repository }} is:pr sha:${{ github.event.workflow_run.head_sha }}',
per_page: 1,
})
const items = response.data.items
if (items.length < 1) {
console.error('No PRs found')
return
}
const pullRequestNumber = items[0].number
console.info("Pull request number is", pullRequestNumber)
return pullRequestNumber
- uses: codecov/codecov-action@v4
name: Upload coverage report
with:
files: ./coverage.xml
fail_ci_if_error: true
override_pr: ${{ steps.pr.outputs.result }}
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
1 change: 0 additions & 1 deletion q2_moshpit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------

from . import busco
from . import eggnog
from . import partition
Expand Down
2 changes: 0 additions & 2 deletions q2_moshpit/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,6 @@ def _process_common_input_params(processing_func, params: dict) -> List[str]:
arg_val
):
processed_args.extend(processing_func(arg_key, arg_val))
else:
continue

return processed_args

Expand Down
5 changes: 4 additions & 1 deletion q2_moshpit/busco/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,8 @@
# ----------------------------------------------------------------------------

from .busco import evaluate_busco, _evaluate_busco, _visualize_busco
from .database import fetch_busco_db

__all__ = ["evaluate_busco", "_evaluate_busco", "_visualize_busco"]
__all__ = [
"evaluate_busco", "_evaluate_busco", "_visualize_busco", "fetch_busco_db"
]
18 changes: 15 additions & 3 deletions q2_moshpit/busco/busco.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@
from q2_moshpit.busco.utils import (
_parse_busco_params, _collect_summaries, _rename_columns,
_parse_df_columns, _partition_dataframe, _calculate_summary_stats,
_get_feature_table, _cleanup_bootstrap, _get_mag_lengths
_get_feature_table, _cleanup_bootstrap, _get_mag_lengths,
_validate_lineage_dataset_input
)
from q2_moshpit._utils import _process_common_input_params, run_command
from q2_types.per_sample_sequences._format import MultiMAGSequencesDirFmt
from q2_moshpit.busco.types import BuscoDatabaseDirFmt
from q2_types.feature_data_mag._format import MAGSequencesDirFmt


Expand Down Expand Up @@ -74,7 +76,7 @@ def _run_busco(
"-o",
sample
])
run_command(cmd)
run_command(cmd, cwd=os.path.dirname(output_dir))

path_to_run_summary = os.path.join(
output_dir, sample, "batch_summary.txt"
Expand Down Expand Up @@ -110,6 +112,7 @@ def _busco_helper(bins, common_args):

def _evaluate_busco(
bins: Union[MultiMAGSequencesDirFmt, MAGSequencesDirFmt],
busco_db: BuscoDatabaseDirFmt,
mode: str = "genome",
lineage_dataset: str = None,
augustus: bool = False,
Expand All @@ -131,8 +134,16 @@ def _evaluate_busco(
scaffold_composition: bool = False,
) -> pd.DataFrame:
kwargs = {
k: v for k, v in locals().items() if k not in ["bins",]
k: v for k, v in locals().items() if k not in ["bins", "busco_db"]
}
kwargs["offline"] = True
kwargs["download_path"] = f"{str(busco_db)}/busco_downloads"

if lineage_dataset is not None:
_validate_lineage_dataset_input(
lineage_dataset, auto_lineage, auto_lineage_euk, auto_lineage_prok,
busco_db, kwargs # kwargs may be modified inside this function
)

# Filter out all kwargs that are None, False or 0.0
common_args = _process_common_input_params(
Expand Down Expand Up @@ -249,6 +260,7 @@ def _visualize_busco(output_dir: str, busco_results: pd.DataFrame) -> None:
def evaluate_busco(
ctx,
bins,
busco_db,
mode="genome",
lineage_dataset=None,
augustus=False,
Expand Down
47 changes: 47 additions & 0 deletions q2_moshpit/busco/database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# ----------------------------------------------------------------------------
# Copyright (c) 2023, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import subprocess
from q2_moshpit._utils import colorify, run_command
from q2_moshpit.busco.types import BuscoDatabaseDirFmt


def fetch_busco_db(
virus: bool = False,
prok: bool = False,
euk: bool = False
) -> BuscoDatabaseDirFmt:
busco_db = BuscoDatabaseDirFmt(path=None, mode='w')

# Parse kwargs
if all([virus, prok, euk]):
args = ["all"]
else:
variable_and_flag = [
('virus', virus),
('prokaryota', prok),
('eukaryota', euk)
]
args = [name for name, flag in variable_and_flag if flag]

# Download
print(colorify("Downloading BUSCO database..."))
try:
run_command(cmd=["busco", "--download", *args], cwd=str(busco_db))
except subprocess.CalledProcessError as e:
raise Exception(
f"Error during BUSCO database download: {e.returncode}"
)

# Let user know that the process is complete but it still needs
# some time to copy files over.
print(colorify(
"Download completed. \n"
"Copying files from temporary directory to final location..."
))

return busco_db
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.

Large diffs are not rendered by default.

7 changes: 6 additions & 1 deletion q2_moshpit/busco/tests/test_busco_feature_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def test_run_busco(self, mock_run):
'busco', '--lineage_dataset', 'bacteria_odb10',
'--cpu', '7', '--in', self.get_data_path('mags/sample1'),
'--out_path', self.temp_dir.name, '-o', 'sample1'
])
], cwd=os.path.dirname(self.temp_dir.name))

@patch(
"q2_moshpit.busco.busco._draw_detailed_plots",
Expand Down Expand Up @@ -135,9 +135,14 @@ def test_evaluate_busco_action(self):
'FeatureData[MAG]',
self.get_data_path('mags/sample2')
)
busco_db = qiime2.Artifact.import_data(
'ReferenceDB[BuscoDB]',
self.get_data_path('busco_db')
)
obs = evaluate_busco(
ctx=mock_ctx,
bins=mags,
busco_db=busco_db,
num_partitions=2
)
exp = ("collated_result", "visualization")
Expand Down
46 changes: 34 additions & 12 deletions q2_moshpit/busco/tests/test_busco_sample_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from unittest.mock import patch, ANY, call, MagicMock
from qiime2.plugin.testing import TestPluginBase
from q2_types.per_sample_sequences._format import MultiMAGSequencesDirFmt
from q2_moshpit.busco.types import BuscoDatabaseDirFmt


class TestBUSCOSampleData(TestPluginBase):
Expand All @@ -28,6 +29,10 @@ def setUp(self):
path=self.get_data_path('mags'),
mode="r",
)
self.busco_db = BuscoDatabaseDirFmt(
path=self.get_data_path("busco_db"),
mode="r"
)

def _prepare_summaries(self):
for s in ['1', '2']:
Expand Down Expand Up @@ -56,15 +61,21 @@ def test_run_busco(self, mock_run):
self.assertDictEqual(obs, exp)
mock_run.assert_has_calls([
call(
['busco', '--lineage_dataset', 'bacteria_odb10',
'--cpu', '7', '--in', self.get_data_path('mags/sample1'),
'--out_path', self.temp_dir.name, '-o', 'sample1'],
[
'busco', '--lineage_dataset', 'bacteria_odb10',
'--cpu', '7', '--in', self.get_data_path('mags/sample1'),
'--out_path', self.temp_dir.name, '-o', 'sample1'
],
cwd=os.path.dirname(self.temp_dir.name)
),
call(
['busco', '--lineage_dataset', 'bacteria_odb10',
'--cpu', '7', '--in', self.get_data_path('mags/sample2'),
'--out_path', self.temp_dir.name, '-o', 'sample2'],
)
[
'busco', '--lineage_dataset', 'bacteria_odb10',
'--cpu', '7', '--in', self.get_data_path('mags/sample2'),
'--out_path', self.temp_dir.name, '-o', 'sample2'
],
cwd=os.path.dirname(self.temp_dir.name)
),
])

@patch('q2_moshpit.busco.busco._run_busco')
Expand Down Expand Up @@ -99,15 +110,21 @@ def test_busco_helper(self, mock_len, mock_run):
)

@patch("q2_moshpit.busco.busco._busco_helper")
def test_evaluate_busco(self, mock_helper):
def test_evaluate_busco_offline(self, mock_helper):
_evaluate_busco(
bins=self.mags, mode="some_mode", lineage_dataset="bacteria_odb10"
bins=self.mags,
busco_db=self.busco_db,
mode="some_mode",
lineage_dataset="lineage_1"
)
mock_helper.assert_called_with(
self.mags,
['--mode', 'some_mode', '--lineage_dataset', 'bacteria_odb10',
'--cpu', '1', '--contig_break', '10', '--evalue', '0.001',
'--limit', '3']
[
'--mode', 'some_mode', '--lineage_dataset', 'lineage_1',
'--cpu', '1', '--contig_break', '10', '--evalue', '0.001',
'--limit', '3', '--offline', "--download_path",
f"{str(self.busco_db)}/busco_downloads"
]
)

@patch(
Expand Down Expand Up @@ -184,9 +201,14 @@ def test_evaluate_busco_action(self):
'SampleData[MAGs]',
self.get_data_path('mags')
)
busco_db = qiime2.Artifact.import_data(
'ReferenceDB[BuscoDB]',
self.get_data_path('busco_db')
)
obs = evaluate_busco(
ctx=mock_ctx,
bins=mags,
busco_db=busco_db,
num_partitions=2
)
exp = ("collated_result", "visualization")
Expand Down
38 changes: 38 additions & 0 deletions q2_moshpit/busco/tests/test_fetch_busco.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# ----------------------------------------------------------------------------
# Copyright (c) 2022-2023, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
from q2_moshpit.busco.database import fetch_busco_db
from unittest.mock import patch
from qiime2.plugin.testing import TestPluginBase


class TestFetchBUSCO(TestPluginBase):
package = "q2_moshpit.busco.tests"

@patch("subprocess.run")
def test_fetch_busco_db_virus(self, subp_run):
busco_db = fetch_busco_db(virus=True, prok=False, euk=False)

# Check that command was called in the expected way
cmd = ["busco", "--download", "virus"]
subp_run.assert_called_once_with(cmd, check=True, cwd=str(busco_db))

@patch("subprocess.run")
def test_fetch_busco_db_prok_euk(self, subp_run):
busco_db = fetch_busco_db(virus=False, prok=True, euk=True)

# Check that command was called in the expected way
cmd = ["busco", "--download", "prokaryota", "eukaryota"]
subp_run.assert_called_once_with(cmd, check=True, cwd=str(busco_db))

@patch("subprocess.run")
def test_fetch_busco_db_all(self, subp_run):
busco_db = fetch_busco_db(virus=True, prok=True, euk=True)

# Check that command was called in the expected way
cmd = ["busco", "--download", "all"]
subp_run.assert_called_once_with(cmd, check=True, cwd=str(busco_db))
Loading

0 comments on commit 7ab7588

Please sign in to comment.