Skip to content

Commit

Permalink
BUG: make bin-contigs collect .fasta contigs too (#86)
Browse files Browse the repository at this point in the history
Co-authored-by: Michal Ziemski <[email protected]>
  • Loading branch information
colinvwood and misialq authored Nov 7, 2023
1 parent 56b60ba commit 01db060
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 34 deletions.
44 changes: 25 additions & 19 deletions q2_moshpit/metabat2/metabat2.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@
import glob
import re
from uuid import uuid4
from pathlib import Path

import os.path
import shutil
import tempfile
from copy import deepcopy

import skbio.io
from q2_types.feature_data import DNAIterator
from q2_types.feature_data import DNAIterator, DNAFASTAFormat

from q2_types_genomics.per_sample_data import ContigSequencesDirFmt, BAMDirFmt
from q2_types_genomics.per_sample_data._format import MultiFASTADirectoryFormat
Expand All @@ -24,24 +25,26 @@
from q2_moshpit.metabat2.utils import _process_metabat2_arg


def _get_sample_name_from_path(fp, suffix):
return os.path.basename(fp).rsplit(suffix, maxsplit=1)[0]


def _assert_samples(contigs_fps, maps_fps) -> dict:
contigs_fps, maps_fps = sorted(contigs_fps), sorted(maps_fps)
contig_samps = [_get_sample_name_from_path(x, '_contigs.fa')
for x in contigs_fps]
map_samps = [_get_sample_name_from_path(x, '_alignment.bam')
for x in maps_fps]
contig_samps = [
Path(fp).stem.rsplit('_contigs', 1)[0] for fp in contigs_fps
]
map_samps = [
Path(fp).stem.rsplit('_alignment', 1)[0] for fp in maps_fps
]
if set(contig_samps) != set(map_samps):
raise Exception('Contigs and alignment maps should belong to the '
'same sample set. You provided contigs for '
f'samples: {",".join(contig_samps)} but maps for '
f'samples: {",".join(map_samps)}. Please check '
'your inputs and try again.')
return {s: {'contigs': contigs_fps[i], 'map': maps_fps[i]}
for i, s in enumerate(contig_samps)}
raise Exception(
'Contigs and alignment maps should belong to the same sample set. '
f'You provided contigs for samples: {",".join(contig_samps)} but '
f'maps for samples: {",".join(map_samps)}. Please check your '
'inputs and try again.'
)

return {
s: {'contigs': contigs_fps[i], 'map': maps_fps[i]}
for i, s in enumerate(contig_samps)
}


def _sort_bams(samp_name, samp_props, loc):
Expand Down Expand Up @@ -129,10 +132,13 @@ def _generate_contig_map(


def _bin_contigs_metabat(
contigs: ContigSequencesDirFmt, alignment_maps: BAMDirFmt,
common_args: list
contigs: ContigSequencesDirFmt,
alignment_maps: BAMDirFmt,
common_args: list
) -> (MultiFASTADirectoryFormat, dict, ContigSequencesDirFmt):
contigs_fps = sorted(glob.glob(os.path.join(str(contigs), '*.fa')))
contigs_fps = sorted(map(
lambda v: str(v[1].path), contigs.sequences.iter_views(DNAFASTAFormat)
))
maps_fps = sorted(glob.glob(os.path.join(str(alignment_maps), '*.bam')))
sample_set = _assert_samples(contigs_fps, maps_fps)

Expand Down
17 changes: 2 additions & 15 deletions q2_moshpit/metabat2/tests/test_metabat2.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,14 @@
from qiime2.plugin.testing import TestPluginBase

from q2_moshpit.metabat2.metabat2 import (
_assert_samples, _get_sample_name_from_path, _sort_bams,
_estimate_depth, _run_metabat2, _process_sample, _bin_contigs_metabat,
_generate_contig_map
_assert_samples, _sort_bams, _estimate_depth, _run_metabat2,
_process_sample, _bin_contigs_metabat, _generate_contig_map
)


class TestMetabat2(TestPluginBase):
package = 'q2_moshpit.metabat2.tests'

def test_get_sample_name_from_path(self):
obs = _get_sample_name_from_path('/a/b/sampleX.fasta',
'.fasta')
exp = 'sampleX'
self.assertEqual(exp, obs)

def test_get_sample_name_from_path_underscores(self):
obs = _get_sample_name_from_path('/a/b/sampleX_something.fasta',
'_something.fasta')
exp = 'sampleX'
self.assertEqual(exp, obs)

def test_assert_samples_ok(self):
contigs = ['/a/b/s1_contigs.fa', '/a/b/s3_contigs.fa',
'/a/b/s2_contigs.fa']
Expand Down

0 comments on commit 01db060

Please sign in to comment.