Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: add actions to filter MAGs #169

Merged
merged 5 commits into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion q2_moshpit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from . import prodigal
from ._version import get_versions
from .dereplication import dereplicate_mags
from .filtering import filter_derep_mags, filter_mags
from .kaiju import classification as kaiju_class, database as kaiju_db
from .kraken2 import (
classification as kraken_class,
Expand All @@ -26,5 +27,6 @@
__all__ = [
'metabat2', 'bracken', 'kraken_class', 'kraken_db',
'kaiju_class', 'kaiju_db', 'dereplicate_mags', 'eggnog',
'busco', 'prodigal', 'kraken_helpers', 'partition'
'busco', 'prodigal', 'kraken_helpers', 'partition',
'filter_derep_mags', 'filter_mags'
]
7 changes: 7 additions & 0 deletions q2_moshpit/busco/types/_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,11 @@ def _2(data: pd.DataFrame) -> BUSCOResultsFormat:
def _3(ff: BUSCOResultsFormat) -> Metadata:
with ff.open() as fh:
df = _read_dataframe(fh)
# parse numeric columns as numbers (exclude the percent_gaps column)
columns = [
*BUSCOResultsFormat.HEADER[4:12],
*BUSCOResultsFormat.HEADER[13:]
]
for col in columns:
df[col] = pd.to_numeric(df[col])
return Metadata(df)
5 changes: 5 additions & 0 deletions q2_moshpit/busco/types/tests/test_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ def test_result_to_metadata_transformer(self):
self.fp, sep='\t', header=0, index_col=0, dtype='str'
)
df.index.name = 'id'
for col in [
'complete', 'single', 'duplicated', 'fragmented', 'missing',
'n_markers', 'scaffold_n50', 'contigs_n50', 'scaffolds', 'length'
]:
df[col] = pd.to_numeric(df[col])
exp = qiime2.Metadata(df)

self.assertEqual(obs, exp)
11 changes: 11 additions & 0 deletions q2_moshpit/filtering/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# ----------------------------------------------------------------------------
# Copyright (c) 2022-2023, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------

from .filter_mags import filter_derep_mags, filter_mags

__all__ = ["filter_derep_mags", "filter_mags"]
159 changes: 159 additions & 0 deletions q2_moshpit/filtering/filter_mags.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# ----------------------------------------------------------------------------
# Copyright (c) 2022-2023, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import os

import pandas as pd
from qiime2 import Metadata
from qiime2.util import duplicate

from q2_types.feature_data_mag import MAGSequencesDirFmt
from q2_types.per_sample_sequences import MultiMAGSequencesDirFmt


def _filter_ids(
ids: set,
metadata: Metadata = None,
where: str = None,
exclude_ids: bool = False
) -> set:
"""
Filters IDs based on the provided metadata.

Parameters:
ids (set): The set of IDs to filter.
metadata (Metadata, optional): The metadata to use for filtering.
Defaults to None.
where (str, optional): The condition to use for filtering.
Defaults to None.
exclude_ids (bool, optional): Whether to exclude the IDs that
match the condition. Defaults to False.

Returns:
set: The filtered set of IDs.
"""
selected_ids = metadata.get_ids(where=where)
if not selected_ids:
print("The filter query returned no IDs to filter out.")
else:
if exclude_ids:
ids -= set(selected_ids)
else:
ids &= set(selected_ids)
print(f"Found {len(ids)} IDs to keep.")
return ids


def _filter_manifest(
manifest: pd.DataFrame, ids_to_keep: set, on: str = 'mag'
) -> pd.DataFrame:
"""
Filters a manifest DataFrame based on a set of IDs.

Parameters:
manifest (pd.DataFrame): The manifest DataFrame to filter.
ids_to_keep (set): The set of IDs to keep.
on (str): The level on which to filter ('mag' or 'sample').
Defaults to 'mag'.

Returns:
pd.DataFrame: The filtered manifest DataFrame.
"""
if on == 'mag':
lvl = 'mag-id'
elif on == 'sample':
lvl = 'sample-id'
else:
raise ValueError(f"Invalid value for 'on' parameter: {on}")

manifest["filename"] = \
manifest.index.get_level_values('sample-id') + "/" + \
manifest.index.get_level_values('mag-id') + ".fasta"

return manifest[manifest.index.get_level_values(lvl).isin(ids_to_keep)]


def _mags_to_df(mags: MultiMAGSequencesDirFmt, on: str):
"""
Converts a MultiMAGSequencesDirFmt object to a DataFrame.

Parameters:
mags (MultiMAGSequencesDirFmt): The MultiMAGSequencesDirFmt
object to convert.
on (str): The level on which to index the DataFrame
('sample' or 'mag').

Returns:
pd.DataFrame: The converted DataFrame.
"""
mags_df = pd.DataFrame.from_dict(mags.sample_dict(), orient="index")
mags_df = mags_df.stack().reset_index()
mags_df.columns = ["sample_id", "mag_id", "mag_fp"]
if on == 'sample':
mags_df.set_index("sample_id", inplace=True)
elif on == 'mag':
mags_df.set_index("mag_id", inplace=True)
return mags_df


def filter_derep_mags(
mags: MAGSequencesDirFmt,
metadata: Metadata = None,
where: str = None,
exclude_ids: bool = False,
) -> MAGSequencesDirFmt:
results = MAGSequencesDirFmt()
features = mags.feature_dict()
ids_to_keep = _filter_ids(
set(features.keys()), metadata, where, exclude_ids
)
try:
for _id in ids_to_keep:
duplicate(
features[_id], os.path.join(str(results), f"{_id}.fasta")
)
except KeyError:
raise ValueError(f"{_id!r} is not a MAG present in the input data.")

Check warning on line 120 in q2_moshpit/filtering/filter_mags.py

View check run for this annotation

Codecov / codecov/patch

q2_moshpit/filtering/filter_mags.py#L119-L120

Added lines #L119 - L120 were not covered by tests

return results


def filter_mags(
mags: MultiMAGSequencesDirFmt,
metadata: Metadata = None,
where: str = None,
exclude_ids: bool = False,
on: str = 'mag'
) -> MultiMAGSequencesDirFmt:
results = MultiMAGSequencesDirFmt()
mags_df = _mags_to_df(mags, on)

ids_to_keep = _filter_ids(
set(mags_df.index), metadata, where, exclude_ids
)

filtered_mags = mags_df[mags_df.index.isin(ids_to_keep)]
filtered_manifest = _filter_manifest(
mags.manifest.view(pd.DataFrame), ids_to_keep, on=on
)
filtered_manifest.to_csv(
os.path.join(str(results), "MANIFEST"), sep=","
)
try:
for _id, row in filtered_mags.iterrows():
if on == 'mag':
sample_dir = os.path.join(str(results), row["sample_id"])
mag_dest = os.path.join(sample_dir, f"{_id}.fasta")
else:
sample_dir = os.path.join(str(results), _id)
mag_dest = os.path.join(sample_dir, f"{row['mag_id']}.fasta")
os.makedirs(sample_dir, exist_ok=True)
duplicate(row['mag_fp'], mag_dest)
except KeyError:
raise ValueError(f"{_id!r} is not a MAG present in the input data.")

Check warning on line 157 in q2_moshpit/filtering/filter_mags.py

View check run for this annotation

Codecov / codecov/patch

q2_moshpit/filtering/filter_mags.py#L156-L157

Added lines #L156 - L157 were not covered by tests

return results
7 changes: 7 additions & 0 deletions q2_moshpit/filtering/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# ----------------------------------------------------------------------------
# Copyright (c) 2022-2023, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
7 changes: 7 additions & 0 deletions q2_moshpit/filtering/tests/data/MANIFEST
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
sample-id,mag-id,filename
id1,mag1,id1/mag1.fasta
id1,mag2,id1/mag2.fasta
id2,mag3,id2/mag3.fasta
id3,mag4,id3/mag4.fasta
id3,mag5,id3/mag5.fasta
id3,mag6,id3/mag6.fasta
4 changes: 4 additions & 0 deletions q2_moshpit/filtering/tests/data/mags/MANIFEST
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
sample-id,mag-id,filename
sample1,24dee6fe-9b84-45bb-8145-de7b092533a1,sample1/24dee6fe-9b84-45bb-8145-de7b092533a1.fasta
sample2,db03f8b6-28e1-48c5-a47c-9c65f38f7357,sample2/db03f8b6-28e1-48c5-a47c-9c65f38f7357.fasta
sample2,d65a71fa-4279-4588-b937-0747ed5d604d,sample2/d65a71fa-4279-4588-b937-0747ed5d604d.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
>NZ_00000000.1_contig1
ATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATG
>NZ_00000000.1_contig2
TTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGA
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
>NZ_CP018863.1_contig1
GCCTCCTCCCAGTTCGTCTCAGCGCTGCTGCTGGTCGGCGCCAAATTCCGTAACGGGCTGCACCTCGAACATTCCGGCCAGAGCGTCCCCAGCCTGGACCACGTTGCCATGACCGTGGCGGTACTGCGCAGCGTCGGGGTGGAGGTAGACGATTCCCGGCAGAACCACTGGTGGTCCGGCCCGGACCGGTCAAGGCCTTCGACGTCACCGTCGAACAGGACCTTTCCAATGCCGGCCCCTTCCTCGCGGCAGCCCTGGCCACGAAGGGAACGGTTCGGATCCCAGGCTGGCCGGAGAAAACCACGCAGGTAGGTGACAAATGGCGCAGCATCCTGGCGCAACTCGGCGCCACTGTCAGCTACGAGAACGGCACCCTCACCGTAACCGGCGGGGCAGAGATCACCGGGGCGCAGCTCGCCGACACCAGCGAACTTGCCCCCACCACGGCGGCGCTCTGTGCCCTGGCCGGCAGCGAATCCAGGCTCACCGGAATTGCCCACTTGCGGGGACACGAAACAGACCGGCTGGCGGCTCTGGTCGCGGAAATCAATGCCTTGGGTGGCGACGCCGAAGAAACCGAAGACGGGTTGATCATCCGTCCGAGGCCACTGCATGGCGGGGTCTTCCATTCATATGAGGACCACAGGATGGCCACCGCCGGAGCCATTATCGGGCTGGCAGTCGAGGGCGTGGAAGTCGAAGACATCGGCACCACGGCCAAGACCATGCCCGAGTTCCCGCGGCTATGGCAGGACCTGTTCGAGACTTCCGTCCGCCAGTCCGAGGCGGGAGCGCTCTAAGGTGGTGCGCGGCAACCGTACGTGGGACGAGTCCGATGTCCGCATCCGTCCCAACAAGCGCGGCTCGCGTCCGCGTACCAAGGAACGGCCTGCCCACGAAGACGCCGTCATCGGGCGGATCATCACCGTGGACCGCGGCCGCTACACCGCGGTCGTCGATGAAGACACTGCCCGGGAACGGGTGGTTGTCGCCGCCCGTG
>NZ_CP018863.1_contig2
CCCGGGAGCTTCGTCGCAGTCCGGTGGTGGCCGGCGACTTCGTAGCGCTCGTCGGTGACATTACCGGTGAGCCGGATACGTTGGCCCGGCTGGTCCGGATTGAGGAACGCCGGACGCTGTTGCGCCGCAGCGCCGACGATACAGATCCCGTGGAGCGGGTAGTCGTCGCCAATGCAGACCAGCTGGTCATCGTCGTGGCCGCCGCAAACCCCGAGCCGCGCACCGGTTTCATCGACCGCGCCTTGGTAGCGGCGTACGACGCCGGTATCAGCCCGCTGCTGTGCGTCACCAAAGCGGACGTCAAGGATCCCGAAGAACTGCTCTCCAACTACCGGCACCTGGACCTGCCCGTGATCGTCAGCCGGACGGCCGGCACGGAGGGCTCCGGGGTGGATGCACGGTCCGCCGACGGGCTGTCTGCCCGTCTCGACCGTGACGCCGTAGCGGCGCTCCGTGGCTATCTGGATGGGATGGTCAGCGTCATGCTCGGCCATTCGGGCGTGGGCAAGTCCACCATGGTCAATGCCCTCACGGGGGCGGAGCGCGCCACGGGGGGAGTCAACGCGGTGACCGGGCGGGGCCGGCATACCTCCTCCTCGGCGCTGGCCCTGAAGCTGGCCGACGCTCCGGCTGGCAGCTGGATCATCGACACGCCCGGCATCCGTTCTTTTGGACTGGCCCACGTGGACCCGGACCGGATCATTTCCGCTTTTCCCGATTTGGAGCCCGGGACGGCGGACTGCGAGCGGGGCTGCAAGCACGACGACCATGCCGTCAACTGCGGCGTGGACGCCTGGGTGGCCTCCGGGCAGGCCGGCGAATCCGGCCCGGCACGGCTGGCCTCGCTGCGCCGTTTGCTGGGAACGGAAGAACGCGCCCAGGCGAAGGAACTCGGGTTCCAGTAGCACCGCCGTCGTCGGTCAGGGACTTCACATCCCGCATCCGGCCGCCAAATAAGGATAAGTTGAAGCCTATGACCCGTGACGTTCAAAGCTATAAC
>NZ_CP018863.1_contig3
GACGATCTGCGCCTGGCCCATGTGATGGCCGATTCCGTGGATTCGCAGACCATGGCCCGCTTCAAGGCGCTGGACCTGAAAATCGAGACCAAGCCGGATCTCACCCCTGTCACGGATGCGGACCGCGCCGCTGAAGAGGCCATCCGCGGCCAACTCTCCCGGGCCCGGCCGCGCGACGCGGTCCTCGGCGAGGAATACGGCAGCAGCGGCCACGGCTCCCGCCGCTGGATCATCGATCCCATCGACGGCACGAAGAACTTCGTCCGCGGGGTGCCGGTCTGGGCCACCTTGATCGCGCTGGTAGACGAAGACCGTCCCGTGGTCGGCCTGGTCAGCGCGCCGGCTCTGGGCAAGCGCTGGTGGGCCGCGACCGGAACCGGTGCCTACATGGGACGTTCGCTGTCCGCGGCCACCCGGCTCCGGGTATCCGATGTCAACCGGCTCGAGGACGCGTCCCTCTCCTATTCCAGCCTCACCGGCTGGCAGGAACGCGGCAACTTCCCGGAGTTCCTCGGCCTCACCGAATCCGTCTGGCGCACCCGTGCCTACGGGGACTTCTGGTCCTACTGCATGGTGGCCGAGGGCGCCGTCGACATTGCCTGCGAACCCGAACTCAACCTCTATGACATGGCGGCCCTCGTGCCGATCGTGACCGAGGCCGGCGGACGGTTCAGTTCGCTCGAGGGCGAGGACGGACCCTTCGGCGGCAACGCGTTGGCCACGAACGGCACGCTGCACGACGAGGTCCTCTACCGGCTCAATCCGCAGTTGCGCGGCCAGCGTCCGGCCGCACACCCGGAGGACGGGTCCCTGCCGGAAACCGCTCCGGAGGCCTCCATGGAGGCGGACGGCCTGCGCTGACGCTGTCTTTTGTGACGAATTACGACGGCGGCCGTCCCCATTCCGGGGATGGCCGCCTTTTCGTTCCCGTAACAAAGATGCGGCCCCTCCGGCCGGACAATAATCTCGATGGCAGGTCACGAGTGCCAGCGCTAAACCC
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
>NZ_CP007255.1_contig1
GGCGTGCCCTCAGCGGTCTCCTGCCCATTTACCCGGTCCATCAGACCGCGGACATACTCTTCTGCAAAGGCAACCTCGTTCCAGTCGGGAAAGACAACCTCCCACACGTCGAAATGACCCGTGTCATCGCGCGACGCTTCAACGACCGCTACGGCCAGGTCTTCCCCGAACCTCAAGCGCTGCTTACCTCGAGCCCCGAAGTACCTGAACTCGACGGCAGAAAGATATCCAAGAGCTACGGAAACGCCATCACACTGTCGATGACCGCCGACGACACCGCAGCGGCAATCCGCAAGGCACCGACCGACTCACAACGCCGCATCACCTTCGACCCGATCAACCGCCCCGGGGTATCCGCACTGCTGACCACAGCAGCATTGTGCTCAGGTCGCGACGAGAATGACCTCGCCGACCAGATCGGCGACGCCGGCGCAGGCGCACTCAAAAAGCTCACCACCACGCTCGGGAACGACTTCCTCGCCCCTCACCGGCAACGCAGAAACGACCTCGCCCAAGACCCCCACCATGTCGCCGACGTCCTGCGCATCGGAAACCAGCGCGCGAACGAAATCGCCCAACAGACCCTCGAGGAAGTCCGAACCGCAATGGGCACCATCTACTGATCGACTCCTACCCATAGCGCACTCCAGTAGTTTGGACATTCACCGGGTTGCGACGATCCCTGGAATCCGCCATCGGGAGTGTCTGATCAACGGTGGATCCGACCTCGGTATCGGTTAATTACTGTTGAGGGTGATGCGTGCTTCGAAGGTGATAGCGAACGCGTTCAGCGCTGGCTTCCACCTCATCGCCCATCGCGCCTTACCTTTCCCAGTCGGGTCCAGAGCCCGAGTCGCCAGATACAAGCATTCGAGCGCCGCCTGCTCCGTCGGGAAGTGCCCACGAGCACGCACAGCACGCCGGTAACGAGCGTTGACGGACTCGATCGCATTGGTCGAGCAGATCACCCGCAGGATCTCTTTCGTCGTAATCCAAGAAC
>NZ_CP007255.1_contig3
ATGATCGATCCTTCCCACCAAGCTCAGCTCGGCGTGTCGAACCAAGACCGGATCAACCGTTTATCTGACAGTCCCCTCACTAGCGAGTAAGTCGTCAGAGCGATTCTTCGGGTTTGGGTGTGCGTTTTTGACCATCGCCATACCGGACAGCACGGAACGGGCTTCTGGGCGTGACTGACGTGGAGAAACTTAGCGAAAAGTTTCCAATTGGGTGTCCCGTCGGCGAACGTGGGTGTGAAGATCGGGTGTCGAGATGCAACACCGCCGGCAGAACACGGGATCCAGGTTGATCTTCTGGTGAAGGCGGGTGTGGATCGCGACCAGGCTTATGTCGATCATCGTGTCAGCGGCGGGCAGGCTCGTCGGCCAGGTTTGGATGATGCGATCAACGCGGCTCGGGCCGGCGACGTCCTTTGTGTGACGATGCCTCATTAACTGTCGCGCTCGGCGAAGGATCTGCATGAGCCGGTAGAACGGATCGCGGAGAAGGTGTATCGCTGTCAATTGACGGTCAGTTGTACGACCCGCGCGATCCGATGGGGAAGATGTTCATCGGCTTGCTGGGGATTGATGACGGAGTTTCGAGTCGGATCTGATTCGTGGCCGCACTCGTGACGCCCTGGCCGCCGCGACAGCTGCTGAGAAGAGAAAGGTGCGTCGGACAAGCTCACCGGCTGGAGAAAAAAGTCAGTCACGATCGGGTCTTTTCTTCCAGAGGCTCCCGGTCTGGAGCCTCACCGGTTGCCGCGGGTCGGGCTCCTTCGATGTGTGGGTCAGCTCGTTACGCCGCGTTAAGAATCTGAAAACTTGGTGAGCAAACCTTCCTTTCCCAGGTGAAGCGGCGGATTATCGAAGTGGCCGCGCATCTGGTTACCGCGCGGCATGTCGAGCGCGCCTGAAGGCACAACGGGCCGGGAGGTCACATGCTGCTACACCAATCGATAGGGTTGGAGCGCTTCAACGAACTACCGCGACAGAAGGCGGTTCACGCACTGTTCGA
4 changes: 4 additions & 0 deletions q2_moshpit/filtering/tests/data/metadata-derep.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
mag_id sample_id input_file dataset complete single duplicated fragmented missing n_markers scaffold_n50 contigs_n50 percent_gaps scaffolds length
24dee6fe-9b84-45bb-8145-de7b092533a1 sample1 24dee6fe-9b84-45bb-8145-de7b092533a1.fasta bacteria_odb10 28.2 27.4 0.8 8.9 62.9 124 4785 4785 0.000% 265 1219165
d65a71fa-4279-4588-b937-0747ed5d604d sample2 d65a71fa-4279-4588-b937-0747ed5d604d.fasta bacteria_odb10 1.6 1.6 0.0 1.6 96.8 124 3548 3548 0.000% 67 245922
db03f8b6-28e1-48c5-a47c-9c65f38f7357 sample2 db03f8b6-28e1-48c5-a47c-9c65f38f7357.fasta bacteria_odb10 26.6 26.6 0.0 3.2 70.2 124 78679 78679 0.000% 17 714893
3 changes: 3 additions & 0 deletions q2_moshpit/filtering/tests/data/metadata-sample.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id input_file dataset metric
sample1 24dee6fe-9b84-45bb-8145-de7b092533a1.fasta bacteria_odb10 28.2
sample2 d65a71fa-4279-4588-b937-0747ed5d604d.fasta bacteria_odb10 1.6
Loading
Loading