Skip to content

Commit

Permalink
Merge branch 'main' into busco_w_feature_data_iss_111_from_main
Browse files Browse the repository at this point in the history
  • Loading branch information
Sann5 authored May 21, 2024
2 parents f3fdcd0 + 0c559e6 commit cdc9e30
Show file tree
Hide file tree
Showing 18 changed files with 448 additions and 95 deletions.
91 changes: 0 additions & 91 deletions .github/workflows/ci.yaml

This file was deleted.

3 changes: 1 addition & 2 deletions ci/recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,11 @@ requirements:
- kaiju
- kraken2
- metabat2
- pandas
- qiime2 {{ qiime2_epoch }}.*
- q2-types {{ qiime2_epoch }}.*
- q2templates {{ qiime2_epoch }}.*
- q2-assembly {{ qiime2_epoch }}.*
- eggnog-mapper >=2.1.10
- diamond
- samtools
- tqdm
- xmltodict
Expand Down
3 changes: 2 additions & 1 deletion q2_moshpit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from . import prodigal
from ._version import get_versions
from .dereplication import dereplicate_mags
from .filtering import filter_derep_mags, filter_mags
from .kaiju import classification as kaiju_class, database as kaiju_db
from .kraken2 import (
classification as kraken_class,
Expand All @@ -28,5 +29,5 @@
'metabat2', 'bracken', 'kraken_class', 'kraken_db',
'kaiju_class', 'kaiju_db', 'dereplicate_mags', 'eggnog',
'busco', 'prodigal', 'kraken_helpers', 'partition',
'get_feature_lengths'
'filter_derep_mags', 'filter_mags', 'get_feature_lengths'
]
7 changes: 7 additions & 0 deletions q2_moshpit/busco/types/_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,11 @@ def _2(data: pd.DataFrame) -> BUSCOResultsFormat:
def _3(ff: BUSCOResultsFormat) -> Metadata:
with ff.open() as fh:
df = _read_dataframe(fh)
# parse numeric columns as numbers (exclude the percent_gaps column)
columns = [
*BUSCOResultsFormat.HEADER[4:12],
*BUSCOResultsFormat.HEADER[13:]
]
for col in columns:
df[col] = pd.to_numeric(df[col])
return Metadata(df)
5 changes: 5 additions & 0 deletions q2_moshpit/busco/types/tests/test_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ def test_result_to_metadata_transformer(self):
self.fp, sep='\t', header=0, index_col=0, dtype='str'
)
df.index.name = 'id'
for col in [
'complete', 'single', 'duplicated', 'fragmented', 'missing',
'n_markers', 'scaffold_n50', 'contigs_n50', 'scaffolds', 'length'
]:
df[col] = pd.to_numeric(df[col])
exp = qiime2.Metadata(df)

self.assertEqual(obs, exp)
11 changes: 11 additions & 0 deletions q2_moshpit/filtering/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# ----------------------------------------------------------------------------
# Copyright (c) 2022-2023, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------

from .filter_mags import filter_derep_mags, filter_mags

__all__ = ["filter_derep_mags", "filter_mags"]
159 changes: 159 additions & 0 deletions q2_moshpit/filtering/filter_mags.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# ----------------------------------------------------------------------------
# Copyright (c) 2022-2023, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import os

import pandas as pd
from qiime2 import Metadata
from qiime2.util import duplicate

from q2_types.feature_data_mag import MAGSequencesDirFmt
from q2_types.per_sample_sequences import MultiMAGSequencesDirFmt


def _filter_ids(
ids: set,
metadata: Metadata = None,
where: str = None,
exclude_ids: bool = False
) -> set:
"""
Filters IDs based on the provided metadata.
Parameters:
ids (set): The set of IDs to filter.
metadata (Metadata, optional): The metadata to use for filtering.
Defaults to None.
where (str, optional): The condition to use for filtering.
Defaults to None.
exclude_ids (bool, optional): Whether to exclude the IDs that
match the condition. Defaults to False.
Returns:
set: The filtered set of IDs.
"""
selected_ids = metadata.get_ids(where=where)
if not selected_ids:
print("The filter query returned no IDs to filter out.")
else:
if exclude_ids:
ids -= set(selected_ids)
else:
ids &= set(selected_ids)
print(f"Found {len(ids)} IDs to keep.")
return ids


def _filter_manifest(
manifest: pd.DataFrame, ids_to_keep: set, on: str = 'mag'
) -> pd.DataFrame:
"""
Filters a manifest DataFrame based on a set of IDs.
Parameters:
manifest (pd.DataFrame): The manifest DataFrame to filter.
ids_to_keep (set): The set of IDs to keep.
on (str): The level on which to filter ('mag' or 'sample').
Defaults to 'mag'.
Returns:
pd.DataFrame: The filtered manifest DataFrame.
"""
if on == 'mag':
lvl = 'mag-id'
elif on == 'sample':
lvl = 'sample-id'
else:
raise ValueError(f"Invalid value for 'on' parameter: {on}")

manifest["filename"] = \
manifest.index.get_level_values('sample-id') + "/" + \
manifest.index.get_level_values('mag-id') + ".fasta"

return manifest[manifest.index.get_level_values(lvl).isin(ids_to_keep)]


def _mags_to_df(mags: MultiMAGSequencesDirFmt, on: str):
"""
Converts a MultiMAGSequencesDirFmt object to a DataFrame.
Parameters:
mags (MultiMAGSequencesDirFmt): The MultiMAGSequencesDirFmt
object to convert.
on (str): The level on which to index the DataFrame
('sample' or 'mag').
Returns:
pd.DataFrame: The converted DataFrame.
"""
mags_df = pd.DataFrame.from_dict(mags.sample_dict(), orient="index")
mags_df = mags_df.stack().reset_index()
mags_df.columns = ["sample_id", "mag_id", "mag_fp"]
if on == 'sample':
mags_df.set_index("sample_id", inplace=True)
elif on == 'mag':
mags_df.set_index("mag_id", inplace=True)
return mags_df


def filter_derep_mags(
mags: MAGSequencesDirFmt,
metadata: Metadata,
where: str = None,
exclude_ids: bool = False,
) -> MAGSequencesDirFmt:
results = MAGSequencesDirFmt()
features = mags.feature_dict()
ids_to_keep = _filter_ids(
set(features.keys()), metadata, where, exclude_ids
)
try:
for _id in ids_to_keep:
duplicate(
features[_id], os.path.join(str(results), f"{_id}.fasta")
)
except KeyError:
raise ValueError(f"{_id!r} is not a MAG present in the input data.")

return results


def filter_mags(
mags: MultiMAGSequencesDirFmt,
metadata: Metadata,
where: str = None,
exclude_ids: bool = False,
on: str = 'mag'
) -> MultiMAGSequencesDirFmt:
results = MultiMAGSequencesDirFmt()
mags_df = _mags_to_df(mags, on)

ids_to_keep = _filter_ids(
set(mags_df.index), metadata, where, exclude_ids
)

filtered_mags = mags_df[mags_df.index.isin(ids_to_keep)]
filtered_manifest = _filter_manifest(
mags.manifest.view(pd.DataFrame), ids_to_keep, on=on
)
filtered_manifest.to_csv(
os.path.join(str(results), "MANIFEST"), sep=","
)
try:
for _id, row in filtered_mags.iterrows():
if on == 'mag':
sample_dir = os.path.join(str(results), row["sample_id"])
mag_dest = os.path.join(sample_dir, f"{_id}.fasta")
else:
sample_dir = os.path.join(str(results), _id)
mag_dest = os.path.join(sample_dir, f"{row['mag_id']}.fasta")
os.makedirs(sample_dir, exist_ok=True)
duplicate(row['mag_fp'], mag_dest)
except KeyError:
raise ValueError(f"{_id!r} is not a MAG present in the input data.")

return results
7 changes: 7 additions & 0 deletions q2_moshpit/filtering/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# ----------------------------------------------------------------------------
# Copyright (c) 2022-2023, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
7 changes: 7 additions & 0 deletions q2_moshpit/filtering/tests/data/MANIFEST
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
sample-id,mag-id,filename
id1,mag1,id1/mag1.fasta
id1,mag2,id1/mag2.fasta
id2,mag3,id2/mag3.fasta
id3,mag4,id3/mag4.fasta
id3,mag5,id3/mag5.fasta
id3,mag6,id3/mag6.fasta
4 changes: 4 additions & 0 deletions q2_moshpit/filtering/tests/data/mags/MANIFEST
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
sample-id,mag-id,filename
sample1,24dee6fe-9b84-45bb-8145-de7b092533a1,sample1/24dee6fe-9b84-45bb-8145-de7b092533a1.fasta
sample2,db03f8b6-28e1-48c5-a47c-9c65f38f7357,sample2/db03f8b6-28e1-48c5-a47c-9c65f38f7357.fasta
sample2,d65a71fa-4279-4588-b937-0747ed5d604d,sample2/d65a71fa-4279-4588-b937-0747ed5d604d.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
>NZ_00000000.1_contig1
ATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATGTTTCCAGATGCAATGCGTGGGCACTGATG
>NZ_00000000.1_contig2
TTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGAAAACCATGCAGTGTTGACGTCAGTGA
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
>NZ_CP018863.1_contig1
GCCTCCTCCCAGTTCGTCTCAGCGCTGCTGCTGGTCGGCGCCAAATTCCGTAACGGGCTGCACCTCGAACATTCCGGCCAGAGCGTCCCCAGCCTGGACCACGTTGCCATGACCGTGGCGGTACTGCGCAGCGTCGGGGTGGAGGTAGACGATTCCCGGCAGAACCACTGGTGGTCCGGCCCGGACCGGTCAAGGCCTTCGACGTCACCGTCGAACAGGACCTTTCCAATGCCGGCCCCTTCCTCGCGGCAGCCCTGGCCACGAAGGGAACGGTTCGGATCCCAGGCTGGCCGGAGAAAACCACGCAGGTAGGTGACAAATGGCGCAGCATCCTGGCGCAACTCGGCGCCACTGTCAGCTACGAGAACGGCACCCTCACCGTAACCGGCGGGGCAGAGATCACCGGGGCGCAGCTCGCCGACACCAGCGAACTTGCCCCCACCACGGCGGCGCTCTGTGCCCTGGCCGGCAGCGAATCCAGGCTCACCGGAATTGCCCACTTGCGGGGACACGAAACAGACCGGCTGGCGGCTCTGGTCGCGGAAATCAATGCCTTGGGTGGCGACGCCGAAGAAACCGAAGACGGGTTGATCATCCGTCCGAGGCCACTGCATGGCGGGGTCTTCCATTCATATGAGGACCACAGGATGGCCACCGCCGGAGCCATTATCGGGCTGGCAGTCGAGGGCGTGGAAGTCGAAGACATCGGCACCACGGCCAAGACCATGCCCGAGTTCCCGCGGCTATGGCAGGACCTGTTCGAGACTTCCGTCCGCCAGTCCGAGGCGGGAGCGCTCTAAGGTGGTGCGCGGCAACCGTACGTGGGACGAGTCCGATGTCCGCATCCGTCCCAACAAGCGCGGCTCGCGTCCGCGTACCAAGGAACGGCCTGCCCACGAAGACGCCGTCATCGGGCGGATCATCACCGTGGACCGCGGCCGCTACACCGCGGTCGTCGATGAAGACACTGCCCGGGAACGGGTGGTTGTCGCCGCCCGTG
>NZ_CP018863.1_contig2
CCCGGGAGCTTCGTCGCAGTCCGGTGGTGGCCGGCGACTTCGTAGCGCTCGTCGGTGACATTACCGGTGAGCCGGATACGTTGGCCCGGCTGGTCCGGATTGAGGAACGCCGGACGCTGTTGCGCCGCAGCGCCGACGATACAGATCCCGTGGAGCGGGTAGTCGTCGCCAATGCAGACCAGCTGGTCATCGTCGTGGCCGCCGCAAACCCCGAGCCGCGCACCGGTTTCATCGACCGCGCCTTGGTAGCGGCGTACGACGCCGGTATCAGCCCGCTGCTGTGCGTCACCAAAGCGGACGTCAAGGATCCCGAAGAACTGCTCTCCAACTACCGGCACCTGGACCTGCCCGTGATCGTCAGCCGGACGGCCGGCACGGAGGGCTCCGGGGTGGATGCACGGTCCGCCGACGGGCTGTCTGCCCGTCTCGACCGTGACGCCGTAGCGGCGCTCCGTGGCTATCTGGATGGGATGGTCAGCGTCATGCTCGGCCATTCGGGCGTGGGCAAGTCCACCATGGTCAATGCCCTCACGGGGGCGGAGCGCGCCACGGGGGGAGTCAACGCGGTGACCGGGCGGGGCCGGCATACCTCCTCCTCGGCGCTGGCCCTGAAGCTGGCCGACGCTCCGGCTGGCAGCTGGATCATCGACACGCCCGGCATCCGTTCTTTTGGACTGGCCCACGTGGACCCGGACCGGATCATTTCCGCTTTTCCCGATTTGGAGCCCGGGACGGCGGACTGCGAGCGGGGCTGCAAGCACGACGACCATGCCGTCAACTGCGGCGTGGACGCCTGGGTGGCCTCCGGGCAGGCCGGCGAATCCGGCCCGGCACGGCTGGCCTCGCTGCGCCGTTTGCTGGGAACGGAAGAACGCGCCCAGGCGAAGGAACTCGGGTTCCAGTAGCACCGCCGTCGTCGGTCAGGGACTTCACATCCCGCATCCGGCCGCCAAATAAGGATAAGTTGAAGCCTATGACCCGTGACGTTCAAAGCTATAAC
>NZ_CP018863.1_contig3
GACGATCTGCGCCTGGCCCATGTGATGGCCGATTCCGTGGATTCGCAGACCATGGCCCGCTTCAAGGCGCTGGACCTGAAAATCGAGACCAAGCCGGATCTCACCCCTGTCACGGATGCGGACCGCGCCGCTGAAGAGGCCATCCGCGGCCAACTCTCCCGGGCCCGGCCGCGCGACGCGGTCCTCGGCGAGGAATACGGCAGCAGCGGCCACGGCTCCCGCCGCTGGATCATCGATCCCATCGACGGCACGAAGAACTTCGTCCGCGGGGTGCCGGTCTGGGCCACCTTGATCGCGCTGGTAGACGAAGACCGTCCCGTGGTCGGCCTGGTCAGCGCGCCGGCTCTGGGCAAGCGCTGGTGGGCCGCGACCGGAACCGGTGCCTACATGGGACGTTCGCTGTCCGCGGCCACCCGGCTCCGGGTATCCGATGTCAACCGGCTCGAGGACGCGTCCCTCTCCTATTCCAGCCTCACCGGCTGGCAGGAACGCGGCAACTTCCCGGAGTTCCTCGGCCTCACCGAATCCGTCTGGCGCACCCGTGCCTACGGGGACTTCTGGTCCTACTGCATGGTGGCCGAGGGCGCCGTCGACATTGCCTGCGAACCCGAACTCAACCTCTATGACATGGCGGCCCTCGTGCCGATCGTGACCGAGGCCGGCGGACGGTTCAGTTCGCTCGAGGGCGAGGACGGACCCTTCGGCGGCAACGCGTTGGCCACGAACGGCACGCTGCACGACGAGGTCCTCTACCGGCTCAATCCGCAGTTGCGCGGCCAGCGTCCGGCCGCACACCCGGAGGACGGGTCCCTGCCGGAAACCGCTCCGGAGGCCTCCATGGAGGCGGACGGCCTGCGCTGACGCTGTCTTTTGTGACGAATTACGACGGCGGCCGTCCCCATTCCGGGGATGGCCGCCTTTTCGTTCCCGTAACAAAGATGCGGCCCCTCCGGCCGGACAATAATCTCGATGGCAGGTCACGAGTGCCAGCGCTAAACCC
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
>NZ_CP007255.1_contig1
GGCGTGCCCTCAGCGGTCTCCTGCCCATTTACCCGGTCCATCAGACCGCGGACATACTCTTCTGCAAAGGCAACCTCGTTCCAGTCGGGAAAGACAACCTCCCACACGTCGAAATGACCCGTGTCATCGCGCGACGCTTCAACGACCGCTACGGCCAGGTCTTCCCCGAACCTCAAGCGCTGCTTACCTCGAGCCCCGAAGTACCTGAACTCGACGGCAGAAAGATATCCAAGAGCTACGGAAACGCCATCACACTGTCGATGACCGCCGACGACACCGCAGCGGCAATCCGCAAGGCACCGACCGACTCACAACGCCGCATCACCTTCGACCCGATCAACCGCCCCGGGGTATCCGCACTGCTGACCACAGCAGCATTGTGCTCAGGTCGCGACGAGAATGACCTCGCCGACCAGATCGGCGACGCCGGCGCAGGCGCACTCAAAAAGCTCACCACCACGCTCGGGAACGACTTCCTCGCCCCTCACCGGCAACGCAGAAACGACCTCGCCCAAGACCCCCACCATGTCGCCGACGTCCTGCGCATCGGAAACCAGCGCGCGAACGAAATCGCCCAACAGACCCTCGAGGAAGTCCGAACCGCAATGGGCACCATCTACTGATCGACTCCTACCCATAGCGCACTCCAGTAGTTTGGACATTCACCGGGTTGCGACGATCCCTGGAATCCGCCATCGGGAGTGTCTGATCAACGGTGGATCCGACCTCGGTATCGGTTAATTACTGTTGAGGGTGATGCGTGCTTCGAAGGTGATAGCGAACGCGTTCAGCGCTGGCTTCCACCTCATCGCCCATCGCGCCTTACCTTTCCCAGTCGGGTCCAGAGCCCGAGTCGCCAGATACAAGCATTCGAGCGCCGCCTGCTCCGTCGGGAAGTGCCCACGAGCACGCACAGCACGCCGGTAACGAGCGTTGACGGACTCGATCGCATTGGTCGAGCAGATCACCCGCAGGATCTCTTTCGTCGTAATCCAAGAAC
>NZ_CP007255.1_contig3
ATGATCGATCCTTCCCACCAAGCTCAGCTCGGCGTGTCGAACCAAGACCGGATCAACCGTTTATCTGACAGTCCCCTCACTAGCGAGTAAGTCGTCAGAGCGATTCTTCGGGTTTGGGTGTGCGTTTTTGACCATCGCCATACCGGACAGCACGGAACGGGCTTCTGGGCGTGACTGACGTGGAGAAACTTAGCGAAAAGTTTCCAATTGGGTGTCCCGTCGGCGAACGTGGGTGTGAAGATCGGGTGTCGAGATGCAACACCGCCGGCAGAACACGGGATCCAGGTTGATCTTCTGGTGAAGGCGGGTGTGGATCGCGACCAGGCTTATGTCGATCATCGTGTCAGCGGCGGGCAGGCTCGTCGGCCAGGTTTGGATGATGCGATCAACGCGGCTCGGGCCGGCGACGTCCTTTGTGTGACGATGCCTCATTAACTGTCGCGCTCGGCGAAGGATCTGCATGAGCCGGTAGAACGGATCGCGGAGAAGGTGTATCGCTGTCAATTGACGGTCAGTTGTACGACCCGCGCGATCCGATGGGGAAGATGTTCATCGGCTTGCTGGGGATTGATGACGGAGTTTCGAGTCGGATCTGATTCGTGGCCGCACTCGTGACGCCCTGGCCGCCGCGACAGCTGCTGAGAAGAGAAAGGTGCGTCGGACAAGCTCACCGGCTGGAGAAAAAAGTCAGTCACGATCGGGTCTTTTCTTCCAGAGGCTCCCGGTCTGGAGCCTCACCGGTTGCCGCGGGTCGGGCTCCTTCGATGTGTGGGTCAGCTCGTTACGCCGCGTTAAGAATCTGAAAACTTGGTGAGCAAACCTTCCTTTCCCAGGTGAAGCGGCGGATTATCGAAGTGGCCGCGCATCTGGTTACCGCGCGGCATGTCGAGCGCGCCTGAAGGCACAACGGGCCGGGAGGTCACATGCTGCTACACCAATCGATAGGGTTGGAGCGCTTCAACGAACTACCGCGACAGAAGGCGGTTCACGCACTGTTCGA
4 changes: 4 additions & 0 deletions q2_moshpit/filtering/tests/data/metadata-derep.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
mag_id sample_id input_file dataset complete single duplicated fragmented missing n_markers scaffold_n50 contigs_n50 percent_gaps scaffolds length
24dee6fe-9b84-45bb-8145-de7b092533a1 sample1 24dee6fe-9b84-45bb-8145-de7b092533a1.fasta bacteria_odb10 28.2 27.4 0.8 8.9 62.9 124 4785 4785 0.000% 265 1219165
d65a71fa-4279-4588-b937-0747ed5d604d sample2 d65a71fa-4279-4588-b937-0747ed5d604d.fasta bacteria_odb10 1.6 1.6 0.0 1.6 96.8 124 3548 3548 0.000% 67 245922
db03f8b6-28e1-48c5-a47c-9c65f38f7357 sample2 db03f8b6-28e1-48c5-a47c-9c65f38f7357.fasta bacteria_odb10 26.6 26.6 0.0 3.2 70.2 124 78679 78679 0.000% 17 714893
3 changes: 3 additions & 0 deletions q2_moshpit/filtering/tests/data/metadata-sample.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id input_file dataset metric
sample1 24dee6fe-9b84-45bb-8145-de7b092533a1.fasta bacteria_odb10 28.2
sample2 d65a71fa-4279-4588-b937-0747ed5d604d.fasta bacteria_odb10 1.6
Loading

0 comments on commit cdc9e30

Please sign in to comment.