diff --git a/q2_moshpit/busco/types/__init__.py b/q2_moshpit/busco/types/__init__.py new file mode 100644 index 00000000..2c8611e4 --- /dev/null +++ b/q2_moshpit/busco/types/__init__.py @@ -0,0 +1,14 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2022-2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- +from ._format import BUSCOResultsFormat, BUSCOResultsDirectoryFormat +from ._type import BUSCOResults + + +__all__ = [ + 'BUSCOResults', 'BUSCOResultsFormat', 'BUSCOResultsDirectoryFormat' +] diff --git a/q2_moshpit/busco/types/_format.py b/q2_moshpit/busco/types/_format.py new file mode 100644 index 00000000..94edfc82 --- /dev/null +++ b/q2_moshpit/busco/types/_format.py @@ -0,0 +1,47 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2022-2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- +import csv + +from qiime2.core.exceptions import ValidationError +from qiime2.plugin import model + + +class BUSCOResultsFormat(model.TextFileFormat): + HEADER = [ + "mag_id", "sample_id", "input_file", "dataset", "complete", + "single", "duplicated", "fragmented", "missing", "n_markers", + "scaffold_n50", "contigs_n50", "percent_gaps", "scaffolds", + "length" + ] + + def _validate(self, n_records=None): + with self.open() as fh: + reader = csv.reader(fh, delimiter='\t') + headers = next(reader) + + if set(headers) != set(self.HEADER): + raise ValidationError( + f'Invalid header: {headers}, expected: {self.HEADER}' + ) + + for i, row in enumerate(reader, start=2): + if len(row) != len(self.HEADER): + raise ValidationError(f'Line {i} has {len(row)} columns, expected {len(self.HEADER)}') + + if n_records is not None and i - 1 >= n_records: + break + + def _validate_(self, level): + record_count_map = {'min': 100, 'max': None} + self._validate(record_count_map[level]) + + +BUSCOResultsDirectoryFormat = model.SingleFileDirectoryFormat( + 'BUSCOResultsDirectoryFormat', 'busco_results.tsv', + BUSCOResultsFormat +) diff --git a/q2_moshpit/busco/types/_transformer.py b/q2_moshpit/busco/types/_transformer.py new file mode 100644 index 00000000..37ed0969 --- /dev/null +++ b/q2_moshpit/busco/types/_transformer.py @@ -0,0 +1,40 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2022-2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- +import pandas as pd +from qiime2 import Metadata + +from . import BUSCOResultsFormat +from ...plugin_setup import plugin + + +def _read_dataframe(fh: str, header=0): + df = pd.read_csv(fh, sep='\t', header=header, index_col=0, dtype='str') + df.index.name = 'id' + return df + + +@plugin.register_transformer +def _1(ff: BUSCOResultsFormat) -> pd.DataFrame: + with ff.open() as fh: + df = _read_dataframe(fh) + return df + + +@plugin.register_transformer +def _2(data: pd.DataFrame) -> BUSCOResultsFormat: + ff = BUSCOResultsFormat() + with ff.open() as fh: + data.to_csv(fh, sep='\t', index=False, header=True) + return ff + + +@plugin.register_transformer +def _3(ff: BUSCOResultsFormat) -> Metadata: + with ff.open() as fh: + df = _read_dataframe(fh) + return Metadata(df) diff --git a/q2_moshpit/busco/types/_type.py b/q2_moshpit/busco/types/_type.py new file mode 100644 index 00000000..cf267214 --- /dev/null +++ b/q2_moshpit/busco/types/_type.py @@ -0,0 +1,11 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2022-2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- +from qiime2.core.type import SemanticType + + +BUSCOResults = SemanticType('BUSCOResults')