Skip to content

Commit

Permalink
Add SampleData[MAGs] to prodigal
Browse files Browse the repository at this point in the history
  • Loading branch information
Sann5 committed Apr 12, 2024
1 parent 4f27961 commit ae2c5d1
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 22 deletions.
2 changes: 1 addition & 1 deletion q2_moshpit/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -989,7 +989,7 @@
plugin.methods.register_function(
function=q2_moshpit.prodigal.predict_genes_prodigal,
inputs={
'mags': FeatureData[MAG]
'mags': FeatureData[MAG] | SampleData[MAGs]
},
input_descriptions={
'mags': 'MAGs for which one wishes to predict genes.'
Expand Down
64 changes: 43 additions & 21 deletions q2_moshpit/prodigal/prodigal.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,17 @@
# ----------------------------------------------------------------------------
import os
import copy as cp
from typing import Union
from .._utils import run_command
from q2_types.feature_data_mag import MAGSequencesDirFmt
from q2_types.per_sample_sequences import MultiMAGSequencesDirFmt
from q2_types.genome_data import (
LociDirectoryFormat, GenesDirectoryFormat, ProteinsDirectoryFormat,
)


def predict_genes_prodigal(
mags: MAGSequencesDirFmt,
mags: Union[MAGSequencesDirFmt, MultiMAGSequencesDirFmt],
translation_table_number: str = "11",
) -> (LociDirectoryFormat, GenesDirectoryFormat, ProteinsDirectoryFormat):

Expand All @@ -24,34 +26,54 @@ def predict_genes_prodigal(
genes = GenesDirectoryFormat()
proteins = ProteinsDirectoryFormat()

# Get paths to fasta files in input dir
fasta_files = [
file for file in os.listdir(mags.path)
if file.endswith(".fa") or file.endswith(".fasta")
]

# Define base command
base_cmd = [
"prodigal",
"-g", translation_table_number,
"-f", "gff"
]

# For every fasta file in mags.path call prodigal and write
# outputs to the corresponding directories.
for fasta_file in fasta_files:
# Get the filename from the file path
file_id = os.path.splitext(fasta_file)[0]

# Adjust command and run
cmd = cp.deepcopy(base_cmd)
cmd.extend([
"-i", os.path.join(mags.path, fasta_file),
"-o", os.path.join(loci.path, f"{file_id}_loci.gff"),
"-a", os.path.join(proteins.path, f"{file_id}_proteins.fasta"),
"-d", os.path.join(genes.path, f"{file_id}_genes.fasta")
])
def _process_fasta_files(fasta_files: list, prefix: str, input_path: str):
# For every fasta file call prodigal and write
# outputs to the corresponding directories.
for fasta_file in fasta_files:
# Get the filename from the file path
file_id = os.path.splitext(fasta_file)[0]

# Adjust command and run
cmd = cp.deepcopy(base_cmd)
cmd.extend([
"-i", os.path.join(input_path, fasta_file),
"-o",
os.path.join(
loci.path, f"{prefix}{file_id}_loci.gff"
),
"-a",
os.path.join(
proteins.path, f"{prefix}{file_id}_proteins.fasta"
),
"-d",
os.path.join(
genes.path, f"{prefix}{file_id}_genes.fasta"
)
])
run_command(cmd)

if isinstance(mags, MAGSequencesDirFmt):
# Get paths to fasta files in input dir
fasta_files = os.listdir(mags.path)
_process_fasta_files(fasta_files, '', mags.path)

elif isinstance(mags, MultiMAGSequencesDirFmt):
# List all directories / samples
for sample_dir in os.listdir(mags.path):
if os.path.isdir(os.path.join(mags.path, sample_dir)):
fasta_files = os.listdir(os.path.join(mags.path, sample_dir))
_process_fasta_files(
fasta_files,
f"{sample_dir}_",
os.path.join(mags.path, sample_dir)
)

# Return output directories
return loci, genes, proteins

0 comments on commit ae2c5d1

Please sign in to comment.