Skip to content

Commit

Permalink
feat: Add intermediate step integration for the workflow
Browse files Browse the repository at this point in the history
- Integrated Typer for handling CLI commands
- Implemented `climate_pipeline` and `genetic_pipeline` commands
- Set `main` function to run as default when no command is specified
- Updated `GeneticTrees` class to be iterable
- Added options for file paths in CLI commands
  • Loading branch information
sikatikenmogne committed Nov 24, 2024
1 parent ed0133a commit 1c02f98
Show file tree
Hide file tree
Showing 5 changed files with 260 additions and 12 deletions.
99 changes: 89 additions & 10 deletions aphylogeo/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from aphylogeo.params import Params
from aphylogeo import utils
from aphylogeo.genetic_trees import GeneticTrees
import typer
import os

# from aphylogeo.utils import climaticPipeline, geneticPipeline, filterResults, loadSequenceFile

Expand All @@ -19,18 +21,42 @@
\/__/
""" # https://patorjk.com/software/taag/#p=display&f=Larry%203D&t=aphylogeo%20

if __name__ == "__main__":
print(titleCard + "\n")
app = typer.Typer(invoke_without_command=True)

# geneticTrees = GeneticTrees.load_trees_from_file("./results/geneticTreesTest.json")
# loaded_seq_alignment = Alignment.load_from_json("./results/aligned_sequences.json")
@app.command()
def climate_pipeline(
file_name: str = typer.Option(Params.PARAMETER_KEYS["file_name"], help="The name of the file containing the climatic data."),
output: str = typer.Option("./datasets/example/climaticTrees.nwk", help="The name of the file to save the climatic trees."),
):
"""
This function is used to run the climatic pipeline that creates the climatic trees.
Args:
file_name (str): The name of the file containing the climatic data.
"""
Params.load_from_file()

climatic_data = pd.read_csv(file_name)
climaticTrees = utils.climaticPipeline(climatic_data)

try:
utils.save_climatic_trees(climaticTrees, output)
except Exception as e:
print(f"Error saving the file: {e}")

# load params
@app.command()
def genetic_pipeline(
reference_gene_filepath: str = typer.Option(os.path.join(Params.PARAMETER_KEYS["reference_gene_dir"], Params.PARAMETER_KEYS["reference_gene_file"]) , help="The path to the reference gene file."),
output: str = typer.Option("./datasets/example/geneticTrees.json", help="The name of the file to save the genetic trees."),
):
"""
This function is used to run the genetic pipeline that creates the genetic trees.
Args:
reference_gene_filepath (str): The path to the reference gene file.
"""
Params.load_from_file()
sequenceFile = utils.loadSequenceFile(Params.reference_gene_filepath)
align_sequence = AlignSequences(sequenceFile)

climatic_data = pd.read_csv(Params.file_name)
sequenceFile = utils.loadSequenceFile(reference_gene_filepath)
align_sequence = AlignSequences(sequenceFile)

print("\nStarting alignement")
start_time = time.time()
Expand All @@ -41,9 +67,62 @@
elapsed_time = round(end_time - start_time, 3)
print(f"Elapsed time: {elapsed_time} seconds")

climaticTrees = utils.climaticPipeline(climatic_data)
try:
trees.save_trees_to_json(output)
except Exception as e:
print(f"Error saving the file: {e}")

@app.callback()
def main(
climatic_tree: str = typer.Option(None, help="The name of the file containing the climatic trees."),
genetic_tree: str = typer.Option(None, help="The name of the file containing the genetic trees."),
):

# geneticTrees = GeneticTrees.load_trees_from_file("./results/geneticTreesTest.json")
# loaded_seq_alignment = Alignment.load_from_json("./results/aligned_sequences.json")

# load params
Params.load_from_file()

# genetic pipeline
alignements = None

if genetic_tree is not None and os.path.exists(genetic_tree):
geneticTrees = GeneticTrees.load_trees_from_file(genetic_tree)
geneticTrees = geneticTrees.trees
trees = GeneticTrees(trees_dict=geneticTrees, format="newick")
else:

typer.echo(typer.style(titleCard, fg=typer.colors.GREEN))

sequenceFile = utils.loadSequenceFile(Params.reference_gene_filepath)
align_sequence = AlignSequences(sequenceFile)

print("\nStarting alignement")
start_time = time.time()
alignements = align_sequence.align()
geneticTrees = utils.geneticPipeline(alignements.msa)
trees = GeneticTrees(trees_dict=geneticTrees, format="newick")
end_time = time.time()
elapsed_time = round(end_time - start_time, 3)
print(f"Elapsed time: {elapsed_time} seconds")

# climatic sequence
if climatic_tree is not None and os.path.exists(climatic_tree):
climaticTrees = utils.load_climatic_trees(climatic_tree)
climatic_data = utils.reverse_climatic_pipeline(climaticTrees)
else:
climatic_data = pd.read_csv(Params.file_name)
climaticTrees = utils.climaticPipeline(climatic_data)

utils.filterResults(climaticTrees, geneticTrees, climatic_data)

# save results
alignements.save_to_json(f"./results/aligned_{Params.reference_gene_file}.json")
if alignements is not None:
alignements.save_to_json(f"./results/aligned_{Params.reference_gene_file}.json")

trees.save_trees_to_json("./results/geneticTrees.json")


if __name__ == "__main__":
app()
46 changes: 46 additions & 0 deletions aphylogeo/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,20 @@ def climaticPipeline(df):
trees[names[i]] = createTree(dm)
return trees

def reverse_climatic_pipeline(trees):
"""
Converts a dictionary of climatic trees back into a DataFrame.
:param trees: The climatic trees dictionary.
:return: The DataFrame containing the climatic data.
:rtype: pd.DataFrame
"""
data = []
for key, tree in trees.items():
for leaf in tree.get_terminals():
data.append([leaf.name, key, leaf.branch_length])
df = pd.DataFrame(data, columns=[Params.names[0], 'Variable', 'Value'])
return df

def createBoostrap(msaSet: dict, bootstrapAmount):
"""
Expand Down Expand Up @@ -658,3 +672,35 @@ def loadSequenceFile(file):
for sequence in SeqIO.parse(sequencesFile, "fasta"):
sequences[sequence.id] = sequence.seq
return sequences

def save_climatic_trees(climatic_trees, file_path):
"""
Save the climatic trees to a file.
:param dict climatic_trees: The climatic trees to save.
:param str file_path: The path to the file where the climatic trees will be saved.
"""

with open(file_path, 'w') as file:
for key, tree in climatic_trees.items():
file.write(f">{key}\n")
Phylo.write(tree, file, "newick")


def load_climatic_trees(file_path):
"""
Load climatic trees from a file.
:param str file_path: Path to the target file where the climatic trees will be loaded
:returns: The loaded climatic trees
:rtype: dict
"""
climatic_trees = {}

with open(file_path, 'r') as file:
lines = file.readlines()
for i in range(0, len(lines), 2):
key = lines[i].strip()[1:]
tree = Phylo.read(StringIO(lines[i + 1]), "newick")
climatic_trees[key] = tree
return climatic_trees
125 changes: 123 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ robinson-foulds = "^1.2"
ete3 = "^3.1.3"
dendropy = "^4.6.1"
textdistance = "^4.6.0"
typer = "^0.13.1"


[tool.poetry.group.dev.dependencies]
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"ete3==3.1.3",
"dendropy==4.6.1",
"textdistance==4.6.0",
"typer==0.13.1"
],
python_requires=">=3.9.0",
packages=find_packages(),
Expand Down

0 comments on commit 1c02f98

Please sign in to comment.