Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
geomarceau committed Nov 21, 2023
2 parents ce01c25 + 417c9c8 commit f595bca
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 110 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
[![Py version](https://img.shields.io/pypi/pyversions/pysd.svg)](https://pypi.python.org/pypi/pysd/)
[![Hits](https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fgithub.com%2Ftahiri-lab%2FaPhylogeo&count_bg=%2379C83D&title_bg=%23555555&icon=&icon_color=%23E7E7E7&title=hits&edge_flat=false)](https://hits.seeyoufarm.com)
[![GitHub release](https://img.shields.io/github/v/release/tahiri-lab/aPhylogeo.svg?maxAge=3600)](https://github.com/tahiri-lab/aPhylogeo/releases/)
[![build and test](https://github.com/tahiri-lab/aPhyloGeo/actions/workflows/github-actions-demo.yml/badge.svg)]([https://github.com/matsengrp/gctree/actions/workflows/build-and-test.yml](https://github.com/tahiri-lab/aPhyloGeo/actions/workflows/github-actions-demo.yml))
[![PyPI version](https://badge.fury.io/py/aphylogeo.svg)](https://badge.fury.io/py/aphylogeo)
</p>


Expand Down
68 changes: 34 additions & 34 deletions aphylogeo/alignement.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,13 +172,13 @@ def align(self) -> Alignment:
Alignment: The alignment object
"""
if Params.alignment_method == "1":
self.centroidKey = self.getSequenceCentroid()[0]
self.centroidSeq = self.sequences.pop(self.centroidKey)
self.aligned = self.alignSequencesWithPairwise()
centroidKey = self.getSequenceCentroid()[0]
centroidSeq = self.sequences.pop(centroidKey)
aligned = self.alignSequencesWithPairwise(centroidKey, centroidSeq)
if Params.fit_method == "1":
heuristicMSA = self.starAlignement()
heuristicMSA = self.starAlignement(centroidKey, aligned)
elif Params.fit_method == "2":
heuristicMSA = self.narrowFitPairwise()
heuristicMSA = self.narrowFitPairwise(aligned)

elif Params.alignment_method == "2":
heuristicMSA = self.muscleAlign()
Expand Down Expand Up @@ -249,7 +249,7 @@ def ScoreSingle(self, args):
score = aligner.score(seqA, seqB)
return (seqAID, seqBID, score)

def alignSequencesWithPairwise(self):
def alignSequencesWithPairwise(self, centroidKey, centroidSeq):
"""
Method that aligns multiple DNA sequences.
The first speciment of the dataset is used as the main pivot.
Expand All @@ -269,7 +269,7 @@ def alignSequencesWithPairwise(self):

seq_pairs = []
for seqXID in seqs.keys():
seq_pairs.append([self.centroidKey, self.centroidSeq, seqXID, seqs[seqXID]])
seq_pairs.append([centroidKey, centroidSeq, seqXID, seqs[seqXID]])

align_scores = Multi(seq_pairs, self.alignSingle).processingLargeData()
aligned = {}
Expand Down Expand Up @@ -380,7 +380,7 @@ def alignSingle(self, args):
aligned = aligner.align(sc, seqB)[0]
return [seqBID, aligned, scID]

def narrowFitPairwise(self):
def narrowFitPairwise(self, aligned):
"""Fit length of a centroid sequence and its pairwise aligned sequences
The length of each sequence from the pairwise alignment are set equal by
Expand All @@ -397,16 +397,16 @@ def narrowFitPairwise(self):
-------
A dictionary of all accessions and their fitted aligned sequences.
"""
seqs = self.getAlignSeqs()
max_len = max(self.getAlignSeqLens())
seqs = self.getAlignSeqs(aligned)
max_len = max(self.getAlignSeqLens(aligned))
for nucleo_i in range(0, max_len):
for seq_i in range(0, len(seqs)):
if self.isCurrentCharDash(seqs, seq_i, nucleo_i):
seqs = self.insertDashToShorterSeq(seqs, nucleo_i)
seqs = self.insertDashToShorterSeq(seqs, nucleo_i, aligned)
seqs = self.appendDashToShorterSeqs(seqs, max_len)
return self.mergeFitPairwise(seqs)
return self.mergeFitPairwise(aligned, seqs)

def getAlignSeqs(self):
def getAlignSeqs(self, aligned):
"""Extract all sequences aligned using a pairwise alignment
Parameters:
Expand All @@ -418,11 +418,11 @@ def getAlignSeqs(self):
List of sequences aligned through pairwise alignment
"""
seqs = []
for alignment in self.aligned:
seqs.append([str(seq) for seq in self.aligned[alignment].values()])
for alignment in aligned:
seqs.append([str(seq) for seq in aligned[alignment].values()])
return list(sum(seqs, []))

def getAlignSeqLens(self):
def getAlignSeqLens(self, aligned):
"""Get length of all sequences aligned using a pairwise alignment
Parameters:
Expand All @@ -433,9 +433,9 @@ def getAlignSeqLens(self):
-------
List of the length of each aligned sequences
"""
return [len(seq) for seq in self.getAlignSeqs()]
return [len(seq) for seq in self.getAlignSeqs(aligned)]

def getAlignCouple(self):
def getAlignCouple(self, aligned):
"""Get nested couple accessions and their respective sequences
Parameters:
Expand All @@ -446,9 +446,9 @@ def getAlignCouple(self):
-------
List of paired accessions and their aligned sequences
"""
return [val for val in list(self.aligned.values())]
return [val for val in list(aligned.values())]

def extractOneAlignAcc(self, nest_ord=0):
def extractOneAlignAcc(self, aligned, nest_ord=0):
"""Extract the accession from a nested alignment couple
Parameters:
Expand All @@ -463,11 +463,11 @@ def extractOneAlignAcc(self, nest_ord=0):
accessions of a group of sequences aligned throug pairwise alignment.
"""
try:
return [list(i)[nest_ord] for i in self.getAlignCouple()]
return [list(i)[nest_ord] for i in self.getAlignCouple(aligned)]

# Return the centroid sequence if an invalid position is queried
except IndexError:
return [list(i)[0] for i in self.getAlignCouple()]
return [list(i)[0] for i in self.getAlignCouple(aligned)]

def isCurrentCharDash(self, seqs, seq_i, ch_i):
"""Assess whether the character at current cursor position is a dash
Expand All @@ -487,7 +487,7 @@ def isCurrentCharDash(self, seqs, seq_i, ch_i):
except IndexError:
return False

def insertDashToShorterSeq(self, seqs, ch_i):
def insertDashToShorterSeq(self, seqs, ch_i, aligned):
"""Insert a dash at the current position of a sequence
Insert a dash (-) character in a sequence if its length is shorter
Expand All @@ -504,13 +504,13 @@ def insertDashToShorterSeq(self, seqs, ch_i):
"""
for seq_j in range(0, len(seqs)):
try:
if (len(seqs[seq_j]) < max(self.getAlignSeqLens())) & (seqs[seq_j][ch_i] != "-"):
if (len(seqs[seq_j]) < max(self.getAlignSeqLens(aligned))) & (seqs[seq_j][ch_i] != "-"):
seqs[seq_j] = seqs[seq_j][:ch_i] + "-" + seqs[seq_j][ch_i:]
except IndexError:
seqs[seq_j] = seqs[seq_j][:ch_i] + "-"
return seqs

def mergeFitPairwise(self, seqs):
def mergeFitPairwise(self, aligned, seqs):
"""Generate a dictionary of all accessions and their fitted sequences
Parameters:
Expand All @@ -522,8 +522,8 @@ def mergeFitPairwise(self, seqs):
-------
Dict, Group of accessions and their fitted sequences from a pairwise alignment
"""
centroid = {list(set(self.extractOneAlignAcc()))[0]: seqs[0]}
non_centroid = dict(zip(self.extractOneAlignAcc(1), seqs[1::2]))
centroid = {list(set(self.extractOneAlignAcc(aligned)))[0]: seqs[0]}
non_centroid = dict(zip(self.extractOneAlignAcc(aligned, 1), seqs[1::2]))
return centroid | non_centroid

def appendDashToShorterSeqs(self, seqs, max_len):
Expand All @@ -540,7 +540,7 @@ def appendDashToShorterSeqs(self, seqs, max_len):
"""
return [f"{str(seq):-<{max_len}}" for seq in seqs]

def starAlignement(self):
def starAlignement(self, centroidKey, aligned):
"""
Method that combs through all the pairwise alignments couples and makes
it so that every sequenced is aligned with every other sequences. If a
Expand Down Expand Up @@ -584,12 +584,12 @@ def starAlignement(self):
Return:
starAlign (dict) see self.heuristicMSA
"""
scKey = self.centroidKey
scKey = centroidKey
starAlign = {}

for k in self.aligned.keys():
for k in aligned.keys():
# couple is SeqA and SeqB of a pairwise alignement
couple = self.aligned[k]
couple = aligned[k]

a = list(couple.keys())
a.remove(scKey)
Expand All @@ -599,7 +599,7 @@ def starAlignement(self):
starAlign[sNewKey] = str(couple[sNewKey]) # SeqB, *not* the reference

if len(starAlign) > 2:
starAlign = self.merge(starAlign, scKey, sNewKey)
starAlign = self.merge(starAlign, scKey, sNewKey, centroidKey)

starAlign["temp"] = starAlign[scKey] # SeqA, the *old* reference
starAlign.pop("temp")
Expand All @@ -613,7 +613,7 @@ def starAlignement(self):

return starAlign

def merge(self, result, k1, k2):
def merge(self, result, k1, k2, centroidKey):
"""
Method that loops through each position of two strings ans compares the Chars.
Expand Down Expand Up @@ -671,7 +671,7 @@ def merge(self, result, k1, k2):
+ "Please check the previous methods"
+ " and ensure the pairwise alignemnt is correct"
+ "\nCentroid ID: "
+ str(self.centroidKey)
+ str(centroidKey)
+ "\nPairwise seq ID"
+ " last inserted: "
+ str(k2)
Expand Down
26 changes: 26 additions & 0 deletions tests/pairwise_align.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
file_name: './datasets/example/geo.csv'
specimen: 'id' #"Please enter the name of the colum containing the specimens names: "
names: ['id', 'ALLSKY_SFC_SW_DWN', 'T2M', 'PRECTOTCORR', 'QV2M', 'WS10M']
bootstrap_threshold: 0
dist_threshold: 60
window_size: 200
step_size: 100
bootstrap_amount: 100
data_names: ['ALLSKY_SFC_SW_DWN_newick', 'T2M_newick', 'QV2M_newick', 'PRECTOTCORR_newick', 'WS10M_newick']
reference_gene_dir: './datasets/example'
reference_gene_file: 'sequences.fasta'
makeDebugFiles: True
alignment_method: '2' #Please select one ~ 1:pairwiseAligner, 2:MUSCLE, 3:CLUSTALW, 4:MAFFT
distance_method: '0' #Please select one ~ 0: All distance methods, 1: Least-Square distance, 2: Robinson-Foulds distance, 3: Euclidean distance (DendroPY)
fit_method: '1' #Please select one ~ 1:Wider Fit by elongating with Gap (starAlignment), 2:Narrow-fit prevent elongation with gap when possible
tree_type: '2' #Please select one ~ 1: BioPython consensus tree, 2: FastTree application
rate_similarity: 90
method_similarity: '1' #Please select one :
# 1: Hamming distance
# 2: Levenshtein distance
# 3: Damerau-Levenshtein distance
# 4: Jaro similarity
# 5: Jaro-Winkler similarity
# 6: Smith–Waterman similarity
# 7: Jaccard similarity
# 8: Sørensen-Dice similarity
16 changes: 0 additions & 16 deletions tests/params_small.yaml

This file was deleted.

18 changes: 0 additions & 18 deletions tests/params_very_small.yaml

This file was deleted.

76 changes: 34 additions & 42 deletions tests/test_genetic.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import ast
import os
from io import StringIO
from pathlib import Path

import pandas as pd
from Bio import AlignIO, Phylo
from Bio.Phylo.PhyloXML import Phylogeny

Expand All @@ -22,42 +20,36 @@ def setup_class(self):

print("Begin setup for test class test_genetic...")

# params_small = Params(os.path.join(os.path.dirname(__file__), "params_small.yaml"))
# sequences_small = aPhyloGeo.readFastaFile(params_small.reference_gene_file)
# small = AlignSequences(params_small.reference_gene_file, params_small.window_size, params_small.step_size,
# params_small.makeDebugFiles, params_small.bootstrapAmount)
params_very_small = Params(os.path.join(os.path.dirname(__file__), "params_very_small.yaml"))
sequences_very_small = utils.loadSequenceFile(params_very_small.reference_gene_file)
very_small = AlignSequences(
sequences_very_small,
params_very_small.window_size,
params_very_small.step_size,
params_very_small.makeDebugFiles,
params_very_small.bootstrapAmount,
params_very_small.alignment_method,
params_very_small.reference_gene_file,
params_very_small.fit_method,
params_very_small.rate_similarity,
params_very_small.method_similarity
)
very_small.align()
self.alignementSetup = [very_small] # , small]
self.paramSetup = [params_very_small] # , params_small]

Params.load_from_file(params_file = "tests/pairwise_align.yaml")

# Load parameters
ref_gene_dir = Params.reference_gene_dir
ref_gene_file = Params.reference_gene_file
sequences_very_small = utils.loadSequenceFile(os.path.join(ref_gene_dir, ref_gene_file))

# Build AlignSequence object
self.sequences = sequences_very_small.copy()
self.seq_alignment = AlignSequences(self.sequences)

# Get centroid key
self.centroid = self.seq_alignment.getSequenceCentroid()[0]
self.centroidSeqs = self.sequences.pop(self.centroid)

# Get pairwise alignment
self.aligned = self.seq_alignment.alignSequencesWithPairwise(self.centroid, self.centroidSeqs)

def test_centroidKey(self):
"""
This test is used to test the centroidKey function.
"""

print("Begin test_centroidKey...")

for alignement in self.alignementSetup:
actual_centroid = alignement.centroidKey
filename = current_file + "/testFiles/getSequenceCentroid/seq very small"

with open(filename, "r") as expected_file:
expected_centroid = expected_file.read()
assert actual_centroid == expected_centroid
# actual_centroid = self.seq_alignment.getSequenceCentroid()[0]

filename = current_file + "/testFiles/getSequenceCentroid/seq very small"
with open(filename, "r") as expected_file:
expected_centroid = expected_file.read()
assert self.centroid == expected_centroid

def test_aligned(self):
"""
Expand All @@ -66,24 +58,24 @@ def test_aligned(self):

print("Begin test_aligned...")

for alignement in self.alignementSetup:
aligned = alignement.aligned

for key in aligned.keys():
for key in self.aligned.keys():
filename = current_file + "/testFiles/alignSequence/seq very small/" + key + ".fasta"
with open(filename, "r") as expected_file:
expected_file = expected_file.read()
expected = AlignSequences.fileToDict(current_file + "/testFiles/alignSequence/seq very small/" + key, ".fasta")
assert aligned[key] == expected

assert self.aligned[key] == expected
def test_heuristicMSA(self):
"""
This test is used to test the heuristicMSA function.
"""

print("Begin test_heuristicMSA...")

for alignement in self.alignementSetup:
starAlignement = alignement.starAlignement()
expected = AlignSequences.fileToDict(current_file + "/testFiles/starAlignement/seq very small", ".fasta")
assert starAlignement == expected
# for alignement in self.alignementSetup:
starAlignement = self.aligned.starAlignement()
expected = AlignSequences.fileToDict(current_file + "/testFiles/starAlignement/seq very small", ".fasta")
assert starAlignement == expected

def test_windowed(self):
"""
Expand Down

0 comments on commit f595bca

Please sign in to comment.