Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: reconstruction of ambiguous nucs in ancestral parsimony mode #293

Merged
merged 1 commit into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 31 additions & 27 deletions packages/treetime/src/alphabet/alphabet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ pub enum AlphabetName {
}

pub type ProfileMap = IndexMap<char, Array1<f64>>;
pub type StateSetMap = IndexMap<char, StateSet>;
pub type CharToSet = IndexMap<char, StateSet>;
pub type SetToChar = IndexMap<StateSet, char>;

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Alphabet {
Expand All @@ -43,6 +46,11 @@ pub struct Alphabet {
treat_gap_as_unknown: bool,
profile_map: ProfileMap,

#[serde(skip)]
char_to_set: IndexMap<char, StateSet>,
#[serde(skip)]
set_to_char: IndexMap<StateSet, char>,

#[serde(skip)]
char_to_index: Vec<Option<usize>>,
#[serde(skip)]
Expand Down Expand Up @@ -124,6 +132,18 @@ impl Alphabet {
index_to_char.push(c);
}

let char_to_set = {
let mut char_to_set: CharToSet = canonical.iter().map(|c| (c, StateSet::from_char(c))).collect();
ambiguous.iter().for_each(|(key, chars)| {
char_to_set.insert(*key, StateSet::from_iter(chars));
});
char_to_set.insert(*gap, StateSet::from_char(*gap));
char_to_set.insert(*unknown, StateSet::from_char(*unknown));
char_to_set
};

let set_to_char: SetToChar = char_to_set.iter().map(|(&c, &s)| (s, c)).collect();

Ok(Self {
all,
char_to_index,
Expand All @@ -137,25 +157,11 @@ impl Alphabet {
gap: *gap,
treat_gap_as_unknown: *treat_gap_as_unknown,
profile_map,
char_to_set,
set_to_char,
})
}

/// Resolve possible ambiguity of the given character to the set of canonical chars
pub fn disambiguate(&self, c: char) -> StateSet {
// If unknown then could be any canonical (e.g. N => { A, C, G, T })
if self.is_unknown(c) {
self.canonical().collect()
}
// If ambiguous (e.g. R => { A, G })
else if let Some(resolutions) = self.ambiguous.get(&c) {
resolutions.iter().copied().collect()
}
// Otherwise it's not ambiguous and it's the char itself (incl. gap)
else {
once(c).collect()
}
}

#[inline]
pub fn get_profile(&self, c: char) -> &Array1<f64> {
self
Expand All @@ -178,7 +184,7 @@ impl Alphabet {
{
let mut profile = Array1::<f64>::zeros(self.n_canonical());
for c in chars {
let chars = self.disambiguate(*c.borrow());
let chars = self.char_to_set(*c.borrow());
for c in chars.iter() {
let index = self.index(c);
profile[index] = 1.0;
Expand Down Expand Up @@ -206,6 +212,14 @@ impl Alphabet {
Ok(prof)
}

pub fn set_to_char(&self, c: StateSet) -> char {
self.set_to_char[&c]
}

pub fn char_to_set(&self, c: char) -> StateSet {
self.char_to_set[&c]
}

/// All existing characters (including 'unknown' and 'gap')
pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
self.all.iter()
Expand Down Expand Up @@ -466,16 +480,6 @@ mod tests {
use indoc::indoc;
use pretty_assertions::assert_eq;

#[test]
fn test_disambiguate() -> Result<(), Report> {
let alphabet = Alphabet::new(AlphabetName::Nuc, false)?;
assert_eq!(stateset! {'A', 'G'}, alphabet.disambiguate('R'));
assert_eq!(stateset! {'A', 'C', 'G', 'T'}, alphabet.disambiguate('N'));
assert_eq!(stateset! {'C'}, alphabet.disambiguate('C'));
assert_eq!(stateset! {alphabet.gap()}, alphabet.disambiguate(alphabet.gap()));
Ok(())
}

#[test]
fn test_alphabet_nuc() -> Result<(), Report> {
let alphabet = Alphabet::new(AlphabetName::Nuc, false)?;
Expand Down
3 changes: 1 addition & 2 deletions packages/treetime/src/commands/ancestral/fitch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -509,8 +509,7 @@ pub fn ancestral_reconstruction_fitch(
}

for (&pos, &states) in &node.fitch.variable {
seq[pos] = states.first().unwrap();
// seq[pos] = alphabet.ambiguate(states).first().unwrap();
seq[pos] = alphabet.set_to_char(states);
}

node.sequence = seq.clone();
Expand Down
2 changes: 1 addition & 1 deletion packages/treetime/src/representation/graph_sparse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ impl SparseSeqNode {
.iter()
.enumerate()
.filter(|(_, &c)| alphabet.is_ambiguous(c))
.map(|(pos, &c)| (pos, alphabet.disambiguate(c)))
.map(|(pos, &c)| (pos, alphabet.char_to_set(c)))
.collect();

let seq_dis = ParsimonySeqDis {
Expand Down