Skip to content

Commit

Permalink
better translation table traits
Browse files Browse the repository at this point in the history
  • Loading branch information
jeff-k committed Feb 19, 2024
1 parent c002a7f commit 3c49d32
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 29 deletions.
3 changes: 2 additions & 1 deletion bio-seq-derive/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,10 @@ pub fn codec_derive(input: TokenStream) -> TokenStream {
type Error = #parse_error;
const WIDTH: u8 = #width;
fn unsafe_from_bits(b: u8) -> Self {
//debug_assert!(false, "Invalid encoding: {b:?}");
match b {
#(#unsafe_alts),*,
_ => panic!(),
_ => unreachable!(),
}
}

Expand Down
79 changes: 51 additions & 28 deletions bio-seq/src/translation/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
//! ## Errors
//!
use core::fmt::Display;
use core::iter::FromIterator;
use std::collections::HashMap;

use crate::codec::Codec;
Expand All @@ -17,25 +16,30 @@ use crate::prelude::{Seq, SeqSlice};
pub mod standard;

/// A codon translation table where all codons map to amino acids
trait TranslationTable<A: Codec, B: Codec> {
pub trait TranslationTable<A: Codec, B: Codec> {
fn to_amino(&self, codon: &SeqSlice<A>) -> B;
fn to_codon(&self, amino: B) -> Result<Seq<A>, TranslationError>;
}

/// A partial translation table where not all triples of characters map to amino acids
trait PartialTranslationTable<A: Codec, B: Codec> {
pub trait PartialTranslationTable<A: Codec, B: Codec> {
fn try_to_amino(&self, codon: &SeqSlice<A>) -> Result<B, TranslationError>;
fn try_to_codon(&self, amino: B) -> Result<Seq<A>, TranslationError>;
}

/// A customisable translation table
pub struct CodonTable<A: Codec, B: Codec> {
// I'm open to using a better bidirectional mapping datastructure
table: HashMap<Seq<A>, B>,
inverse_table: HashMap<B, Option<Seq<A>>>,
}

impl<A: Codec, B: Codec + Display> CodonTable<A, B> {
pub fn from_map(table: HashMap<Seq<A>, B>) -> Self {
pub fn from_map<T>(table: T) -> Self
where
T: Into<HashMap<Seq<A>, B>>,
{
let table: HashMap<Seq<A>, B> = table.into();
let mut inverse_table = HashMap::new();
for (codon, amino) in &table {
if inverse_table.contains_key(amino) {
Expand All @@ -51,26 +55,6 @@ impl<A: Codec, B: Codec + Display> CodonTable<A, B> {
}
}

impl<A: Codec, B: Codec> FromIterator<(Seq<A>, B)> for CodonTable<A, B> {
fn from_iter<I>(iter: I) -> Self
where
I: IntoIterator<Item = (Seq<A>, B)>,
{
let mut table: HashMap<Seq<A>, B> = HashMap::new();
let mut inverse_table: HashMap<B, Option<Seq<A>>> = HashMap::new();

for (codon, amino) in iter {
table.insert(codon.clone(), amino);
inverse_table.insert(amino, Some(codon.clone()));
}

CodonTable {
table,
inverse_table,
}
}
}

impl<A: Codec, B: Codec + Display> PartialTranslationTable<A, B> for CodonTable<A, B> {
fn try_to_amino(&self, codon: &SeqSlice<A>) -> Result<B, TranslationError> {
match self.table.get(&Seq::from(codon)) {
Expand All @@ -91,8 +75,9 @@ impl<A: Codec, B: Codec + Display> PartialTranslationTable<A, B> for CodonTable<
#[cfg(test)]
mod tests {
use crate::prelude::*;
use crate::translation::{CodonTable, PartialTranslationTable, TranslationError};
use std::collections::HashMap;
use crate::translation::{
CodonTable, PartialTranslationTable, TranslationError, TranslationTable,
};

#[test]
fn custom_codon_table() {
Expand All @@ -105,8 +90,7 @@ mod tests {
(dna!("TTA"), Amino::F),
];

let codons = HashMap::from(mito);
let table = CodonTable::from_map(codons);
let table = CodonTable::from_map(mito);

let seq: Seq<Dna> = dna!("AAACCCGGGTTTTTATTAATG");
let mut amino_seq: Seq<Amino> = Seq::new();
Expand All @@ -126,4 +110,43 @@ mod tests {
Err(TranslationError::InvalidCodon)
);
}

#[test]
fn mitochondrial_coding_table() {
struct Mitochondria;

impl TranslationTable<Dna, Amino> for Mitochondria {
fn to_amino(&self, codon: &SeqSlice<Dna>) -> Amino {
if *codon == dna!("AGA") {
Amino::X
} else if *codon == dna!("AGG") {
Amino::X
} else if *codon == dna!("ATA") {
Amino::M
} else if *codon == dna!("TGA") {
Amino::W
} else {
Amino::unsafe_from_bits(Into::<u8>::into(codon))
}
}

fn to_codon(&self, _amino: Amino) -> Result<Seq<Dna>, TranslationError> {
unimplemented!()
}
}

let seq: Seq<Dna> =
dna!("AATTTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATGAGGACGATCAGCACCATAAGAACAAA");
let aminos: Seq<Amino> = seq
.windows(3)
.map(|codon| Mitochondria.to_amino(&codon))
.collect::<Seq<Amino>>();
assert_eq!(seq.len() - 2, aminos.len());

for (x, y) in aminos.into_iter().zip(
amino!("NIFLCVWGGVFSRVSLCARGALSPRAPPLL*SVYTLYMWE*GDTRDISQSAHTPHM*K*ENTQK").into_iter(),
) {
assert_eq!(x, y)
}
}
}
1 change: 1 addition & 0 deletions bio-seq/src/translation/standard.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ impl TranslationTable<Dna, Amino> for Standard {
if codon.len() != 3 {
panic!("Invalid codon of length {}", codon.len());
}
// It should be possible to assert that this is safe at compile time
Amino::unsafe_from_bits(Into::<u8>::into(codon))
}

Expand Down

0 comments on commit 3c49d32

Please sign in to comment.