Skip to content

Commit

Permalink
feat: adding vertebrate mitochondrial code (#160) (#161)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe authored Mar 5, 2024
1 parent 84fb34d commit eb6739b
Showing 1 changed file with 221 additions and 0 deletions.
221 changes: 221 additions & 0 deletions src/sequences.rs
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,191 @@ lazy_static::lazy_static! {
("YTR", "L"),
];

/// Vertebrate mitochondrial code, cf. https://en.wikipedia.org/wiki/Vertebrate_mitochondrial_code
pub static ref DNA_TO_AA1_CHRMT_VERTEBRATE_VEC: Vec<(&'static str, &'static str)> = vec![
("AAA", "K"),
("AAC", "N"),
("AAG", "K"),
("AAT", "N"),
("ACA", "T"),
("ACC", "T"),
("ACG", "T"),
("ACT", "T"),
// caveat lector
("AGA", "*"),
("AGC", "S"),
// caveat lector
("AGG", "*"),
("AGT", "S"),
// caveat lector
("ATA", "M"),
("ATC", "I"),
("ATG", "M"),
("ATT", "I"),
("CAA", "Q"),
("CAC", "H"),
("CAG", "Q"),
("CAT", "H"),
("CCA", "P"),
("CCC", "P"),
("CCG", "P"),
("CCT", "P"),
("CGA", "R"),
("CGC", "R"),
("CGG", "R"),
("CGT", "R"),
("CTA", "L"),
("CTC", "L"),
("CTG", "L"),
("CTT", "L"),
("GAA", "E"),
("GAC", "D"),
("GAG", "E"),
("GAT", "D"),
("GCA", "A"),
("GCC", "A"),
("GCG", "A"),
("GCT", "A"),
("GGA", "G"),
("GGC", "G"),
("GGG", "G"),
("GGT", "G"),
("GTA", "V"),
("GTC", "V"),
("GTG", "V"),
("GTT", "V"),
("TAA", "*"),
("TAC", "Y"),
("TAG", "*"),
("TAT", "Y"),
("TCA", "S"),
("TCC", "S"),
("TCG", "S"),
("TCT", "S"),
// caveat lector
("TGA", "W"),
("TGC", "C"),
("TGG", "W"),
("TGT", "C"),
("TTA", "L"),
("TTC", "F"),
("TTG", "L"),
("TTT", "F"),
// degenerate codons
("AAR", "K"),
("AAY", "N"),
("ACB", "T"),
("ACD", "T"),
("ACH", "T"),
("ACK", "T"),
("ACM", "T"),
("ACN", "T"),
("ACR", "T"),
("ACS", "T"),
("ACV", "T"),
("ACW", "T"),
("ACY", "T"),
("AGR", "R"),
("AGY", "S"),
("ATH", "I"),
("ATM", "I"),
("ATW", "I"),
("ATY", "I"),
("CAR", "Q"),
("CAY", "H"),
("CCB", "P"),
("CCD", "P"),
("CCH", "P"),
("CCK", "P"),
("CCM", "P"),
("CCN", "P"),
("CCR", "P"),
("CCS", "P"),
("CCV", "P"),
("CCW", "P"),
("CCY", "P"),
("CGB", "R"),
("CGD", "R"),
("CGH", "R"),
("CGK", "R"),
("CGM", "R"),
("CGN", "R"),
("CGR", "R"),
("CGS", "R"),
("CGV", "R"),
("CGW", "R"),
("CGY", "R"),
("CTB", "L"),
("CTD", "L"),
("CTH", "L"),
("CTK", "L"),
("CTM", "L"),
("CTN", "L"),
("CTR", "L"),
("CTS", "L"),
("CTV", "L"),
("CTW", "L"),
("CTY", "L"),
("GAR", "E"),
("GAY", "D"),
("GCB", "A"),
("GCD", "A"),
("GCH", "A"),
("GCK", "A"),
("GCM", "A"),
("GCN", "A"),
("GCR", "A"),
("GCS", "A"),
("GCV", "A"),
("GCW", "A"),
("GCY", "A"),
("GGB", "G"),
("GGD", "G"),
("GGH", "G"),
("GGK", "G"),
("GGM", "G"),
("GGN", "G"),
("GGR", "G"),
("GGS", "G"),
("GGV", "G"),
("GGW", "G"),
("GGY", "G"),
("GTB", "V"),
("GTD", "V"),
("GTH", "V"),
("GTK", "V"),
("GTM", "V"),
("GTN", "V"),
("GTR", "V"),
("GTS", "V"),
("GTV", "V"),
("GTW", "V"),
("GTY", "V"),
("MGA", "R"),
("MGG", "R"),
("MGR", "R"),
("TAR", "*"),
("TAY", "Y"),
("TCB", "S"),
("TCD", "S"),
("TCH", "S"),
("TCK", "S"),
("TCM", "S"),
("TCN", "S"),
("TCR", "S"),
("TCS", "S"),
("TCV", "S"),
("TCW", "S"),
("TCY", "S"),
("TGY", "C"),
("TRA", "*"),
("TTR", "L"),
("TTY", "F"),
("YTA", "L"),
("YTG", "L"),
("YTR", "L"),
];

static ref AA1_TO_AA3: FxHashMap<&'static [u8], &'static str> = {
let mut m = FxHashMap::default();
for (aa3, aa1) in AA3_TO_AA1_VEC.iter() {
Expand Down Expand Up @@ -560,6 +745,14 @@ lazy_static::lazy_static! {
m
};

static ref DNA_TO_AA1_CHRMT_VERTEBRATE: FxHashMap<Vec<u8>, u8> = {
let mut m = FxHashMap::default();
for (dna, aa1) in DNA_TO_AA1_CHRMT_VERTEBRATE_VEC.iter() {
m.insert(Vec::from(dna.as_bytes()), aa1.as_bytes()[0]);
}
m
};

static ref CODON_2BIT_TO_AA1_LUT: [u8; 64] = {
let mut result = [0; 64];
for (i, (dna3, aa1)) in DNA_TO_AA1_LUT_VEC.iter().enumerate() {
Expand All @@ -583,6 +776,18 @@ lazy_static::lazy_static! {
}
result
};

static ref CODON_2BIT_TO_AA1_CHRMT_VERTEBRATE: [u8; 64] = {
let mut result = [0; 64];
for (i, (dna3, aa1)) in DNA_TO_AA1_CHRMT_VERTEBRATE_VEC.iter().enumerate() {
if i > 63 {
break; // skip degenerate codons
}
let dna3_2bit = dna3_to_2bit(dna3.as_bytes()).expect("invalid dna3");
result[dna3_2bit as usize] = aa1.as_bytes()[0];
}
result
};
}

static IUPAC_AMBIGUITY_CODES: &[u8] = b"BDHVNUWSMKRYZ";
Expand All @@ -605,6 +810,7 @@ pub enum TranslationTable {
#[default]
Standard,
Selenocysteine,
VertebrateMitochondrial,
}

/// Coerces string of 1- or 3-letter amino acids to 1-letter representation.
Expand Down Expand Up @@ -751,10 +957,12 @@ impl CodonTranslator {
codon_2bit_to_aa1: match table {
TranslationTable::Standard => &CODON_2BIT_TO_AA1_LUT,
TranslationTable::Selenocysteine => &CODON_2BIT_TO_AA1_SEC,
TranslationTable::VertebrateMitochondrial => &CODON_2BIT_TO_AA1_CHRMT_VERTEBRATE,
},
full_dna_to_aa1: match table {
TranslationTable::Standard => &DNA_TO_AA1_LUT,
TranslationTable::Selenocysteine => &DNA_TO_AA1_SEC,
TranslationTable::VertebrateMitochondrial => &DNA_TO_AA1_CHRMT_VERTEBRATE,
},

codon: Vec::with_capacity(3),
Expand Down Expand Up @@ -1119,6 +1327,7 @@ mod test {
assert!(AA3_TO_AA1_VEC[0] == ("Ala", "A"));
assert!(DNA_TO_AA1_LUT_VEC[0] == ("AAA", "K"));
assert!(DNA_TO_AA1_SEC_VEC[0] == ("AAA", "K"));
assert!(DNA_TO_AA1_CHRMT_VERTEBRATE_VEC[0] == ("AAA", "K"));
}

#[test]
Expand All @@ -1144,6 +1353,18 @@ mod test {

Ok(())
}

#[test]
fn codon_translator_chrmt_vertebrate() -> Result<(), Error> {
let mut translator = CodonTranslator::new(TranslationTable::Selenocysteine);

// Non-denenerate codon.
assert_eq!(translator.translate(b"AAA")?, b'K');
// Degenerate codon.
assert_eq!(translator.translate(b"AAR")?, b'K');

Ok(())
}
}

// <LICENSE>
Expand Down

0 comments on commit eb6739b

Please sign in to comment.