Skip to content

Commit

Permalink
migrate codecs to new Codec trait def
Browse files Browse the repository at this point in the history
  • Loading branch information
jeff-k committed May 31, 2024
1 parent d81e995 commit 2095362
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 34 deletions.
12 changes: 12 additions & 0 deletions bio-seq/src/codec/amino.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,18 @@ pub enum Amino {
X = 0b000011, // TAA (stop)
}

impl From<Amino> for u8 {
fn from(b: Amino) -> u8 {
b as u8
}
}

impl core::fmt::Display for Amino {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.to_char())
}
}

#[macro_export]
macro_rules! amino {
($seq:expr) => {
Expand Down
2 changes: 1 addition & 1 deletion bio-seq/src/codec/dna.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ impl Codec for Dna {
impl Complement for Dna {
/// This 2-bit representation of nucleotides lends itself to a very fast
/// complement implementation with bitwise xor
fn to_comp(&self) -> Self {
fn comp(&self) -> Self {
// flip the bits
let b = *self as u8 ^ 0b11;
Dna::unsafe_from_bits(b)
Expand Down
31 changes: 31 additions & 0 deletions bio-seq/src/codec/iupac.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,37 @@ use crate::seq::{Seq, SeqSlice};

use core::ops::{BitAnd, BitOr};

/*
const LTABLE: [u8; 256] = {
let mut table = [0; 256];
table[b'A' as usize] = 0b1000;
table[b'C' as usize] = 0b0100;
table[b'G' as usize] = 0b0010;
table[b'T' as usize] = 0b0001;
table[b'R' as usize] = 0b1010;
table[b'Y' as usize] = 0b0101;
table[b'S' as usize] = 0b0110;
table[b'W' as usize] = 0b1001;
table[b'K' as usize] = 0b0011;
table[b'M' as usize] = 0b1100;
table[b'B' as usize] = 0b0111;
table[b'D' as usize] = 0b1011;
table[b'H' as usize] = 0b1101;
table[b'V' as usize] = 0b1110;
table[b'N' as usize] = 0b1111;
table[b'-' as usize] = 0b0000;
table
};
*/

impl From<Iupac> for u8 {
fn from(b: Iupac) -> u8 {
b as u8
}
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Codec)]
#[bits(4)]
#[repr(u8)]
Expand Down
43 changes: 30 additions & 13 deletions bio-seq/src/codec/text.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//! 8-bit UTF-8/ASCII representation of nucleotides
//! 8-bit ASCII representation of nucleotides
//!
//! This encoding is a literal interpretation of bytes of text as DNA
use crate::codec::{dna, Codec, Complement};
Expand All @@ -9,28 +9,35 @@ use crate::error::ParseBioError;
pub struct Dna(u8);

impl Codec for Dna {
type Error = ParseBioError;

const BITS: usize = 8;
const BITS: u8 = 8;

fn unsafe_from_bits(b: u8) -> Self {
Self(b)
}

fn try_from_bits(b: u8) -> Result<Self, Self::Error> {
Ok(Self(b))
fn try_from_bits(b: u8) -> Option<Self> {
Some(Self(b))
}

fn unsafe_from_ascii(c: u8) -> Self {
Self(c)
}

fn from_char(c: char) -> Result<Self, Self::Error> {
match u8::try_from(c) {
Ok(b) => Ok(Self(b)),
_ => Err(Self::Error {}),
fn try_from_ascii(c: u8) -> Option<Self> {
// if c.is_ascii_alphanumeric() {
match c {
b'A' | b'C' | b'G' | b'T' | b'N' => Some(Self(c)),
_ => None,
}
}

fn to_char(self) -> char {
self.0.into()
}

fn items() -> impl Iterator<Item = Self> {
vec![Dna(b'A'), Dna(b'C'), Dna(b'G'), Dna(b'T'), Dna(b'N')].into_iter()
}
}

//impl Eq for Dna {
Expand All @@ -40,8 +47,15 @@ impl Codec for Dna {
//}

impl Complement for Dna {
fn comp(self) -> Self {
unimplemented!()
fn comp(&self) -> Self {
match self {
Self(b'A') => Self(b'T'),
Self(b'C') => Self(b'G'),
Self(b'G') => Self(b'C'),
Self(b'T') => Self(b'A'),
Self(b'N') => Self(b'N'),
_ => Self(b'N'),
}
}
}

Expand Down Expand Up @@ -71,11 +85,14 @@ impl TryFrom<Dna> for dna::Dna {
b'C' => Ok(dna::Dna::C),
b'G' => Ok(dna::Dna::G),
b'T' => Ok(dna::Dna::T),
// Todo: decide whether to support lower cases
/*
b'a' => Ok(dna::Dna::A),
b'c' => Ok(dna::Dna::C),
b'g' => Ok(dna::Dna::G),
b't' => Ok(dna::Dna::T),
_ => Err(ParseBioError {}),
*/
_ => Err(ParseBioError::UnrecognisedBase(base.0)),
}
}
}
Expand Down
11 changes: 5 additions & 6 deletions bio-seq/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,20 +54,19 @@ pub mod codec;
pub mod error;
pub mod kmer;
pub mod seq;
//pub mod translation;
pub mod translation;

pub mod prelude {
// pub use crate::codec::amino::Amino;
pub use crate::codec::amino::Amino;
pub use crate::codec::dna::Dna;
// pub use crate::codec::iupac::Iupac;
pub use crate::codec::iupac::Iupac;
pub use crate::codec::{Codec, Complement};

pub use crate::kmer::Kmer;
pub use crate::seq::{ReverseComplement, Seq, SeqSlice};
// pub use crate::{amino, dna, iupac, kmer};
pub use crate::{dna, kmer};
pub use crate::{amino, dna, iupac, kmer};

// pub use crate::translation;
pub use crate::translation;
pub use core::str::FromStr;

pub use crate::error::ParseBioError;
Expand Down
26 changes: 12 additions & 14 deletions bio-seq/src/seq/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
pub mod index;
pub mod iterators;

use crate::codec::{Codec, Complement};
//use crate::codec::{text, Codec, Complement};
//use crate::codec::{Codec, Complement};
use crate::codec::{text, Codec, Complement};
use crate::error::ParseBioError;

use crate::{Bs, Bv, Order};
Expand Down Expand Up @@ -62,17 +62,6 @@ pub struct SeqSlice<A: Codec> {
bs: Bs,
}

/*
impl From<Vec<u8>> for Seq<text::Dna> {
fn from(vec: Vec<u8>) -> Self {
Seq {
_p: PhantomData,
bv: Bv::from_vec(vec),
}
}
}
*/

impl<A: Codec> From<Seq<A>> for usize {
fn from(slice: Seq<A>) -> usize {
assert!(slice.bv.len() <= usize::BITS as usize);
Expand Down Expand Up @@ -113,7 +102,7 @@ impl<A: Codec + Complement> ReverseComplement for SeqSlice<A> {
/// The inefficient default complementation of complement
fn revcomp(&self) -> Seq<A> {
let mut seq = Seq::<A>::with_capacity(self.len());
seq.extend(self.rev().map(|base| base.to_comp()));
seq.extend(self.rev().map(|base| base.comp()));
seq
}
}
Expand Down Expand Up @@ -516,6 +505,15 @@ impl<A: Codec> FromIterator<A> for Seq<A> {
}
}

impl From<Vec<u8>> for Seq<text::Dna> {
fn from(vec: Vec<u8>) -> Self {
Seq {
_p: PhantomData,
bv: Bv::from_vec(vec),
}
}
}

#[cfg(test)]
mod tests {
use crate::prelude::*;
Expand Down

0 comments on commit 2095362

Please sign in to comment.