Skip to content

Commit

Permalink
move experimental codecs to "extra_codecs" feature gate
Browse files Browse the repository at this point in the history
  • Loading branch information
jeff-k committed Oct 15, 2024
1 parent 8cdf277 commit b6f59ac
Show file tree
Hide file tree
Showing 8 changed files with 295 additions and 5 deletions.
3 changes: 2 additions & 1 deletion bio-seq/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,11 @@ serde_derive = { version = "1", optional = true }
serde = ["dep:serde", "dep:serde_derive", "bitvec/serde"]
translation = []
simd = []
extra_codecs = []

#[[example]]
#name = "aminokmers"

[package.metadata.docs.rs]
features = ["translation", "serde"]
features = ["translation", "serde", "extra_codecs"]
# all-features = true
35 changes: 35 additions & 0 deletions bio-seq/src/codec/degenerate/dna.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
use crate::codec::Codec;
use crate::codec::Complement;
use crate::seq::{ReverseComplement, Seq};

/// 1-bit encoding for `S`trong (`G`/`C`) and `W`eak (`A`/`T`) nucleotides
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Codec)]
#[bits(1)]
#[repr(u8)]
pub enum Dna {
W = 0b0,
S = 0b1,
}

impl Complement for Dna {
/// This representation collapses complements; `comp(x) == x`
fn comp(&self) -> Self {
*self
}
}

impl ReverseComplement for Seq<Dna> {
type Output = Self;

fn revcomp(&self) -> Self {
let mut bv = self.bv.clone();
bv.reverse();
Self::from(bv)
}
}

#[cfg(test)]
mod tests {
use crate::codec::masked;
use crate::prelude::*;
}
110 changes: 110 additions & 0 deletions bio-seq/src/codec/degenerate/iupac.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
use crate::codec::Codec;
use crate::seq::{ReverseComplement, Seq};

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Codec)]
#[bits(5)]
#[repr(u8)]
pub enum Iupac {
A = 0b10_0_00,
C = 0b01000,
G = 0b00010,
T = 0b00001,

Y = 0b01001,
R = 0b10010,
W = 0b10001,
S = 0b01010,

K = 0b00011,
M = 0b11000,
D = 0b10011,
V = 0b11010,

H = 0b11001,
B = 0b01011,

N = 0b11011,
#[display('-')]
X = 0b00000,

#[display('-')]
AMasked = 0b10_1_00,
#[display('-')]
CMasked = 0b01100,
#[display('-')]
GMasked = 0b00110,
#[display('-')]
TMasked = 0b00101,

#[display('-')]
YMasked = 0b01101,
#[display('-')]
RMasked = 0b10110,
#[display('-')]
WMasked = 0b10101,
#[display('-')]
SMasked = 0b01110,

#[display('-')]
KMasked = 0b00111,
#[display('-')]
MMasked = 0b11100,
#[display('-')]
DMasked = 0b10111,
#[display('-')]
VMasked = 0b11110,

#[display('-')]
HMasked = 0b11101,
#[display('-')]
BMasked = 0b01111,

#[display('-')]
NMasked = 0b11111,
#[display('-')]
XMasked = 0b00100,

}

/*
impl Complement for Dna {
/// This representation can be complemented by reversing the bit pattern
fn comp(&self) -> Self {
// reverse the bits
Dna::unsafe_from_bits(b)
}
}
*/

/*
impl Dna {
/// Flipping the bit pattern masks/unmasks this representation
pub fn mask(&self) -> Self {
let b = *self as u8 ^ 0b1111;
Dna::unsafe_from_bits(b)
}
}
impl Seq<Dna> {
pub fn mask(&self) -> Self {
Self::from(!self.bv.clone())
}
}
impl ReverseComplement for Seq<Dna> {
type Output = Self;
fn revcomp(&self) -> Self {
let mut bv = self.bv.clone();
bv.reverse();
Self::from(bv)
}
}
*/
#[cfg(test)]
mod tests {
use crate::codec::masked;
use crate::prelude::*;


}
16 changes: 16 additions & 0 deletions bio-seq/src/codec/degenerate/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
//! Experimental encodings with degenerate representations
//! Includes `N`, `n`, `.`, `-`
pub mod dna;

// TODO
//pub mod iupac;

pub use dna::Dna;
//pub use iupac::Iupac;

#[cfg(test)]
mod tests {
use crate::codec::masked;
use crate::prelude::*;
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
//! Experimental 4-bit DNA encoding with masked bases
//! Includes `N`, `n`, `.`, `-`
use crate::codec::Codec;
use crate::seq::{ReverseComplement, Seq};

Expand Down
108 changes: 108 additions & 0 deletions bio-seq/src/codec/masked/iupac.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
use crate::codec::masked::Maskable;
use crate::codec::{Codec, Complement};
use crate::seq::{ReverseComplement, Seq};

/// 5-bit encoding for maskable IUPAC symbols
/// The middle bit is the mask flag and symbols are complemented by reversing the bit pattern
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Codec)]
#[bits(5)]
#[repr(u8)]
pub enum Iupac {
A = 0b10000,
C = 0b01000,
G = 0b00010,
T = 0b00001,

Y = 0b01001,
R = 0b10010,
W = 0b10001,
S = 0b01010,

K = 0b00011,
M = 0b11000,
D = 0b10011,
V = 0b11010,

H = 0b11001,
B = 0b01011,

N = 0b11011,
#[display('-')]
X = 0b00000,

#[display('a')]
AMasked = 0b10100,
#[display('c')]
CMasked = 0b01100,
#[display('g')]
GMasked = 0b00110,
#[display('t')]
TMasked = 0b00101,

#[display('y')]
YMasked = 0b01101,
#[display('r')]
RMasked = 0b10110,
#[display('w')]
WMasked = 0b10101,
#[display('s')]
SMasked = 0b01110,

#[display('k')]
KMasked = 0b00111,
#[display('m')]
MMasked = 0b11100,
#[display('d')]
DMasked = 0b10111,
#[display('v')]
VMasked = 0b11110,

#[display('h')]
HMasked = 0b11101,
#[display('b')]
BMasked = 0b01111,

#[display('n')]
NMasked = 0b11111,
#[display('.')]
XMasked = 0b00100,
}

impl Complement for Iupac {
/// This representation can be complemented by reversing the bit pattern
fn comp(&self) -> Self {
// reverse the bits
todo!()
}
}

impl Maskable for Iupac {
/// Setting the middle bit sets the mask flag
fn mask(&self) -> Self {
let b = *self as u8 | 0b00100;
Self::unsafe_from_bits(b)
}

/// Unsetting the middle bit clears the mask flag
fn unmask(&self) -> Self {
let b = *self as u8 & 0b11011;
Self::unsafe_from_bits(b)
}
}

impl ReverseComplement for Seq<Iupac> {
type Output = Self;

/// A reverse complementing just requires reversing the bit sequence
fn revcomp(&self) -> Self {
let mut bv = self.bv.clone();
bv.reverse();
Self::from(bv)
}
}

#[cfg(test)]
mod tests {
use crate::codec::masked;
use crate::prelude::*;
}
18 changes: 18 additions & 0 deletions bio-seq/src/codec/masked/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
//! Experimental encodings with maskable bases
pub mod dna;
pub mod iupac;

pub use dna::Dna;
pub use iupac::Iupac;

pub trait Maskable {
fn mask(&self) -> Self;
fn unmask(&self) -> Self;
}

#[cfg(test)]
mod tests {
use crate::codec::masked;
use crate::prelude::*;
}
7 changes: 6 additions & 1 deletion bio-seq/src/codec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,13 +119,18 @@ use core::hash::Hash;
pub mod amino;
pub mod dna;
pub mod iupac;

#[cfg(feature = "extra_codecs")]
pub mod masked;

#[cfg(feature = "extra_codecs")]
pub mod degenerate;

pub mod text;

pub use bio_seq_derive::Codec;

/// The bit encodings of an alphabet's symbols can be represented with any type.
/// The binary encoding of an alphabet's symbols can be represented with any type.
/// Encoding from ASCII bytes and decoding the representation is implemented through
/// the `Codec` trait.
///
Expand Down

0 comments on commit b6f59ac

Please sign in to comment.