-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
initial test of masked sequence encoding (#5)
- Loading branch information
Showing
4 changed files
with
109 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[package] | ||
name = "bio-seq" | ||
version = "0.13.0" | ||
version = "0.13.1" | ||
authors = ["jeff-k <[email protected]>"] | ||
edition = "2021" | ||
description = "Bit packed and well-typed biological sequences" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
//! Experimental 4-bit DNA encoding with masked bases | ||
//! Includes `N`, `n`, `.`, `-` | ||
use crate::codec::Codec; | ||
use crate::seq::{ReverseComplement, Seq}; | ||
|
||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Codec)] | ||
#[bits(4)] | ||
#[repr(u8)] | ||
pub enum Dna { | ||
A = 0b1000, | ||
C = 0b0100, | ||
G = 0b0010, | ||
T = 0b0001, | ||
|
||
#[display('a')] | ||
AMasked = 0b0111, | ||
#[display('c')] | ||
CMasked = 0b1011, | ||
#[display('g')] | ||
GMasked = 0b1101, | ||
#[display('t')] | ||
TMasked = 0b1110, | ||
|
||
N = 0b0000, | ||
#[display('n')] | ||
NMasked = 0b1111, | ||
|
||
#[display('-')] | ||
#[alt(0b0011)] | ||
Gap = 0b1100, | ||
|
||
#[display('.')] | ||
#[alt(0b0101)] | ||
Pad = 0b1010, | ||
|
||
#[display('?')] | ||
Unknown1 = 0b0110, | ||
|
||
#[display('!')] | ||
Unknown2 = 0b1001, | ||
} | ||
|
||
/* | ||
impl Complement for Dna { | ||
/// This representation can be complemented by reversing the bit pattern | ||
fn comp(&self) -> Self { | ||
// reverse the bits | ||
Dna::unsafe_from_bits(b) | ||
} | ||
} | ||
*/ | ||
|
||
impl Dna { | ||
/// Flipping the bit pattern masks/unmasks this representation | ||
pub fn mask(&self) -> Self { | ||
let b = *self as u8 ^ 0b1111; | ||
Dna::unsafe_from_bits(b) | ||
} | ||
} | ||
|
||
impl Seq<Dna> { | ||
pub fn mask(&self) -> Self { | ||
Self::from(!self.bv.clone()) | ||
} | ||
} | ||
|
||
impl ReverseComplement for Seq<Dna> { | ||
type Output = Self; | ||
|
||
fn revcomp(&self) -> Self { | ||
let mut bv = self.bv.clone(); | ||
bv.reverse(); | ||
Self::from(bv) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use crate::codec::masked; | ||
use crate::prelude::*; | ||
|
||
#[test] | ||
fn mask_sequence() { | ||
let seq = Seq::<masked::Dna>::try_from("A.TCGCgtcataN--A").unwrap(); | ||
|
||
assert_ne!(seq.mask().to_string(), "a.tcgcGTGATAN--a".to_string()); | ||
assert_eq!(seq.mask().to_string(), "a.tcgcGTCATAn--a".to_string()); | ||
} | ||
|
||
#[test] | ||
fn masked_revcomp() { | ||
let seq = Seq::<masked::Dna>::try_from("A.TCGCgtcataN--A").unwrap(); | ||
|
||
assert_ne!(seq.revcomp().to_string(), "T--NtaagacGCGA.T".to_string()); | ||
assert_eq!(seq.revcomp().to_string(), "T--NtatgacGCGA.T".to_string()); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -119,6 +119,7 @@ use core::hash::Hash; | |
pub mod amino; | ||
pub mod dna; | ||
pub mod iupac; | ||
pub mod masked; | ||
|
||
pub mod text; | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters