diff --git a/bio-seq/src/kmer/mod.rs b/bio-seq/src/kmer/mod.rs index f6ef33e..b3c0d60 100644 --- a/bio-seq/src/kmer/mod.rs +++ b/bio-seq/src/kmer/mod.rs @@ -52,7 +52,7 @@ use serde_derive::{Deserialize, Serialize}; mod sealed { use crate::Bs; - pub trait KmerStorage: Copy + Clone { + pub trait KmerStorage: Copy + Clone + PartialEq { const BITS: usize; type BaN: AsRef + AsMut; @@ -68,6 +68,7 @@ mod sealed { pub trait KmerStorage: sealed::KmerStorage {} +/* impl sealed::KmerStorage for u32 { const BITS: usize = u32::BITS as usize; type BaN = Ba<1>; @@ -87,6 +88,7 @@ impl sealed::KmerStorage for u32 { } impl KmerStorage for u32 {} +*/ impl sealed::KmerStorage for usize { const BITS: usize = usize::BITS as usize; @@ -164,6 +166,14 @@ pub struct Kmer { pub bs: S, } +/* +impl PartialEq> for Kmer { + fn eq(&self, other: &Kmer) -> bool { + self.bs.to_bitarray() == other.bs.to_bitarray() + } +} +*/ + impl Kmer { // This error message can be formatted with constants in nightly (const_format) const _ASSERT_K: () = assert!( @@ -329,11 +339,11 @@ pub struct KmerIter<'a, A: Codec, const K: usize> { pub _p: PhantomData, } -impl Kmer { - fn unsafe_from(slice: &SeqSlice) -> Self { +impl Kmer { + fn unsafe_from(seq: &SeqSlice) -> Self { Kmer { _p: PhantomData, - bs: slice.try_into().unwrap(), + bs: S::from_bitslice(&seq.bs), } } } @@ -396,12 +406,12 @@ impl Hash for Kmer { } } -impl TryFrom<&SeqSlice> for Kmer { +impl TryFrom<&SeqSlice> for Kmer { type Error = ParseBioError; fn try_from(seq: &SeqSlice) -> Result { if seq.len() == K { - Ok(Kmer::::unsafe_from(&seq[0..K])) + Ok(Kmer::::unsafe_from(&seq[0..K])) } else { Err(ParseBioError::MismatchedLength(K, seq.len())) } @@ -424,6 +434,24 @@ impl From> for String { } */ +impl PartialEq> for Kmer { + fn eq(&self, seq: &SeqArray) -> bool { + if seq.len() != K { + return false; + } + &Kmer::::unsafe_from(seq.as_ref()) == self + } +} + +impl PartialEq<&SeqArray> for Kmer { + fn eq(&self, seq: &&SeqArray) -> bool { + if seq.len() != K { + return false; + } + &Kmer::::unsafe_from(seq.as_ref()) == self + } +} + impl PartialEq> for Kmer { fn eq(&self, seq: &Seq) -> bool { if seq.len() != K { @@ -433,12 +461,21 @@ impl PartialEq> for Kmer { } } -impl PartialEq> for Kmer { +impl PartialEq> for Kmer { fn eq(&self, seq: &SeqSlice) -> bool { if seq.len() != K { return false; } - &Kmer::::unsafe_from(seq) == self + &Kmer::::unsafe_from(seq) == self + } +} + +impl PartialEq<&SeqSlice> for Kmer { + fn eq(&self, seq: &&SeqSlice) -> bool { + if seq.len() != K { + return false; + } + &Kmer::::unsafe_from(seq) == self } } @@ -448,7 +485,7 @@ impl PartialEq<&str> for Kmer { } } -impl FromStr for Kmer { +impl FromStr for Kmer { type Err = ParseBioError; fn from_str(s: &str) -> Result { @@ -456,7 +493,7 @@ impl FromStr for Kmer { return Err(ParseBioError::MismatchedLength(K, s.len())); } let seq: Seq = Seq::from_str(s)?; - Kmer::::try_from(seq.as_ref()) + Kmer::::try_from(seq.as_ref()) } } @@ -653,16 +690,18 @@ mod tests { assert_ne!(kmer.to_string(), "ACTGCGATGA"); } - /* #[test] fn eq_functions() { assert_eq!(kmer!("ACGT"), dna!("ACGT")); - assert_ne!(kmer!("ACGT"), dna!("ACGTA")); + + // this should be a compiler error: + // assert_ne!(kmer!("ACGT"), dna!("ACGTA")); + let kmer: Kmer = Kmer::from_str("ACGT").unwrap(); assert_eq!(kmer, iupac!("ACGT")); assert_ne!(kmer, iupac!("NCGT")); } - */ + #[test] fn kmer_iter() { //let seq = dna!("ACTGA"); @@ -728,6 +767,62 @@ mod tests { assert_eq!(seq.to_string(), "TTTTTTTTTTTTTTTTAGCTAGCTAGCTAGCT"); } + #[test] + fn kmer_storage_types() { + let s1 = "AACGTAGCCGCGAACTTACGTAGCCGCGAAAA"; + let s2 = "AACGTAGCCGCGAACTTACGTAGCCGCGAAA"; + let s3 = "ACGTAGCCGCGAACTTACGTAGCCGCGAAAA"; + + let s4 = "AACGTAGCCGCGAACTTACGTAGCCGCGAAAAAACGTAGCCGCGAACTTACGTAGCCGCGAAAA"; + let s5 = "AACGTAGCCGCGAACTTACGTAGCCGCGAAAAAACGTAGCCGCGAACTTACGTAGCCGCGAAAAA"; + + assert_eq!(s1.len(), 32); + assert_eq!(s2.len(), 31); + assert_eq!(s3.len(), 31); + assert_eq!(s4.len(), 64); + assert_eq!(s5.len(), 65); + + let kmer1_64 = Kmer::::from_str(&s1).unwrap(); + let kmer2_64 = Kmer::::from_str(&s2).unwrap(); + let kmer3_64 = Kmer::::from_str(&s3).unwrap(); + + let kmer1 = Kmer::::from_str(&s1).unwrap(); + let kmer2 = Kmer::::from_str(&s2).unwrap(); + let kmer3 = Kmer::::from_str(&s3).unwrap(); + + let kmer4_128 = Kmer::::from_str(&s4).unwrap(); + + let seq5: Seq = s5.try_into().unwrap(); + + assert_eq!(kmer4_128, &seq5[..64]); + assert_ne!(kmer4_128, &seq5[1..]); + + assert_eq!(kmer1, &seq5[..32]); + assert_eq!(kmer1, &seq5[32..64]); + + assert_eq!(kmer1_64, &seq5[..32]); + assert_eq!(kmer1_64, &seq5[32..64]); + + assert_ne!(kmer1, &seq5[..31]); + assert_ne!(kmer1, &seq5[32..]); + + assert_ne!(kmer1_64, &seq5[1..33]); + assert_ne!(kmer1_64, &seq5[33..]); + + assert_eq!(kmer4_128, kmer4_128); + assert_eq!(kmer1_64, kmer1_64); + assert_eq!(kmer2, kmer2); + + assert_ne!(kmer2, kmer3); + assert_ne!(kmer2_64, kmer3_64); + // PartialEq is not implemented for different storgage types + /* + assert_ne!(kmer2, kmer3_64); + assert_eq!(kmer2, kmer2_64); + assert_eq!(kmer1, kmer1_64); + */ + } + #[test] fn try_from_seq() { let seq: Seq = Seq::try_from("ACACACACACACGT").unwrap(); @@ -753,12 +848,4 @@ mod tests { "ACACACACACACGT" ); } - /* - #[test] - fn kmer_static() { - static STATIC_KMER: Kmer = kmer!("TTTTTTTT"); - - assert_eq!(STATIC_KMER.to_string(), "ACGT"); - } - */ } diff --git a/bio-seq/src/seq/mod.rs b/bio-seq/src/seq/mod.rs index cdba297..988d41f 100644 --- a/bio-seq/src/seq/mod.rs +++ b/bio-seq/src/seq/mod.rs @@ -1075,4 +1075,14 @@ mod tests { assert_eq!(hash1, hash2); } */ + #[test] + fn static_eq() { + static STATIC_SEQ: &SeqArray = dna!("TTTTTTTT"); + + assert_ne!(STATIC_SEQ, Seq::::try_from("ACGT").unwrap()); + assert_eq!(STATIC_SEQ, Seq::::try_from("TTTTTTTT").unwrap()); + + assert_ne!(STATIC_SEQ, Seq::::try_from("ACGT").unwrap()); + assert_eq!(STATIC_SEQ, Seq::::try_from("TTTTTTTT").unwrap()); + } }