diff --git a/bio-seq-derive/src/lib.rs b/bio-seq-derive/src/lib.rs index ebe03ca..e893484 100644 --- a/bio-seq-derive/src/lib.rs +++ b/bio-seq-derive/src/lib.rs @@ -80,7 +80,7 @@ pub fn codec_derive(input: TokenStream) -> TokenStream { } }; - alt_discriminants.push(quote! { #value => Ok(Self::#ident) }); + alt_discriminants.push(quote! { #value => Some(Self::#ident) }); unsafe_alts.push(quote! { #value => Self::#ident }); max_variant = max_variant.max(value); @@ -90,7 +90,9 @@ pub fn codec_derive(input: TokenStream) -> TokenStream { .into(); } - let mut char_repr = ident.to_string().chars().next().unwrap(); + //let mut char_repr = ident.to_string().chars().next().unwrap(); + + let mut char_repr = ident.to_string().bytes().next().unwrap(); for attr in &variant.attrs { if attr.path().is_ident("display") { @@ -98,7 +100,7 @@ pub fn codec_derive(input: TokenStream) -> TokenStream { Ok(attr) => attr, Err(err) => return err.to_compile_error().into(), }; - char_repr = alt_attr.value(); + char_repr = alt_attr.value() as u8; } else if attr.path().is_ident("alt") { let discs: Punctuated = match attr.parse_args_with(Punctuated::parse_terminated) { @@ -107,24 +109,24 @@ pub fn codec_derive(input: TokenStream) -> TokenStream { }; for d in discs.into_iter() { - alt_discriminants.push(quote! { #d => Ok(Self::#ident) }); + alt_discriminants.push(quote! { #d => Some(Self::#ident) }); unsafe_alts.push(quote! { #d => Self::#ident }); } }; } variants_to_char.push(quote! { Self::#ident => #char_repr }); - chars_to_variant.push(quote! { #char_repr => Ok(Self::#ident) }); + chars_to_variant.push(quote! { #char_repr => Some(Self::#ident) }); } // default width is the log2 of the max_variant - let mut width = f32::ceil(f32::log2(max_variant as f32)) as usize; + let mut width = f32::ceil(f32::log2(max_variant as f32)) as u8; for attr in &enum_ast.attrs { if attr.path().is_ident("bits") { width = match attr.parse_args::() { Ok(w) => { - let chosen_width = w.base10_parse::().unwrap(); + let chosen_width = w.base10_parse::().unwrap(); // test whether the specified width is too small if chosen_width < width { return syn::Error::new_spanned( @@ -143,19 +145,18 @@ pub fn codec_derive(input: TokenStream) -> TokenStream { }; } - let parse_error = quote! { crate::prelude::ParseBioError }; + //let parse_error = quote! { crate::prelude::ParseBioError }; // Generate the implementation let output = quote! { impl Codec for #enum_ident { - type Error = #parse_error; - const BITS: usize = #width; + const BITS: u8 = #width; fn unsafe_from_bits(b: u8) -> Self { //debug_assert!(false, "Invalid encoding: {b:?}"); match b { #(#unsafe_alts),*, - _ => unreachable!(), + x => panic!("Unrecognised bit pattern: {}", x), } } @@ -169,28 +170,35 @@ pub fn codec_derive(input: TokenStream) -> TokenStream { fn unsafe_from_ascii(c: u8) -> Self { match c { #(#chars_to_variant),*, - _ => panic!(), - } + x => { + if x.is_ascii_alphanumeric() { + panic!("Unrecognised character: {} ({:#04X?})", x as char, x); + } else { + panic!("Unrecognised character: {:#04X?}", x); + } + }, + }.unwrap() } - fn try_from_ascii(c: u8) -> Result { + fn try_from_ascii(c: u8) -> Option { match c { #(#chars_to_variant),*, - _ => Err(#parse_error {}), + _ => None, } } fn to_char(self) -> char { match self { #(#variants_to_char),*, - } + }.into() } - pub fn items() -> impl Iterator { + fn items() -> impl Iterator { vec![ #(Self::#variant_idents,)* ].into_iter() } } }; + //println!("{}", output); output.into() } diff --git a/bio-seq/src/codec/mod.rs b/bio-seq/src/codec/mod.rs index 4ba738f..0a7ab0d 100644 --- a/bio-seq/src/codec/mod.rs +++ b/bio-seq/src/codec/mod.rs @@ -6,9 +6,9 @@ //! The `BITS` attribute stores the number of bits used by the representation. //! ``` //! use bio_seq::prelude::{Dna, Codec}; -//! //use bio_seq::codec::text; +//! use bio_seq::codec::text; //! assert_eq!(Dna::BITS, 2); -//! //assert_eq!(text::Dna::BITS, 8); +//! assert_eq!(text::Dna::BITS, 8); //! ``` //! //! ## Deriving custom Codecs @@ -31,29 +31,95 @@ //! //! Custom encodings can be defined on enums by implementing the `Codec` trait. //! -//! ```ignore +//! ``` //! use bio_seq::prelude; //! use bio_seq::prelude::Codec; //! -//! #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Codec)] +//! #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] //! pub enum Dna { //! A = 0b00, //! C = 0b01, //! G = 0b10, //! T = 0b11, //! } +//! +//! impl From for u8 { +//! fn from(base: Dna) -> u8 { +//! match base { +//! Dna::A => 0b00, +//! Dna::C => 0b01, +//! Dna::G => 0b10, +//! Dna::T => 0b11, +//! } +//! } +//! } +//! +//! impl Codec for Dna { +//! const BITS: u8 = 2; +//! +//! fn unsafe_from_bits(bits: u8) -> Self { +//! if let Some(base) = Self::try_from_bits(bits) { +//! base +//! } else { +//! panic!("Unrecognised bit pattern!") +//! } +//! } +//! +//! fn try_from_bits(bits: u8) -> Option { +//! match bits { +//! 0b00 => Some(Dna::A), +//! 0b01 => Some(Dna::C), +//! 0b10 => Some(Dna::G), +//! 0b11 => Some(Dna::T), +//! _ => None, +//! } +//! } +//! +//! fn unsafe_from_ascii(chr: u8) -> Self { +//! if let Some(base) = Self::try_from_ascii(chr) { +//! base +//! } else { +//! panic!("Unrecognised bit pattern!") +//! } +//! } +//! +//! fn try_from_ascii(chr: u8) -> Option { +//! match chr { +//! b'A' => Some(Dna::A), +//! b'C' => Some(Dna::C), +//! b'G' => Some(Dna::G), +//! b'T' => Some(Dna::T), +//! _ => None, +//! } +//! } +//! +//! fn to_char(self) -> char { +//! match self { +//! Dna::A => 'A', +//! Dna::C => 'C', +//! Dna::G => 'G', +//! Dna::T => 'T', +//! } +//! } +//! +//! fn items() -> impl Iterator { +//! vec![Dna::A, Dna::C, Dna::G, Dna::T].into_iter() +//! } +//! } +//! //! ``` +use core::fmt; use core::hash::Hash; -//#[macro_use] -//pub mod amino; +#[macro_use] +pub mod amino; #[macro_use] pub mod dna; -//#[macro_use] -//pub mod iupac; +#[macro_use] +pub mod iupac; -//pub mod text; +pub mod text; pub use bio_seq_derive::Codec; @@ -62,7 +128,7 @@ pub use bio_seq_derive::Codec; /// the `Codec` trait. /// /// The intended representation is an `Enum`, transparently represented as a `u8`. -pub trait Codec: Copy + Clone + PartialEq + Hash + Eq + Into { +pub trait Codec: fmt::Debug + Copy + Clone + PartialEq + Hash + Eq + Into { /// The number of bits used to encode the characters. e.g. `Dna::BITS` = 2, `Iupac::BITS` = 4. const BITS: u8; @@ -92,20 +158,19 @@ pub trait Codec: Copy + Clone + PartialEq + Hash + Eq + Into { pub trait Complement { /// ``` /// use bio_seq::prelude::{Dna, Complement}; - /// assert_eq!(Dna::A.to_comp(), Dna::T); + /// assert_eq!(Dna::A.comp(), Dna::T); /// ```` - fn to_comp(&self) -> Self; + fn comp(&self) -> Self; /// `Complement` a value in place - fn comp(&mut self) + fn comp_assign(&mut self) where Self: Sized, { - *self = self.to_comp(); + *self = self.comp(); } } -/* #[cfg(test)] mod tests { use super::dna::Dna; @@ -124,4 +189,3 @@ mod tests { assert_ne!(Iupac::from(Dna::G), Iupac::T); } } -*/ diff --git a/bio-seq/src/error.rs b/bio-seq/src/error.rs index 8ab1cc8..b6c1c09 100644 --- a/bio-seq/src/error.rs +++ b/bio-seq/src/error.rs @@ -10,8 +10,17 @@ impl fmt::Display for ParseBioError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { ParseBioError::UnrecognisedBase(byte) => { - write!(f, "Unrecognised character: {byte}") + if byte.is_ascii_alphanumeric() { + write!( + f, + "Unrecognised character: '{}' ({:#04X?})", + *byte as char, byte + ) + } else { + write!(f, "Unrecognised character: {:#04X?}", byte) + } } + ParseBioError::MismatchedLength(got, expected) => { write!(f, "Expected length {expected}, got {got}") } @@ -21,22 +30,3 @@ impl fmt::Display for ParseBioError { // #![feature(error_in_core) impl std::error::Error for ParseBioError {} - -/// Error conditions for codon/amino acid translation -#[derive(Debug, PartialEq, Eq, Clone)] -pub enum TranslationError { - AmbiguousCodon, - InvalidCodon, -} - -impl fmt::Display for TranslationError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match *self { - TranslationError::AmbiguousCodon => write!(f, ""), - TranslationError::InvalidCodon => write!(f, ""), - } - } -} - -// #![feature(error_in_core) -impl std::error::Error for TranslationError {}