Skip to content

Commit

Permalink
improve error messages and remove them from Codec trait
Browse files Browse the repository at this point in the history
  • Loading branch information
jeff-k committed May 31, 2024
1 parent bba3547 commit d81e995
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 53 deletions.
42 changes: 25 additions & 17 deletions bio-seq-derive/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ pub fn codec_derive(input: TokenStream) -> TokenStream {
}
};

alt_discriminants.push(quote! { #value => Ok(Self::#ident) });
alt_discriminants.push(quote! { #value => Some(Self::#ident) });
unsafe_alts.push(quote! { #value => Self::#ident });

max_variant = max_variant.max(value);
Expand All @@ -90,15 +90,17 @@ pub fn codec_derive(input: TokenStream) -> TokenStream {
.into();
}

let mut char_repr = ident.to_string().chars().next().unwrap();
//let mut char_repr = ident.to_string().chars().next().unwrap();

let mut char_repr = ident.to_string().bytes().next().unwrap();

for attr in &variant.attrs {
if attr.path().is_ident("display") {
let alt_attr: syn::LitChar = match attr.parse_args() {
Ok(attr) => attr,
Err(err) => return err.to_compile_error().into(),
};
char_repr = alt_attr.value();
char_repr = alt_attr.value() as u8;
} else if attr.path().is_ident("alt") {
let discs: Punctuated<syn::ExprLit, Token![,]> =
match attr.parse_args_with(Punctuated::parse_terminated) {
Expand All @@ -107,24 +109,24 @@ pub fn codec_derive(input: TokenStream) -> TokenStream {
};

for d in discs.into_iter() {
alt_discriminants.push(quote! { #d => Ok(Self::#ident) });
alt_discriminants.push(quote! { #d => Some(Self::#ident) });
unsafe_alts.push(quote! { #d => Self::#ident });
}
};
}

variants_to_char.push(quote! { Self::#ident => #char_repr });
chars_to_variant.push(quote! { #char_repr => Ok(Self::#ident) });
chars_to_variant.push(quote! { #char_repr => Some(Self::#ident) });
}

// default width is the log2 of the max_variant
let mut width = f32::ceil(f32::log2(max_variant as f32)) as usize;
let mut width = f32::ceil(f32::log2(max_variant as f32)) as u8;

for attr in &enum_ast.attrs {
if attr.path().is_ident("bits") {
width = match attr.parse_args::<syn::LitInt>() {
Ok(w) => {
let chosen_width = w.base10_parse::<usize>().unwrap();
let chosen_width = w.base10_parse::<u8>().unwrap();
// test whether the specified width is too small
if chosen_width < width {
return syn::Error::new_spanned(
Expand All @@ -143,19 +145,18 @@ pub fn codec_derive(input: TokenStream) -> TokenStream {
};
}

let parse_error = quote! { crate::prelude::ParseBioError };
//let parse_error = quote! { crate::prelude::ParseBioError };

// Generate the implementation
let output = quote! {
impl Codec for #enum_ident {
type Error = #parse_error;
const BITS: usize = #width;
const BITS: u8 = #width;

fn unsafe_from_bits(b: u8) -> Self {
//debug_assert!(false, "Invalid encoding: {b:?}");
match b {
#(#unsafe_alts),*,
_ => unreachable!(),
x => panic!("Unrecognised bit pattern: {}", x),
}
}

Expand All @@ -169,28 +170,35 @@ pub fn codec_derive(input: TokenStream) -> TokenStream {
fn unsafe_from_ascii(c: u8) -> Self {
match c {
#(#chars_to_variant),*,
_ => panic!(),
}
x => {
if x.is_ascii_alphanumeric() {
panic!("Unrecognised character: {} ({:#04X?})", x as char, x);
} else {
panic!("Unrecognised character: {:#04X?}", x);
}
},
}.unwrap()
}

fn try_from_ascii(c: u8) -> Result<Self, Self::Error> {
fn try_from_ascii(c: u8) -> Option<Self> {
match c {
#(#chars_to_variant),*,
_ => Err(#parse_error {}),
_ => None,
}
}

fn to_char(self) -> char {
match self {
#(#variants_to_char),*,
}
}.into()
}

pub fn items() -> impl Iterator<Item = Self> {
fn items() -> impl Iterator<Item = Self> {
vec![ #(Self::#variant_idents,)* ].into_iter()
}
}

};
//println!("{}", output);
output.into()
}
96 changes: 80 additions & 16 deletions bio-seq/src/codec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
//! The `BITS` attribute stores the number of bits used by the representation.
//! ```
//! use bio_seq::prelude::{Dna, Codec};
//! //use bio_seq::codec::text;
//! use bio_seq::codec::text;
//! assert_eq!(Dna::BITS, 2);
//! //assert_eq!(text::Dna::BITS, 8);
//! assert_eq!(text::Dna::BITS, 8);
//! ```
//!
//! ## Deriving custom Codecs
Expand All @@ -31,29 +31,95 @@
//!
//! Custom encodings can be defined on enums by implementing the `Codec` trait.
//!
//! ```ignore
//! ```
//! use bio_seq::prelude;
//! use bio_seq::prelude::Codec;
//!
//! #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Codec)]
//! #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
//! pub enum Dna {
//! A = 0b00,
//! C = 0b01,
//! G = 0b10,
//! T = 0b11,
//! }
//!
//! impl From<Dna> for u8 {
//! fn from(base: Dna) -> u8 {
//! match base {
//! Dna::A => 0b00,
//! Dna::C => 0b01,
//! Dna::G => 0b10,
//! Dna::T => 0b11,
//! }
//! }
//! }
//!
//! impl Codec for Dna {
//! const BITS: u8 = 2;
//!
//! fn unsafe_from_bits(bits: u8) -> Self {
//! if let Some(base) = Self::try_from_bits(bits) {
//! base
//! } else {
//! panic!("Unrecognised bit pattern!")
//! }
//! }
//!
//! fn try_from_bits(bits: u8) -> Option<Self> {
//! match bits {
//! 0b00 => Some(Dna::A),
//! 0b01 => Some(Dna::C),
//! 0b10 => Some(Dna::G),
//! 0b11 => Some(Dna::T),
//! _ => None,
//! }
//! }
//!
//! fn unsafe_from_ascii(chr: u8) -> Self {
//! if let Some(base) = Self::try_from_ascii(chr) {
//! base
//! } else {
//! panic!("Unrecognised bit pattern!")
//! }
//! }
//!
//! fn try_from_ascii(chr: u8) -> Option<Self> {
//! match chr {
//! b'A' => Some(Dna::A),
//! b'C' => Some(Dna::C),
//! b'G' => Some(Dna::G),
//! b'T' => Some(Dna::T),
//! _ => None,
//! }
//! }
//!
//! fn to_char(self) -> char {
//! match self {
//! Dna::A => 'A',
//! Dna::C => 'C',
//! Dna::G => 'G',
//! Dna::T => 'T',
//! }
//! }
//!
//! fn items() -> impl Iterator<Item = Self> {
//! vec![Dna::A, Dna::C, Dna::G, Dna::T].into_iter()
//! }
//! }
//!
//! ```
use core::fmt;
use core::hash::Hash;

//#[macro_use]
//pub mod amino;
#[macro_use]
pub mod amino;
#[macro_use]
pub mod dna;
//#[macro_use]
//pub mod iupac;
#[macro_use]
pub mod iupac;

//pub mod text;
pub mod text;

pub use bio_seq_derive::Codec;

Expand All @@ -62,7 +128,7 @@ pub use bio_seq_derive::Codec;
/// the `Codec` trait.
///
/// The intended representation is an `Enum`, transparently represented as a `u8`.
pub trait Codec: Copy + Clone + PartialEq + Hash + Eq + Into<u8> {
pub trait Codec: fmt::Debug + Copy + Clone + PartialEq + Hash + Eq + Into<u8> {
/// The number of bits used to encode the characters. e.g. `Dna::BITS` = 2, `Iupac::BITS` = 4.
const BITS: u8;

Expand Down Expand Up @@ -92,20 +158,19 @@ pub trait Codec: Copy + Clone + PartialEq + Hash + Eq + Into<u8> {
pub trait Complement {
/// ```
/// use bio_seq::prelude::{Dna, Complement};
/// assert_eq!(Dna::A.to_comp(), Dna::T);
/// assert_eq!(Dna::A.comp(), Dna::T);
/// ````
fn to_comp(&self) -> Self;
fn comp(&self) -> Self;

/// `Complement` a value in place
fn comp(&mut self)
fn comp_assign(&mut self)
where
Self: Sized,
{
*self = self.to_comp();
*self = self.comp();
}
}

/*
#[cfg(test)]
mod tests {
use super::dna::Dna;
Expand All @@ -124,4 +189,3 @@ mod tests {
assert_ne!(Iupac::from(Dna::G), Iupac::T);
}
}
*/
30 changes: 10 additions & 20 deletions bio-seq/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,17 @@ impl fmt::Display for ParseBioError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
ParseBioError::UnrecognisedBase(byte) => {
write!(f, "Unrecognised character: {byte}")
if byte.is_ascii_alphanumeric() {
write!(
f,
"Unrecognised character: '{}' ({:#04X?})",
*byte as char, byte
)
} else {
write!(f, "Unrecognised character: {:#04X?}", byte)
}
}

ParseBioError::MismatchedLength(got, expected) => {
write!(f, "Expected length {expected}, got {got}")
}
Expand All @@ -21,22 +30,3 @@ impl fmt::Display for ParseBioError {

// #![feature(error_in_core)
impl std::error::Error for ParseBioError {}

/// Error conditions for codon/amino acid translation
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum TranslationError {
AmbiguousCodon,
InvalidCodon,
}

impl fmt::Display for TranslationError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {
TranslationError::AmbiguousCodon => write!(f, ""),
TranslationError::InvalidCodon => write!(f, ""),
}
}
}

// #![feature(error_in_core)
impl std::error::Error for TranslationError {}

0 comments on commit d81e995

Please sign in to comment.