From c64f9d2c30df5e014a33864b7a13f53fa9e3758d Mon Sep 17 00:00:00 2001 From: Kevin Nakamura Date: Thu, 15 Feb 2024 10:57:17 +0900 Subject: [PATCH 1/8] New `parse` implementation without `clone`. `EdnError` is now non-String, with well defined data. --- Cargo.toml | 1 + src/deserialize/mod.rs | 82 ++-- src/deserialize/parse.rs | 851 ++++++++++++++++++++------------------ src/edn/error.rs | 111 +++++ src/edn/mod.rs | 62 +-- src/lib.rs | 2 +- tests/deserialize.rs | 24 +- tests/deserialize_sets.rs | 14 +- tests/emit_json.rs | 5 +- tests/error_messages.rs | 97 +++++ tests/lib.rs | 1 + 11 files changed, 704 insertions(+), 546 deletions(-) create mode 100644 src/edn/error.rs create mode 100644 tests/error_messages.rs diff --git a/Cargo.toml b/Cargo.toml index a3e3c4a..a6780de 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ unsafe_code = "deny" [lints.clippy] pedantic = "warn" nursery = "warn" +inline_always = "allow" [features] default = ["sets", "std"] diff --git a/src/deserialize/mod.rs b/src/deserialize/mod.rs index 10ea244..0945a3c 100644 --- a/src/deserialize/mod.rs +++ b/src/deserialize/mod.rs @@ -1,7 +1,6 @@ use alloc::collections::BTreeMap; #[cfg(feature = "sets")] use alloc::collections::BTreeSet; -use alloc::format; use alloc::string::{String, ToString}; use alloc::vec::Vec; use core::any; @@ -12,7 +11,8 @@ use std::collections::HashMap; #[cfg(all(feature = "sets", feature = "std"))] use std::collections::HashSet; -use crate::edn::{Edn, Error}; +use crate::edn::Edn; +use crate::EdnError as Error; pub mod parse; @@ -23,7 +23,7 @@ use ordered_float::OrderedFloat; /// /// # Errors /// -/// Error will be like `EdnError::Deserialize("couldn't convert into ")` +/// Error implements Debug. See docs for more information. /// /// ``` /// use crate::edn_rs::{Edn, EdnError, Deserialize}; @@ -72,15 +72,15 @@ pub trait Deserialize: Sized { fn deserialize(edn: &Edn) -> Result; } -fn build_deserialize_error(edn: &Edn, type_: &str) -> Error { - Error::Deserialize(format!("couldn't convert `{edn}` into `{type_}`")) +const fn build_deserialize_error(type_: &'static str) -> Error { + Error::deserialize(type_) } impl Deserialize for () { fn deserialize(edn: &Edn) -> Result { match edn { Edn::Nil => Ok(()), - _ => Err(build_deserialize_error(edn, "unit")), + _ => Err(build_deserialize_error("unit")), } } } @@ -89,7 +89,7 @@ impl Deserialize for () { impl Deserialize for OrderedFloat { fn deserialize(edn: &Edn) -> Result { edn.to_float() - .ok_or_else(|| build_deserialize_error(edn, "edn_rs::Double")) + .ok_or_else(|| build_deserialize_error("edn_rs::Double")) .map(Into::into) } } @@ -97,7 +97,7 @@ impl Deserialize for OrderedFloat { impl Deserialize for f64 { fn deserialize(edn: &Edn) -> Result { edn.to_float() - .ok_or_else(|| build_deserialize_error(edn, "edn_rs::Double")) + .ok_or_else(|| build_deserialize_error("edn_rs::Double")) .map(Into::into) } } @@ -109,7 +109,7 @@ macro_rules! impl_deserialize_int { fn deserialize(edn: &Edn) -> Result { let int = edn .to_int() - .ok_or_else(|| build_deserialize_error(edn, "int"))?; + .ok_or_else(|| build_deserialize_error("int"))?; Ok(Self::try_from(int)?) } } @@ -126,7 +126,7 @@ macro_rules! impl_deserialize_uint { fn deserialize(edn: &Edn) -> Result { let uint = edn .to_uint() - .ok_or_else(|| build_deserialize_error(edn, "uint"))?; + .ok_or_else(|| build_deserialize_error("uint"))?; Ok(Self::try_from(uint)?) } } @@ -138,8 +138,7 @@ impl_deserialize_uint!(u8, u16, u32, u64, usize); impl Deserialize for bool { fn deserialize(edn: &Edn) -> Result { - edn.to_bool() - .ok_or_else(|| build_deserialize_error(edn, "bool")) + edn.to_bool().ok_or_else(|| build_deserialize_error("bool")) } } @@ -160,8 +159,7 @@ impl Deserialize for String { impl Deserialize for char { fn deserialize(edn: &Edn) -> Result { - edn.to_char() - .ok_or_else(|| build_deserialize_error(edn, "char")) + edn.to_char().ok_or_else(|| build_deserialize_error("char")) } } @@ -173,21 +171,21 @@ where match edn { Edn::Vector(_) => Ok(edn .iter_some() - .ok_or_else(|| Error::Iter(format!("Could not create iter from {edn:?}")))? + .ok_or_else(Error::iter)? .map(|e| Deserialize::deserialize(e)) .collect::>()?), Edn::List(_) => Ok(edn .iter_some() - .ok_or_else(|| Error::Iter(format!("Could not create iter from {edn:?}")))? + .ok_or_else(Error::iter)? .map(|e| Deserialize::deserialize(e)) .collect::>()?), #[cfg(feature = "sets")] Edn::Set(_) => Ok(edn .iter_some() - .ok_or_else(|| Error::Iter(format!("Could not create iter from {edn:?}")))? + .ok_or_else(Error::iter)? .map(|e| Deserialize::deserialize(e)) .collect::>()?), - _ => Err(build_deserialize_error(edn, any::type_name::())), + _ => Err(build_deserialize_error(any::type_name::())), } } } @@ -202,20 +200,15 @@ where match edn { Edn::Map(_) => edn .map_iter() - .ok_or_else(|| Error::Iter(format!("Could not create iter from {edn:?}")))? + .ok_or_else(Error::iter)? .map(|(key, e)| { Ok(( key.to_string(), - Deserialize::deserialize(e).map_err(|_| { - Error::Deserialize(format!( - "Cannot safely deserialize {:?} to {}", - edn, "HashMap" - )) - })?, + Deserialize::deserialize(e).map_err(|_| Error::deserialize("HashMap"))?, )) }) .collect::>(), - _ => Err(build_deserialize_error(edn, any::type_name::())), + _ => Err(build_deserialize_error(any::type_name::())), } } } @@ -228,20 +221,15 @@ where match edn { Edn::Map(_) => edn .map_iter() - .ok_or_else(|| Error::Iter(format!("Could not create iter from {edn:?}")))? + .ok_or_else(Error::iter)? .map(|(key, e)| { Ok(( key.to_string(), - Deserialize::deserialize(e).map_err(|_| { - Error::Deserialize(format!( - "Cannot safely deserialize {:?} to {}", - edn, "BTreeMap" - )) - })?, + Deserialize::deserialize(e).map_err(|_| Error::deserialize("BTreeMap"))?, )) }) .collect::>(), - _ => Err(build_deserialize_error(edn, any::type_name::())), + _ => Err(build_deserialize_error(any::type_name::())), } } } @@ -256,17 +244,10 @@ where match edn { Edn::Set(_) => edn .set_iter() - .ok_or_else(|| Error::Iter(format!("Could not create iter from {edn:?}")))? - .map(|e| { - Deserialize::deserialize(e).map_err(|_| { - Error::Deserialize(format!( - "Cannot safely deserialize {:?} to {}", - edn, "HashSet" - )) - }) - }) + .ok_or_else(Error::iter)? + .map(|e| Deserialize::deserialize(e).map_err(|_| Error::deserialize("HashSet"))) .collect::>(), - _ => Err(build_deserialize_error(edn, any::type_name::())), + _ => Err(build_deserialize_error(any::type_name::())), } } } @@ -280,17 +261,10 @@ where match edn { Edn::Set(_) => edn .set_iter() - .ok_or_else(|| Error::Iter(format!("Could not create iter from {edn:?}")))? - .map(|e| { - Deserialize::deserialize(e).map_err(|_| { - Error::Deserialize(format!( - "Cannot safely deserialize {:?} to {}", - edn, "BTreeSet" - )) - }) - }) + .ok_or_else(Error::iter)? + .map(|e| Deserialize::deserialize(e).map_err(|_| Error::deserialize("BTreeSet"))) .collect::>(), - _ => Err(build_deserialize_error(edn, any::type_name::())), + _ => Err(build_deserialize_error(any::type_name::())), } } } diff --git a/src/deserialize/parse.rs b/src/deserialize/parse.rs index b392a25..7c5459f 100644 --- a/src/deserialize/parse.rs +++ b/src/deserialize/parse.rs @@ -1,221 +1,476 @@ +#![allow(clippy::inline_always)] + +use alloc::borrow::ToOwned; use alloc::boxed::Box; use alloc::collections::BTreeMap; #[cfg(feature = "sets")] use alloc::collections::BTreeSet; use alloc::string::{String, ToString}; use alloc::vec::Vec; -use alloc::{format, vec}; -use core::iter; use core::primitive::str; +use crate::edn::error::{Code, Error}; #[cfg(feature = "sets")] use crate::edn::Set; -use crate::edn::{Edn, Error, List, Map, Vector}; +use crate::edn::{Edn, List, Map, Vector}; const DELIMITERS: [char; 8] = [',', ']', '}', ')', ';', '(', '[', '{']; -pub fn parse(edn: &str) -> Result { - let owned = String::from(edn); - let mut tokens = owned.chars().enumerate(); - (parse_internal(tokens.next(), &mut tokens)?).map_or_else(|| Ok(Edn::Empty), Ok) +#[derive(Debug)] +struct Walker<'w> { + slice: &'w str, + ptr: usize, + column: usize, + line: usize, } -fn parse_consuming( - c: Option<(usize, char)>, - chars: &mut iter::Enumerate>, -) -> Result { - (parse_internal(c, chars)?).map_or_else(|| Ok(Edn::Empty), Ok) -} +impl Walker<'_> { + // Slurps until whitespace or delimiter, returning the slice. + #[inline(always)] + fn slurp_literal(&mut self) -> &str { + let token = self.slice[self.ptr..] + .split(|c: char| c.is_whitespace() || DELIMITERS.contains(&c)) + .next() + .unwrap(); // At least an empty slice will always be on the first split, even on an empty str + + self.ptr += token.len(); + self.column += token.len(); + token + } + + // Slurps a char. Special handling for chars that happen to be delimiters + #[inline(always)] + fn slurp_char(&mut self) -> &str { + let starting_ptr = self.ptr; + + let mut ptr = 0; + while let Some(c) = self.peek_next() { + // first is always \\, second is always a char we want. + // Handles edge cases of having a valid "\\[" but also "\\c[lolthisisvalidedn" + if ptr > 1 && (c.is_whitespace() || DELIMITERS.contains(&c)) { + break; + } -fn parse_internal( - c: Option<(usize, char)>, - chars: &mut iter::Enumerate>, -) -> Result, Error> { - Ok(match c { - Some((_, '[')) => Some(read_vec(chars)?), - Some((_, '(')) => Some(read_list(chars)?), - Some((_, '#')) => tagged_or_set_or_discard(chars)?, - Some((_, '{')) => Some(read_map(chars)?), - Some((_, ';')) => { - // Consumes the content - chars.find(|c| c.1 == '\n'); - read_if_not_container_end(chars)? + let _ = self.nibble_next(); + ptr += 1; } - Some((_, s)) if s.is_whitespace() || s == ',' => read_if_not_container_end(chars)?, - None => None, - edn => Some(edn_element(edn, chars)?), - }) -} + &self.slice[starting_ptr..starting_ptr + ptr] + } -fn edn_element( - c: Option<(usize, char)>, - chars: &mut iter::Enumerate>, -) -> Result { - match c { - Some((_, '\"')) => read_str(chars), - Some((_, ':')) => Ok(read_key(chars)), - Some((_, n)) if n.is_numeric() => Ok(read_number(n, chars)?), - Some((_, n)) - if (n == '-' || n == '+') - && chars - .clone() - .peekable() - .peek() - .is_some_and(|n| n.1.is_numeric()) => - { - Ok(read_number(n, chars)?) + // Slurps until whitespace or delimiter, returning the slice. + #[inline(always)] + fn slurp_tag(&mut self) -> &str { + let token = self.slice[self.ptr..] + .split(|c: char| c.is_whitespace() && c != ',') + .next() + .unwrap(); // At least an empty slice will always be on the first split, even on an empty str + + self.ptr += token.len(); + self.column += token.len(); + + if token.ends_with(',') { + return &token[0..token.len() - 1]; } - Some((_, '\\')) => Ok(read_char(chars)?), - Some((_, b)) if b == 't' || b == 'f' || b == 'n' => Ok(read_bool_or_nil(b, chars)?), - Some((_, a)) => Ok(read_symbol(a, chars)?), - None => Err(Error::ParseEdn("Edn could not be parsed".to_string())), + token } -} -fn tagged_or_set_or_discard( - chars: &mut iter::Enumerate>, -) -> Result, Error> { - match chars.clone().next() { - Some((_, '{')) => read_set(chars).map(Some), - Some((_, '_')) => read_discard(chars), - _ => read_tagged(chars).map(Some), + #[inline(always)] + fn slurp_str(&mut self) -> Result { + let _ = self.nibble_next(); // Consume the leading '"' char + let mut s = String::new(); + let mut escape = false; + loop { + if let Some(c) = self.nibble_next() { + if escape { + match c { + 't' => s.push('\t'), + 'r' => s.push('\r'), + 'n' => s.push('\n'), + '\\' => s.push('\\'), + '\"' => s.push('\"'), + _ => { + return Err(Error { + code: Code::InvalidEscape, + column: Some(self.column), + line: Some(self.line), + ptr: Some(self.ptr), + }) + } + } + escape = false; + } else if c == '\"' { + return Ok(Edn::Str(s)); + } else if c == '\\' { + escape = true; + } else { + escape = false; + s.push(c); + } + } else { + return Err(Error { + code: Code::UnexpectedEOF, + column: Some(self.column), + line: Some(self.line), + ptr: Some(self.ptr), + }); + } + } + } + + // Nibbles away until the next new line + #[inline(always)] + fn nibble_newline(&mut self) { + let len = self.slice[self.ptr..].split('\n').next().unwrap(); // At least an empty slice will always be on the first split, even on an empty str + self.ptr += len.len(); + self.nibble_whitespace(); + } + + // Nibbles away until the start of the next form + #[inline(always)] + fn nibble_whitespace(&mut self) { + while let Some(n) = self.peek_next() { + if n == ',' || n.is_whitespace() { + let _ = self.nibble_next(); + continue; + } + break; + } + } + + // Consumes next + #[inline(always)] + fn nibble_next(&mut self) -> Option { + let char = self.slice[self.ptr..].chars().next(); + if let Some(c) = char { + self.ptr += 1; + if c == '\n' { + self.line += 1; + self.column = 1; + } else { + self.column += 1; + } + } + char + } + + // Peek into the next char + #[inline(always)] + fn peek_next(&mut self) -> Option { + self.slice[self.ptr..].chars().next() } } -fn read_key(chars: &mut iter::Enumerate>) -> Edn { - let key_chars = chars - .clone() - .take_while(|c| !c.1.is_whitespace() && !DELIMITERS.contains(&c.1)); - let c_len = key_chars.clone().count(); +pub fn parse(edn: &str) -> Result { + let mut walker = Walker { + slice: edn, + ptr: 0, + column: 1, + line: 1, + }; - let mut key = String::from(":"); - let key_chars = chars.take(c_len).map(|c| c.1).collect::(); - key.push_str(&key_chars); - Edn::Key(key) + parse_internal(&mut walker) } -fn read_str(chars: &mut iter::Enumerate>) -> Result { - let result = chars.try_fold( - (false, String::new()), - |(last_was_escape, mut s), (_, c)| { - if last_was_escape { - // Supported escape characters, per https://github.com/edn-format/edn#strings - match c { - 't' => s.push('\t'), - 'r' => s.push('\r'), - 'n' => s.push('\n'), - '\\' => s.push('\\'), - '\"' => s.push('\"'), - _ => { - return Err(Err(Error::ParseEdn(format!( - "Invalid escape sequence \\{c}" - )))) - } - }; - - Ok((false, s)) - } else if c == '\"' { - // Unescaped quote means we're done - Err(Ok(s)) - } else if c == '\\' { - Ok((true, s)) - } else { - s.push(c); - Ok((false, s)) +#[inline] +fn parse_internal(walker: &mut Walker<'_>) -> Result { + walker.nibble_whitespace(); + while let Some(next) = walker.peek_next() { + let column_start = walker.column; + let ptr_start = walker.ptr; + let line_start = walker.line; + if let Some(ret) = match next { + '\\' => match parse_char(walker.slurp_char()) { + Ok(edn) => Some(Ok(edn)), + Err(code) => { + return Err(Error { + code, + line: Some(walker.line), + column: Some(column_start), + ptr: Some(walker.ptr), + }) + } + }, + '\"' => Some(walker.slurp_str()), + // comment. consume until a new line. + ';' => { + walker.nibble_newline(); + None } - }, - ); - - match result { - // An Ok means we actually finished parsing *without* seeing the end of the string, so that's - // an error. - Ok(_) => Err(Error::ParseEdn("Unterminated string".to_string())), - Err(Err(e)) => Err(e), - Err(Ok(string)) => Ok(Edn::Str(string)), + '[' => return parse_vector(walker), + '(' => return parse_list(walker), + '{' => return parse_map(walker), + '#' => parse_tag_set_discard(walker)?.map(Ok), + // non-string literal case + _ => match edn_literal(walker.slurp_literal()) { + Ok(edn) => Some(Ok(edn)), + Err(code) => { + return Err(Error { + code, + line: Some(line_start), + column: Some(column_start), + ptr: Some(ptr_start), + }) + } + }, + } { + return ret; + } } + Ok(Edn::Empty) } -fn read_symbol(a: char, chars: &mut iter::Enumerate>) -> Result { - let c_len = chars - .clone() - .enumerate() - .take_while(|&(_, c)| !c.1.is_whitespace() && !DELIMITERS.contains(&c.1)) - .count(); - let i = chars - .clone() - .next() - .ok_or_else(|| Error::ParseEdn("Could not identify symbol index".to_string()))? - .0; - - if a.is_whitespace() { - return Err(Error::ParseEdn(format!( - "\"{a}\" could not be parsed at char count {i}" - ))); +#[inline] +fn parse_tag_set_discard(walker: &mut Walker<'_>) -> Result, Error> { + let _ = walker.nibble_next(); // Consume the leading '#' char + + match walker.peek_next() { + Some('{') => parse_set(walker).map(Some), + Some('_') => parse_discard(walker), + _ => parse_tag(walker).map(Some), } +} + +#[inline] +fn parse_discard(walker: &mut Walker<'_>) -> Result, Error> { + let _ = walker.nibble_next(); // Consume the leading '_' char + Ok(match parse_internal(walker)? { + Edn::Empty => { + return Err(Error { + code: Code::UnexpectedEOF, + line: Some(walker.line), + column: Some(walker.column), + ptr: Some(walker.ptr), + }) + } + _ => match walker.peek_next() { + Some(_) => Some(parse_internal(walker)?), + None => None, + }, + }) +} + +#[inline] +#[cfg(feature = "sets")] +fn parse_set(walker: &mut Walker<'_>) -> Result { + let _ = walker.nibble_next(); // Consume the leading '{' char + let mut set: BTreeSet = BTreeSet::new(); - let mut symbol = String::from(a); - let symbol_chars = chars.take(c_len).map(|c| c.1).collect::(); - symbol.push_str(&symbol_chars); - Ok(Edn::Symbol(symbol)) + loop { + match walker.peek_next() { + Some('}') => { + let _ = walker.nibble_next(); + return Ok(Edn::Set(Set::new(set))); + } + Some(_) => { + let next = parse_internal(walker)?; + if next != Edn::Empty { + set.insert(next); + } + } + _ => { + return Err(Error { + code: Code::UnexpectedEOF, + line: Some(walker.line), + column: Some(walker.column), + ptr: Some(walker.ptr), + }) + } + } + } } -fn read_tagged(chars: &mut iter::Enumerate>) -> Result { - let tag = chars - .take_while(|c| !c.1.is_whitespace() && c.1 != ',') - .map(|c| c.1) - .collect::(); +#[inline] +#[cfg(not(feature = "sets"))] +const fn parse_set(walker: &Walker<'_>) -> Result { + Err(Error { + code: Code::NoFeatureSets, + line: Some(walker.line), + column: Some(walker.column), + ptr: Some(walker.ptr), + }) +} +#[inline] +fn parse_tag(walker: &mut Walker<'_>) -> Result { + let tag = walker.slurp_tag(); Ok(Edn::Tagged( - tag, - Box::new(parse_consuming(chars.next(), chars)?), + tag.to_string(), + Box::new(parse_internal(walker)?), )) } -fn read_discard(chars: &mut iter::Enumerate>) -> Result, Error> { - let _discard_underscore = chars.next(); - let i = chars - .clone() - .next() - .ok_or_else(|| Error::ParseEdn("Could not identify symbol index".to_string()))? - .0; - match parse_consuming(chars.next(), chars) { - Err(e) => Err(e), - Ok(Edn::Empty) => Err(Error::ParseEdn(format!( - "Discard sequence must have a following element at char count {i}" - ))), - _ => read_if_not_container_end(chars), +#[inline] +fn parse_map(walker: &mut Walker<'_>) -> Result { + let _ = walker.nibble_next(); // Consume the leading '{' char + let mut map: BTreeMap = BTreeMap::new(); + loop { + match walker.peek_next() { + Some('}') => { + let _ = walker.nibble_next(); + return Ok(Edn::Map(Map::new(map))); + } + Some(n) => { + if n == ']' || n == ')' { + return Err(Error { + code: Code::UnmatchedDelimiter(n), + line: Some(walker.line), + column: Some(walker.column), + ptr: Some(walker.ptr), + }); + } + + let key = parse_internal(walker)?; + let val = parse_internal(walker)?; + + if key != Edn::Empty && val != Edn::Empty { + // Existing keys are considered an error + if map.insert(key.to_string(), val).is_some() { + return Err(Error { + code: Code::HashMapDuplicateKey, + line: Some(walker.line), + column: Some(walker.column), + ptr: Some(walker.ptr), + }); + } + } + } + _ => { + return Err(Error { + code: Code::UnexpectedEOF, + line: Some(walker.line), + column: Some(walker.column), + ptr: Some(walker.ptr), + }) + } + } } } -fn num_den_from_slice(slice: impl AsRef) -> Option<(i64, u64)> { - let slice = slice.as_ref(); - let index = slice.find('/'); +#[inline] +fn parse_vector(walker: &mut Walker<'_>) -> Result { + let _ = walker.nibble_next(); // Consume the leading '[' char + let mut vec = Vec::new(); - if let Some(i) = index { - let (num, den) = slice.split_at(i); // This can't panic because the index is valid - let num = num.parse::(); - let den = den[1..].parse::(); + loop { + match walker.peek_next() { + Some(']') => { + let _ = walker.nibble_next(); + return Ok(Edn::Vector(Vector::new(vec))); + } + Some(_) => { + let next = parse_internal(walker)?; + if next != Edn::Empty { + vec.push(next); + } + } + _ => { + return Err(Error { + code: Code::UnexpectedEOF, + line: Some(walker.line), + column: Some(walker.column), + ptr: Some(walker.ptr), + }) + } + } + } +} - if let (Ok(n), Ok(d)) = (num, den) { - return Some((n, d)); +#[inline] +fn parse_list(walker: &mut Walker<'_>) -> Result { + let _ = walker.nibble_next(); // Consume the leading '[' char + let mut vec = Vec::new(); + + loop { + match walker.peek_next() { + Some(')') => { + let _ = walker.nibble_next(); + return Ok(Edn::List(List::new(vec))); + } + Some(_) => { + let next = parse_internal(walker)?; + if next != Edn::Empty { + vec.push(next); + } + } + _ => { + return Err(Error { + code: Code::UnexpectedEOF, + line: Some(walker.line), + column: Some(walker.column), + ptr: Some(walker.ptr), + }) + } } - return None; } - None } -fn read_number(n: char, chars: &mut iter::Enumerate>) -> Result { - let c_len = chars - .clone() - .take_while(|(_, c)| !c.is_whitespace() && !DELIMITERS.contains(c)) - .count(); +#[inline] +fn edn_literal(literal: &str) -> Result { + fn numeric(s: &str) -> bool { + let (first, second) = { + let mut s = s.chars(); + (s.next(), s.next()) + }; + + if let Some(f) = first { + if f.is_numeric() { + return true; + } + + if f == '-' || f == '+' { + if let Some(s) = second { + if s.is_numeric() { + return true; + } + } + } + } + false + } + + Ok(match literal { + "nil" => Edn::Nil, + "true" => Edn::Bool(true), + "false" => Edn::Bool(false), + "" => Edn::Empty, + k if k.starts_with(':') => { + if k.len() <= 1 { + return Err(Code::InvalidKeyword); + } + Edn::Key(k.to_owned()) + } + n if numeric(n) => return parse_number(n), + _ => Edn::Symbol(literal.to_owned()), + }) +} + +#[inline] +fn parse_char(lit: &str) -> Result { + let lit = &lit[1..]; // ignore the leading '\\' + match lit { + "newline" => Ok(Edn::Char('\n')), + "return" => Ok(Edn::Char('\r')), + "tab" => Ok(Edn::Char('\t')), + "space" => Ok(Edn::Char(' ')), + c if c.len() == 1 => Ok(Edn::Char(c.chars().next().unwrap())), + _ => Err(Code::InvalidChar), + } +} + +#[inline] +fn parse_number(lit: &str) -> Result { + let mut chars = lit.chars(); let (number, radix) = { let mut number = String::new(); + // The EDN spec allows for a redundant '+' symbol, we just ignore it. - if n != '+' { - number.push(n); + if let Some(n) = chars.next() { + if n != '+' { + number.push(n); + } } - for (_, c) in chars.take(c_len) { + + for c in chars { number.push(c); } if number.to_lowercase().starts_with("0x") { @@ -230,16 +485,16 @@ fn read_number(n: char, chars: &mut iter::Enumerate>) -> Re let negative = number.starts_with('-'); let radix = { if negative { - (number[1..index]).parse::() + (number[1..index]).parse::() } else { - (number[0..index]).parse::() + (number[0..index]).parse::() } }; match radix { Ok(r) => { // from_str_radix panics if radix is not in the range from 2 to 36 if !(2..=36).contains(&r) { - return Err(Error::ParseEdn(format!("Radix of {r} is out of bounds"))); + return Err(Code::InvalidRadix(Some(r))); } if negative { @@ -253,10 +508,8 @@ fn read_number(n: char, chars: &mut iter::Enumerate>) -> Re } (number, r) } - Err(e) => { - return Err(Error::ParseEdn(format!( - "{e} while trying to parse radix from {number}" - ))); + Err(_) => { + return Err(Code::InvalidRadix(None)); } } } else { @@ -268,252 +521,32 @@ fn read_number(n: char, chars: &mut iter::Enumerate>) -> Re n if (n.contains('E') || n.contains('e')) && n.parse::().is_ok() => { Ok(Edn::Double(n.parse::()?.into())) } - n if u64::from_str_radix(&n, radix).is_ok() => { - Ok(Edn::UInt(u64::from_str_radix(&n, radix)?)) + n if u64::from_str_radix(&n, radix.into()).is_ok() => { + Ok(Edn::UInt(u64::from_str_radix(&n, radix.into())?)) } - n if i64::from_str_radix(&n, radix).is_ok() => { - Ok(Edn::Int(i64::from_str_radix(&n, radix)?)) + n if i64::from_str_radix(&n, radix.into()).is_ok() => { + Ok(Edn::Int(i64::from_str_radix(&n, radix.into())?)) } n if n.parse::().is_ok() => Ok(Edn::Double(n.parse::()?.into())), n if num_den_from_slice(&n).is_some() => Ok(Edn::Rational(num_den_from_slice(n).unwrap())), - n if n.to_uppercase().chars().filter(|c| c == &'E').count() > 1 => { - let mut n = n.chars(); - read_symbol(n.next().unwrap_or(' '), &mut n.enumerate()) - } - _ => Err(Error::ParseEdn(format!( - "{number} could not be parsed with radix {radix}" - ))), + _ => Err(Code::InvalidNumber), } } -fn read_char(chars: &mut iter::Enumerate>) -> Result { - let element = chars - .clone() - .enumerate() - .take_while(|&(_, c)| !c.1.is_whitespace()) - .map(|(_, c)| c.1) - .collect::(); - - let mut consume_chars = |n| { - // We need to map/collect to consume out of the Enumerate - let _ = chars.take(n).map(|c| c.1).collect::(); - }; - - match element { - _ if element.starts_with("newline") => { - consume_chars(7); - Ok(Edn::Char('\n')) - } - _ if element.starts_with("return") => { - consume_chars(6); - Ok(Edn::Char('\r')) - } - _ if element.starts_with("tab") => { - consume_chars(3); - Ok(Edn::Char('\t')) - } - _ if element.starts_with("space") => { - consume_chars(5); - Ok(Edn::Char(' ')) - } - c if !c.is_empty() => { - consume_chars(1); - Ok(Edn::Char(c.chars().next().unwrap())) - } - _ => Err(Error::ParseEdn(format!( - "{element:?} could not be parsed as a symbol" - ))), - } -} - -fn read_bool_or_nil( - c: char, - chars: &mut iter::Enumerate>, -) -> Result { - let i = chars - .clone() - .next() - .ok_or_else(|| Error::ParseEdn("Could not identify symbol index".to_string()))? - .0; - match c { - 't' if { - let val = chars - .clone() - .take_while(|(_, c)| !c.is_whitespace() && !DELIMITERS.contains(c)) - .map(|c| c.1) - .collect::(); - val.eq("rue") - } => - { - let mut string = String::new(); - let t = chars.take(3).map(|c| c.1).collect::(); - string.push(c); - string.push_str(&t); - Ok(Edn::Bool(string.parse::()?)) - } - 'f' if { - let val = chars - .clone() - .take_while(|(_, c)| !c.is_whitespace() && !DELIMITERS.contains(c)) - .map(|c| c.1) - .collect::(); - val.eq("alse") - } => - { - let mut string = String::new(); - let f = chars.take(4).map(|c| c.1).collect::(); - string.push(c); - string.push_str(&f); - Ok(Edn::Bool(string.parse::()?)) - } - 'n' if { - let val = chars - .clone() - .take_while(|(_, c)| !c.is_whitespace() && !DELIMITERS.contains(c)) - .map(|c| c.1) - .collect::(); - val.eq("il") - } => - { - let mut string = String::new(); - let n = chars.take(2).map(|c| c.1).collect::(); - string.push(c); - string.push_str(&n); - match &string[..] { - "nil" => Ok(Edn::Nil), - _ => Err(Error::ParseEdn(format!( - "{string} could not be parsed at char count {i}" - ))), - } - } - _ => read_symbol(c, chars), - } -} - -fn read_vec(chars: &mut iter::Enumerate>) -> Result { - let i = chars - .clone() - .next() - .ok_or_else(|| Error::ParseEdn("Could not identify symbol index".to_string()))? - .0; - let mut res: Vec = vec![]; - loop { - match chars.next() { - Some((_, ']')) => return Ok(Edn::Vector(Vector::new(res))), - Some(c) => { - if let Some(e) = parse_internal(Some(c), chars)? { - res.push(e); - } - } - err => { - return Err(Error::ParseEdn(format!( - "{err:?} could not be parsed at char count {i}" - ))) - } - } - } -} - -fn read_list(chars: &mut iter::Enumerate>) -> Result { - let i = chars - .clone() - .next() - .ok_or_else(|| Error::ParseEdn("Could not identify symbol index".to_string()))? - .0; - let mut res: Vec = vec![]; - loop { - match chars.next() { - Some((_, ')')) => return Ok(Edn::List(List::new(res))), - Some(c) => { - if let Some(e) = parse_internal(Some(c), chars)? { - res.push(e); - } - } - err => { - return Err(Error::ParseEdn(format!( - "{err:?} could not be parsed at char count {i}" - ))) - } - } - } -} - -#[cfg(feature = "sets")] -fn read_set(chars: &mut iter::Enumerate>) -> Result { - let _discard_brackets = chars.next(); - let i = chars - .clone() - .next() - .ok_or_else(|| Error::ParseEdn("Could not identify symbol index".to_string()))? - .0; - let mut res: BTreeSet = BTreeSet::new(); - loop { - match chars.next() { - Some((_, '}')) => return Ok(Edn::Set(Set::new(res))), - Some(c) => { - if let Some(e) = parse_internal(Some(c), chars)? { - res.insert(e); - } - } - err => { - return Err(Error::ParseEdn(format!( - "{err:?} could not be parsed at char count {i}" - ))) - } - } - } -} - -#[cfg(not(feature = "sets"))] -fn read_set(_chars: &mut iter::Enumerate>) -> Result { - Err(Error::ParseEdn( - "Could not parse set due to feature not being enabled".to_string(), - )) -} +#[inline] +fn num_den_from_slice(slice: impl AsRef) -> Option<(i64, u64)> { + let slice = slice.as_ref(); + let index = slice.find('/'); -fn read_map(chars: &mut iter::Enumerate>) -> Result { - let i = chars - .clone() - .next() - .ok_or_else(|| Error::ParseEdn("Could not identify symbol index".to_string()))? - .0; - let mut res: BTreeMap = BTreeMap::new(); - let mut key: Option = None; - let mut val: Option = None; - loop { - match chars.next() { - Some((_, '}')) => return Ok(Edn::Map(Map::new(res))), - Some(c) => { - if key.is_some() { - val = Some(parse_consuming(Some(c), chars)?); - } else { - key = parse_internal(Some(c), chars)?; - } - } - err => { - return Err(Error::ParseEdn(format!( - "{err:?} could not be parsed at char count {i}" - ))) - } - } + if let Some(i) = index { + let (num, den) = slice.split_at(i); // This can't panic because the index is valid + let num = num.parse::(); + let den = den[1..].parse::(); - if key.is_some() && val.is_some() { - res.insert(key.unwrap().to_string(), val.unwrap()); - key = None; - val = None; + if let (Ok(n), Ok(d)) = (num, den) { + return Some((n, d)); } + return None; } + None } - -fn read_if_not_container_end( - chars: &mut iter::Enumerate>, -) -> Result, Error> { - Ok(match chars.clone().next() { - Some(c) if c.1 == ']' || c.1 == ')' || c.1 == '}' => None, - Some(_) => parse_internal(chars.next(), chars)?, - None => None, - }) -} - -#[cfg(test)] -mod test {} diff --git a/src/edn/error.rs b/src/edn/error.rs new file mode 100644 index 0000000..6edd4f5 --- /dev/null +++ b/src/edn/error.rs @@ -0,0 +1,111 @@ +use core::fmt::{self, Debug}; +use core::{convert, num, str}; + +pub struct Error { + pub code: Code, + /// Counting from 1. + pub line: Option, + /// This is a utf-8 char count. Counting from 1. + pub column: Option, + /// This is a pointer into the str trying to be parsed, not a utf-8 char offset + pub ptr: Option, +} + +#[derive(Debug, Eq, PartialEq)] +#[non_exhaustive] +pub enum Code { + /// Parse errors + HashMapDuplicateKey, + InvalidChar, + InvalidEscape, + InvalidKeyword, + InvalidNumber, + InvalidRadix(Option), + ParseNumber(ParseNumber), + UnexpectedEOF, + UnmatchedDelimiter(char), + + /// Feature errors + NoFeatureSets, + + /// Deserialize errors + Convert(&'static str), + + /// Navigation errors + Iter, + + /// Type conversion errors + TryFromInt(num::TryFromIntError), + #[doc(hidden)] + Infallable(), // Makes the compiler happy for converting u64 to u64 and i64 to i64 +} + +#[derive(Debug, Eq, PartialEq)] +#[non_exhaustive] +pub enum ParseNumber { + ParseIntError(num::ParseIntError), + ParseFloatError(num::ParseFloatError), +} + +impl Error { + pub(crate) const fn deserialize(conv_type: &'static str) -> Self { + Self { + code: Code::Convert(conv_type), + line: None, + column: None, + ptr: None, + } + } + pub(crate) const fn iter() -> Self { + Self { + code: Code::Iter, + line: None, + column: None, + ptr: None, + } + } +} + +impl Debug for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "EdnError {{ code: {:?}, line: {:?}, column: {:?}, ptr: {:?} }}", + self.code, self.line, self.column, self.ptr + ) + } +} + +impl From for Code { + fn from(e: num::ParseIntError) -> Self { + Self::ParseNumber(ParseNumber::ParseIntError(e)) + } +} + +impl From for Code { + fn from(e: num::ParseFloatError) -> Self { + Self::ParseNumber(ParseNumber::ParseFloatError(e)) + } +} + +impl From for Error { + fn from(_: convert::Infallible) -> Self { + Self { + code: Code::Infallable(), + line: None, + column: None, + ptr: None, + } + } +} + +impl From for Error { + fn from(e: num::TryFromIntError) -> Self { + Self { + code: Code::TryFromInt(e), + line: None, + column: None, + ptr: None, + } + } +} diff --git a/src/edn/mod.rs b/src/edn/mod.rs index 62cd6de..a585f0b 100644 --- a/src/edn/mod.rs +++ b/src/edn/mod.rs @@ -7,7 +7,7 @@ use alloc::vec::Vec; use alloc::{fmt, format}; #[cfg(feature = "sets")] use core::cmp::{Ord, PartialOrd}; -use core::convert::{Infallible, TryFrom}; +use core::convert::TryFrom; use core::num; use crate::deserialize::parse::{self}; @@ -16,6 +16,7 @@ use utils::index::Index; #[cfg(feature = "sets")] use ordered_float::OrderedFloat; +pub mod error; #[doc(hidden)] pub mod utils; @@ -692,7 +693,7 @@ impl Edn { } impl core::str::FromStr for Edn { - type Err = Error; + type Err = error::Error; /// Parses a `&str` that contains an Edn into `Result` fn from_str(s: &str) -> Result { @@ -712,63 +713,6 @@ pub(crate) fn rational_to_double((n, d): (i64, u64)) -> f64 { (n as f64) / (d as f64) } -#[derive(Debug, PartialEq, Eq)] -#[non_exhaustive] -pub enum Error { - ParseEdn(String), - Deserialize(String), - Iter(String), - TryFromInt(num::TryFromIntError), - #[doc(hidden)] - Infallable(), // Makes the compiler happy for converting u64 to u64 and i64 to i64 -} - -impl From for Error { - fn from(s: String) -> Self { - Self::ParseEdn(s) - } -} - -impl From for Error { - fn from(s: num::ParseIntError) -> Self { - Self::ParseEdn(s.to_string()) - } -} - -impl From for Error { - fn from(s: num::ParseFloatError) -> Self { - Self::ParseEdn(s.to_string()) - } -} - -impl From for Error { - fn from(s: core::str::ParseBoolError) -> Self { - Self::ParseEdn(s.to_string()) - } -} - -impl From for Error { - fn from(e: num::TryFromIntError) -> Self { - Self::TryFromInt(e) - } -} - -impl From for Error { - fn from(_: Infallible) -> Self { - Self::Infallable() - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::ParseEdn(s) | Self::Deserialize(s) | Self::Iter(s) => write!(f, "{}", &s), - Self::TryFromInt(e) => write!(f, "{e}"), - Self::Infallable() => panic!("Infallable conversion"), - } - } -} - #[cfg(test)] mod test { use alloc::borrow::ToOwned; diff --git a/src/lib.rs b/src/lib.rs index 3c11bc4..473a115 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -100,7 +100,7 @@ pub fn json_to_edn<'a>(json: impl AsRef) -> Cow<'a, str> { } pub use deserialize::{from_edn, from_str, Deserialize}; -pub use edn::Error as EdnError; +pub use edn::error::Error as EdnError; #[cfg(feature = "sets")] pub use edn::Set; pub use edn::{Edn, List, Map, Vector}; diff --git a/tests/deserialize.rs b/tests/deserialize.rs index ce206e2..0f379a8 100644 --- a/tests/deserialize.rs +++ b/tests/deserialize.rs @@ -19,6 +19,11 @@ mod test { #[test] fn parse_empty() { assert_eq!(Edn::from_str("").unwrap(), Edn::Empty); + assert_eq!( + Edn::from_str("[]").unwrap(), + Edn::Vector(Vector::new(vec![])) + ); + assert_eq!(Edn::from_str("()").unwrap(), Edn::List(List::new(vec![]))); } #[test] @@ -33,12 +38,7 @@ mod test { #[cfg(not(feature = "sets"))] // Special case of running into a set without the feature enabled fn parse_set_without_set_feature() { - assert_eq!( - Edn::from_str("#{true, \\c, 3,four, }"), - Err(Error::ParseEdn( - "Could not parse set due to feature not being enabled".to_string() - )) - ) + assert!(Edn::from_str("#{true, \\c, 3,four, }").is_err()) } #[test] @@ -804,10 +804,7 @@ mod test { #[test] fn parse_numberic_symbol_with_doube_e() { - assert_eq!( - Edn::from_str("5011227E71367421E12").unwrap(), - Edn::Symbol("5011227E71367421E12".to_string()) - ); + assert!(Edn::from_str("5011227E71367421E12").is_err()); } #[test] @@ -991,4 +988,11 @@ mod test { ])) ); } + + #[test] + fn invalid_edn() { + assert!(Edn::from_str("{ :foo 42 :foo 43 }").is_err()); + assert!(Edn::from_str("{ :[0x42] 42 }").is_err()); + assert!(Edn::from_str("\\cats").is_err()); + } } diff --git a/tests/deserialize_sets.rs b/tests/deserialize_sets.rs index 6e09b95..42e5f2c 100644 --- a/tests/deserialize_sets.rs +++ b/tests/deserialize_sets.rs @@ -6,8 +6,8 @@ mod test { use alloc::collections::BTreeSet; use core::str::FromStr; - use edn::{Error, List, Vector}; - use edn_rs::{edn, from_edn, from_str, hset, map, set, Edn, Map, Set}; + use edn::{List, Vector}; + use edn_rs::{edn, from_edn, from_str, hset, map, set, Edn, EdnError, Map, Set}; #[test] fn parse_set_with_commas() { @@ -176,14 +176,8 @@ mod test { #[test] fn deser_btreeset_with_error() { let edn = "#{\"a\", 5, \"b\"}"; - let err: Result, Error> = from_str(edn); - assert_eq!( - err, - Err(Error::Deserialize( - "Cannot safely deserialize Set(Set({Str(\"a\"), Str(\"b\"), UInt(5)})) to BTreeSet" - .to_string() - )) - ); + let err: Result, EdnError> = from_str(edn); + assert!(err.is_err()); } #[test] diff --git a/tests/emit_json.rs b/tests/emit_json.rs index 2e91733..29836e8 100644 --- a/tests/emit_json.rs +++ b/tests/emit_json.rs @@ -87,14 +87,13 @@ mod tests { #[test] fn regression_str_to_uint_test() { use edn_derive::Deserialize; - use edn_rs::EdnError; #[derive(Deserialize, Debug, PartialEq)] struct A { amount: u64, } - let a: Result = edn_rs::from_str("{ :amount \"123\" }"); - assert_eq!(a, Ok(A { amount: 123 })); + let a: A = edn_rs::from_str("{ :amount \"123\" }").unwrap(); + assert_eq!(a, A { amount: 123 }); } #[test] diff --git a/tests/error_messages.rs b/tests/error_messages.rs new file mode 100644 index 0000000..8e30916 --- /dev/null +++ b/tests/error_messages.rs @@ -0,0 +1,97 @@ +#[cfg(test)] +mod test { + use core::str::FromStr; + + use edn_rs::Edn; + + fn err_as_string(s: &str) -> String { + let err = Edn::from_str(s).err().unwrap(); + format!("{err:?}") + } + + #[test] + fn invalid_keyword() { + assert_eq!( + err_as_string(":"), + "EdnError { code: InvalidKeyword, line: Some(1), column: Some(1), ptr: Some(0) }" + ); + assert_eq!( + err_as_string(" :"), + "EdnError { code: InvalidKeyword, line: Some(1), column: Some(3), ptr: Some(2) }" + ); + assert_eq!( + err_as_string("\n\n :"), + "EdnError { code: InvalidKeyword, line: Some(3), column: Some(4), ptr: Some(5) }" + ); + } + + #[test] + fn unexpected_eof() { + assert_eq!( + err_as_string(r#""hello, world!"#), + "EdnError { code: UnexpectedEOF, line: Some(1), column: Some(15), ptr: Some(14) }" + ); + assert_eq!( + err_as_string( + r#""hello, +multiple +lines +world!"# + ), + "EdnError { code: UnexpectedEOF, line: Some(4), column: Some(7), ptr: Some(29) }" + ); + } + + #[test] + fn invalid_num() { + assert_eq!( + err_as_string(" ,,,, , , ,,,, ,\n ,,,, 0xfoobarlol"), + "EdnError { code: InvalidNumber, line: Some(2), column: Some(13), ptr: Some(29) }" + ); + assert_eq!( + err_as_string("[ ; comment \n-0xfoobarlol 0xsilycat]"), + "EdnError { code: InvalidNumber, line: Some(2), column: Some(1), ptr: Some(13) }" + ); + assert_eq!( + err_as_string("[ ;;;,,,,\n , , ,,,, ,\n ,,,, 16 -0xfoobarlol 0xsilycat]"), + "EdnError { code: InvalidNumber, line: Some(3), column: Some(13), ptr: Some(34) }" + ); + } + + #[test] + fn utf8() { + let err = Edn::from_str("(猫 ; cat\nおやつ;treats\n ") + .err() + .unwrap(); + + assert_eq!( + format!("{err:?}"), + "EdnError { code: UnexpectedEOF, line: Some(3), column: Some(7), ptr: Some(34) }" + ); + + assert_eq!(err.line, Some(3)); + assert_eq!(err.column, Some(7)); + assert_eq!(err.ptr, Some(34)); + } + + #[test] + #[cfg(not(feature = "sets"))] + fn disabled_features() { + // Special case of running into a set without the feature enabled + assert_eq!( + err_as_string("#{true, \\c, 3,four, }",), + "EdnError { code: NoFeatureSets, line: Some(1), column: Some(2), ptr: Some(1) }" + ); + + assert_eq!( + err_as_string("[1 \n2 ;3 \n4 #{true, \\c, 3,four, }]",), + "EdnError { code: NoFeatureSets, line: Some(3), column: Some(4), ptr: Some(13) }" + ); + } + + #[test] + fn invalid_conversions() { + let small = edn_rs::from_edn::(&Edn::UInt(9876123)); + assert_eq!(format!("{small:?}"), "Err(EdnError { code: TryFromInt(TryFromIntError(())), line: None, column: None, ptr: None })"); + } +} diff --git a/tests/lib.rs b/tests/lib.rs index 4380296..2e6f166 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -8,6 +8,7 @@ pub mod deserialize; pub mod deserialize_sets; pub mod emit; pub mod emit_json; +pub mod error_messages; pub mod parse; pub mod parse_sets; pub mod ser; From 5ad0427304dc9a05276ef2de36013c82e5ab22e8 Mon Sep 17 00:00:00 2001 From: Kevin Nakamura Date: Thu, 15 Feb 2024 10:32:01 +0900 Subject: [PATCH 2/8] bb task for instrument based coverage --- bb.edn | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/bb.edn b/bb.edn index f70adc6..81365cd 100644 --- a/bb.edn +++ b/bb.edn @@ -1,7 +1,11 @@ {:tasks - {clean {:doc "Removes target folder" - :requires ([babashka.fs :as fs]) - :task (fs/delete-tree "target")} + {:init (do (def code-cov-env + {"CARGO_INCREMENTAL" "0" + "RUSTFLAGS" "-Cinstrument-coverage -Copt-level=0 -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off" + "LLVM_PROFILE_FILE" "target/coverage/cargo-test-%p-%m.profraw"})) + :requires ([babashka.fs :as fs]) + clean {:doc "Removes target folder" + :task (fs/delete-tree "target")} test_lib_features (shell "cargo test --all-features --no-fail-fast") test_lib_no_default_features (shell "cargo test --features std --no-default-features --no-fail-fast") example_fast (shell "cargo test --examples --no-fail-fast") @@ -23,5 +27,15 @@ :task (shell "cargo clippy --features json --no-default-features -- -W future-incompatible -W rust_2018_idioms -W clippy::all -W clippy::pedantic -W clippy::nursery --deny warnings")} clippy {:doc "Runs all variations of cargo clippy" :depends [cargo-clippy-all-features cargo-clippy-no-sets-json cargo-clippy-no-defaults]} + cov-all-features {:doc "Coverage, all features" + :task (shell {:extra-env code-cov-env} "cargo test --all-features")} + cov-std-only {:doc "Coverage, std only" + :task (shell {:extra-env code-cov-env} "cargo test --no-default-features --features std")} + clean-cov {:doc "Cleans all .profraw files and generated html" + :task (fs/delete-tree "target/coverage")} + grcov {:doc "Runs grcov to generate human readable html" + :task (shell "grcov target/coverage --binary-path ./target/debug/deps/ -s . -t html --branch --ignore-not-existing -o target/coverage/html")} + coverage {:doc "Generates coverage in human friendly html in target/coverage/" + :depends [clean-cov cov-all-features cov-std-only grcov]} test {:doc "Runs all tests and checks" :depends [cargo-test cargo-fmt clippy]}}} From 5e39f4a29c535c8f78e0344a5d43dedc31b52d46 Mon Sep 17 00:00:00 2001 From: Kevin Nakamura Date: Thu, 15 Feb 2024 10:57:54 +0900 Subject: [PATCH 3/8] Examples simplified with `is_err()` and docs with `Debug` impl. --- examples/complex_struct_deserialization.rs | 7 +------ examples/from_edn.rs | 7 +------ examples/struct_from_str.rs | 14 ++------------ src/deserialize/mod.rs | 21 +++------------------ 4 files changed, 7 insertions(+), 42 deletions(-) diff --git a/examples/complex_struct_deserialization.rs b/examples/complex_struct_deserialization.rs index e3836e3..49143f3 100644 --- a/examples/complex_struct_deserialization.rs +++ b/examples/complex_struct_deserialization.rs @@ -70,12 +70,7 @@ fn complex_wrong() -> Result<(), EdnError> { let bad_edn_str = "{:list [{:name \"rose\" :age \"some text\" :cool true}, {:name \"josh\" :age 33 :cool false}, {:name \"eva\" :age 296 :cool true}]}"; let complex: Result = edn_rs::from_str(bad_edn_str); - assert_eq!( - complex, - Err(EdnError::Deserialize( - "couldn't convert `\"some text\"` into `uint`".to_string() - )) - ); + assert!(complex.is_err()); Ok(()) } diff --git a/examples/from_edn.rs b/examples/from_edn.rs index fa319cc..20f149b 100644 --- a/examples/from_edn.rs +++ b/examples/from_edn.rs @@ -42,12 +42,7 @@ fn person_mistyped() -> Result<(), EdnError> { })); let person: Result = edn_rs::from_edn(&bad_edn); - assert_eq!( - person, - Err(EdnError::Deserialize( - "couldn't convert `\"some text\"` into `uint`".to_string() - )) - ); + assert!(person.is_err()); Ok(()) } diff --git a/examples/struct_from_str.rs b/examples/struct_from_str.rs index 2939f46..45941ad 100644 --- a/examples/struct_from_str.rs +++ b/examples/struct_from_str.rs @@ -36,13 +36,7 @@ fn person_mistyped() -> Result<(), EdnError> { let bad_edn_str = "{:name \"rose\" :age \"some text\" }"; let person: Result = edn_rs::from_str(bad_edn_str); - assert_eq!( - person, - Err(EdnError::Deserialize( - "couldn't convert `\"some text\"` into `uint`".to_string() - )) - ); - + assert!(person.is_err()); Ok(()) } @@ -50,11 +44,7 @@ fn person_overflow() -> Result<(), EdnError> { let overflow_edn_str = " {:name \"rose\" :age 9002 } "; let person: Result = edn_rs::from_str(overflow_edn_str); - assert_eq!( - format!("{person:?}"), - "Err(TryFromInt(TryFromIntError(())))" - ); - + assert!(person.is_err()); Ok(()) } diff --git a/src/deserialize/mod.rs b/src/deserialize/mod.rs index 0945a3c..4124752 100644 --- a/src/deserialize/mod.rs +++ b/src/deserialize/mod.rs @@ -60,12 +60,7 @@ use ordered_float::OrderedFloat; /// let bad_edn_str = "{:name \"rose\" :age \"some text\" }"; /// let person: Result = edn_rs::from_str(bad_edn_str); /// -/// assert_eq!( -/// person, -/// Err(EdnError::Deserialize( -/// "couldn't convert `\"some text\"` into `uint`".to_string() -/// )) -/// ); +/// println!("{:?}", person); /// ``` #[allow(clippy::missing_errors_doc)] pub trait Deserialize: Sized { @@ -322,12 +317,7 @@ where /// let bad_edn_str = "{:name \"rose\" :age \"some text\" }"; /// let person: Result = edn_rs::from_str(bad_edn_str); /// -/// assert_eq!( -/// person, -/// Err(EdnError::Deserialize( -/// "couldn't convert `\"some text\"` into `uint`".to_string() -/// )) -/// ); +/// println!("{:?}", person); /// ``` pub fn from_str(s: &str) -> Result { let edn = Edn::from_str(s)?; @@ -381,12 +371,7 @@ pub fn from_str(s: &str) -> Result { /// })); /// let person: Result = edn_rs::from_edn(&bad_edn); /// -/// assert_eq!( -/// person, -/// Err(EdnError::Deserialize( -/// "couldn't convert `\"some text\"` into `uint`".to_string() -/// )) -/// ); +/// println!("{:?}", person); /// ``` pub fn from_edn(edn: &Edn) -> Result { T::deserialize(edn) From b5b403562f65498a8011db89c851d27472a61a81 Mon Sep 17 00:00:00 2001 From: Kevin Nakamura Date: Thu, 15 Feb 2024 11:28:33 +0900 Subject: [PATCH 4/8] Convert EdnError tests to compare `Debug` impl instead of relying on `Eq`. --- tests/deserialize.rs | 86 +++++++++++++++------------------------ tests/deserialize_sets.rs | 23 ++++++----- 2 files changed, 45 insertions(+), 64 deletions(-) diff --git a/tests/deserialize.rs b/tests/deserialize.rs index 0f379a8..3a09173 100644 --- a/tests/deserialize.rs +++ b/tests/deserialize.rs @@ -5,9 +5,13 @@ mod test { use alloc::collections::BTreeMap; use core::str::FromStr; - use edn::Error; use edn_rs::{edn, from_edn, from_str, hmap, map, Edn, List, Map, Vector}; + fn err_as_string(s: &str) -> String { + let err = Edn::from_str(s).err().unwrap(); + format!("{err:?}") + } + #[test] fn unit() { let nil = "nil"; @@ -109,16 +113,16 @@ mod test { #[test] fn parse_str_with_invalid_escape() { assert_eq!( - Edn::from_str(r#""hello\n \r \t \"world\" with escaped \\ \g characters""#), - Err(Error::ParseEdn("Invalid escape sequence \\g".to_string())) + err_as_string(r#""hello\n \r \t \"world\" with escaped \\ \g characters""#), + "EdnError { code: InvalidEscape, line: Some(1), column: Some(44), ptr: Some(43) }" ); } #[test] fn parse_unterminated_string() { assert_eq!( - Edn::from_str(r#""hello\n \r \t \"world\" with escaped \\ characters"#), - Err(Error::ParseEdn("Unterminated string".to_string())) + err_as_string(r#""hello\n \r \t \"world\" with escaped \\ characters"#), + "EdnError { code: UnexpectedEOF, line: Some(1), column: Some(52), ptr: Some(51) }" ); } @@ -159,15 +163,15 @@ mod test { let edn = "[1 \"2\" 3.3 :b true \\c]"; assert_eq!( - Edn::from_str(edn), - Ok(Edn::Vector(Vector::new(vec![ + Edn::from_str(edn).unwrap(), + Edn::Vector(Vector::new(vec![ Edn::UInt(1), Edn::Str("2".to_string()), Edn::Double(3.3.into()), Edn::Key(":b".to_string()), Edn::Bool(true), Edn::Char('c') - ]))) + ])) ); } @@ -294,14 +298,14 @@ mod test { let edn = "(1 \"2\" 3.3 :b [true \\c])"; assert_eq!( - Edn::from_str(edn), - Ok(Edn::List(List::new(vec![ + Edn::from_str(edn).unwrap(), + Edn::List(List::new(vec![ Edn::UInt(1), Edn::Str("2".to_string()), Edn::Double(3.3.into()), Edn::Key(":b".to_string()), Edn::Vector(Vector::new(vec![Edn::Bool(true), Edn::Char('c')])) - ]))) + ])) ); } @@ -347,11 +351,11 @@ mod test { let edn = "{:a \"2\" :b true :c nil}"; assert_eq!( - Edn::from_str(edn), - Ok(Edn::Map(Map::new( + Edn::from_str(edn).unwrap(), + Edn::Map(Map::new( map! {":a".to_string() => Edn::Str("2".to_string()), ":b".to_string() => Edn::Bool(true), ":c".to_string() => Edn::Nil} - ))) + )) ); } @@ -426,10 +430,8 @@ mod test { #[test] fn parse_discard_invalid() { assert_eq!( - Edn::from_str("#_{ 234"), - Err(Error::ParseEdn( - "None could not be parsed at char count 3".to_string() - )) + err_as_string("#_{ 234"), + "EdnError { code: UnexpectedEOF, line: Some(1), column: Some(8), ptr: Some(7) }" ); } @@ -464,10 +466,8 @@ mod test { #[test] fn parse_discard_no_follow_element() { assert_eq!( - Edn::from_str("#_ ,, "), - Err(Error::ParseEdn( - "Discard sequence must have a following element at char count 2".to_string() - )) + err_as_string("#_ ,, "), + "EdnError { code: UnexpectedEOF, line: Some(1), column: Some(7), ptr: Some(6) }" ); } @@ -482,10 +482,8 @@ mod test { #[test] fn parse_discard_end_of_seq_no_follow() { assert_eq!( - Edn::from_str("[:foo #_ ]"), - Err(Error::ParseEdn( - "Discard sequence must have a following element at char count 8".to_string() - )) + err_as_string("[:foo #_ ]"), + "EdnError { code: UnexpectedEOF, line: Some(1), column: Some(10), ptr: Some(9) }" ); } @@ -851,30 +849,17 @@ mod test { #[test] fn parse_invalid_ints() { assert_eq!( - Edn::from_str("42invalid123"), - Err(Error::ParseEdn( - "42invalid123 could not be parsed with radix 10".to_string() - )) + err_as_string("42invalid123"), + "EdnError { code: InvalidNumber, line: Some(1), column: Some(1), ptr: Some(0) }" ); - assert_eq!( - Edn::from_str("0xxyz123"), - Err(Error::ParseEdn( - "xyz123 could not be parsed with radix 16".to_string() - )) + err_as_string("0xxyz123"), + "EdnError { code: InvalidNumber, line: Some(1), column: Some(1), ptr: Some(0) }" ); - + assert_eq!(err_as_string("42rabcxzy"), "EdnError { code: InvalidRadix(Some(42)), line: Some(1), column: Some(1), ptr: Some(0) }"); assert_eq!( - Edn::from_str("42rabcxzy"), - Err(Error::ParseEdn("Radix of 42 is out of bounds".to_string())) - ); - - assert_eq!( - Edn::from_str("42crazyrabcxzy"), - Err(Error::ParseEdn( - "invalid digit found in string while trying to parse radix from 42crazyrabcxzy" - .to_string() - )) + err_as_string("42crazyrabcxzy"), + "EdnError { code: InvalidRadix(None), line: Some(1), column: Some(1), ptr: Some(0) }" ); } @@ -937,14 +922,7 @@ mod test { #[test] fn weird_input() { - let edn = "{:a]"; - - assert_eq!( - Edn::from_str(edn), - Err(Error::ParseEdn( - "Could not identify symbol index".to_string() - )) - ); + assert_eq!(err_as_string("{:a]"), "EdnError { code: UnmatchedDelimiter(']'), line: Some(1), column: Some(4), ptr: Some(3) }"); } #[test] diff --git a/tests/deserialize_sets.rs b/tests/deserialize_sets.rs index 42e5f2c..aac780b 100644 --- a/tests/deserialize_sets.rs +++ b/tests/deserialize_sets.rs @@ -9,6 +9,11 @@ mod test { use edn::{List, Vector}; use edn_rs::{edn, from_edn, from_str, hset, map, set, Edn, EdnError, Map, Set}; + fn err_as_string(s: &str) -> String { + let err = Edn::from_str(s).err().unwrap(); + format!("{err:?}") + } + #[test] fn parse_set_with_commas() { assert_eq!( @@ -59,15 +64,15 @@ mod test { let edn = "(1 -10 \"2\" 3.3 :b #{true \\c})"; assert_eq!( - Edn::from_str(edn), - Ok(Edn::List(List::new(vec![ + Edn::from_str(edn).unwrap(), + Edn::List(List::new(vec![ Edn::UInt(1), Edn::Int(-10), Edn::Str("2".to_string()), Edn::Double(3.3.into()), Edn::Key(":b".to_string()), Edn::Set(Set::new(set![Edn::Bool(true), Edn::Char('c')])) - ]))) + ])) ); } @@ -114,15 +119,15 @@ mod test { let edn = "{:a \"2\" :b [true false] :c #{:A {:a :b} nil}}"; assert_eq!( - Edn::from_str(edn), - Ok(Edn::Map(Map::new(map! { + Edn::from_str(edn).unwrap(), + Edn::Map(Map::new(map! { ":a".to_string() =>Edn::Str("2".to_string()), ":b".to_string() => Edn::Vector(Vector::new(vec![Edn::Bool(true), Edn::Bool(false)])), ":c".to_string() => Edn::Set(Set::new( set!{ Edn::Map(Map::new(map!{":a".to_string() => Edn::Key(":b".to_string())})), Edn::Key(":A".to_string()), - Edn::Nil}))}))) + Edn::Nil}))})) ); } @@ -149,12 +154,10 @@ mod test { #[test] fn parse_discard_space_invalid() { assert_eq!( - Edn::from_str( + err_as_string( "#_ ,, #{hello, this will be discarded} #_{so will this} #{this is invalid" ), - Err(Error::ParseEdn( - "None could not be parsed at char count 58".to_string() - )) + "EdnError { code: UnexpectedEOF, line: Some(1), column: Some(74), ptr: Some(73) }" ); } From 8d3b76fcb67fbd34436c37c96cc116f5e97701ce Mon Sep 17 00:00:00 2001 From: Kevin Nakamura Date: Thu, 15 Feb 2024 11:38:13 +0900 Subject: [PATCH 5/8] 100%, minus Infallable, instrument coverage of new error handling. --- src/deserialize/mod.rs | 46 ++++++++++++++++++++-------------------- src/deserialize/parse.rs | 27 ++++++++++++----------- src/edn/error.rs | 31 --------------------------- src/edn/mod.rs | 8 +++---- 4 files changed, 40 insertions(+), 72 deletions(-) diff --git a/src/deserialize/mod.rs b/src/deserialize/mod.rs index 4124752..a51b399 100644 --- a/src/deserialize/mod.rs +++ b/src/deserialize/mod.rs @@ -164,20 +164,20 @@ where { fn deserialize(edn: &Edn) -> Result { match edn { - Edn::Vector(_) => Ok(edn - .iter_some() - .ok_or_else(Error::iter)? + Edn::Vector(v) => Ok(v + .0 + .iter() .map(|e| Deserialize::deserialize(e)) .collect::>()?), - Edn::List(_) => Ok(edn - .iter_some() - .ok_or_else(Error::iter)? + Edn::List(l) => Ok(l + .0 + .iter() .map(|e| Deserialize::deserialize(e)) .collect::>()?), #[cfg(feature = "sets")] - Edn::Set(_) => Ok(edn - .iter_some() - .ok_or_else(Error::iter)? + Edn::Set(s) => Ok(s + .0 + .iter() .map(|e| Deserialize::deserialize(e)) .collect::>()?), _ => Err(build_deserialize_error(any::type_name::())), @@ -193,9 +193,9 @@ where { fn deserialize(edn: &Edn) -> Result { match edn { - Edn::Map(_) => edn - .map_iter() - .ok_or_else(Error::iter)? + Edn::Map(m) => m + .0 + .iter() .map(|(key, e)| { Ok(( key.to_string(), @@ -214,9 +214,9 @@ where { fn deserialize(edn: &Edn) -> Result { match edn { - Edn::Map(_) => edn - .map_iter() - .ok_or_else(Error::iter)? + Edn::Map(m) => m + .0 + .iter() .map(|(key, e)| { Ok(( key.to_string(), @@ -237,11 +237,11 @@ where { fn deserialize(edn: &Edn) -> Result { match edn { - Edn::Set(_) => edn - .set_iter() - .ok_or_else(Error::iter)? - .map(|e| Deserialize::deserialize(e).map_err(|_| Error::deserialize("HashSet"))) - .collect::>(), + Edn::Set(s) => { + s.0.iter() + .map(|e| Deserialize::deserialize(e).map_err(|_| Error::deserialize("HashSet"))) + .collect::>() + } _ => Err(build_deserialize_error(any::type_name::())), } } @@ -254,9 +254,9 @@ where { fn deserialize(edn: &Edn) -> Result { match edn { - Edn::Set(_) => edn - .set_iter() - .ok_or_else(Error::iter)? + Edn::Set(s) => s + .0 + .iter() .map(|e| Deserialize::deserialize(e).map_err(|_| Error::deserialize("BTreeSet"))) .collect::>(), _ => Err(build_deserialize_error(any::type_name::())), diff --git a/src/deserialize/parse.rs b/src/deserialize/parse.rs index 7c5459f..bc36479 100644 --- a/src/deserialize/parse.rs +++ b/src/deserialize/parse.rs @@ -16,7 +16,6 @@ use crate::edn::{Edn, List, Map, Vector}; const DELIMITERS: [char; 8] = [',', ']', '}', ')', ';', '(', '[', '{']; -#[derive(Debug)] struct Walker<'w> { slice: &'w str, ptr: usize, @@ -517,20 +516,20 @@ fn parse_number(lit: &str) -> Result { } }; - match number { - n if (n.contains('E') || n.contains('e')) && n.parse::().is_ok() => { - Ok(Edn::Double(n.parse::()?.into())) - } - n if u64::from_str_radix(&n, radix.into()).is_ok() => { - Ok(Edn::UInt(u64::from_str_radix(&n, radix.into())?)) - } - n if i64::from_str_radix(&n, radix.into()).is_ok() => { - Ok(Edn::Int(i64::from_str_radix(&n, radix.into())?)) - } - n if n.parse::().is_ok() => Ok(Edn::Double(n.parse::()?.into())), - n if num_den_from_slice(&n).is_some() => Ok(Edn::Rational(num_den_from_slice(n).unwrap())), - _ => Err(Code::InvalidNumber), + if let Ok(n) = u64::from_str_radix(&number, radix.into()) { + return Ok(Edn::UInt(n)); + } + if let Ok(n) = i64::from_str_radix(&number, radix.into()) { + return Ok(Edn::Int(n)); } + if let Ok(n) = number.parse::() { + return Ok(Edn::Double(n.into())); + } + if let Some((n, d)) = num_den_from_slice(&number) { + return Ok(Edn::Rational((n, d))); + } + + Err(Code::InvalidNumber) } #[inline] diff --git a/src/edn/error.rs b/src/edn/error.rs index 6edd4f5..2ca6028 100644 --- a/src/edn/error.rs +++ b/src/edn/error.rs @@ -21,7 +21,6 @@ pub enum Code { InvalidKeyword, InvalidNumber, InvalidRadix(Option), - ParseNumber(ParseNumber), UnexpectedEOF, UnmatchedDelimiter(char), @@ -31,22 +30,12 @@ pub enum Code { /// Deserialize errors Convert(&'static str), - /// Navigation errors - Iter, - /// Type conversion errors TryFromInt(num::TryFromIntError), #[doc(hidden)] Infallable(), // Makes the compiler happy for converting u64 to u64 and i64 to i64 } -#[derive(Debug, Eq, PartialEq)] -#[non_exhaustive] -pub enum ParseNumber { - ParseIntError(num::ParseIntError), - ParseFloatError(num::ParseFloatError), -} - impl Error { pub(crate) const fn deserialize(conv_type: &'static str) -> Self { Self { @@ -56,14 +45,6 @@ impl Error { ptr: None, } } - pub(crate) const fn iter() -> Self { - Self { - code: Code::Iter, - line: None, - column: None, - ptr: None, - } - } } impl Debug for Error { @@ -76,18 +57,6 @@ impl Debug for Error { } } -impl From for Code { - fn from(e: num::ParseIntError) -> Self { - Self::ParseNumber(ParseNumber::ParseIntError(e)) - } -} - -impl From for Code { - fn from(e: num::ParseFloatError) -> Self { - Self::ParseNumber(ParseNumber::ParseFloatError(e)) - } -} - impl From for Error { fn from(_: convert::Infallible) -> Self { Self { diff --git a/src/edn/mod.rs b/src/edn/mod.rs index a585f0b..92e60d5 100644 --- a/src/edn/mod.rs +++ b/src/edn/mod.rs @@ -90,7 +90,7 @@ impl From for Double { #[derive(Debug, Clone, PartialEq)] #[cfg_attr(feature = "sets", derive(Eq, PartialOrd, Ord))] -pub struct Vector(Vec); +pub struct Vector(pub(crate) Vec); impl Vector { #[must_use] pub const fn new(v: Vec) -> Self { @@ -110,7 +110,7 @@ impl Vector { #[derive(Debug, Clone, PartialEq)] #[cfg_attr(feature = "sets", derive(Eq, PartialOrd, Ord))] -pub struct List(Vec); +pub struct List(pub(crate) Vec); impl List { #[must_use] pub fn new(v: Vec) -> Self { @@ -130,7 +130,7 @@ impl List { #[cfg(feature = "sets")] #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] -pub struct Set(BTreeSet); +pub struct Set(pub(crate) BTreeSet); #[cfg(feature = "sets")] impl Set { @@ -152,7 +152,7 @@ impl Set { #[derive(Debug, Clone, PartialEq)] #[cfg_attr(feature = "sets", derive(Eq, PartialOrd, Ord))] -pub struct Map(BTreeMap); +pub struct Map(pub(crate) BTreeMap); impl Map { #[must_use] pub fn new(m: BTreeMap) -> Self { From cfeedbfaedff45bb8dec580d82965b56822b0a83 Mon Sep 17 00:00:00 2001 From: Kevin Nakamura Date: Sat, 24 Feb 2024 04:07:43 +0900 Subject: [PATCH 6/8] EdnError::CustomError for convenience crates built on top of `edn-rs`. --- src/edn/error.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/edn/error.rs b/src/edn/error.rs index 2ca6028..a8e9d15 100644 --- a/src/edn/error.rs +++ b/src/edn/error.rs @@ -14,6 +14,10 @@ pub struct Error { #[derive(Debug, Eq, PartialEq)] #[non_exhaustive] pub enum Code { + /// Custom errors for external crates (eg. `edn-derive`). std only. + #[cfg(feature = "std")] + CustomError(std::string::String), + /// Parse errors HashMapDuplicateKey, InvalidChar, From a1cf368be59bbc8067f279bb42a20e22b6f47eb1 Mon Sep 17 00:00:00 2001 From: Kevin Nakamura Date: Sat, 24 Feb 2024 05:35:30 +0900 Subject: [PATCH 7/8] Refactor `tests/` with a common module. --- Cargo.toml | 1 + tests/deserialize.rs | 6 ++---- tests/deserialize_sets.rs | 6 ++---- tests/lib.rs | 14 -------------- tests/ser.rs | 2 ++ tests/test/common.rs | 7 +++++++ 6 files changed, 14 insertions(+), 22 deletions(-) delete mode 100644 tests/lib.rs create mode 100644 tests/test/common.rs diff --git a/Cargo.toml b/Cargo.toml index a6780de..9afbcf4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ repository = "https://github.com/edn-rs/edn-rs" keywords = ["EDN", "no_std"] license = "MIT" edition = "2021" +resolver = "2" [lints.rust] rust_2018_idioms = "warn" diff --git a/tests/deserialize.rs b/tests/deserialize.rs index 3a09173..74b3b89 100644 --- a/tests/deserialize.rs +++ b/tests/deserialize.rs @@ -7,10 +7,8 @@ mod test { use edn_rs::{edn, from_edn, from_str, hmap, map, Edn, List, Map, Vector}; - fn err_as_string(s: &str) -> String { - let err = Edn::from_str(s).err().unwrap(); - format!("{err:?}") - } + mod common; + use crate::test::common::err_as_string; #[test] fn unit() { diff --git a/tests/deserialize_sets.rs b/tests/deserialize_sets.rs index aac780b..43e534d 100644 --- a/tests/deserialize_sets.rs +++ b/tests/deserialize_sets.rs @@ -9,10 +9,8 @@ mod test { use edn::{List, Vector}; use edn_rs::{edn, from_edn, from_str, hset, map, set, Edn, EdnError, Map, Set}; - fn err_as_string(s: &str) -> String { - let err = Edn::from_str(s).err().unwrap(); - format!("{err:?}") - } + mod common; + use crate::test::common::err_as_string; #[test] fn parse_set_with_commas() { diff --git a/tests/lib.rs b/tests/lib.rs deleted file mode 100644 index 2e6f166..0000000 --- a/tests/lib.rs +++ /dev/null @@ -1,14 +0,0 @@ -#![recursion_limit = "512"] - -extern crate alloc; -#[cfg(feature = "std")] -extern crate std; - -pub mod deserialize; -pub mod deserialize_sets; -pub mod emit; -pub mod emit_json; -pub mod error_messages; -pub mod parse; -pub mod parse_sets; -pub mod ser; diff --git a/tests/ser.rs b/tests/ser.rs index 0e27876..6444627 100644 --- a/tests/ser.rs +++ b/tests/ser.rs @@ -1,3 +1,5 @@ +#![recursion_limit = "512"] + #[cfg(test)] mod tests { use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; diff --git a/tests/test/common.rs b/tests/test/common.rs new file mode 100644 index 0000000..d34f8ef --- /dev/null +++ b/tests/test/common.rs @@ -0,0 +1,7 @@ +use core::str::FromStr; +use edn_rs::Edn; + +pub fn err_as_string(s: &str) -> String { + let err = Edn::from_str(s).err().unwrap(); + format!("{err:?}") +} From f658a218156d589c39e21c3cd5dec0bcad34c889 Mon Sep 17 00:00:00 2001 From: Kevin Nakamura Date: Sun, 25 Feb 2024 15:05:43 +0900 Subject: [PATCH 8/8] Duplicates in sets is now an Error. Clojure's behavior: (clojure.edn/read-string "#{1 2 2 3}") java.lang.IllegalArgumentException: Duplicate key: 2 [at :1:1] --- src/deserialize/parse.rs | 11 ++++++++--- src/edn/error.rs | 1 + tests/error_messages.rs | 17 +++++++++++++++++ 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/deserialize/parse.rs b/src/deserialize/parse.rs index bc36479..79ed7d2 100644 --- a/src/deserialize/parse.rs +++ b/src/deserialize/parse.rs @@ -262,9 +262,14 @@ fn parse_set(walker: &mut Walker<'_>) -> Result { } Some(_) => { let next = parse_internal(walker)?; - if next != Edn::Empty { - set.insert(next); - } + if next != Edn::Empty && !set.insert(next) { + return Err(Error { + code: Code::SetDuplicateKey, + line: Some(walker.line), + column: Some(walker.column), + ptr: Some(walker.ptr), + }); + }; } _ => { return Err(Error { diff --git a/src/edn/error.rs b/src/edn/error.rs index a8e9d15..a186bf5 100644 --- a/src/edn/error.rs +++ b/src/edn/error.rs @@ -20,6 +20,7 @@ pub enum Code { /// Parse errors HashMapDuplicateKey, + SetDuplicateKey, InvalidChar, InvalidEscape, InvalidKeyword, diff --git a/tests/error_messages.rs b/tests/error_messages.rs index 8e30916..13ac985 100644 --- a/tests/error_messages.rs +++ b/tests/error_messages.rs @@ -94,4 +94,21 @@ world!"# let small = edn_rs::from_edn::(&Edn::UInt(9876123)); assert_eq!(format!("{small:?}"), "Err(EdnError { code: TryFromInt(TryFromIntError(())), line: None, column: None, ptr: None })"); } + + #[test] + #[cfg(feature = "sets")] + fn duplicate_in_set() { + assert_eq!( + err_as_string("#{1 42 42 3}"), + "EdnError { code: SetDuplicateKey, line: Some(1), column: Some(10), ptr: Some(9) }" + ); + } + + #[test] + fn duplicate_in_map() { + assert_eq!( + err_as_string("{:foo 42 :bar 43 :foo \"cat\"}"), + "EdnError { code: HashMapDuplicateKey, line: Some(1), column: Some(28), ptr: Some(27) }" + ); + } }