diff --git a/Cargo.toml b/Cargo.toml index a3e3c4a..a6780de 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ unsafe_code = "deny" [lints.clippy] pedantic = "warn" nursery = "warn" +inline_always = "allow" [features] default = ["sets", "std"] diff --git a/README.md b/README.md index 31499a6..b30f630 100644 --- a/README.md +++ b/README.md @@ -385,7 +385,7 @@ fn complex_ok() -> Result<(), EdnError> { - [x] String `"\"string\""` - [x] Numbers `"324352"`, `"3442.234"`, `"3/4"` - [x] Keywords `:a` - - [x] Symbol `sym-bol-s` with a maximum of 200 chars + - [x] Symbol `sym-bol-s` - [x] Vector `"[1 :2 \"d\"]"` - [x] List `"(1 :2 \"d\")"` - [x] Set `"#{1 2 3}"` diff --git a/bb.edn b/bb.edn index f70adc6..81365cd 100644 --- a/bb.edn +++ b/bb.edn @@ -1,7 +1,11 @@ {:tasks - {clean {:doc "Removes target folder" - :requires ([babashka.fs :as fs]) - :task (fs/delete-tree "target")} + {:init (do (def code-cov-env + {"CARGO_INCREMENTAL" "0" + "RUSTFLAGS" "-Cinstrument-coverage -Copt-level=0 -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off" + "LLVM_PROFILE_FILE" "target/coverage/cargo-test-%p-%m.profraw"})) + :requires ([babashka.fs :as fs]) + clean {:doc "Removes target folder" + :task (fs/delete-tree "target")} test_lib_features (shell "cargo test --all-features --no-fail-fast") test_lib_no_default_features (shell "cargo test --features std --no-default-features --no-fail-fast") example_fast (shell "cargo test --examples --no-fail-fast") @@ -23,5 +27,15 @@ :task (shell "cargo clippy --features json --no-default-features -- -W future-incompatible -W rust_2018_idioms -W clippy::all -W clippy::pedantic -W clippy::nursery --deny warnings")} clippy {:doc "Runs all variations of cargo clippy" :depends [cargo-clippy-all-features cargo-clippy-no-sets-json cargo-clippy-no-defaults]} + cov-all-features {:doc "Coverage, all features" + :task (shell {:extra-env code-cov-env} "cargo test --all-features")} + cov-std-only {:doc "Coverage, std only" + :task (shell {:extra-env code-cov-env} "cargo test --no-default-features --features std")} + clean-cov {:doc "Cleans all .profraw files and generated html" + :task (fs/delete-tree "target/coverage")} + grcov {:doc "Runs grcov to generate human readable html" + :task (shell "grcov target/coverage --binary-path ./target/debug/deps/ -s . -t html --branch --ignore-not-existing -o target/coverage/html")} + coverage {:doc "Generates coverage in human friendly html in target/coverage/" + :depends [clean-cov cov-all-features cov-std-only grcov]} test {:doc "Runs all tests and checks" :depends [cargo-test cargo-fmt clippy]}}} diff --git a/examples/complex_struct_deserialization.rs b/examples/complex_struct_deserialization.rs index e3836e3..49143f3 100644 --- a/examples/complex_struct_deserialization.rs +++ b/examples/complex_struct_deserialization.rs @@ -70,12 +70,7 @@ fn complex_wrong() -> Result<(), EdnError> { let bad_edn_str = "{:list [{:name \"rose\" :age \"some text\" :cool true}, {:name \"josh\" :age 33 :cool false}, {:name \"eva\" :age 296 :cool true}]}"; let complex: Result = edn_rs::from_str(bad_edn_str); - assert_eq!( - complex, - Err(EdnError::Deserialize( - "couldn't convert `\"some text\"` into `uint`".to_string() - )) - ); + assert!(complex.is_err()); Ok(()) } diff --git a/examples/from_edn.rs b/examples/from_edn.rs index fa319cc..20f149b 100644 --- a/examples/from_edn.rs +++ b/examples/from_edn.rs @@ -42,12 +42,7 @@ fn person_mistyped() -> Result<(), EdnError> { })); let person: Result = edn_rs::from_edn(&bad_edn); - assert_eq!( - person, - Err(EdnError::Deserialize( - "couldn't convert `\"some text\"` into `uint`".to_string() - )) - ); + assert!(person.is_err()); Ok(()) } diff --git a/examples/struct_from_str.rs b/examples/struct_from_str.rs index 2939f46..45941ad 100644 --- a/examples/struct_from_str.rs +++ b/examples/struct_from_str.rs @@ -36,13 +36,7 @@ fn person_mistyped() -> Result<(), EdnError> { let bad_edn_str = "{:name \"rose\" :age \"some text\" }"; let person: Result = edn_rs::from_str(bad_edn_str); - assert_eq!( - person, - Err(EdnError::Deserialize( - "couldn't convert `\"some text\"` into `uint`".to_string() - )) - ); - + assert!(person.is_err()); Ok(()) } @@ -50,11 +44,7 @@ fn person_overflow() -> Result<(), EdnError> { let overflow_edn_str = " {:name \"rose\" :age 9002 } "; let person: Result = edn_rs::from_str(overflow_edn_str); - assert_eq!( - format!("{person:?}"), - "Err(TryFromInt(TryFromIntError(())))" - ); - + assert!(person.is_err()); Ok(()) } diff --git a/src/deserialize/mod.rs b/src/deserialize/mod.rs index 10ea244..a51b399 100644 --- a/src/deserialize/mod.rs +++ b/src/deserialize/mod.rs @@ -1,7 +1,6 @@ use alloc::collections::BTreeMap; #[cfg(feature = "sets")] use alloc::collections::BTreeSet; -use alloc::format; use alloc::string::{String, ToString}; use alloc::vec::Vec; use core::any; @@ -12,7 +11,8 @@ use std::collections::HashMap; #[cfg(all(feature = "sets", feature = "std"))] use std::collections::HashSet; -use crate::edn::{Edn, Error}; +use crate::edn::Edn; +use crate::EdnError as Error; pub mod parse; @@ -23,7 +23,7 @@ use ordered_float::OrderedFloat; /// /// # Errors /// -/// Error will be like `EdnError::Deserialize("couldn't convert into ")` +/// Error implements Debug. See docs for more information. /// /// ``` /// use crate::edn_rs::{Edn, EdnError, Deserialize}; @@ -60,27 +60,22 @@ use ordered_float::OrderedFloat; /// let bad_edn_str = "{:name \"rose\" :age \"some text\" }"; /// let person: Result = edn_rs::from_str(bad_edn_str); /// -/// assert_eq!( -/// person, -/// Err(EdnError::Deserialize( -/// "couldn't convert `\"some text\"` into `uint`".to_string() -/// )) -/// ); +/// println!("{:?}", person); /// ``` #[allow(clippy::missing_errors_doc)] pub trait Deserialize: Sized { fn deserialize(edn: &Edn) -> Result; } -fn build_deserialize_error(edn: &Edn, type_: &str) -> Error { - Error::Deserialize(format!("couldn't convert `{edn}` into `{type_}`")) +const fn build_deserialize_error(type_: &'static str) -> Error { + Error::deserialize(type_) } impl Deserialize for () { fn deserialize(edn: &Edn) -> Result { match edn { Edn::Nil => Ok(()), - _ => Err(build_deserialize_error(edn, "unit")), + _ => Err(build_deserialize_error("unit")), } } } @@ -89,7 +84,7 @@ impl Deserialize for () { impl Deserialize for OrderedFloat { fn deserialize(edn: &Edn) -> Result { edn.to_float() - .ok_or_else(|| build_deserialize_error(edn, "edn_rs::Double")) + .ok_or_else(|| build_deserialize_error("edn_rs::Double")) .map(Into::into) } } @@ -97,7 +92,7 @@ impl Deserialize for OrderedFloat { impl Deserialize for f64 { fn deserialize(edn: &Edn) -> Result { edn.to_float() - .ok_or_else(|| build_deserialize_error(edn, "edn_rs::Double")) + .ok_or_else(|| build_deserialize_error("edn_rs::Double")) .map(Into::into) } } @@ -109,7 +104,7 @@ macro_rules! impl_deserialize_int { fn deserialize(edn: &Edn) -> Result { let int = edn .to_int() - .ok_or_else(|| build_deserialize_error(edn, "int"))?; + .ok_or_else(|| build_deserialize_error("int"))?; Ok(Self::try_from(int)?) } } @@ -126,7 +121,7 @@ macro_rules! impl_deserialize_uint { fn deserialize(edn: &Edn) -> Result { let uint = edn .to_uint() - .ok_or_else(|| build_deserialize_error(edn, "uint"))?; + .ok_or_else(|| build_deserialize_error("uint"))?; Ok(Self::try_from(uint)?) } } @@ -138,8 +133,7 @@ impl_deserialize_uint!(u8, u16, u32, u64, usize); impl Deserialize for bool { fn deserialize(edn: &Edn) -> Result { - edn.to_bool() - .ok_or_else(|| build_deserialize_error(edn, "bool")) + edn.to_bool().ok_or_else(|| build_deserialize_error("bool")) } } @@ -160,8 +154,7 @@ impl Deserialize for String { impl Deserialize for char { fn deserialize(edn: &Edn) -> Result { - edn.to_char() - .ok_or_else(|| build_deserialize_error(edn, "char")) + edn.to_char().ok_or_else(|| build_deserialize_error("char")) } } @@ -171,23 +164,23 @@ where { fn deserialize(edn: &Edn) -> Result { match edn { - Edn::Vector(_) => Ok(edn - .iter_some() - .ok_or_else(|| Error::Iter(format!("Could not create iter from {edn:?}")))? + Edn::Vector(v) => Ok(v + .0 + .iter() .map(|e| Deserialize::deserialize(e)) .collect::>()?), - Edn::List(_) => Ok(edn - .iter_some() - .ok_or_else(|| Error::Iter(format!("Could not create iter from {edn:?}")))? + Edn::List(l) => Ok(l + .0 + .iter() .map(|e| Deserialize::deserialize(e)) .collect::>()?), #[cfg(feature = "sets")] - Edn::Set(_) => Ok(edn - .iter_some() - .ok_or_else(|| Error::Iter(format!("Could not create iter from {edn:?}")))? + Edn::Set(s) => Ok(s + .0 + .iter() .map(|e| Deserialize::deserialize(e)) .collect::>()?), - _ => Err(build_deserialize_error(edn, any::type_name::())), + _ => Err(build_deserialize_error(any::type_name::())), } } } @@ -200,22 +193,17 @@ where { fn deserialize(edn: &Edn) -> Result { match edn { - Edn::Map(_) => edn - .map_iter() - .ok_or_else(|| Error::Iter(format!("Could not create iter from {edn:?}")))? + Edn::Map(m) => m + .0 + .iter() .map(|(key, e)| { Ok(( key.to_string(), - Deserialize::deserialize(e).map_err(|_| { - Error::Deserialize(format!( - "Cannot safely deserialize {:?} to {}", - edn, "HashMap" - )) - })?, + Deserialize::deserialize(e).map_err(|_| Error::deserialize("HashMap"))?, )) }) .collect::>(), - _ => Err(build_deserialize_error(edn, any::type_name::())), + _ => Err(build_deserialize_error(any::type_name::())), } } } @@ -226,22 +214,17 @@ where { fn deserialize(edn: &Edn) -> Result { match edn { - Edn::Map(_) => edn - .map_iter() - .ok_or_else(|| Error::Iter(format!("Could not create iter from {edn:?}")))? + Edn::Map(m) => m + .0 + .iter() .map(|(key, e)| { Ok(( key.to_string(), - Deserialize::deserialize(e).map_err(|_| { - Error::Deserialize(format!( - "Cannot safely deserialize {:?} to {}", - edn, "BTreeMap" - )) - })?, + Deserialize::deserialize(e).map_err(|_| Error::deserialize("BTreeMap"))?, )) }) .collect::>(), - _ => Err(build_deserialize_error(edn, any::type_name::())), + _ => Err(build_deserialize_error(any::type_name::())), } } } @@ -254,19 +237,12 @@ where { fn deserialize(edn: &Edn) -> Result { match edn { - Edn::Set(_) => edn - .set_iter() - .ok_or_else(|| Error::Iter(format!("Could not create iter from {edn:?}")))? - .map(|e| { - Deserialize::deserialize(e).map_err(|_| { - Error::Deserialize(format!( - "Cannot safely deserialize {:?} to {}", - edn, "HashSet" - )) - }) - }) - .collect::>(), - _ => Err(build_deserialize_error(edn, any::type_name::())), + Edn::Set(s) => { + s.0.iter() + .map(|e| Deserialize::deserialize(e).map_err(|_| Error::deserialize("HashSet"))) + .collect::>() + } + _ => Err(build_deserialize_error(any::type_name::())), } } } @@ -278,19 +254,12 @@ where { fn deserialize(edn: &Edn) -> Result { match edn { - Edn::Set(_) => edn - .set_iter() - .ok_or_else(|| Error::Iter(format!("Could not create iter from {edn:?}")))? - .map(|e| { - Deserialize::deserialize(e).map_err(|_| { - Error::Deserialize(format!( - "Cannot safely deserialize {:?} to {}", - edn, "BTreeSet" - )) - }) - }) + Edn::Set(s) => s + .0 + .iter() + .map(|e| Deserialize::deserialize(e).map_err(|_| Error::deserialize("BTreeSet"))) .collect::>(), - _ => Err(build_deserialize_error(edn, any::type_name::())), + _ => Err(build_deserialize_error(any::type_name::())), } } } @@ -348,12 +317,7 @@ where /// let bad_edn_str = "{:name \"rose\" :age \"some text\" }"; /// let person: Result = edn_rs::from_str(bad_edn_str); /// -/// assert_eq!( -/// person, -/// Err(EdnError::Deserialize( -/// "couldn't convert `\"some text\"` into `uint`".to_string() -/// )) -/// ); +/// println!("{:?}", person); /// ``` pub fn from_str(s: &str) -> Result { let edn = Edn::from_str(s)?; @@ -407,12 +371,7 @@ pub fn from_str(s: &str) -> Result { /// })); /// let person: Result = edn_rs::from_edn(&bad_edn); /// -/// assert_eq!( -/// person, -/// Err(EdnError::Deserialize( -/// "couldn't convert `\"some text\"` into `uint`".to_string() -/// )) -/// ); +/// println!("{:?}", person); /// ``` pub fn from_edn(edn: &Edn) -> Result { T::deserialize(edn) diff --git a/src/deserialize/parse.rs b/src/deserialize/parse.rs index a48154d..bc36479 100644 --- a/src/deserialize/parse.rs +++ b/src/deserialize/parse.rs @@ -1,226 +1,475 @@ +#![allow(clippy::inline_always)] + +use alloc::borrow::ToOwned; use alloc::boxed::Box; use alloc::collections::BTreeMap; #[cfg(feature = "sets")] use alloc::collections::BTreeSet; use alloc::string::{String, ToString}; use alloc::vec::Vec; -use alloc::{format, vec}; -use core::iter; use core::primitive::str; +use crate::edn::error::{Code, Error}; #[cfg(feature = "sets")] use crate::edn::Set; -use crate::edn::{Edn, Error, List, Map, Vector}; +use crate::edn::{Edn, List, Map, Vector}; const DELIMITERS: [char; 8] = [',', ']', '}', ')', ';', '(', '[', '{']; -pub fn parse(edn: &str) -> Result { - let owned = String::from(edn); - let mut tokens = owned.chars().enumerate(); - (parse_internal(tokens.next(), &mut tokens)?).map_or_else(|| Ok(Edn::Empty), Ok) +struct Walker<'w> { + slice: &'w str, + ptr: usize, + column: usize, + line: usize, } -fn parse_consuming( - c: Option<(usize, char)>, - chars: &mut iter::Enumerate>, -) -> Result { - (parse_internal(c, chars)?).map_or_else(|| Ok(Edn::Empty), Ok) -} +impl Walker<'_> { + // Slurps until whitespace or delimiter, returning the slice. + #[inline(always)] + fn slurp_literal(&mut self) -> &str { + let token = self.slice[self.ptr..] + .split(|c: char| c.is_whitespace() || DELIMITERS.contains(&c)) + .next() + .unwrap(); // At least an empty slice will always be on the first split, even on an empty str + + self.ptr += token.len(); + self.column += token.len(); + token + } + + // Slurps a char. Special handling for chars that happen to be delimiters + #[inline(always)] + fn slurp_char(&mut self) -> &str { + let starting_ptr = self.ptr; + + let mut ptr = 0; + while let Some(c) = self.peek_next() { + // first is always \\, second is always a char we want. + // Handles edge cases of having a valid "\\[" but also "\\c[lolthisisvalidedn" + if ptr > 1 && (c.is_whitespace() || DELIMITERS.contains(&c)) { + break; + } -fn parse_internal( - c: Option<(usize, char)>, - chars: &mut iter::Enumerate>, -) -> Result, Error> { - Ok(match c { - Some((_, '[')) => Some(read_vec(chars)?), - Some((_, '(')) => Some(read_list(chars)?), - Some((_, '#')) => tagged_or_set_or_discard(chars)?, - Some((_, '{')) => Some(read_map(chars)?), - Some((_, ';')) => { - // Consumes the content - chars.find(|c| c.1 == '\n'); - read_if_not_container_end(chars)? + let _ = self.nibble_next(); + ptr += 1; } - Some((_, s)) if s.is_whitespace() || s == ',' => read_if_not_container_end(chars)?, - None => None, - edn => Some(edn_element(edn, chars)?), - }) -} + &self.slice[starting_ptr..starting_ptr + ptr] + } + + // Slurps until whitespace or delimiter, returning the slice. + #[inline(always)] + fn slurp_tag(&mut self) -> &str { + let token = self.slice[self.ptr..] + .split(|c: char| c.is_whitespace() && c != ',') + .next() + .unwrap(); // At least an empty slice will always be on the first split, even on an empty str + + self.ptr += token.len(); + self.column += token.len(); -fn edn_element( - c: Option<(usize, char)>, - chars: &mut iter::Enumerate>, -) -> Result { - match c { - Some((_, '\"')) => read_str(chars), - Some((_, ':')) => Ok(read_key(chars)), - Some((_, n)) if n.is_numeric() => Ok(read_number(n, chars)?), - Some((_, n)) - if (n == '-' || n == '+') - && chars - .clone() - .peekable() - .peek() - .is_some_and(|n| n.1.is_numeric()) => - { - Ok(read_number(n, chars)?) + if token.ends_with(',') { + return &token[0..token.len() - 1]; } - Some((_, '\\')) => Ok(read_char(chars)?), - Some((_, b)) if b == 't' || b == 'f' || b == 'n' => Ok(read_bool_or_nil(b, chars)?), - Some((_, a)) => Ok(read_symbol(a, chars)?), - None => Err(Error::ParseEdn("Edn could not be parsed".to_string())), + token } -} -fn tagged_or_set_or_discard( - chars: &mut iter::Enumerate>, -) -> Result, Error> { - match chars.clone().next() { - Some((_, '{')) => read_set(chars).map(Some), - Some((_, '_')) => read_discard(chars), - _ => read_tagged(chars).map(Some), + #[inline(always)] + fn slurp_str(&mut self) -> Result { + let _ = self.nibble_next(); // Consume the leading '"' char + let mut s = String::new(); + let mut escape = false; + loop { + if let Some(c) = self.nibble_next() { + if escape { + match c { + 't' => s.push('\t'), + 'r' => s.push('\r'), + 'n' => s.push('\n'), + '\\' => s.push('\\'), + '\"' => s.push('\"'), + _ => { + return Err(Error { + code: Code::InvalidEscape, + column: Some(self.column), + line: Some(self.line), + ptr: Some(self.ptr), + }) + } + } + escape = false; + } else if c == '\"' { + return Ok(Edn::Str(s)); + } else if c == '\\' { + escape = true; + } else { + escape = false; + s.push(c); + } + } else { + return Err(Error { + code: Code::UnexpectedEOF, + column: Some(self.column), + line: Some(self.line), + ptr: Some(self.ptr), + }); + } + } + } + + // Nibbles away until the next new line + #[inline(always)] + fn nibble_newline(&mut self) { + let len = self.slice[self.ptr..].split('\n').next().unwrap(); // At least an empty slice will always be on the first split, even on an empty str + self.ptr += len.len(); + self.nibble_whitespace(); + } + + // Nibbles away until the start of the next form + #[inline(always)] + fn nibble_whitespace(&mut self) { + while let Some(n) = self.peek_next() { + if n == ',' || n.is_whitespace() { + let _ = self.nibble_next(); + continue; + } + break; + } + } + + // Consumes next + #[inline(always)] + fn nibble_next(&mut self) -> Option { + let char = self.slice[self.ptr..].chars().next(); + if let Some(c) = char { + self.ptr += 1; + if c == '\n' { + self.line += 1; + self.column = 1; + } else { + self.column += 1; + } + } + char + } + + // Peek into the next char + #[inline(always)] + fn peek_next(&mut self) -> Option { + self.slice[self.ptr..].chars().next() } } -fn read_key(chars: &mut iter::Enumerate>) -> Edn { - let key_chars = chars - .clone() - .take_while(|c| !c.1.is_whitespace() && !DELIMITERS.contains(&c.1)); - let c_len = key_chars.clone().count(); +pub fn parse(edn: &str) -> Result { + let mut walker = Walker { + slice: edn, + ptr: 0, + column: 1, + line: 1, + }; - let mut key = String::from(":"); - let key_chars = chars.take(c_len).map(|c| c.1).collect::(); - key.push_str(&key_chars); - Edn::Key(key) + parse_internal(&mut walker) } -fn read_str(chars: &mut iter::Enumerate>) -> Result { - let result = chars.try_fold( - (false, String::new()), - |(last_was_escape, mut s), (_, c)| { - if last_was_escape { - // Supported escape characters, per https://github.com/edn-format/edn#strings - match c { - 't' => s.push('\t'), - 'r' => s.push('\r'), - 'n' => s.push('\n'), - '\\' => s.push('\\'), - '\"' => s.push('\"'), - _ => { - return Err(Err(Error::ParseEdn(format!( - "Invalid escape sequence \\{c}" - )))) - } - }; - - Ok((false, s)) - } else if c == '\"' { - // Unescaped quote means we're done - Err(Ok(s)) - } else if c == '\\' { - Ok((true, s)) - } else { - s.push(c); - Ok((false, s)) +#[inline] +fn parse_internal(walker: &mut Walker<'_>) -> Result { + walker.nibble_whitespace(); + while let Some(next) = walker.peek_next() { + let column_start = walker.column; + let ptr_start = walker.ptr; + let line_start = walker.line; + if let Some(ret) = match next { + '\\' => match parse_char(walker.slurp_char()) { + Ok(edn) => Some(Ok(edn)), + Err(code) => { + return Err(Error { + code, + line: Some(walker.line), + column: Some(column_start), + ptr: Some(walker.ptr), + }) + } + }, + '\"' => Some(walker.slurp_str()), + // comment. consume until a new line. + ';' => { + walker.nibble_newline(); + None } - }, - ); - - match result { - // An Ok means we actually finished parsing *without* seeing the end of the string, so that's - // an error. - Ok(_) => Err(Error::ParseEdn("Unterminated string".to_string())), - Err(Err(e)) => Err(e), - Err(Ok(string)) => Ok(Edn::Str(string)), + '[' => return parse_vector(walker), + '(' => return parse_list(walker), + '{' => return parse_map(walker), + '#' => parse_tag_set_discard(walker)?.map(Ok), + // non-string literal case + _ => match edn_literal(walker.slurp_literal()) { + Ok(edn) => Some(Ok(edn)), + Err(code) => { + return Err(Error { + code, + line: Some(line_start), + column: Some(column_start), + ptr: Some(ptr_start), + }) + } + }, + } { + return ret; + } } + Ok(Edn::Empty) } -fn read_symbol(a: char, chars: &mut iter::Enumerate>) -> Result { - let c_len = chars - .clone() - .enumerate() - .take_while(|&(i, c)| i <= 200 && !c.1.is_whitespace() && !DELIMITERS.contains(&c.1)) - .count(); - let i = chars - .clone() - .next() - .ok_or_else(|| Error::ParseEdn("Could not identify symbol index".to_string()))? - .0; - - if a.is_whitespace() { - return Err(Error::ParseEdn(format!( - "\"{a}\" could not be parsed at char count {i}" - ))); +#[inline] +fn parse_tag_set_discard(walker: &mut Walker<'_>) -> Result, Error> { + let _ = walker.nibble_next(); // Consume the leading '#' char + + match walker.peek_next() { + Some('{') => parse_set(walker).map(Some), + Some('_') => parse_discard(walker), + _ => parse_tag(walker).map(Some), } +} - let mut symbol = String::from(a); - let symbol_chars = chars.take(c_len).map(|c| c.1).collect::(); - symbol.push_str(&symbol_chars); - Ok(Edn::Symbol(symbol)) +#[inline] +fn parse_discard(walker: &mut Walker<'_>) -> Result, Error> { + let _ = walker.nibble_next(); // Consume the leading '_' char + Ok(match parse_internal(walker)? { + Edn::Empty => { + return Err(Error { + code: Code::UnexpectedEOF, + line: Some(walker.line), + column: Some(walker.column), + ptr: Some(walker.ptr), + }) + } + _ => match walker.peek_next() { + Some(_) => Some(parse_internal(walker)?), + None => None, + }, + }) } -fn read_tagged(chars: &mut iter::Enumerate>) -> Result { - let tag = chars - .take_while(|c| !c.1.is_whitespace() && c.1 != ',') - .map(|c| c.1) - .collect::(); +#[inline] +#[cfg(feature = "sets")] +fn parse_set(walker: &mut Walker<'_>) -> Result { + let _ = walker.nibble_next(); // Consume the leading '{' char + let mut set: BTreeSet = BTreeSet::new(); + loop { + match walker.peek_next() { + Some('}') => { + let _ = walker.nibble_next(); + return Ok(Edn::Set(Set::new(set))); + } + Some(_) => { + let next = parse_internal(walker)?; + if next != Edn::Empty { + set.insert(next); + } + } + _ => { + return Err(Error { + code: Code::UnexpectedEOF, + line: Some(walker.line), + column: Some(walker.column), + ptr: Some(walker.ptr), + }) + } + } + } +} + +#[inline] +#[cfg(not(feature = "sets"))] +const fn parse_set(walker: &Walker<'_>) -> Result { + Err(Error { + code: Code::NoFeatureSets, + line: Some(walker.line), + column: Some(walker.column), + ptr: Some(walker.ptr), + }) +} + +#[inline] +fn parse_tag(walker: &mut Walker<'_>) -> Result { + let tag = walker.slurp_tag(); Ok(Edn::Tagged( - tag, - Box::new(parse_consuming(chars.next(), chars)?), + tag.to_string(), + Box::new(parse_internal(walker)?), )) } -fn read_discard(chars: &mut iter::Enumerate>) -> Result, Error> { - let _discard_underscore = chars.next(); - let i = chars - .clone() - .next() - .ok_or_else(|| Error::ParseEdn("Could not identify symbol index".to_string()))? - .0; - match parse_consuming(chars.next(), chars) { - Err(e) => Err(e), - Ok(Edn::Empty) => Err(Error::ParseEdn(format!( - "Discard sequence must have a following element at char count {i}" - ))), - _ => read_if_not_container_end(chars), +#[inline] +fn parse_map(walker: &mut Walker<'_>) -> Result { + let _ = walker.nibble_next(); // Consume the leading '{' char + let mut map: BTreeMap = BTreeMap::new(); + loop { + match walker.peek_next() { + Some('}') => { + let _ = walker.nibble_next(); + return Ok(Edn::Map(Map::new(map))); + } + Some(n) => { + if n == ']' || n == ')' { + return Err(Error { + code: Code::UnmatchedDelimiter(n), + line: Some(walker.line), + column: Some(walker.column), + ptr: Some(walker.ptr), + }); + } + + let key = parse_internal(walker)?; + let val = parse_internal(walker)?; + + if key != Edn::Empty && val != Edn::Empty { + // Existing keys are considered an error + if map.insert(key.to_string(), val).is_some() { + return Err(Error { + code: Code::HashMapDuplicateKey, + line: Some(walker.line), + column: Some(walker.column), + ptr: Some(walker.ptr), + }); + } + } + } + _ => { + return Err(Error { + code: Code::UnexpectedEOF, + line: Some(walker.line), + column: Some(walker.column), + ptr: Some(walker.ptr), + }) + } + } } } -fn num_den_from_slice(slice: impl AsRef) -> Option<(i64, u64)> { - let slice = slice.as_ref(); - let index = slice.find('/'); +#[inline] +fn parse_vector(walker: &mut Walker<'_>) -> Result { + let _ = walker.nibble_next(); // Consume the leading '[' char + let mut vec = Vec::new(); - if let Some(i) = index { - let (num, den) = slice.split_at(i); // This can't panic because the index is valid - let num = num.parse::(); - let den = den[1..].parse::(); + loop { + match walker.peek_next() { + Some(']') => { + let _ = walker.nibble_next(); + return Ok(Edn::Vector(Vector::new(vec))); + } + Some(_) => { + let next = parse_internal(walker)?; + if next != Edn::Empty { + vec.push(next); + } + } + _ => { + return Err(Error { + code: Code::UnexpectedEOF, + line: Some(walker.line), + column: Some(walker.column), + ptr: Some(walker.ptr), + }) + } + } + } +} - if let (Ok(n), Ok(d)) = (num, den) { - return Some((n, d)); +#[inline] +fn parse_list(walker: &mut Walker<'_>) -> Result { + let _ = walker.nibble_next(); // Consume the leading '[' char + let mut vec = Vec::new(); + + loop { + match walker.peek_next() { + Some(')') => { + let _ = walker.nibble_next(); + return Ok(Edn::List(List::new(vec))); + } + Some(_) => { + let next = parse_internal(walker)?; + if next != Edn::Empty { + vec.push(next); + } + } + _ => { + return Err(Error { + code: Code::UnexpectedEOF, + line: Some(walker.line), + column: Some(walker.column), + ptr: Some(walker.ptr), + }) + } } - return None; } - None } -fn read_number(n: char, chars: &mut iter::Enumerate>) -> Result { - let i = chars - .clone() - .next() - .ok_or_else(|| Error::ParseEdn("Could not identify symbol index".to_string()))? - .0; - let c_len = chars - .clone() - .take_while(|(_, c)| !c.is_whitespace() && !DELIMITERS.contains(c)) - .count(); +#[inline] +fn edn_literal(literal: &str) -> Result { + fn numeric(s: &str) -> bool { + let (first, second) = { + let mut s = s.chars(); + (s.next(), s.next()) + }; + + if let Some(f) = first { + if f.is_numeric() { + return true; + } + + if f == '-' || f == '+' { + if let Some(s) = second { + if s.is_numeric() { + return true; + } + } + } + } + false + } + + Ok(match literal { + "nil" => Edn::Nil, + "true" => Edn::Bool(true), + "false" => Edn::Bool(false), + "" => Edn::Empty, + k if k.starts_with(':') => { + if k.len() <= 1 { + return Err(Code::InvalidKeyword); + } + Edn::Key(k.to_owned()) + } + n if numeric(n) => return parse_number(n), + _ => Edn::Symbol(literal.to_owned()), + }) +} + +#[inline] +fn parse_char(lit: &str) -> Result { + let lit = &lit[1..]; // ignore the leading '\\' + match lit { + "newline" => Ok(Edn::Char('\n')), + "return" => Ok(Edn::Char('\r')), + "tab" => Ok(Edn::Char('\t')), + "space" => Ok(Edn::Char(' ')), + c if c.len() == 1 => Ok(Edn::Char(c.chars().next().unwrap())), + _ => Err(Code::InvalidChar), + } +} + +#[inline] +fn parse_number(lit: &str) -> Result { + let mut chars = lit.chars(); let (number, radix) = { let mut number = String::new(); + // The EDN spec allows for a redundant '+' symbol, we just ignore it. - if n != '+' { - number.push(n); + if let Some(n) = chars.next() { + if n != '+' { + number.push(n); + } } - for (_, c) in chars.take(c_len) { + + for c in chars { number.push(c); } if number.to_lowercase().starts_with("0x") { @@ -235,16 +484,16 @@ fn read_number(n: char, chars: &mut iter::Enumerate>) -> Re let negative = number.starts_with('-'); let radix = { if negative { - (number[1..index]).parse::() + (number[1..index]).parse::() } else { - (number[0..index]).parse::() + (number[0..index]).parse::() } }; match radix { Ok(r) => { // from_str_radix panics if radix is not in the range from 2 to 36 if !(2..=36).contains(&r) { - return Err(Error::ParseEdn(format!("Radix of {r} is out of bounds"))); + return Err(Code::InvalidRadix(Some(r))); } if negative { @@ -258,10 +507,8 @@ fn read_number(n: char, chars: &mut iter::Enumerate>) -> Re } (number, r) } - Err(e) => { - return Err(Error::ParseEdn(format!( - "{e} while trying to parse radix from {number}" - ))); + Err(_) => { + return Err(Code::InvalidRadix(None)); } } } else { @@ -269,229 +516,36 @@ fn read_number(n: char, chars: &mut iter::Enumerate>) -> Re } }; - match number { - n if (n.contains('E') || n.contains('e')) && n.parse::().is_ok() => { - Ok(Edn::Double(n.parse::()?.into())) - } - n if u64::from_str_radix(&n, radix).is_ok() => { - Ok(Edn::UInt(u64::from_str_radix(&n, radix)?)) - } - n if i64::from_str_radix(&n, radix).is_ok() => { - Ok(Edn::Int(i64::from_str_radix(&n, radix)?)) - } - n if n.parse::().is_ok() => Ok(Edn::Double(n.parse::()?.into())), - n if num_den_from_slice(&n).is_some() => Ok(Edn::Rational(num_den_from_slice(n).unwrap())), - n if n.to_uppercase().chars().filter(|c| c == &'E').count() > 1 => { - let mut n = n.chars(); - read_symbol(n.next().unwrap_or(' '), &mut n.enumerate()) - } - _ => Err(Error::ParseEdn(format!( - "{number} could not be parsed at char count {i} with radix {radix}" - ))), + if let Ok(n) = u64::from_str_radix(&number, radix.into()) { + return Ok(Edn::UInt(n)); } -} - -fn read_char(chars: &mut iter::Enumerate>) -> Result { - let i = chars - .clone() - .next() - .ok_or_else(|| Error::ParseEdn("Could not identify symbol index".to_string()))? - .0; - let c = chars.next(); - c.ok_or(format!("{c:?} could not be parsed at char count {i}")) - .map(|c| c.1) - .map(Edn::Char) - .map_err(Error::ParseEdn) -} - -fn read_bool_or_nil( - c: char, - chars: &mut iter::Enumerate>, -) -> Result { - let i = chars - .clone() - .next() - .ok_or_else(|| Error::ParseEdn("Could not identify symbol index".to_string()))? - .0; - match c { - 't' if { - let val = chars - .clone() - .take_while(|(_, c)| !c.is_whitespace() && !DELIMITERS.contains(c)) - .map(|c| c.1) - .collect::(); - val.eq("rue") - } => - { - let mut string = String::new(); - let t = chars.take(3).map(|c| c.1).collect::(); - string.push(c); - string.push_str(&t); - Ok(Edn::Bool(string.parse::()?)) - } - 'f' if { - let val = chars - .clone() - .take_while(|(_, c)| !c.is_whitespace() && !DELIMITERS.contains(c)) - .map(|c| c.1) - .collect::(); - val.eq("alse") - } => - { - let mut string = String::new(); - let f = chars.take(4).map(|c| c.1).collect::(); - string.push(c); - string.push_str(&f); - Ok(Edn::Bool(string.parse::()?)) - } - 'n' if { - let val = chars - .clone() - .take_while(|(_, c)| !c.is_whitespace() && !DELIMITERS.contains(c)) - .map(|c| c.1) - .collect::(); - val.eq("il") - } => - { - let mut string = String::new(); - let n = chars.take(2).map(|c| c.1).collect::(); - string.push(c); - string.push_str(&n); - match &string[..] { - "nil" => Ok(Edn::Nil), - _ => Err(Error::ParseEdn(format!( - "{string} could not be parsed at char count {i}" - ))), - } - } - _ => read_symbol(c, chars), + if let Ok(n) = i64::from_str_radix(&number, radix.into()) { + return Ok(Edn::Int(n)); } -} - -fn read_vec(chars: &mut iter::Enumerate>) -> Result { - let i = chars - .clone() - .next() - .ok_or_else(|| Error::ParseEdn("Could not identify symbol index".to_string()))? - .0; - let mut res: Vec = vec![]; - loop { - match chars.next() { - Some((_, ']')) => return Ok(Edn::Vector(Vector::new(res))), - Some(c) => { - if let Some(e) = parse_internal(Some(c), chars)? { - res.push(e); - } - } - err => { - return Err(Error::ParseEdn(format!( - "{err:?} could not be parsed at char count {i}" - ))) - } - } + if let Ok(n) = number.parse::() { + return Ok(Edn::Double(n.into())); } -} - -fn read_list(chars: &mut iter::Enumerate>) -> Result { - let i = chars - .clone() - .next() - .ok_or_else(|| Error::ParseEdn("Could not identify symbol index".to_string()))? - .0; - let mut res: Vec = vec![]; - loop { - match chars.next() { - Some((_, ')')) => return Ok(Edn::List(List::new(res))), - Some(c) => { - if let Some(e) = parse_internal(Some(c), chars)? { - res.push(e); - } - } - err => { - return Err(Error::ParseEdn(format!( - "{err:?} could not be parsed at char count {i}" - ))) - } - } + if let Some((n, d)) = num_den_from_slice(&number) { + return Ok(Edn::Rational((n, d))); } -} -#[cfg(feature = "sets")] -fn read_set(chars: &mut iter::Enumerate>) -> Result { - let _discard_brackets = chars.next(); - let i = chars - .clone() - .next() - .ok_or_else(|| Error::ParseEdn("Could not identify symbol index".to_string()))? - .0; - let mut res: BTreeSet = BTreeSet::new(); - loop { - match chars.next() { - Some((_, '}')) => return Ok(Edn::Set(Set::new(res))), - Some(c) => { - if let Some(e) = parse_internal(Some(c), chars)? { - res.insert(e); - } - } - err => { - return Err(Error::ParseEdn(format!( - "{err:?} could not be parsed at char count {i}" - ))) - } - } - } + Err(Code::InvalidNumber) } -#[cfg(not(feature = "sets"))] -fn read_set(_chars: &mut iter::Enumerate>) -> Result { - Err(Error::ParseEdn( - "Could not parse set due to feature not being enabled".to_string(), - )) -} +#[inline] +fn num_den_from_slice(slice: impl AsRef) -> Option<(i64, u64)> { + let slice = slice.as_ref(); + let index = slice.find('/'); -fn read_map(chars: &mut iter::Enumerate>) -> Result { - let i = chars - .clone() - .next() - .ok_or_else(|| Error::ParseEdn("Could not identify symbol index".to_string()))? - .0; - let mut res: BTreeMap = BTreeMap::new(); - let mut key: Option = None; - let mut val: Option = None; - loop { - match chars.next() { - Some((_, '}')) => return Ok(Edn::Map(Map::new(res))), - Some(c) => { - if key.is_some() { - val = Some(parse_consuming(Some(c), chars)?); - } else { - key = parse_internal(Some(c), chars)?; - } - } - err => { - return Err(Error::ParseEdn(format!( - "{err:?} could not be parsed at char count {i}" - ))) - } - } + if let Some(i) = index { + let (num, den) = slice.split_at(i); // This can't panic because the index is valid + let num = num.parse::(); + let den = den[1..].parse::(); - if key.is_some() && val.is_some() { - res.insert(key.unwrap().to_string(), val.unwrap()); - key = None; - val = None; + if let (Ok(n), Ok(d)) = (num, den) { + return Some((n, d)); } + return None; } + None } - -fn read_if_not_container_end( - chars: &mut iter::Enumerate>, -) -> Result, Error> { - Ok(match chars.clone().next() { - Some(c) if c.1 == ']' || c.1 == ')' || c.1 == '}' => None, - Some(_) => parse_internal(chars.next(), chars)?, - None => None, - }) -} - -#[cfg(test)] -mod test {} diff --git a/src/edn/error.rs b/src/edn/error.rs new file mode 100644 index 0000000..5aef72f --- /dev/null +++ b/src/edn/error.rs @@ -0,0 +1,80 @@ +use core::fmt::{self, Debug}; +use core::{convert, num, str}; + +pub struct Error { + pub code: Code, + /// Counting from 1. + pub line: Option, + /// This is a utf-8 char count. Counting from 1. + pub column: Option, + /// This is a pointer into the str trying to be parsed, not a utf-8 char offset + pub ptr: Option, +} + +#[derive(Debug, Eq, PartialEq)] +#[non_exhaustive] +pub enum Code { + /// Parse errors + HashMapDuplicateKey, + InvalidChar, + InvalidEscape, + InvalidKeyword, + InvalidNumber, + InvalidRadix(Option), + UnexpectedEOF, + UnmatchedDelimiter(char), + + /// Feature errors + NoFeatureSets, + + /// Deserialize errors + Convert(&'static str), + + /// Type conversion errors + TryFromInt(num::TryFromIntError), + #[doc(hidden)] + Infallable(), // Makes the compiler happy for converting u64 to u64 and i64 to i64 +} + +impl Error { + pub(crate) const fn deserialize(conv_type: &'static str) -> Self { + Self { + code: Code::Convert(conv_type), + line: None, + column: None, + ptr: None, + } + } +} + +impl Debug for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "EdnError {{ code: {:?}, line: {:?}, column: {:?}, index: {:?} }}", + self.code, self.line, self.column, self.ptr + ) + } +} + +impl From for Error { + fn from(_: convert::Infallible) -> Self { + Self { + code: Code::Infallable(), + line: None, + column: None, + ptr: None, + } + } +} + +impl From for Error { + fn from(e: num::TryFromIntError) -> Self { + Self { + code: Code::TryFromInt(e), + line: None, + column: None, + ptr: None, + } + } +} diff --git a/src/edn/mod.rs b/src/edn/mod.rs index 63c4977..c524129 100644 --- a/src/edn/mod.rs +++ b/src/edn/mod.rs @@ -7,7 +7,7 @@ use alloc::vec::Vec; use alloc::{fmt, format}; #[cfg(feature = "sets")] use core::cmp::{Ord, PartialOrd}; -use core::convert::{Infallible, TryFrom}; +use core::convert::TryFrom; use core::num; use crate::deserialize::parse::{self}; @@ -16,6 +16,7 @@ use utils::index::Index; #[cfg(feature = "sets")] use ordered_float::OrderedFloat; +pub mod error; #[doc(hidden)] pub mod utils; @@ -89,7 +90,7 @@ impl From for Double { #[derive(Debug, Clone, PartialEq)] #[cfg_attr(feature = "sets", derive(Eq, PartialOrd, Ord))] -pub struct Vector(Vec); +pub struct Vector(pub(crate) Vec); impl Vector { #[must_use] pub const fn new(v: Vec) -> Self { @@ -109,7 +110,7 @@ impl Vector { #[derive(Debug, Clone, PartialEq)] #[cfg_attr(feature = "sets", derive(Eq, PartialOrd, Ord))] -pub struct List(Vec); +pub struct List(pub(crate) Vec); impl List { #[must_use] pub fn new(v: Vec) -> Self { @@ -129,7 +130,7 @@ impl List { #[cfg(feature = "sets")] #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] -pub struct Set(BTreeSet); +pub struct Set(pub(crate) BTreeSet); #[cfg(feature = "sets")] impl Set { @@ -151,7 +152,7 @@ impl Set { #[derive(Debug, Clone, PartialEq)] #[cfg_attr(feature = "sets", derive(Eq, PartialOrd, Ord))] -pub struct Map(BTreeMap); +pub struct Map(pub(crate) BTreeMap); impl Map { #[must_use] pub fn new(m: BTreeMap) -> Self { @@ -230,6 +231,16 @@ impl core::fmt::Display for Map { } } +fn char_to_edn(c: char) -> String { + match c { + '\n' => "\\newline".to_string(), + '\r' => "\\return".to_string(), + ' ' => "\\space".to_string(), + '\t' => "\\tab".to_string(), + _ => format!("\\{c}"), + } +} + impl core::fmt::Display for Edn { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let text = match self { @@ -246,7 +257,7 @@ impl core::fmt::Display for Edn { Self::Double(d) => format!("{d}"), Self::Rational((n, d)) => format!("{n}/{d}"), Self::Bool(b) => format!("{b}"), - Self::Char(c) => format!("\\{c}"), + Self::Char(c) => char_to_edn(*c), Self::Nil => String::from("nil"), Self::Empty => String::new(), Self::Tagged(tag, edn) => format!("#{tag} {edn}"), @@ -682,7 +693,7 @@ impl Edn { } impl core::str::FromStr for Edn { - type Err = Error; + type Err = error::Error; /// Parses a `&str` that contains an Edn into `Result` fn from_str(s: &str) -> Result { @@ -702,63 +713,6 @@ pub(crate) fn rational_to_double((n, d): (i64, u64)) -> f64 { (n as f64) / (d as f64) } -#[derive(Debug, PartialEq, Eq)] -#[non_exhaustive] -pub enum Error { - ParseEdn(String), - Deserialize(String), - Iter(String), - TryFromInt(num::TryFromIntError), - #[doc(hidden)] - Infallable(), // Makes the compiler happy for converting u64 to u64 and i64 to i64 -} - -impl From for Error { - fn from(s: String) -> Self { - Self::ParseEdn(s) - } -} - -impl From for Error { - fn from(s: num::ParseIntError) -> Self { - Self::ParseEdn(s.to_string()) - } -} - -impl From for Error { - fn from(s: num::ParseFloatError) -> Self { - Self::ParseEdn(s.to_string()) - } -} - -impl From for Error { - fn from(s: core::str::ParseBoolError) -> Self { - Self::ParseEdn(s.to_string()) - } -} - -impl From for Error { - fn from(e: num::TryFromIntError) -> Self { - Self::TryFromInt(e) - } -} - -impl From for Error { - fn from(_: Infallible) -> Self { - Self::Infallable() - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::ParseEdn(s) | Self::Deserialize(s) | Self::Iter(s) => write!(f, "{}", &s), - Self::TryFromInt(e) => write!(f, "{e}"), - Self::Infallable() => panic!("Infallable conversion"), - } - } -} - #[cfg(test)] mod test { use alloc::borrow::ToOwned; diff --git a/src/lib.rs b/src/lib.rs index 3c11bc4..473a115 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -100,7 +100,7 @@ pub fn json_to_edn<'a>(json: impl AsRef) -> Cow<'a, str> { } pub use deserialize::{from_edn, from_str, Deserialize}; -pub use edn::Error as EdnError; +pub use edn::error::Error as EdnError; #[cfg(feature = "sets")] pub use edn::Set; pub use edn::{Edn, List, Map, Vector}; diff --git a/tests/deserialize.rs b/tests/deserialize.rs index 6cc6ab1..524cdb5 100644 --- a/tests/deserialize.rs +++ b/tests/deserialize.rs @@ -5,7 +5,6 @@ mod test { use alloc::collections::BTreeMap; use core::str::FromStr; - use edn::Error; use edn_rs::{edn, from_edn, from_str, hmap, map, Edn, List, Map, Vector}; #[test] @@ -19,6 +18,16 @@ mod test { #[test] fn parse_empty() { assert_eq!(Edn::from_str("").unwrap(), Edn::Empty); + assert_eq!( + Edn::from_str("[]").unwrap(), + Edn::Vector(Vector::new(vec![])) + ); + assert_eq!(Edn::from_str("()").unwrap(), Edn::List(List::new(vec![]))); + } + + #[test] + fn parse_invalid_vec() { + assert!(Edn::from_str("[42").is_err()); } #[test] @@ -33,12 +42,7 @@ mod test { #[cfg(not(feature = "sets"))] // Special case of running into a set without the feature enabled fn parse_set_without_set_feature() { - assert_eq!( - Edn::from_str("#{true, \\c, 3,four, }"), - Err(Error::ParseEdn( - "Could not parse set due to feature not being enabled".to_string() - )) - ) + assert!(Edn::from_str("#{true, \\c, 3,four, }").is_err()) } #[test] @@ -108,22 +112,19 @@ mod test { #[test] fn parse_str_with_invalid_escape() { - assert_eq!( - Edn::from_str(r#""hello\n \r \t \"world\" with escaped \\ \g characters""#), - Err(Error::ParseEdn("Invalid escape sequence \\g".to_string())) + assert!( + Edn::from_str(r#""hello\n \r \t \"world\" with escaped \\ \g characters""#).is_err() ); } #[test] fn parse_unterminated_string() { - assert_eq!( - Edn::from_str(r#""hello\n \r \t \"world\" with escaped \\ characters"#), - Err(Error::ParseEdn("Unterminated string".to_string())) - ); + assert!(Edn::from_str(r#""hello\n \r \t \"world\" with escaped \\ characters"#).is_err()); } #[test] fn parse_number() { + assert_eq!(Edn::from_str("7").unwrap(), Edn::UInt(7)); assert_eq!(Edn::from_str("143").unwrap(), Edn::UInt(143)); assert_eq!(Edn::from_str("-435143").unwrap(), Edn::Int(-435_143)); assert_eq!( @@ -158,15 +159,15 @@ mod test { let edn = "[1 \"2\" 3.3 :b true \\c]"; assert_eq!( - Edn::from_str(edn), - Ok(Edn::Vector(Vector::new(vec![ + Edn::from_str(edn).unwrap(), + Edn::Vector(Vector::new(vec![ Edn::UInt(1), Edn::Str("2".to_string()), Edn::Double(3.3.into()), Edn::Key(":b".to_string()), Edn::Bool(true), Edn::Char('c') - ]))) + ])) ); } @@ -293,14 +294,14 @@ mod test { let edn = "(1 \"2\" 3.3 :b [true \\c])"; assert_eq!( - Edn::from_str(edn), - Ok(Edn::List(List::new(vec![ + Edn::from_str(edn).unwrap(), + Edn::List(List::new(vec![ Edn::UInt(1), Edn::Str("2".to_string()), Edn::Double(3.3.into()), Edn::Key(":b".to_string()), Edn::Vector(Vector::new(vec![Edn::Bool(true), Edn::Char('c')])) - ]))) + ])) ); } @@ -346,11 +347,11 @@ mod test { let edn = "{:a \"2\" :b true :c nil}"; assert_eq!( - Edn::from_str(edn), - Ok(Edn::Map(Map::new( + Edn::from_str(edn).unwrap(), + Edn::Map(Map::new( map! {":a".to_string() => Edn::Str("2".to_string()), ":b".to_string() => Edn::Bool(true), ":c".to_string() => Edn::Nil} - ))) + )) ); } @@ -424,12 +425,7 @@ mod test { #[test] fn parse_discard_invalid() { - assert_eq!( - Edn::from_str("#_{ 234"), - Err(Error::ParseEdn( - "None could not be parsed at char count 3".to_string() - )) - ); + assert!(Edn::from_str("#_{ 234").is_err()); } #[test] @@ -462,12 +458,7 @@ mod test { #[test] fn parse_discard_no_follow_element() { - assert_eq!( - Edn::from_str("#_ ,, "), - Err(Error::ParseEdn( - "Discard sequence must have a following element at char count 2".to_string() - )) - ); + assert!(Edn::from_str("#_ ,, ").is_err()); } #[test] @@ -480,12 +471,7 @@ mod test { #[test] fn parse_discard_end_of_seq_no_follow() { - assert_eq!( - Edn::from_str("[:foo #_ ]"), - Err(Error::ParseEdn( - "Discard sequence must have a following element at char count 8".to_string() - )) - ); + assert!(Edn::from_str("[:foo #_ ]").is_err()); } #[test] @@ -683,6 +669,9 @@ mod test { Edn::Symbol("your-hair!-is+_parsed?".to_string()), ])); assert_eq!(edn, expected); + + let lorem = "Lorem-ipsum-dolor-sit-amet-consectetur-adipiscing-elit-sed-do-eiusmod-tempor-incididunt-ut-labore-et-dolore-magna-aliqua.-Ut-enim-ad-minim-veniam-quis-nostrud-exercitation-ullamco-laboris-nisi-ut-aliquip-ex-ea-commodo-consequat.-Duis-aute-irure-dolor-in-reprehenderit-in-voluptate-velit-esse-cillum-dolore-eu-fugiat-nulla-pariatur.-Excepteur-sint-occaecat-cupidatat-non-proident-sunt-in-culpa-qui-officia-deserunt-mollit-anim-id-est-laborum".to_string(); + assert_eq!(Edn::from_str(lorem.as_str()).unwrap(), Edn::Symbol(lorem)); } #[test] @@ -800,10 +789,7 @@ mod test { #[test] fn parse_numberic_symbol_with_doube_e() { - assert_eq!( - Edn::from_str("5011227E71367421E12").unwrap(), - Edn::Symbol("5011227E71367421E12".to_string()) - ); + assert!(Edn::from_str("5011227E71367421E12").is_err()); } #[test] @@ -849,32 +835,10 @@ mod test { #[test] fn parse_invalid_ints() { - assert_eq!( - Edn::from_str("42invalid123"), - Err(Error::ParseEdn( - "42invalid123 could not be parsed at char count 1 with radix 10".to_string() - )) - ); - - assert_eq!( - Edn::from_str("0xxyz123"), - Err(Error::ParseEdn( - "xyz123 could not be parsed at char count 1 with radix 16".to_string() - )) - ); - - assert_eq!( - Edn::from_str("42rabcxzy"), - Err(Error::ParseEdn("Radix of 42 is out of bounds".to_string())) - ); - - assert_eq!( - Edn::from_str("42crazyrabcxzy"), - Err(Error::ParseEdn( - "invalid digit found in string while trying to parse radix from 42crazyrabcxzy" - .to_string() - )) - ); + assert!(Edn::from_str("42invalid123").is_err()); + assert!(Edn::from_str("0xxyz123").is_err()); + assert!(Edn::from_str("42rabcxzy").is_err()); + assert!(Edn::from_str("42crazyrabcxzy").is_err()); } #[test] @@ -938,11 +902,56 @@ mod test { fn weird_input() { let edn = "{:a]"; + assert!(Edn::from_str(edn).is_err()); + } + + #[test] + fn special_chars() { + let edn = "[\\space \\@ \\` \\tab \\return \\newline \\# \\% \\' \\g \\( \\* \\j \\+ \\, \\l \\- \\. \\/ \\0 \\2 \\r \\: \\; \\< \\\\ \\] \\} \\~ \\? \\_]"; + assert_eq!( - Edn::from_str(edn), - Err(Error::ParseEdn( - "Could not identify symbol index".to_string() - )) + Edn::from_str(edn).unwrap(), + Edn::Vector(Vector::new(vec![ + Edn::Char(' '), + Edn::Char('@'), + Edn::Char('`'), + Edn::Char('\t'), + Edn::Char('\r'), + Edn::Char('\n'), + Edn::Char('#'), + Edn::Char('%'), + Edn::Char('\''), + Edn::Char('g'), + Edn::Char('('), + Edn::Char('*'), + Edn::Char('j'), + Edn::Char('+'), + Edn::Char(','), + Edn::Char('l'), + Edn::Char('-'), + Edn::Char('.'), + Edn::Char('/'), + Edn::Char('0'), + Edn::Char('2'), + Edn::Char('r'), + Edn::Char(':'), + Edn::Char(';'), + Edn::Char('<'), + Edn::Char('\\'), + Edn::Char(']'), + Edn::Char('}'), + Edn::Char('~'), + Edn::Char('?'), + Edn::Char('_'), + ])) ); } + + #[test] + fn invalid_edn() { + assert!(Edn::from_str("{ :foo 42 :foo 43 }").is_err()); + assert!(Edn::from_str("{ :[0x42] 42 }").is_err()); + assert!(Edn::from_str("\\cats").is_err()); + assert!(Edn::from_str("42/").is_err()); + } } diff --git a/tests/deserialize_sets.rs b/tests/deserialize_sets.rs index 6e09b95..4af38ef 100644 --- a/tests/deserialize_sets.rs +++ b/tests/deserialize_sets.rs @@ -6,8 +6,8 @@ mod test { use alloc::collections::BTreeSet; use core::str::FromStr; - use edn::{Error, List, Vector}; - use edn_rs::{edn, from_edn, from_str, hset, map, set, Edn, Map, Set}; + use edn::{List, Vector}; + use edn_rs::{edn, from_edn, from_str, hset, map, set, Edn, EdnError, Map, Set}; #[test] fn parse_set_with_commas() { @@ -59,15 +59,15 @@ mod test { let edn = "(1 -10 \"2\" 3.3 :b #{true \\c})"; assert_eq!( - Edn::from_str(edn), - Ok(Edn::List(List::new(vec![ + Edn::from_str(edn).unwrap(), + Edn::List(List::new(vec![ Edn::UInt(1), Edn::Int(-10), Edn::Str("2".to_string()), Edn::Double(3.3.into()), Edn::Key(":b".to_string()), Edn::Set(Set::new(set![Edn::Bool(true), Edn::Char('c')])) - ]))) + ])) ); } @@ -114,15 +114,15 @@ mod test { let edn = "{:a \"2\" :b [true false] :c #{:A {:a :b} nil}}"; assert_eq!( - Edn::from_str(edn), - Ok(Edn::Map(Map::new(map! { + Edn::from_str(edn).unwrap(), + Edn::Map(Map::new(map! { ":a".to_string() =>Edn::Str("2".to_string()), ":b".to_string() => Edn::Vector(Vector::new(vec![Edn::Bool(true), Edn::Bool(false)])), ":c".to_string() => Edn::Set(Set::new( set!{ Edn::Map(Map::new(map!{":a".to_string() => Edn::Key(":b".to_string())})), Edn::Key(":A".to_string()), - Edn::Nil}))}))) + Edn::Nil}))})) ); } @@ -148,14 +148,10 @@ mod test { #[test] fn parse_discard_space_invalid() { - assert_eq!( - Edn::from_str( - "#_ ,, #{hello, this will be discarded} #_{so will this} #{this is invalid" - ), - Err(Error::ParseEdn( - "None could not be parsed at char count 58".to_string() - )) - ); + assert!(Edn::from_str( + "#_ ,, #{hello, this will be discarded} #_{so will this} #{this is invalid" + ) + .is_err()); } #[test] @@ -176,14 +172,8 @@ mod test { #[test] fn deser_btreeset_with_error() { let edn = "#{\"a\", 5, \"b\"}"; - let err: Result, Error> = from_str(edn); - assert_eq!( - err, - Err(Error::Deserialize( - "Cannot safely deserialize Set(Set({Str(\"a\"), Str(\"b\"), UInt(5)})) to BTreeSet" - .to_string() - )) - ); + let err: Result, EdnError> = from_str(edn); + assert!(err.is_err()); } #[test] diff --git a/tests/emit.rs b/tests/emit.rs index 7966528..2b09470 100644 --- a/tests/emit.rs +++ b/tests/emit.rs @@ -25,5 +25,8 @@ mod tests { fn char_formatting() { let edn = edn_to_string_unwrap("(\\b \\g \\m)"); assert_eq!(edn, "(\\b \\g \\m)"); + + assert_eq!(edn_to_string_unwrap("[\\space \\@ \\` \\tab \\return \\newline \\# \\% \\' \\g \\( \\* \\j \\+ \\, \\l \\- \\. \\/ \\0 \\2 \\r \\: \\; \\< \\\\ \\] \\} \\~ \\? \\_]"), + "[\\space \\@ \\` \\tab \\return \\newline \\# \\% \\' \\g \\( \\* \\j \\+ \\, \\l \\- \\. \\/ \\0 \\2 \\r \\: \\; \\< \\\\ \\] \\} \\~ \\? \\_]") } } diff --git a/tests/emit_json.rs b/tests/emit_json.rs index 2e91733..29836e8 100644 --- a/tests/emit_json.rs +++ b/tests/emit_json.rs @@ -87,14 +87,13 @@ mod tests { #[test] fn regression_str_to_uint_test() { use edn_derive::Deserialize; - use edn_rs::EdnError; #[derive(Deserialize, Debug, PartialEq)] struct A { amount: u64, } - let a: Result = edn_rs::from_str("{ :amount \"123\" }"); - assert_eq!(a, Ok(A { amount: 123 })); + let a: A = edn_rs::from_str("{ :amount \"123\" }").unwrap(); + assert_eq!(a, A { amount: 123 }); } #[test] diff --git a/tests/error_messages.rs b/tests/error_messages.rs new file mode 100644 index 0000000..3a2f45c --- /dev/null +++ b/tests/error_messages.rs @@ -0,0 +1,89 @@ +#[cfg(test)] +mod test { + use core::str::FromStr; + + use edn_rs::Edn; + + fn err_as_string(s: &str) -> String { + let err = Edn::from_str(s).err().unwrap(); + format!("{err:?}") + } + + #[test] + fn invalid_keyword() { + assert_eq!( + err_as_string(":"), + "EdnError { code: InvalidKeyword, line: Some(1), column: Some(1), index: Some(0) }" + ); + assert_eq!( + err_as_string(" :"), + "EdnError { code: InvalidKeyword, line: Some(1), column: Some(3), index: Some(2) }" + ); + assert_eq!( + err_as_string("\n\n :"), + "EdnError { code: InvalidKeyword, line: Some(3), column: Some(4), index: Some(5) }" + ); + } + + #[test] + fn unexpected_eof() { + assert_eq!( + err_as_string(r#""hello, world!"#), + "EdnError { code: UnexpectedEOF, line: Some(1), column: Some(15), index: Some(14) }" + ); + assert_eq!( + err_as_string( + r#""hello, +multiple +lines +world!"# + ), + "EdnError { code: UnexpectedEOF, line: Some(4), column: Some(7), index: Some(29) }" + ); + } + + #[test] + fn invalid_num() { + assert_eq!( + err_as_string(" ,,,, , , ,,,, ,\n ,,,, 0xfoobarlol"), + "EdnError { code: InvalidNumber, line: Some(2), column: Some(13), index: Some(29) }" + ); + assert_eq!( + err_as_string("[ ; comment \n-0xfoobarlol 0xsilycat]"), + "EdnError { code: InvalidNumber, line: Some(2), column: Some(1), index: Some(13) }" + ); + assert_eq!( + err_as_string("[ ;;;,,,,\n , , ,,,, ,\n ,,,, 16 -0xfoobarlol 0xsilycat]"), + "EdnError { code: InvalidNumber, line: Some(3), column: Some(13), index: Some(34) }" + ); + } + + #[test] + fn utf8() { + assert_eq!( + err_as_string("(猫 ; cat\nおやつ;treats\n "), + "EdnError { code: UnexpectedEOF, line: Some(3), column: Some(7), index: Some(34) }" + ); + } + + #[test] + #[cfg(not(feature = "sets"))] + fn disabled_features() { + // Special case of running into a set without the feature enabled + assert_eq!( + err_as_string("#{true, \\c, 3,four, }",), + "EdnError { code: NoFeatureSets, line: Some(1), column: Some(2), index: Some(1) }" + ); + + assert_eq!( + err_as_string("[1 \n2 ;3 \n4 #{true, \\c, 3,four, }]",), + "EdnError { code: NoFeatureSets, line: Some(3), column: Some(4), index: Some(13) }" + ); + } + + #[test] + fn invalid_conversions() { + let small = edn_rs::from_edn::(&Edn::UInt(9876123)); + assert_eq!(format!("{small:?}"), "Err(EdnError { code: TryFromInt(TryFromIntError(())), line: None, column: None, index: None })"); + } +} diff --git a/tests/lib.rs b/tests/lib.rs index 4380296..2e6f166 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -8,6 +8,7 @@ pub mod deserialize; pub mod deserialize_sets; pub mod emit; pub mod emit_json; +pub mod error_messages; pub mod parse; pub mod parse_sets; pub mod ser;