diff --git a/Cargo.toml b/Cargo.toml index 1234339..4905497 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ unsafe_code = "deny" [lints.clippy] pedantic = "warn" nursery = "warn" +inline_always = "allow" [features] default = ["sets", "std"] diff --git a/src/deserialize/mod.rs b/src/deserialize/mod.rs index 533e71e..4124752 100644 --- a/src/deserialize/mod.rs +++ b/src/deserialize/mod.rs @@ -23,7 +23,7 @@ use ordered_float::OrderedFloat; /// /// # Errors /// -/// Error implements Display and Debug. See docs for more implementations. +/// Error implements Debug. See docs for more information. /// /// ``` /// use crate::edn_rs::{Edn, EdnError, Deserialize}; diff --git a/src/deserialize/parse.rs b/src/deserialize/parse.rs index 184117e..7c5459f 100644 --- a/src/deserialize/parse.rs +++ b/src/deserialize/parse.rs @@ -1,9 +1,10 @@ +#![allow(clippy::inline_always)] + use alloc::borrow::ToOwned; use alloc::boxed::Box; use alloc::collections::BTreeMap; #[cfg(feature = "sets")] use alloc::collections::BTreeSet; -use alloc::format; use alloc::string::{String, ToString}; use alloc::vec::Vec; use core::primitive::str; @@ -25,7 +26,7 @@ struct Walker<'w> { impl Walker<'_> { // Slurps until whitespace or delimiter, returning the slice. - #[inline] + #[inline(always)] fn slurp_literal(&mut self) -> &str { let token = self.slice[self.ptr..] .split(|c: char| c.is_whitespace() || DELIMITERS.contains(&c)) @@ -38,7 +39,7 @@ impl Walker<'_> { } // Slurps a char. Special handling for chars that happen to be delimiters - #[inline] + #[inline(always)] fn slurp_char(&mut self) -> &str { let starting_ptr = self.ptr; @@ -57,7 +58,7 @@ impl Walker<'_> { } // Slurps until whitespace or delimiter, returning the slice. - #[inline] + #[inline(always)] fn slurp_tag(&mut self) -> &str { let token = self.slice[self.ptr..] .split(|c: char| c.is_whitespace() && c != ',') @@ -73,7 +74,7 @@ impl Walker<'_> { token } - #[inline] + #[inline(always)] fn slurp_str(&mut self) -> Result { let _ = self.nibble_next(); // Consume the leading '"' char let mut s = String::new(); @@ -92,6 +93,7 @@ impl Walker<'_> { code: Code::InvalidEscape, column: Some(self.column), line: Some(self.line), + ptr: Some(self.ptr), }) } } @@ -109,23 +111,22 @@ impl Walker<'_> { code: Code::UnexpectedEOF, column: Some(self.column), line: Some(self.line), + ptr: Some(self.ptr), }); } } } // Nibbles away until the next new line - #[inline] + #[inline(always)] fn nibble_newline(&mut self) { let len = self.slice[self.ptr..].split('\n').next().unwrap(); // At least an empty slice will always be on the first split, even on an empty str - self.line += 1; - self.column = 1; self.ptr += len.len(); self.nibble_whitespace(); } // Nibbles away until the start of the next form - #[inline] + #[inline(always)] fn nibble_whitespace(&mut self) { while let Some(n) = self.peek_next() { if n == ',' || n.is_whitespace() { @@ -137,7 +138,7 @@ impl Walker<'_> { } // Consumes next - #[inline] + #[inline(always)] fn nibble_next(&mut self) -> Option { let char = self.slice[self.ptr..].chars().next(); if let Some(c) = char { @@ -153,7 +154,7 @@ impl Walker<'_> { } // Peek into the next char - #[inline] + #[inline(always)] fn peek_next(&mut self) -> Option { self.slice[self.ptr..].chars().next() } @@ -167,13 +168,16 @@ pub fn parse(edn: &str) -> Result { line: 1, }; - parse_foobar(&mut walker) + parse_internal(&mut walker) } -fn parse_foobar(walker: &mut Walker<'_>) -> Result { +#[inline] +fn parse_internal(walker: &mut Walker<'_>) -> Result { walker.nibble_whitespace(); while let Some(next) = walker.peek_next() { let column_start = walker.column; + let ptr_start = walker.ptr; + let line_start = walker.line; if let Some(ret) = match next { '\\' => match parse_char(walker.slurp_char()) { Ok(edn) => Some(Ok(edn)), @@ -182,6 +186,7 @@ fn parse_foobar(walker: &mut Walker<'_>) -> Result { code, line: Some(walker.line), column: Some(column_start), + ptr: Some(walker.ptr), }) } }, @@ -201,8 +206,9 @@ fn parse_foobar(walker: &mut Walker<'_>) -> Result { Err(code) => { return Err(Error { code, - line: Some(walker.line), + line: Some(line_start), column: Some(column_start), + ptr: Some(ptr_start), }) } }, @@ -213,6 +219,7 @@ fn parse_foobar(walker: &mut Walker<'_>) -> Result { Ok(Edn::Empty) } +#[inline] fn parse_tag_set_discard(walker: &mut Walker<'_>) -> Result, Error> { let _ = walker.nibble_next(); // Consume the leading '#' char @@ -223,23 +230,26 @@ fn parse_tag_set_discard(walker: &mut Walker<'_>) -> Result, Error> } } +#[inline] fn parse_discard(walker: &mut Walker<'_>) -> Result, Error> { let _ = walker.nibble_next(); // Consume the leading '_' char - Ok(match parse_foobar(walker)? { + Ok(match parse_internal(walker)? { Edn::Empty => { return Err(Error { code: Code::UnexpectedEOF, line: Some(walker.line), column: Some(walker.column), + ptr: Some(walker.ptr), }) } _ => match walker.peek_next() { - Some(_) => Some(parse_foobar(walker)?), + Some(_) => Some(parse_internal(walker)?), None => None, }, }) } +#[inline] #[cfg(feature = "sets")] fn parse_set(walker: &mut Walker<'_>) -> Result { let _ = walker.nibble_next(); // Consume the leading '{' char @@ -252,7 +262,7 @@ fn parse_set(walker: &mut Walker<'_>) -> Result { return Ok(Edn::Set(Set::new(set))); } Some(_) => { - let next = parse_foobar(walker)?; + let next = parse_internal(walker)?; if next != Edn::Empty { set.insert(next); } @@ -262,29 +272,34 @@ fn parse_set(walker: &mut Walker<'_>) -> Result { code: Code::UnexpectedEOF, line: Some(walker.line), column: Some(walker.column), + ptr: Some(walker.ptr), }) } } } } +#[inline] #[cfg(not(feature = "sets"))] const fn parse_set(walker: &Walker<'_>) -> Result { Err(Error { code: Code::NoFeatureSets, line: Some(walker.line), column: Some(walker.column), + ptr: Some(walker.ptr), }) } +#[inline] fn parse_tag(walker: &mut Walker<'_>) -> Result { let tag = walker.slurp_tag(); Ok(Edn::Tagged( tag.to_string(), - Box::new(parse_foobar(walker)?), + Box::new(parse_internal(walker)?), )) } +#[inline] fn parse_map(walker: &mut Walker<'_>) -> Result { let _ = walker.nibble_next(); // Consume the leading '{' char let mut map: BTreeMap = BTreeMap::new(); @@ -300,14 +315,23 @@ fn parse_map(walker: &mut Walker<'_>) -> Result { code: Code::UnmatchedDelimiter(n), line: Some(walker.line), column: Some(walker.column), + ptr: Some(walker.ptr), }); } - let key = parse_foobar(walker)?; - let val = parse_foobar(walker)?; + let key = parse_internal(walker)?; + let val = parse_internal(walker)?; if key != Edn::Empty && val != Edn::Empty { - map.insert(key.to_string(), val); + // Existing keys are considered an error + if map.insert(key.to_string(), val).is_some() { + return Err(Error { + code: Code::HashMapDuplicateKey, + line: Some(walker.line), + column: Some(walker.column), + ptr: Some(walker.ptr), + }); + } } } _ => { @@ -315,12 +339,14 @@ fn parse_map(walker: &mut Walker<'_>) -> Result { code: Code::UnexpectedEOF, line: Some(walker.line), column: Some(walker.column), + ptr: Some(walker.ptr), }) } } } } +#[inline] fn parse_vector(walker: &mut Walker<'_>) -> Result { let _ = walker.nibble_next(); // Consume the leading '[' char let mut vec = Vec::new(); @@ -332,7 +358,7 @@ fn parse_vector(walker: &mut Walker<'_>) -> Result { return Ok(Edn::Vector(Vector::new(vec))); } Some(_) => { - let next = parse_foobar(walker)?; + let next = parse_internal(walker)?; if next != Edn::Empty { vec.push(next); } @@ -342,12 +368,14 @@ fn parse_vector(walker: &mut Walker<'_>) -> Result { code: Code::UnexpectedEOF, line: Some(walker.line), column: Some(walker.column), + ptr: Some(walker.ptr), }) } } } } +#[inline] fn parse_list(walker: &mut Walker<'_>) -> Result { let _ = walker.nibble_next(); // Consume the leading '[' char let mut vec = Vec::new(); @@ -359,7 +387,7 @@ fn parse_list(walker: &mut Walker<'_>) -> Result { return Ok(Edn::List(List::new(vec))); } Some(_) => { - let next = parse_foobar(walker)?; + let next = parse_internal(walker)?; if next != Edn::Empty { vec.push(next); } @@ -369,12 +397,14 @@ fn parse_list(walker: &mut Walker<'_>) -> Result { code: Code::UnexpectedEOF, line: Some(walker.line), column: Some(walker.column), + ptr: Some(walker.ptr), }) } } } } +#[inline] fn edn_literal(literal: &str) -> Result { fn numeric(s: &str) -> bool { let (first, second) = { @@ -414,6 +444,7 @@ fn edn_literal(literal: &str) -> Result { }) } +#[inline] fn parse_char(lit: &str) -> Result { let lit = &lit[1..]; // ignore the leading '\\' match lit { @@ -426,6 +457,7 @@ fn parse_char(lit: &str) -> Result { } } +#[inline] fn parse_number(lit: &str) -> Result { let mut chars = lit.chars(); let (number, radix) = { @@ -497,12 +529,11 @@ fn parse_number(lit: &str) -> Result { } n if n.parse::().is_ok() => Ok(Edn::Double(n.parse::()?.into())), n if num_den_from_slice(&n).is_some() => Ok(Edn::Rational(num_den_from_slice(n).unwrap())), - _ => Err(Code::Message( - format!("{number} could not be parsed with radix {radix}").into_boxed_str(), - )), + _ => Err(Code::InvalidNumber), } } +#[inline] fn num_den_from_slice(slice: impl AsRef) -> Option<(i64, u64)> { let slice = slice.as_ref(); let index = slice.find('/'); diff --git a/src/edn/error.rs b/src/edn/error.rs index 3849690..73716e1 100644 --- a/src/edn/error.rs +++ b/src/edn/error.rs @@ -1,38 +1,40 @@ -use alloc::boxed::Box; -use core::fmt::{self, Debug, Display}; +use core::fmt::{self, Debug}; use core::{convert, num, str}; pub struct Error { - pub(crate) code: Code, - pub(crate) line: Option, - pub(crate) column: Option, + pub code: Code, + /// Counting from 1. + pub line: Option, + /// This is a utf-8 char count. Counting from 1. + pub column: Option, + /// This is a pointer into the str trying to be parsed, not a utf-8 char offset + pub ptr: Option, } #[derive(Debug, Eq, PartialEq)] #[non_exhaustive] pub enum Code { - /// Catchall/placeholder error messages - Message(Box), - /// Parse errors + HashMapDuplicateKey, InvalidChar, InvalidEscape, InvalidKeyword, + InvalidNumber, InvalidRadix(Option), ParseNumber(ParseNumber), UnexpectedEOF, UnmatchedDelimiter(char), - // Feature errors + /// Feature errors NoFeatureSets, - // Deserialize errors + /// Deserialize errors Convert(&'static str), - // Navigation errors + /// Navigation errors Iter, - /// For type conversions + /// Type conversion errors TryFromInt(num::TryFromIntError), #[doc(hidden)] Infallable(), // Makes the compiler happy for converting u64 to u64 and i64 to i64 @@ -51,6 +53,7 @@ impl Error { code: Code::Convert(conv_type), line: None, column: None, + ptr: None, } } pub(crate) const fn iter() -> Self { @@ -58,6 +61,7 @@ impl Error { code: Code::Iter, line: None, column: None, + ptr: None, } } } @@ -66,22 +70,12 @@ impl Debug for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, - "EdnError {{ code: {:?}, line: {:?}, column: {:?} }}", - self.code, self.line, self.column + "EdnError {{ code: {:?}, line: {:?}, column: {:?}, index: {:?} }}", + self.code, self.line, self.column, self.ptr ) } } -impl Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match &self.code { - Code::Message(m) => write!(f, "{}", m.as_ref()), - Code::TryFromInt(e) => write!(f, "{e}"), - _ => todo!(), - } - } -} - impl From for Code { fn from(e: num::ParseIntError) -> Self { Self::ParseNumber(ParseNumber::ParseIntError(e)) @@ -100,6 +94,7 @@ impl From for Error { code: Code::Infallable(), line: None, column: None, + ptr: None, } } } @@ -110,6 +105,7 @@ impl From for Error { code: Code::TryFromInt(e), line: None, column: None, + ptr: None, } } } diff --git a/tests/deserialize.rs b/tests/deserialize.rs index 3cf47db..2e070ae 100644 --- a/tests/deserialize.rs +++ b/tests/deserialize.rs @@ -18,6 +18,11 @@ mod test { #[test] fn parse_empty() { assert_eq!(Edn::from_str("").unwrap(), Edn::Empty); + assert_eq!( + Edn::from_str("[]").unwrap(), + Edn::Vector(Vector::new(vec![])) + ); + assert_eq!(Edn::from_str("()").unwrap(), Edn::List(List::new(vec![]))); } #[test] @@ -929,4 +934,11 @@ mod test { ])) ); } + + #[test] + fn invalid_edn() { + assert!(Edn::from_str("{ :foo 42 :foo 43 }").is_err()); + assert!(Edn::from_str("{ :[0x42] 42 }").is_err()); + assert!(Edn::from_str("\\cats").is_err()); + } } diff --git a/tests/error_messages.rs b/tests/error_messages.rs index e1738a5..5a4baac 100644 --- a/tests/error_messages.rs +++ b/tests/error_messages.rs @@ -13,15 +13,15 @@ mod test { fn invalid_keyword() { assert_eq!( debug_msg(":"), - "EdnError { code: InvalidKeyword, line: Some(1), column: Some(1) }" + "EdnError { code: InvalidKeyword, line: Some(1), column: Some(1), index: Some(0) }" ); assert_eq!( debug_msg(" :"), - "EdnError { code: InvalidKeyword, line: Some(1), column: Some(3) }" + "EdnError { code: InvalidKeyword, line: Some(1), column: Some(3), index: Some(2) }" ); assert_eq!( debug_msg("\n\n :"), - "EdnError { code: InvalidKeyword, line: Some(3), column: Some(4) }" + "EdnError { code: InvalidKeyword, line: Some(3), column: Some(4), index: Some(5) }" ); } @@ -29,7 +29,7 @@ mod test { fn unexpected_eof() { assert_eq!( debug_msg(r#""hello, world!"#), - "EdnError { code: UnexpectedEOF, line: Some(1), column: Some(15) }" + "EdnError { code: UnexpectedEOF, line: Some(1), column: Some(15), index: Some(14) }" ); assert_eq!( debug_msg( @@ -38,7 +38,31 @@ multiple lines world!"# ), - "EdnError { code: UnexpectedEOF, line: Some(4), column: Some(7) }" + "EdnError { code: UnexpectedEOF, line: Some(4), column: Some(7), index: Some(29) }" + ); + } + + #[test] + fn invalid_num() { + assert_eq!( + debug_msg(" ,,,, , , ,,,, ,\n ,,,, 0xfoobarlol"), + "EdnError { code: InvalidNumber, line: Some(2), column: Some(13), index: Some(29) }" + ); + assert_eq!( + debug_msg("[ ; comment \n-0xfoobarlol 0xsilycat]"), + "EdnError { code: InvalidNumber, line: Some(2), column: Some(1), index: Some(13) }" + ); + assert_eq!( + debug_msg("[ ;;;,,,,\n , , ,,,, ,\n ,,,, 16 -0xfoobarlol 0xsilycat]"), + "EdnError { code: InvalidNumber, line: Some(3), column: Some(13), index: Some(34) }" + ); + } + + #[test] + fn utf8() { + assert_eq!( + debug_msg("(猫 ; cat\nおやつ;treats\n "), + "EdnError { code: UnexpectedEOF, line: Some(3), column: Some(7), index: Some(34) }" ); } @@ -48,7 +72,12 @@ world!"# // Special case of running into a set without the feature enabled assert_eq!( debug_msg("#{true, \\c, 3,four, }",), - "EdnError { code: NoFeatureSets, line: Some(1), column: Some(2) }" + "EdnError { code: NoFeatureSets, line: Some(1), column: Some(2), index: Some(1) }" + ); + + assert_eq!( + debug_msg("[1 \n2 ;3 \n4 #{true, \\c, 3,four, }]",), + "EdnError { code: NoFeatureSets, line: Some(3), column: Some(4), index: Some(13) }" ); } }