From 9f379d271313225fe3605729940f1172dcde0393 Mon Sep 17 00:00:00 2001 From: cdecompilador Date: Thu, 25 Apr 2024 12:18:10 +0200 Subject: [PATCH] made errors generic + file structure + johanverse tests --- src/parser.rs | 72 ++++++++++++++++ src/parser/ast.rs | 1 + src/parser/combinators.rs | 175 ++++++++++++++++++++++++++++++++++++++ src/parser/cst.rs | 1 + src/parser/error.rs | 16 ++++ src/parser/primitive.rs | 15 ++++ 6 files changed, 280 insertions(+) create mode 100644 src/parser.rs create mode 100644 src/parser/ast.rs create mode 100644 src/parser/combinators.rs create mode 100644 src/parser/cst.rs create mode 100644 src/parser/error.rs create mode 100644 src/parser/primitive.rs diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..f849ab5 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,72 @@ +mod ast; +mod combinators; +mod cst; +mod error; +mod primitive; + +use combinators::*; +use error::*; + +pub type ParseResult<'a, Out, Err = Vec> = Result<(Out, &'a str), Err>; + +pub trait Parser<'a, Out, Err = Vec> { + fn parse(&self, input: &'a str) -> ParseResult<'a, Out, Err>; + + // NOTE(cdecompilador): Impossible with static dispatch, you can't self reference yourself + // inside of a struct so you need to do dynamic dispatch, this Box works like a Rc in reality + // since the only important data it contains is the vtable to the parse method (static) + fn map(self, f: F) -> SharedParser<'a, NewOut, Err> + where + Self: Sized + 'a, + Out: 'a, + NewOut: 'a, + Err: 'a, + F: Fn(Out) -> NewOut + 'a, + { + SharedParser { + parser: Rc::new(map(self, f)), + } + } + + fn map_err(self, f: F) -> SharedParser<'a, Out, NewErr> + where + Self: Sized + 'a, + Out: 'a, + Err: 'a, + NewErr: 'a, + F: Fn(Err) -> NewErr + 'a, + { + SharedParser { + parser: Rc::new(map_err(self, f)), + } + } +} + +use std::rc::Rc; + +#[derive(Clone)] +pub struct SharedParser<'a, Out, Err> { + parser: Rc + 'a>, +} + +impl<'a, Out, Err> Parser<'a, Out, Err> for SharedParser<'a, Out, Err> { + fn parse(&self, input: &'a str) -> ParseResult<'a, Out, Err> { + self.parser.parse(input) + } +} + +impl<'a, F, Out, Err> Parser<'a, Out, Err> for F +where + F: Fn(&'a str) -> ParseResult<'a, Out, Err>, +{ + fn parse(&self, input: &'a str) -> ParseResult<'a, Out, Err> { + self(input) + } +} + +// NOTE(cdecompilador): This function can't fail since it will have to handle the +// priting of all warning or errors (with crash), the caller of this will have the responsability +// of passing the &str from a full in_memory file read or a mmap +pub fn parse(_: &str) -> ast::Ast { + todo!() +} diff --git a/src/parser/ast.rs b/src/parser/ast.rs new file mode 100644 index 0000000..beed323 --- /dev/null +++ b/src/parser/ast.rs @@ -0,0 +1 @@ +pub enum Ast {} diff --git a/src/parser/combinators.rs b/src/parser/combinators.rs new file mode 100644 index 0000000..fd94194 --- /dev/null +++ b/src/parser/combinators.rs @@ -0,0 +1,175 @@ +//! This module contains the fundamental combinators used through all the parsing pipeline + +use super::*; + +pub fn pair<'a, Out1, Out2, Err>( + first: impl Parser<'a, Out1, Err>, + second: impl Parser<'a, Out2, Err>, +) -> impl Parser<'a, (Out1, Out2), Err> { + move |input| match first.parse(input) { + Ok((first_result, rest)) => match second.parse(rest) { + Ok((second_result, rest)) => Ok(((first_result, second_result), rest)), + Err(err) => Err(err), + }, + Err(err) => Err(err), + } +} + +pub fn left<'a, Out1, Out2, Err>( + first: impl Parser<'a, Out1, Err>, + second: impl Parser<'a, Out2, Err>, +) -> impl Parser<'a, Out1, Err> { + map(pair(first, second), |(left, _)| left) +} + +pub fn right<'a, Out1, Out2, Err>( + first: impl Parser<'a, Out1, Err>, + second: impl Parser<'a, Out2, Err>, +) -> impl Parser<'a, Out2, Err> { + map(pair(first, second), |(_, right)| right) +} + +pub fn map<'a, In, Out, Err>( + parser: impl Parser<'a, In, Err>, + f: impl Fn(In) -> Out, +) -> impl Parser<'a, Out, Err> { + move |input| match parser.parse(input) { + Ok((result, rest)) => Ok((f(result), rest)), + Err(errors) => Err(errors), + } +} + +pub fn map_err<'a, Out, InErr, OutErr>( + parser: impl Parser<'a, Out, InErr>, + f: impl Fn(InErr) -> OutErr, +) -> impl Parser<'a, Out, OutErr> { + move |input| match parser.parse(input) { + Ok((result, rest)) => Ok((result, rest)), + Err(error) => Err(f(error)), + } +} + +pub fn either<'a, Out, Err1, Err2>( + first: impl Parser<'a, Out, Err1>, + second: impl Parser<'a, Out, Err2>, +) -> impl Parser<'a, Out, (Err1, Err2)> { + move |input| match first.parse(input) { + Ok(result1) => Ok(result1), + Err(fst_err) => match second.parse(input) { + Ok(result2) => Ok(result2), + Err(snd_err) => Err((fst_err, snd_err)), + }, + } +} + +pub fn zero_or_more<'a, Out, Err>( + parser: impl Parser<'a, Out, Err>, +) -> impl Parser<'a, Vec, Err> { + move |mut input| { + let mut result = Vec::new(); + + while let Ok((out, next_input)) = parser.parse(input) { + input = next_input; + result.push(out); + } + + Ok((result, input)) + } +} + +pub fn one_or_more<'a, Out, Err>( + parser: impl Parser<'a, Out, Err> + Clone, +) -> impl Parser<'a, Vec, Err> { + map( + pair(parser.clone(), zero_or_more(parser)), + |(head, mut tail)| { + tail.insert(0, head); + tail + }, + ) +} + +pub fn surrounded_by<'a, OutS, Out, ErrS, Err>( + parser: impl Parser<'a, Out, Err>, + surr: impl Parser<'a, OutS, ErrS> + Clone, +) -> impl Parser<'a, Out, Err> { + move |input| todo!() +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct LiteralError<'a> { + pub expected: &'static str, + pub found: &'a str, +} + +pub fn literal<'a>(literal: &'static str) -> impl Parser<'a, (), LiteralError<'a>> { + move |input: &'a str| { + if input.starts_with(literal) { + Ok(((), &input[literal.len()..])) + } else { + Err(LiteralError { + expected: literal, + found: input, + }) + } + } +} + +/* +fn regex<'a>(re_str: &'static str) -> impl Parser<'a, Captures<'a>> { + let re = Regex::new(re_str).unwrap(); + move |input: &'a str| match re.captures(input) { + Some(captures) => { + let match_length = captures.get(0).unwrap().end(); + Ok((captures, &input[match_length..])) + } + None => { + let next_line = input.find(&['\n', '\r']).unwrap_or(min(10, input.len())); + Err(vec![ParserError::RegexError(re_str, &input[0..next_line])]) + } + } +} +*/ + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn it_works() { + let johan = literal("johan"); + + assert_eq!(johan.parse("johan"), Ok(((), ""))); + assert_eq!(johan.parse("johan jr"), Ok(((), " jr"))); + assert_eq!( + Err(LiteralError { + expected: "johan", + found: "pepe", + }), + johan.parse("pepe"), + ); + + #[derive(Debug, Clone, PartialEq, Eq)] + struct Johan; + #[derive(Debug, Clone, PartialEq, Eq)] + struct JohanErr; + + let johan = johan.map(|_| Johan).map_err(|_| JohanErr); + + assert_eq!(johan.parse("johan"), Ok((Johan, ""))); + assert_eq!(johan.parse("pepe"), Err(JohanErr)); + + let johan_verse_dyn = left(johan.clone(), johan.clone()); + assert_eq!(johan_verse_dyn.parse("johanjohan"), Ok((Johan, ""))); + + assert_eq!( + zero_or_more(johan.clone()).parse("johanjohanjohan"), + Ok((vec![Johan, Johan, Johan], "")) + ); + assert_eq!( + zero_or_more(johan.clone()).parse("uwu"), + Ok((vec![], "uwu")) + ); + assert_eq!(one_or_more(johan.clone()).parse("uwu"), Err(JohanErr)) + } +} diff --git a/src/parser/cst.rs b/src/parser/cst.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/parser/cst.rs @@ -0,0 +1 @@ + diff --git a/src/parser/error.rs b/src/parser/error.rs new file mode 100644 index 0000000..d5c29ff --- /dev/null +++ b/src/parser/error.rs @@ -0,0 +1,16 @@ +pub enum ParseError { + LexError(LexError), + SintacticError(SintacticError), +} + +pub enum LexError { + ExpectedCharacter(char), + ExpectedPattern(String), + ExpectedOneOf(Vec), + InvalidIntLiteral, + UnclosedStringLiteral, +} + +pub enum SintacticError {} + +// TODO: Semantic error diff --git a/src/parser/primitive.rs b/src/parser/primitive.rs new file mode 100644 index 0000000..34cc1aa --- /dev/null +++ b/src/parser/primitive.rs @@ -0,0 +1,15 @@ +use super::*; + +pub fn integer_literal(input: &str) -> ParseResult<'_, i64, ParseError> { + todo!() +} + +// DESIGN(cdecompilador): Should this really a &str?? or a Span?? or an allocated String?? +pub fn identifier(input: &str) -> ParseResult<'_, String, ParseError> { + todo!() +} + +// DESIGN(cdecompilador): Should this really a &str?? or a Span?? or an allocated String?? +pub fn string_literal(input: &str) -> ParseResult<'_, String, ParseError> { + todo!() +}