Skip to content

Commit

Permalink
Parser refactoring (#56)
Browse files Browse the repository at this point in the history
  • Loading branch information
Lurk authored Apr 8, 2024
1 parent 95f3c79 commit 5c2a9fc
Show file tree
Hide file tree
Showing 29 changed files with 1,094 additions and 2,167 deletions.
8 changes: 4 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "yamd"
version = "0.12.1"
version = "0.13.1"
edition = "2021"
license = "MIT OR Apache-2.0"
description = "Yet Another Markdown Document (flavor)"
Expand All @@ -12,6 +12,6 @@ keywords = ["markdown", "parser"]
pretty_assertions = "1.4.0"

[dependencies]
serde = { version = "1.0.193", features = ["derive"] }
chrono = { version = "0.4.31", features = ["serde"] }
serde_yaml = "0.9.27"
serde = { version = "1.0.197", features = ["derive"] }
chrono = { version = "0.4.37", features = ["serde"] }
serde_yaml = "0.9.34"
4 changes: 2 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@
//!
use nodes::yamd::Yamd;
use toolkit::deserializer::Deserializer;
use toolkit::parser::Parse;

pub mod nodes;
mod toolkit;
Expand All @@ -209,7 +209,7 @@ mod toolkit;
/// let yamd = deserialize(input).unwrap();
/// ```
pub fn deserialize(input: &str) -> Option<Yamd> {
Yamd::deserialize(input)
Yamd::parse(input, 0, None).map(|(yamd, _)| yamd)
}

/// Serialize a Yamd struct into a string
Expand Down
74 changes: 42 additions & 32 deletions src/nodes/anchor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,7 @@ use std::fmt::{Display, Formatter};

use serde::Serialize;

use crate::{
toolkit::context::Context,
toolkit::deserializer::Deserializer,
toolkit::{matcher::Matcher, node::Node},
};
use crate::toolkit::{context::Context, parser::Parse};

/// Representation of an anchor
#[derive(Debug, PartialEq, Serialize, Clone)]
Expand All @@ -30,18 +26,28 @@ impl Display for Anchor {
}
}

impl Node for Anchor {
fn len(&self) -> usize {
self.text.len() + self.url.len() + 4
}
}

impl Deserializer for Anchor {
fn deserialize_with_context(input: &str, _: Option<Context>) -> Option<Self> {
let mut matcher = Matcher::new(input);
if let Some(text) = matcher.get_match("[", "]", false) {
if let Some(url) = matcher.get_match("(", ")", false) {
return Some(Anchor::new(text.body, url.body));
impl Parse for Anchor {
fn parse(input: &str, current_position: usize, _: Option<&Context>) -> Option<(Self, usize)> {
if input[current_position..].starts_with('[') {
if let Some(middle) = input[current_position + 1..].find("](") {
let mut level = 1;
for (i, c) in input[current_position + middle + 3..].char_indices() {
if c == '(' {
level += 1;
} else if c == ')' {
level -= 1;
}
if level == 0 {
return Some((
Anchor::new(
&input[current_position + 1..current_position + middle + 1],
&input[current_position + middle + 3
..current_position + middle + 3 + i],
),
middle + 3 + i + 1,
));
}
}
}
}
None
Expand All @@ -50,8 +56,9 @@ impl Deserializer for Anchor {

#[cfg(test)]
mod tests {
use crate::toolkit::parser::Parse;

use super::Anchor;
use crate::toolkit::{deserializer::Deserializer, node::Node};
use pretty_assertions::assert_eq;

#[test]
Expand All @@ -68,27 +75,30 @@ mod tests {
}

#[test]
fn deserialize() {
assert_eq!(Anchor::deserialize("[1](2)"), Some(Anchor::new("1", "2")));
assert_eq!(Anchor::deserialize("[1"), None);
assert_eq!(Anchor::deserialize("[1](2"), None);
fn parse() {
assert_eq!(
Anchor::parse("[1](2)", 0, None),
Some((Anchor::new("1", "2"), 6))
);
assert_eq!(Anchor::parse("[1", 0, None), None);
assert_eq!(Anchor::parse("[1](2", 0, None), None);
}

#[test]
fn deserilalze_with_parentesis_in_url() {
assert_eq!(
Anchor::deserialize(
"[the Rope data structure](https://en.wikipedia.org/wiki/Rope_(data_structure))"
Anchor::parse(
"[the Rope data structure](https://en.wikipedia.org/wiki/Rope_(data_structure))",
0,
None
),
Some(Anchor::new(
"the Rope data structure",
"https://en.wikipedia.org/wiki/Rope_(data_structure)"
Some((
Anchor::new(
"the Rope data structure",
"https://en.wikipedia.org/wiki/Rope_(data_structure)"
),
78
))
);
}

#[test]
fn len() {
assert_eq!(Anchor::new("a", "b").len(), 6);
}
}
173 changes: 67 additions & 106 deletions src/nodes/bold.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,93 +3,94 @@ use std::fmt::Display;
use serde::Serialize;

use crate::{
nodes::italic::Italic,
nodes::strikethrough::Strikethrough,
nodes::text::Text,
nodes::{italic::Italic, strikethrough::Strikethrough, text::Text},
toolkit::{
context::Context,
deserializer::{Branch, DefinitelyNode, Deserializer, MaybeNode},
matcher::Matcher,
node::Node,
parser::{parse_to_consumer, parse_to_parser, Branch, Consumer, Parse, Parser},
},
};

#[derive(Debug, PartialEq, Serialize, Clone)]
#[serde(tag = "type")]
pub enum BoldNodes {
Italic(Italic),
Strikethrough(Strikethrough),
Text(Text),
I(Italic),
S(Strikethrough),
}

impl From<Text> for BoldNodes {
fn from(value: Text) -> Self {
BoldNodes::Text(value)
}
}

impl From<Italic> for BoldNodes {
fn from(value: Italic) -> Self {
BoldNodes::I(value)
fn from(i: Italic) -> Self {
BoldNodes::Italic(i)
}
}

impl From<Strikethrough> for BoldNodes {
fn from(value: Strikethrough) -> Self {
BoldNodes::S(value)
}
}

impl Display for BoldNodes {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
BoldNodes::Text(node) => write!(f, "{}", node),
BoldNodes::I(node) => write!(f, "{}", node),
BoldNodes::S(node) => write!(f, "{}", node),
}
fn from(s: Strikethrough) -> Self {
BoldNodes::Strikethrough(s)
}
}

impl Node for BoldNodes {
fn len(&self) -> usize {
match self {
BoldNodes::Text(node) => node.len(),
BoldNodes::I(node) => node.len(),
BoldNodes::S(node) => node.len(),
}
impl From<Text> for BoldNodes {
fn from(t: Text) -> Self {
BoldNodes::Text(t)
}
}

#[derive(Debug, PartialEq, Serialize, Clone, Default)]
pub struct Bold {
pub nodes: Vec<BoldNodes>,
nodes: Vec<BoldNodes>,
}

impl Bold {
pub fn new(nodes: Vec<BoldNodes>) -> Self {
Self { nodes }
impl Branch<BoldNodes> for Bold {
fn get_parsers(&self) -> Vec<Parser<BoldNodes>> {
vec![
parse_to_parser::<BoldNodes, Italic>(),
parse_to_parser::<BoldNodes, Strikethrough>(),
]
}
}

impl Branch<BoldNodes> for Bold {
fn push<BC: Into<BoldNodes>>(&mut self, element: BC) {
self.nodes.push(element.into());
fn push_node(&mut self, node: BoldNodes) {
self.nodes.push(node);
}

fn get_maybe_nodes() -> Vec<MaybeNode<BoldNodes>> {
vec![Italic::maybe_node(), Strikethrough::maybe_node()]
fn get_consumer(&self) -> Option<Consumer<BoldNodes>> {
Some(parse_to_consumer::<BoldNodes, Text>())
}
}

fn get_fallback_node() -> Option<DefinitelyNode<BoldNodes>> {
Some(Box::new(|str| Text::new(str).into()))
impl Parse for Bold {
fn parse(input: &str, current_position: usize, _: Option<&Context>) -> Option<(Self, usize)> {
if input[current_position..].starts_with("**") {
if let Some(end) = input[current_position + 2..].find("**") {
let b = Bold::new(vec![]);
return Some((
b.parse_branch(
&input[current_position + 2..current_position + 2 + end],
"",
None,
)
.expect("bold should always succed"),
end + 4,
));
}
}
None
}
}

fn get_outer_token_length(&self) -> usize {
4
impl Bold {
pub fn new(nodes: Vec<BoldNodes>) -> Self {
Self { nodes }
}
}

fn is_empty(&self) -> bool {
self.nodes.is_empty()
impl Display for BoldNodes {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
BoldNodes::Text(node) => write!(f, "{}", node),
BoldNodes::Italic(node) => write!(f, "{}", node),
BoldNodes::Strikethrough(node) => write!(f, "{}", node),
}
}
}

Expand All @@ -107,40 +108,18 @@ impl Display for Bold {
}
}

impl Node for Bold {
fn len(&self) -> usize {
self.nodes.iter().map(|node| node.len()).sum::<usize>() + 4
}
}

impl Deserializer for Bold {
fn deserialize_with_context(input: &str, _: Option<Context>) -> Option<Self> {
let mut matcher = Matcher::new(input);
if let Some(bold) = matcher.get_match("**", "**", false) {
return Self::parse_branch(bold.body, "", Self::default());
}
None
}
}

#[cfg(test)]
mod tests {
use crate::{
nodes::bold::Bold,
nodes::italic::Italic,
nodes::strikethrough::Strikethrough,
nodes::text::Text,
toolkit::{
deserializer::{Branch, Deserializer},
node::Node,
},
nodes::{bold::Bold, italic::Italic, strikethrough::Strikethrough, text::Text},
toolkit::parser::{Branch, Parse},
};
use pretty_assertions::assert_eq;

#[test]
fn only_text() {
let mut b = Bold::default();
b.push(Text::new("B as bold"));
b.push_node(Text::new("B as bold").into());
let str = b.to_string();
assert_eq!(str, "**B as bold**".to_string());
}
Expand All @@ -159,39 +138,21 @@ mod tests {
#[test]
fn from_string() {
assert_eq!(
Bold::deserialize("**b**"),
Some(Bold::new(vec![Text::new("b").into()]))
Bold::parse("**b**", 0, None),
Some((Bold::new(vec![Text::new("b").into()]), 5))
);

assert_eq!(
Bold::deserialize("**b ~~st~~ _i t_**"),
Some(Bold::new(vec![
Text::new("b ").into(),
Strikethrough::new("st").into(),
Text::new(" ").into(),
Italic::new("i t").into()
]))
);
}

#[test]
fn len() {
assert_eq!(Bold::new(vec![Text::new("T").into()]).len(), 5);
assert_eq!(
Bold::new(vec![Text::new("T").into(), Strikethrough::new("S").into()]).len(),
10
Bold::parse("**b ~~st~~ _i t_**", 0, None),
Some((
Bold::new(vec![
Text::new("b ").into(),
Strikethrough::new("st").into(),
Text::new(" ").into(),
Italic::new("i t").into()
]),
18
))
);
}

#[test]
fn default() {
assert_eq!(Bold::default(), Bold::default());
}

#[test]
fn empty_bold() {
let b = Bold::new(vec![]);
assert_eq!(b.len(), 4);
assert_eq!(b.is_empty(), true);
}
}
Loading

0 comments on commit 5c2a9fc

Please sign in to comment.