diff --git a/Markdown/Markdown.PerformanceTests/Markdown.PerformanceTests.csproj b/Markdown/Markdown.PerformanceTests/Markdown.PerformanceTests.csproj new file mode 100644 index 000000000..f3221b756 --- /dev/null +++ b/Markdown/Markdown.PerformanceTests/Markdown.PerformanceTests.csproj @@ -0,0 +1,21 @@ + + + + Exe + net8.0 + enable + enable + + + + + + + + + + + + + + diff --git a/Markdown/Markdown.PerformanceTests/MdPerformanceTests.cs b/Markdown/Markdown.PerformanceTests/MdPerformanceTests.cs new file mode 100644 index 000000000..afb11db86 --- /dev/null +++ b/Markdown/Markdown.PerformanceTests/MdPerformanceTests.cs @@ -0,0 +1,55 @@ +using System.Text; +using FluentAssertions; +using NUnit.Framework; + +namespace Markdown.PerformanceTests; + +[TestFixture] +public class MdPerformanceTests +{ + private IRenderer _mdRenderer; + private PerformanceMeasurer _measurer; + + [SetUp] + public void SetUp() + { + _mdRenderer = DefaultMdFactory.CreateMd(); + _measurer = new PerformanceMeasurer(Console.WriteLine); + } + + [Test] + [TestCase("Hello _world_!\n", 18000)] + [TestCase("# _Hello_\n __world__!\n", 9000)] + [TestCase("This __text _contains_ nested__ markdown\n", 10000)] + [TestCase("This is _an example __of inversed__ nested_ markdown\n", 8000)] + [TestCase("Text_12_3\n", 15000)] + [TestCase("Text __that_12_3__ is in bold\n", 10000)] + [TestCase("_begin_ning\n", 20000)] + [TestCase("end_ing_\n", 20000)] + [TestCase("mi__ddl__e\n", 15000)] + [TestCase("This sh_ould not cha_nge\n", 10000)] + [TestCase("This sh__o_uld_ wo__rk like this\n", 9000)] + [TestCase("__Unpaired_ markdown\n", 20000)] + [TestCase("Another _unpaired markdown__\n", 18000)] + [TestCase("Intersecting _markdown __should_ work__ like this\n", 10000)] + [TestCase("This should ____ remain the same\n", 15000)] + [TestCase(@"This should \_not turn\_ into tags", 20000)] + [TestCase(@"This should \remain the\ same", 20000)] + public void PerformanceTest(string testInput, int stringRepetitions) + { + var str = ArrangePerformanceTest(testInput, stringRepetitions); + Console.WriteLine($"Total length: {str.Length}"); + + _measurer.MeasureAverageTime(() => _mdRenderer.Render(str), 10) + .Should() + .BeLessOrEqualTo(1000); + } + + private string ArrangePerformanceTest(string input, int copyCount) + { + var sb = new StringBuilder(); + for (var i = 0; i < copyCount; i++) + sb.Append(input); + return sb.ToString(); + } +} \ No newline at end of file diff --git a/Markdown/Markdown.Tests/Markdown.Tests.csproj b/Markdown/Markdown.Tests/Markdown.Tests.csproj new file mode 100644 index 000000000..94db3bb71 --- /dev/null +++ b/Markdown/Markdown.Tests/Markdown.Tests.csproj @@ -0,0 +1,20 @@ + + + + net8.0 + + false + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Markdown/Markdown.Tests/MdTests.cs b/Markdown/Markdown.Tests/MdTests.cs new file mode 100644 index 000000000..9d9a4a8cd --- /dev/null +++ b/Markdown/Markdown.Tests/MdTests.cs @@ -0,0 +1,155 @@ +using FluentAssertions; +using NUnit.Framework; + +namespace Markdown.Tests; + +[TestFixture] +[TestOf(typeof(Md))] +public class MdTests +{ + private IRenderer _md; + + [SetUp] + public void SetUp() + { + _md = DefaultMdFactory.CreateMd(); + } + + [Test] + [Description("Базовые тесты")] + [TestCase("", "")] + [TestCase("Hello world", "Hello world")] + [TestCase("Hello _world_!", "Hello world!")] + [TestCase("# _Hello_ __world__!", "

Hello world!

")] + public void Render_ReturnsCorrectMarkdown_ForSimpleCases( + string input, + string expectedOutput) + { + _md.Render(input) + .Should() + .Be(expectedOutput); + } + + [Test] + [Description("Тесты на вложенность двойного и одинарного выделения")] + [TestCase("This __text _contains_ nested__ markdown", "This text contains nested markdown")] + [TestCase("This is _an example __of inversed__ nested_ markdown", "This is an example __of inversed__ nested markdown")] + public void Render_ReturnsCorrectMarkdown_ForCasesWithNesting( + string input, + string expectedOutput) + { + _md.Render(input) + .Should() + .Be(expectedOutput); + } + + [Test] + [Description("Тесты для разметки внутри текста с цифрами")] + [TestCase("Text_12_3", "Text_12_3")] + [TestCase("5__12_3__4", "5__12_3__4")] + [TestCase("Text __that_12_3__ is in bold", "Text that_12_3 is in bold")] + public void Render_ReturnsCorrectMarkdown_ForTextWithNumbers( + string input, + string expectedOutput) + { + _md.Render(input) + .Should() + .Be(expectedOutput); + } + + [Test] + [Description("Тесты для разметки внутри слов")] + [TestCase("_begin_ning", "beginning")] + [TestCase("mi_ddl_e", "middle")] + [TestCase("end_ing_", "ending")] + [TestCase("__begin__ning", "beginning")] + [TestCase("mi__ddl__e", "middle")] + [TestCase("end__ing__", "ending")] + public void Render_ReturnsCorrectMarkdown_ForPartsOfWords( + string input, + string expectedOutput) + { + _md.Render(input) + .Should() + .Be(expectedOutput); + } + + [Test] + [Description("Тесты для подчерков, находящихся внутри разных слов")] + [TestCase("This sh_ould not cha_nge", "This sh_ould not cha_nge")] + [TestCase("As w__ell a__s this", "As w__ell a__s this")] + [TestCase("This sh__o_uld_ wo__rk like this", "This sh__ould wo__rk like this")] + public void Render_ReturnsCorrectMarkdown_ForMarkdownInDifferentWords( + string input, + string expectedOutput) + { + _md.Render(input) + .Should() + .Be(expectedOutput); + } + + [Test] + [Description("Тесты для непарных символов разметки")] + [TestCase("__Unpaired_ markdown", "__Unpaired_ markdown")] + [TestCase("Another _unpaired markdown__", "Another _unpaired markdown__")] + public void Render_ReturnsCorrectMarkdown_ForUnpairedMarkdownSymbols( + string input, + string expectedOutput) + { + _md.Render(input) + .Should() + .Be(expectedOutput); + } + + [Test] + [Description("Проверяем, что подчерки должны следовать за (стоять перед) непробельным символом")] + [TestCase("This_ should not_ change", "This_ should not_ change")] + [TestCase("This _should _be in_ italics", "This should _be in italics")] + public void Render_ReturnsCorrectMarkdown_ForIncorrectlyPlacedUnderscores( + string input, + string expectedOutput) + { + _md.Render(input) + .Should() + .Be(expectedOutput); + } + + [Test] + [Description("Тесты на пересечение двойных и одинарных подчерков")] + [TestCase("Intersecting _markdown __should_ work__ like this", "Intersecting _markdown __should_ work__ like this")] + [TestCase("Another __example of _intersecting__ markdown_", "Another __example of _intersecting__ markdown_")] + public void Render_ReturnsCorrectMarkdown_ForIntersectingMarkdown( + string input, + string expectedOutput) + { + _md.Render(input) + .Should() + .Be(expectedOutput); + } + + [Test] + [Description("Тесты на пустую разметку")] + [TestCase("This should ____ remain the same", "This should ____ remain the same")] + [TestCase("This also should __ not change", "This also should __ not change")] + public void Render_ReturnsCorrectMarkdown_ForEmptyMarkdown( + string input, + string expectedOutput) + { + _md.Render(input) + .Should() + .Be(expectedOutput); + } + + [Test] + [Description("Тесты на экранирование")] + [TestCase(@"This should \_not turn\_ into tags", "This should _not turn_ into tags")] + [TestCase(@"This should \remain the\ same", @"This should \remain the\ same")] + public void Render_ReturnsCorrectMarkdown_ForEscapeCharacters( + string input, + string expectedOutput) + { + _md.Render(input) + .Should() + .Be(expectedOutput); + } +} \ No newline at end of file diff --git a/Markdown/Markdown.sln b/Markdown/Markdown.sln new file mode 100644 index 000000000..e1ba4dda3 --- /dev/null +++ b/Markdown/Markdown.sln @@ -0,0 +1,28 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown", "Markdown\Markdown.csproj", "{B8FD8A48-C2C3-434B-953F-B9AF324E3E95}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown.Tests", "Markdown.Tests\Markdown.Tests.csproj", "{0B1D2315-E457-4F38-92C9-5BC11A8752B6}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown.PerformanceTests", "Markdown.PerformanceTests\Markdown.PerformanceTests.csproj", "{C030F3F2-BED4-42E7-830A-63B3A7541B4C}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {B8FD8A48-C2C3-434B-953F-B9AF324E3E95}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {B8FD8A48-C2C3-434B-953F-B9AF324E3E95}.Debug|Any CPU.Build.0 = Debug|Any CPU + {B8FD8A48-C2C3-434B-953F-B9AF324E3E95}.Release|Any CPU.ActiveCfg = Release|Any CPU + {B8FD8A48-C2C3-434B-953F-B9AF324E3E95}.Release|Any CPU.Build.0 = Release|Any CPU + {0B1D2315-E457-4F38-92C9-5BC11A8752B6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {0B1D2315-E457-4F38-92C9-5BC11A8752B6}.Debug|Any CPU.Build.0 = Debug|Any CPU + {0B1D2315-E457-4F38-92C9-5BC11A8752B6}.Release|Any CPU.ActiveCfg = Release|Any CPU + {0B1D2315-E457-4F38-92C9-5BC11A8752B6}.Release|Any CPU.Build.0 = Release|Any CPU + {C030F3F2-BED4-42E7-830A-63B3A7541B4C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {C030F3F2-BED4-42E7-830A-63B3A7541B4C}.Debug|Any CPU.Build.0 = Debug|Any CPU + {C030F3F2-BED4-42E7-830A-63B3A7541B4C}.Release|Any CPU.ActiveCfg = Release|Any CPU + {C030F3F2-BED4-42E7-830A-63B3A7541B4C}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection +EndGlobal diff --git a/Markdown/Markdown/AbstractSyntaxTree/AbstractSyntaxTreeNodeView.cs b/Markdown/Markdown/AbstractSyntaxTree/AbstractSyntaxTreeNodeView.cs new file mode 100644 index 000000000..d39c4a2ad --- /dev/null +++ b/Markdown/Markdown/AbstractSyntaxTree/AbstractSyntaxTreeNodeView.cs @@ -0,0 +1,8 @@ +using Markdown.NodeView; + +namespace Markdown.AbstractSyntaxTree; + +public record AbstractSyntaxTreeNodeView( + ReadOnlyMemory Text, + TTokenType TokenType) + : BaseNodeView; \ No newline at end of file diff --git a/Markdown/Markdown/AbstractSyntaxTree/IAbstractSyntaxTree.cs b/Markdown/Markdown/AbstractSyntaxTree/IAbstractSyntaxTree.cs new file mode 100644 index 000000000..3482e1b4f --- /dev/null +++ b/Markdown/Markdown/AbstractSyntaxTree/IAbstractSyntaxTree.cs @@ -0,0 +1,11 @@ +using Markdown.NodeView; +using Markdown.SyntaxRules; +using Markdown.Traversable; + +namespace Markdown.AbstractSyntaxTree; + +public interface IAbstractSyntaxTree : ITraversable> +{ + public IAbstractSyntaxTree AddRule(ISyntaxRule rule); + public IAbstractSyntaxTree ApplyRules(); +} \ No newline at end of file diff --git a/Markdown/Markdown/AbstractSyntaxTree/MdAbstractSyntaxTree.cs b/Markdown/Markdown/AbstractSyntaxTree/MdAbstractSyntaxTree.cs new file mode 100644 index 000000000..74de42377 --- /dev/null +++ b/Markdown/Markdown/AbstractSyntaxTree/MdAbstractSyntaxTree.cs @@ -0,0 +1,122 @@ +using System.Collections.Immutable; +using Markdown.NodeView; +using Markdown.ParseTree; +using Markdown.SyntaxRules; +using Markdown.Token; + +namespace Markdown.AbstractSyntaxTree; + +public class MdAbstractSyntaxTree : IAbstractSyntaxTree +{ + private class Node : INodeView + { + public Node( + MdTokenType type, + ReadOnlyMemory? text = null, + Node? parent = null, + bool insideWord = false) + { + Type = type; + Text = text ?? ReadOnlyMemory.Empty; + Parent = parent; + Children = new List>(); + InsideWord = insideWord; + } + + public ReadOnlyMemory Text { get; set; } + public MdTokenType Type { get; set; } + public bool InsideWord { get; set; } + public List> Children { get; set; } + public INodeView? Parent { get; set; } + } + + private Node _root; + private Node _current; + + private ImmutableList> _rules; + + private MdAbstractSyntaxTree() + { + _root = new Node(MdTokenType.Document); + _current = _root; + _rules = ImmutableList>.Empty; + } + + public static MdAbstractSyntaxTree FromParseTree(IParseTree parseTree) + { + var syntaxTree = new MdAbstractSyntaxTree(); + foreach (var baseView in parseTree.Traverse()) + { + if (baseView is ParseTreeNodeView nodeView) + { + if (nodeView.TokenType != MdTokenType.Document) + { + if (nodeView.Complete) + { + var newNode = new Node( + nodeView.TokenType, nodeView.Text, syntaxTree._current, nodeView.insideWord); + if (nodeView.TokenType != MdTokenType.PlainText) + syntaxTree.AddNode(newNode); + else + syntaxTree._current.Children.Add(newNode); + } + else + { + var newNode = new Node(MdTokenType.PlainText, nodeView.Text, syntaxTree._current); + syntaxTree._current.Children.Add(newNode); + } + } + } + else if (baseView is ViewEnd) + syntaxTree.EndCurrentNode(); + else + throw new InvalidOperationException("Unexpected node type"); + } + return syntaxTree; + } + + private void AddNode(Node node) + { + _current.Children.Add(node); + _current = node; + } + + private void EndCurrentNode() + { + if (_current != _root) + _current = (Node) _current.Parent!; + } + + public IEnumerable> Traverse() + { + return Traverse(_root); + } + + private static IEnumerable> Traverse(INodeView node) + { + yield return new AbstractSyntaxTreeNodeView(node.Text, node.Type); + var childNodes = node.Children.SelectMany(Traverse).ToList(); + foreach (var childNode in childNodes) + yield return childNode; + if (childNodes.Count > 0) + yield return new ViewEnd(node.Type); + } + + public IAbstractSyntaxTree AddRule(ISyntaxRule rule) + { + _rules = _rules.Add(rule); + return this; + } + + public IAbstractSyntaxTree ApplyRules() + { + INodeView syntaxTree = _root; + syntaxTree = _rules + .Aggregate(syntaxTree, + (current, rule) => rule.Apply(current)); + _root = (Node) syntaxTree; + _current = _root; + _rules = ImmutableList>.Empty; + return this; + } +} \ No newline at end of file diff --git a/Markdown/Markdown/ArgumentExceptionHelpers.cs b/Markdown/Markdown/ArgumentExceptionHelpers.cs new file mode 100644 index 000000000..2fb150dc7 --- /dev/null +++ b/Markdown/Markdown/ArgumentExceptionHelpers.cs @@ -0,0 +1,16 @@ +namespace Markdown; + +public static class ArgumentExceptionHelpers +{ + public static void ThrowIfFalse(bool flag, string message) + { + if (!flag) + throw new ArgumentException(message); + } + + public static void ThrowIfNull(object? obj, string message) + { + if (obj == null) + throw new ArgumentException(message); + } +} \ No newline at end of file diff --git a/Markdown/Markdown/DefaultMdFactory.cs b/Markdown/Markdown/DefaultMdFactory.cs new file mode 100644 index 000000000..be5495e05 --- /dev/null +++ b/Markdown/Markdown/DefaultMdFactory.cs @@ -0,0 +1,44 @@ +using Markdown.Parser; +using Markdown.ParseTree; +using Markdown.SyntaxRules; +using Markdown.Token; +using Markdown.Tokenizer; + +namespace Markdown; + +public static class DefaultMdFactory +{ + private static readonly char[] _delimiters = { ' ', '\t', '\n', '\r', ',', '.', '!', '?' }; + + private static readonly Dictionary _tokenAliases = new() + { + { "_", MdTokenType.Italic }, + { "__", MdTokenType.Bold }, + { "# ", MdTokenType.Heading }, + { "\n", MdTokenType.Line } + }; + + private static readonly Dictionary _tokenTags = new() + { + { MdTokenType.Italic, "em" }, + { MdTokenType.Bold, "strong" }, + { MdTokenType.Heading, "h1" } + }; + + private static readonly List> _syntaxRules = new() + { + new NestingRule(), + new NumberRule(), + new TokensInDifferentWordsRule(_delimiters) + }; + + public static Md CreateMd() + { + return new Md( + _tokenTags, + new MdTokenizer( + _tokenAliases, '\\', _delimiters), + new MdParser(new MdParseTree()), + _syntaxRules.ToArray()); + } +} \ No newline at end of file diff --git a/Markdown/Markdown/Extensions/ReadOnlyMemoryExtensions.cs b/Markdown/Markdown/Extensions/ReadOnlyMemoryExtensions.cs new file mode 100644 index 000000000..3d71a4769 --- /dev/null +++ b/Markdown/Markdown/Extensions/ReadOnlyMemoryExtensions.cs @@ -0,0 +1,34 @@ +using System.Runtime.InteropServices; + +namespace Markdown.Extensions; + +public static class ReadOnlyMemoryExtensions +{ + public static bool Contains(this ReadOnlyMemory memory, char value) + { + ArgumentExceptionHelpers.ThrowIfFalse( + MemoryMarshal.TryGetString(memory, out var str, out var start, out var length), + "Underlying object in the input argument is not a string"); + for (var i = start; i < start + length; i++) + { + if (str![i] == value) + return true; + } + + return false; + } + + public static bool ContainsNumber(this ReadOnlyMemory memory) + { + ArgumentExceptionHelpers.ThrowIfFalse( + MemoryMarshal.TryGetString(memory, out var str, out var start, out var length), + "Underlying object in the input argument is not a string"); + for (var i = start; i < start + length; i++) + { + if (int.TryParse(str![i].ToString(), out _)) + return true; + } + + return false; + } +} \ No newline at end of file diff --git a/Markdown/Markdown/IRenderer.cs b/Markdown/Markdown/IRenderer.cs new file mode 100644 index 000000000..0314b9797 --- /dev/null +++ b/Markdown/Markdown/IRenderer.cs @@ -0,0 +1,6 @@ +namespace Markdown; + +public interface IRenderer +{ + public string Render(string input); +} \ No newline at end of file diff --git a/Markdown/Markdown/Markdown.csproj b/Markdown/Markdown/Markdown.csproj new file mode 100644 index 000000000..2f4fc7765 --- /dev/null +++ b/Markdown/Markdown/Markdown.csproj @@ -0,0 +1,10 @@ + + + + Exe + net8.0 + enable + enable + + + diff --git a/Markdown/Markdown/Md.cs b/Markdown/Markdown/Md.cs new file mode 100644 index 000000000..c4ad3f84b --- /dev/null +++ b/Markdown/Markdown/Md.cs @@ -0,0 +1,53 @@ +using System.Text; +using Markdown.AbstractSyntaxTree; +using Markdown.NodeView; +using Markdown.Parser; +using Markdown.SyntaxRules; +using Markdown.Token; +using Markdown.Tokenizer; + +namespace Markdown; + +public class Md( + Dictionary tokenTags, + ITokenizer tokenizer, + IParser parser, + ISyntaxRule[] syntaxRules) : IRenderer +{ + public string Render(string input) + { + var tokens = tokenizer.Tokenize(input.AsMemory()); + var parseTree = parser.Parse(tokens); + var syntaxTree = MdAbstractSyntaxTree.FromParseTree(parseTree); + + foreach (var syntaxRule in syntaxRules) + syntaxTree.AddRule(syntaxRule); + + return syntaxTree + .ApplyRules() + .Traverse() + .Aggregate(new StringBuilder(), + (sb, node) => ProcessNode(node, sb)) + .ToString(); + } + + private StringBuilder ProcessNode(BaseNodeView? node, StringBuilder sb) + { + if (node is AbstractSyntaxTreeNodeView nodeView) + { + if (nodeView.TokenType is MdTokenType.PlainText or MdTokenType.Document or MdTokenType.Line) + sb.Append(nodeView.Text); + else + sb.Append($"<{tokenTags[nodeView.TokenType]}>"); + } + else if (node is ViewEnd viewEnd) + { + if (viewEnd.TokenType is not (MdTokenType.PlainText or MdTokenType.Document or MdTokenType.Line)) + { + sb.Append($""); + } + } + + return sb; + } +} \ No newline at end of file diff --git a/Markdown/Markdown/NodeView/BaseNodeView.cs b/Markdown/Markdown/NodeView/BaseNodeView.cs new file mode 100644 index 000000000..b8464af85 --- /dev/null +++ b/Markdown/Markdown/NodeView/BaseNodeView.cs @@ -0,0 +1,3 @@ +namespace Markdown.NodeView; + +public record BaseNodeView(); \ No newline at end of file diff --git a/Markdown/Markdown/NodeView/INodeView.cs b/Markdown/Markdown/NodeView/INodeView.cs new file mode 100644 index 000000000..3840158ee --- /dev/null +++ b/Markdown/Markdown/NodeView/INodeView.cs @@ -0,0 +1,10 @@ +namespace Markdown.NodeView; + +public interface INodeView +{ + public ReadOnlyMemory Text { get; set; } + public TTokenType Type { get; set; } + public bool InsideWord { get; set; } + public List> Children { get; set; } + public INodeView? Parent { get; set; } +} \ No newline at end of file diff --git a/Markdown/Markdown/NodeView/ViewEnd.cs b/Markdown/Markdown/NodeView/ViewEnd.cs new file mode 100644 index 000000000..154680f22 --- /dev/null +++ b/Markdown/Markdown/NodeView/ViewEnd.cs @@ -0,0 +1,3 @@ +namespace Markdown.NodeView; + +public record ViewEnd(TTokenType TokenType) : BaseNodeView; \ No newline at end of file diff --git a/Markdown/Markdown/ParseTree/IParseTree.cs b/Markdown/Markdown/ParseTree/IParseTree.cs new file mode 100644 index 000000000..a34685935 --- /dev/null +++ b/Markdown/Markdown/ParseTree/IParseTree.cs @@ -0,0 +1,11 @@ +using Markdown.NodeView; +using Markdown.Traversable; + +namespace Markdown.ParseTree; + +public interface IParseTree : ITraversable> +{ + public ParseTreeNodeView CurrentToken { get; } + public void OpenToken(TTokenType tokenType, ReadOnlyMemory text, bool insideWord = false); + public void CloseCurrentToken(bool complete); +} \ No newline at end of file diff --git a/Markdown/Markdown/ParseTree/MdParseTree.cs b/Markdown/Markdown/ParseTree/MdParseTree.cs new file mode 100644 index 000000000..7a54402c4 --- /dev/null +++ b/Markdown/Markdown/ParseTree/MdParseTree.cs @@ -0,0 +1,75 @@ +using Markdown.NodeView; +using Markdown.Token; + +namespace Markdown.ParseTree; + +public class MdParseTree : IParseTree +{ + private class Node + { + public Node(MdTokenType type, + bool complete = false, + bool insideWord = false, + ReadOnlyMemory? text = null, + Node? parent = null + ) + { + Type = type; + Children = new List(); + Complete = complete; + InsideWord = insideWord; + Text = text ?? ReadOnlyMemory.Empty; + Parent = parent; + } + + public ReadOnlyMemory Text { get; set; } + public MdTokenType Type { get; set; } + public bool Complete { get; set; } + public bool InsideWord { get; set; } + public List Children { get; set; } + public Node? Parent { get; set; } + } + + private readonly Node _root; + private Node _current; + + public MdParseTree() + { + _root = new Node(MdTokenType.Document, true); + _current = _root; + } + + public ParseTreeNodeView CurrentToken => + new(_current.Text, _current.Type, _current.Children.Count == 0, _current.Complete, _current.InsideWord); + + public void OpenToken(MdTokenType tokenType, ReadOnlyMemory text, bool insideWord = false) + { + var newNode = new Node(tokenType, false, insideWord, text, _current); + _current.Children.Add(newNode); + _current = newNode; + } + + public void CloseCurrentToken(bool complete) + { + if (_current == _root) + throw new InvalidOperationException("Cannot call CloseCurrentToken when on root node"); + _current.Complete = complete; + _current = _current.Parent!; + } + + public IEnumerable> Traverse() + { + return Traverse(_root); + } + + private static IEnumerable> Traverse(Node node) + { + yield return new ParseTreeNodeView( + node.Text, node.Type, node.Children.Count == 0, node.Complete, node.InsideWord); + var childNodes = node.Children.SelectMany(Traverse).ToList(); + foreach (var childNode in childNodes) + yield return childNode; + if (childNodes.Count > 0) + yield return new ViewEnd(node.Type); + } +} \ No newline at end of file diff --git a/Markdown/Markdown/ParseTree/ParseTreeNodeView.cs b/Markdown/Markdown/ParseTree/ParseTreeNodeView.cs new file mode 100644 index 000000000..01fd3d6fd --- /dev/null +++ b/Markdown/Markdown/ParseTree/ParseTreeNodeView.cs @@ -0,0 +1,10 @@ +using Markdown.NodeView; + +namespace Markdown.ParseTree; + +public record ParseTreeNodeView( + ReadOnlyMemory Text, + TTokenType TokenType, + bool Empty, + bool Complete, + bool insideWord) : BaseNodeView; \ No newline at end of file diff --git a/Markdown/Markdown/Parser/IParser.cs b/Markdown/Markdown/Parser/IParser.cs new file mode 100644 index 000000000..fba975904 --- /dev/null +++ b/Markdown/Markdown/Parser/IParser.cs @@ -0,0 +1,8 @@ +using Markdown.ParseTree; + +namespace Markdown.Parser; + +public interface IParser +{ + public IParseTree Parse(IEnumerable tokens); +} \ No newline at end of file diff --git a/Markdown/Markdown/Parser/MdParser.cs b/Markdown/Markdown/Parser/MdParser.cs new file mode 100644 index 000000000..d9ee024fb --- /dev/null +++ b/Markdown/Markdown/Parser/MdParser.cs @@ -0,0 +1,78 @@ +using Markdown.ParseTree; +using Markdown.Token; + +namespace Markdown.Parser; + +public class MdParser(IParseTree parseTree) : IParser +{ + public IParseTree Parse(IEnumerable tokens) + { + parseTree.OpenToken(MdTokenType.Line, ReadOnlyMemory.Empty); + foreach (var token in tokens) + { + if (token.Type == MdTokenType.Heading) + { + if (parseTree.CurrentToken is { TokenType: MdTokenType.Line, Empty: true }) + { + parseTree.OpenToken(token.Type, token.Text); + } + else + { + parseTree.OpenToken(token.Type, token.Text); + parseTree.CloseCurrentToken(false); + } + } + else if (token.Type == MdTokenType.Line) + { + while (parseTree.CurrentToken.TokenType != MdTokenType.Document) + parseTree.CloseCurrentToken( + parseTree.CurrentToken.TokenType is MdTokenType.Heading or MdTokenType.Line); + parseTree.OpenToken(token.Type, token.Text); + } + else if (token.Behaviour == MdTokenBehaviour.Opening + && parseTree.CurrentToken.TokenType != token.Type) + { + parseTree.OpenToken(token.Type, token.Text); + } + else if (token.Behaviour == MdTokenBehaviour.Closing + && parseTree.CurrentToken.TokenType == token.Type) + { + if (parseTree.CurrentToken.Empty) + { + parseTree.CloseCurrentToken(false); + parseTree.OpenToken(token.Type, token.Text); + parseTree.CloseCurrentToken(false); + } + else + parseTree.CloseCurrentToken(true); + } + else if (token.Type == MdTokenType.PlainText) + { + parseTree.OpenToken(token.Type, token.Text); + parseTree.CloseCurrentToken(true); + } + else if (token.Behaviour == MdTokenBehaviour.InsideAWord) + { + if (parseTree.CurrentToken.TokenType == token.Type) + parseTree.CloseCurrentToken(true); + else + parseTree.OpenToken(token.Type, token.Text, true); + } + else + { + if (token.Behaviour == MdTokenBehaviour.Closing + && parseTree.CurrentToken.TokenType != MdTokenType.Document + && parseTree.CurrentToken.TokenType != MdTokenType.Line) + parseTree.CloseCurrentToken(false); + parseTree.OpenToken(token.Type, token.Text); + parseTree.CloseCurrentToken(false); + } + } + + while (parseTree.CurrentToken.TokenType != MdTokenType.Document) + parseTree.CloseCurrentToken( + parseTree.CurrentToken.TokenType is MdTokenType.Heading or MdTokenType.Line); + + return parseTree; + } +} \ No newline at end of file diff --git a/Markdown/Markdown/PerformanceMeasurer.cs b/Markdown/Markdown/PerformanceMeasurer.cs new file mode 100644 index 000000000..a609a52d4 --- /dev/null +++ b/Markdown/Markdown/PerformanceMeasurer.cs @@ -0,0 +1,23 @@ +using System.Diagnostics; + +namespace Markdown; + +public class PerformanceMeasurer(Action logAction) +{ + public long MeasureAverageTime(Action action, int times) + { + var measures = new List(); + var stopwatch = new Stopwatch(); + for (var i = 0; i < times; i++) + { + stopwatch.Start(); + action(); + stopwatch.Stop(); + measures.Add(stopwatch.ElapsedMilliseconds); + stopwatch.Reset(); + } + var time = (long)Math.Round(measures.Average()); + logAction($"Average time in ms: {time}"); + return time; + } +} \ No newline at end of file diff --git a/Markdown/Markdown/Program.cs b/Markdown/Markdown/Program.cs new file mode 100644 index 000000000..aae97738e --- /dev/null +++ b/Markdown/Markdown/Program.cs @@ -0,0 +1,4 @@ +using Markdown; + +var md = DefaultMdFactory.CreateMd(); +Console.WriteLine(md.Render("# Hello World! _some words_ in italics\n__some other text__")); diff --git a/Markdown/Markdown/SyntaxRules/ISyntaxRule.cs b/Markdown/Markdown/SyntaxRules/ISyntaxRule.cs new file mode 100644 index 000000000..7b6f7c47d --- /dev/null +++ b/Markdown/Markdown/SyntaxRules/ISyntaxRule.cs @@ -0,0 +1,9 @@ +using Markdown.AbstractSyntaxTree; +using Markdown.NodeView; + +namespace Markdown.SyntaxRules; + +public interface ISyntaxRule +{ + public INodeView Apply(INodeView nodeView); +} \ No newline at end of file diff --git a/Markdown/Markdown/SyntaxRules/MdValidationRule.cs b/Markdown/Markdown/SyntaxRules/MdValidationRule.cs new file mode 100644 index 000000000..71f34bb4a --- /dev/null +++ b/Markdown/Markdown/SyntaxRules/MdValidationRule.cs @@ -0,0 +1,33 @@ +using Markdown.NodeView; +using Markdown.Token; + +namespace Markdown.SyntaxRules; + +public abstract class MdValidationRule : ISyntaxRule +{ + protected abstract bool CheckNode(INodeView currentNode, INodeView parentNode); + + public INodeView Apply(INodeView nodeView) + { + for (var i = 0; i < nodeView.Children.Count; i++) + { + var childNode = nodeView.Children[i]; + if (CheckNode(childNode, nodeView)) + { + childNode.Type = MdTokenType.PlainText; + foreach (var toMove in childNode.Children.AsEnumerable().Reverse()) + { + nodeView.Children.Insert(i + 1, toMove); + toMove.Parent = nodeView; + } + + nodeView.Children.Insert(i + 1 + childNode.Children.Count, childNode); + childNode.Children.Clear(); + } + + Apply(childNode); + } + + return nodeView; + } +} \ No newline at end of file diff --git a/Markdown/Markdown/SyntaxRules/NestingRule.cs b/Markdown/Markdown/SyntaxRules/NestingRule.cs new file mode 100644 index 000000000..a7199d28d --- /dev/null +++ b/Markdown/Markdown/SyntaxRules/NestingRule.cs @@ -0,0 +1,12 @@ +using Markdown.NodeView; +using Markdown.Token; + +namespace Markdown.SyntaxRules; + +public class NestingRule : MdValidationRule +{ + protected override bool CheckNode(INodeView currentNode, INodeView parentNode) + { + return currentNode.Type == MdTokenType.Bold && parentNode.Type == MdTokenType.Italic; + } +} \ No newline at end of file diff --git a/Markdown/Markdown/SyntaxRules/NumberRule.cs b/Markdown/Markdown/SyntaxRules/NumberRule.cs new file mode 100644 index 000000000..bdb89e726 --- /dev/null +++ b/Markdown/Markdown/SyntaxRules/NumberRule.cs @@ -0,0 +1,15 @@ +using Markdown.Extensions; +using Markdown.NodeView; +using Markdown.Token; + +namespace Markdown.SyntaxRules; + +public class NumberRule : MdValidationRule +{ + protected override bool CheckNode(INodeView currentNode, INodeView parentNode) + { + return currentNode is { InsideWord: true, Type: MdTokenType.Bold or MdTokenType.Italic } + && currentNode.Children.Any( + n => n.Text.ContainsNumber()); + } +} \ No newline at end of file diff --git a/Markdown/Markdown/SyntaxRules/TokensInDifferentWordsRule.cs b/Markdown/Markdown/SyntaxRules/TokensInDifferentWordsRule.cs new file mode 100644 index 000000000..d6a7c94b2 --- /dev/null +++ b/Markdown/Markdown/SyntaxRules/TokensInDifferentWordsRule.cs @@ -0,0 +1,15 @@ +using Markdown.Extensions; +using Markdown.NodeView; +using Markdown.Token; + +namespace Markdown.SyntaxRules; + +public class TokensInDifferentWordsRule(char[] delimiters) : MdValidationRule +{ + protected override bool CheckNode(INodeView currentNode, INodeView parentNode) + { + return currentNode is { InsideWord: true, Type: MdTokenType.Bold or MdTokenType.Italic } + && currentNode.Children.Any( + n => delimiters.Any(x => n.Text.Contains(x))); + } +} \ No newline at end of file diff --git a/Markdown/Markdown/Token/MdToken.cs b/Markdown/Markdown/Token/MdToken.cs new file mode 100644 index 000000000..2c557cd27 --- /dev/null +++ b/Markdown/Markdown/Token/MdToken.cs @@ -0,0 +1,3 @@ +namespace Markdown.Token; + +public record MdToken(MdTokenType Type, MdTokenBehaviour Behaviour, ReadOnlyMemory Text); \ No newline at end of file diff --git a/Markdown/Markdown/Token/MdTokenBehaviour.cs b/Markdown/Markdown/Token/MdTokenBehaviour.cs new file mode 100644 index 000000000..77c85783b --- /dev/null +++ b/Markdown/Markdown/Token/MdTokenBehaviour.cs @@ -0,0 +1,9 @@ +namespace Markdown.Token; + +public enum MdTokenBehaviour +{ + Opening, + Closing, + InsideAWord, + Undefined, +} \ No newline at end of file diff --git a/Markdown/Markdown/Token/MdTokenType.cs b/Markdown/Markdown/Token/MdTokenType.cs new file mode 100644 index 000000000..2a1af971b --- /dev/null +++ b/Markdown/Markdown/Token/MdTokenType.cs @@ -0,0 +1,13 @@ +namespace Markdown.Token; + +public enum MdTokenType +{ + PlainText, + Document, + Line, + Italic, + Bold, + Heading, + UnorderedList, + UnorderedListItem, +} \ No newline at end of file diff --git a/Markdown/Markdown/TokenType.cs b/Markdown/Markdown/TokenType.cs new file mode 100644 index 000000000..5816793d9 --- /dev/null +++ b/Markdown/Markdown/TokenType.cs @@ -0,0 +1,9 @@ +namespace Markdown; + +public enum TokenType +{ + PlainText, + Italic, + Bold, + Heading +} \ No newline at end of file diff --git a/Markdown/Markdown/Tokenizer/ITokenizer.cs b/Markdown/Markdown/Tokenizer/ITokenizer.cs new file mode 100644 index 000000000..4f72a6ea0 --- /dev/null +++ b/Markdown/Markdown/Tokenizer/ITokenizer.cs @@ -0,0 +1,6 @@ +namespace Markdown.Tokenizer; + +public interface ITokenizer +{ + public IEnumerable Tokenize(ReadOnlyMemory input); +} \ No newline at end of file diff --git a/Markdown/Markdown/Tokenizer/MdTokenizer.cs b/Markdown/Markdown/Tokenizer/MdTokenizer.cs new file mode 100644 index 000000000..a6f795b8c --- /dev/null +++ b/Markdown/Markdown/Tokenizer/MdTokenizer.cs @@ -0,0 +1,181 @@ +using System.Runtime.InteropServices; +using Markdown.Token; + +namespace Markdown.Tokenizer; + +public class MdTokenizer( + Dictionary tokenAliases, + char escapeCharacter, + char[] wordDelimiters) : ITokenizer +{ + private class TokenInfo + { + public TokenInfo(MdTokenType tokenType, string tokenAlias, MdTokenBehaviour tokenBehaviour) + { + TokenType = tokenType; + TokenAlias = tokenAlias; + TokenBehaviour = tokenBehaviour; + } + + public MdTokenType TokenType { get; set; } + public string TokenAlias { get; set; } + public MdTokenBehaviour TokenBehaviour { get; set; } + } + + public IEnumerable Tokenize(ReadOnlyMemory input) + { + ArgumentExceptionHelpers.ThrowIfFalse( + MemoryMarshal.TryGetString(input, out var str, out var start, out var length), + "Underlying object in the input argument is not a string"); + + var foundPlainText = false; + var plainTextStart = 0; + var increment = 1; + for (var i = start; i < start + length; ) + { + if (escapeCharacter == str![i] && i + 1 < str.Length) + { + if (TryMatchTokenAliases(str, i + 1, out _)) + { + increment = 2; + yield return new MdToken(MdTokenType.PlainText, MdTokenBehaviour.Undefined, + input.Slice(plainTextStart, i - plainTextStart)); + yield return new MdToken(MdTokenType.PlainText, MdTokenBehaviour.Undefined, input.Slice(i + 1, 1)); + foundPlainText = false; + } + } + else if (TryMatchTokenAliases(str, i, out var tokenInfo)) + { + increment = tokenInfo.TokenAlias.Length; + + if (foundPlainText) + yield return new MdToken(MdTokenType.PlainText, MdTokenBehaviour.Undefined, + input.Slice(plainTextStart, i - plainTextStart)); + + yield return new MdToken( + tokenInfo.TokenType, tokenInfo.TokenBehaviour, input.Slice(i, tokenInfo.TokenAlias.Length)); + + foundPlainText = false; + } + else + { + if (!foundPlainText) + plainTextStart = i; + foundPlainText = true; + } + + i += increment; + if (increment > 1) + increment = 1; + } + + if (foundPlainText) + yield return new MdToken(MdTokenType.PlainText, MdTokenBehaviour.Undefined, + input.Slice(plainTextStart, str!.Length - plainTextStart)); + } + + private bool TryMatchTokenAliases( + string input, + int index, + out TokenInfo tokenInfo) + { + var matchedClosingToken = false; + var mathcedOpeningToken = false; + + var openingTokenType = default(MdTokenType); + var closingTokenType = default(MdTokenType); + + if (TryMatchTokenAliases(input, index, true, out var closingTokenAlias)) + matchedClosingToken = tokenAliases.TryGetValue(closingTokenAlias, out closingTokenType); + + if (TryMatchTokenAliases(input, index, false, out var openingTokenAlias)) + mathcedOpeningToken = tokenAliases.TryGetValue(openingTokenAlias, out openingTokenType); + + if (mathcedOpeningToken && matchedClosingToken) + { + var (alias, type, behaviour) = openingTokenAlias.Length > closingTokenAlias.Length + ? (openingTokenAlias, openingTokenType, MdTokenBehaviour.Opening) + : (closingTokenAlias, closingTokenType, MdTokenBehaviour.Closing); + + MdTokenBehaviour tokenBehaviour; + if (IsInsideAWord(input, index, alias)) + tokenBehaviour = MdTokenBehaviour.InsideAWord; + else + tokenBehaviour = behaviour; + tokenInfo = new TokenInfo(type, alias, tokenBehaviour); + return true; + } + + if (mathcedOpeningToken) + { + tokenInfo = new TokenInfo(openingTokenType, openingTokenAlias, MdTokenBehaviour.Opening); + return true; + } + + if (matchedClosingToken) + { + tokenInfo = new TokenInfo(closingTokenType, closingTokenAlias, MdTokenBehaviour.Closing); + return true; + } + + tokenInfo = null; + return false; + } + + private bool TryMatchTokenAliases( + string input, + int index, + bool endToken, + out string tokenAlias) + { + tokenAlias = String.Empty; + foreach (var alias in tokenAliases.Keys) + { + if (TryMatchAlias(input, index, alias, endToken) && tokenAlias.Length < alias.Length) + tokenAlias = alias; + } + + return !String.IsNullOrEmpty(tokenAlias); + } + + private bool TryMatchAlias(string input, int index, string alias, bool isEndToken) + { + return TryMatchPattern(input, index, alias) + && HasANonDelimiterCharacterNearIt(input, index, alias, isEndToken); + } + + private bool IsWordDelimiter(char c) + { + return wordDelimiters.Contains(c); + } + + private bool IsInsideAWord(string input, int index, string alias) + { + var trueForLeftEdge = index - 1 >= 0 && !IsWordDelimiter(input[index - 1]); + var trueForRightEdge = index + alias.Length < input.Length + && !IsWordDelimiter(input[index + alias.Length]); + return trueForLeftEdge && trueForRightEdge; + } + + private bool HasANonDelimiterCharacterNearIt(string input, int index, string alias, bool closingToken) + { + var trueForOpening = index + alias.Length < input.Length + && !IsWordDelimiter(input[index + alias.Length]); + var trueForClosing = index - 1 >= 0 + && !IsWordDelimiter(input[index - 1]); + return closingToken ? trueForClosing : trueForOpening; + } + + private bool TryMatchPattern(string input, int index, string pattern) + { + int i = 0; + foreach (var ch in pattern) + { + if (index + i >= input.Length || ch != input[index + i]) + return false; + i++; + } + + return true; + } +} \ No newline at end of file diff --git a/Markdown/Markdown/Traversable/ITraversable.cs b/Markdown/Markdown/Traversable/ITraversable.cs new file mode 100644 index 000000000..f7730f071 --- /dev/null +++ b/Markdown/Markdown/Traversable/ITraversable.cs @@ -0,0 +1,6 @@ +namespace Markdown.Traversable; + +public interface ITraversable +{ + public IEnumerable Traverse(); +} \ No newline at end of file