From 9f1863929b368ec291fb8c0683a10dd38dbccf79 Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Wed, 11 Dec 2024 00:56:07 +0500 Subject: [PATCH 01/27] =?UTF-8?q?=D0=91=D0=B5=D0=B7=20=D0=BB=D0=B8=D1=88?= =?UTF-8?q?=D0=BD=D0=B8=D1=85=20=D1=81=D0=BB=D0=BE=D0=B2=20-=20=D0=BC?= =?UTF-8?q?=D1=83=D0=B6=D1=87=D0=B8=D0=BD=D0=B0=20=D1=83=D0=BC=D0=B5=D1=80?= =?UTF-8?q?...?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- MarkdownSpec.md | 20 ++- cs/Markdown/Generator/HtmlGenerator.cs | 43 ++++++ cs/Markdown/Generator/IGenerator.cs | 9 ++ cs/Markdown/Md.cs | 24 +++ cs/Markdown/Parser/Nodes/Node.cs | 6 + cs/Markdown/Parser/Nodes/NodeType.cs | 15 ++ cs/Markdown/Parser/Nodes/SpecNode.cs | 3 + cs/Markdown/Parser/Nodes/TagNode.cs | 8 + cs/Markdown/Parser/Nodes/TextNode.cs | 9 ++ cs/Markdown/Parser/Rules/BodyRule.cs | 24 +++ cs/Markdown/Parser/Rules/BoldRule.cs | 39 +++++ cs/Markdown/Parser/Rules/BoolRules/AndRule.cs | 27 ++++ .../Parser/Rules/BoolRules/ConditionalRule.cs | 13 ++ .../Parser/Rules/BoolRules/ContinuesRule.cs | 16 ++ .../Parser/Rules/BoolRules/KleeneStarRule.cs | 15 ++ cs/Markdown/Parser/Rules/BoolRules/OrRule.cs | 30 ++++ cs/Markdown/Parser/Rules/EscapeRule.cs | 25 +++ cs/Markdown/Parser/Rules/HeaderRule.cs | 26 ++++ cs/Markdown/Parser/Rules/IParsingRule.cs | 9 ++ cs/Markdown/Parser/Rules/InWordBoldRule.cs | 48 ++++++ cs/Markdown/Parser/Rules/InWordItalicRule.cs | 42 ++++++ cs/Markdown/Parser/Rules/ItalicRule.cs | 48 ++++++ cs/Markdown/Parser/Rules/ListItemRule.cs | 26 ++++ cs/Markdown/Parser/Rules/ParagraphRule.cs | 40 +++++ cs/Markdown/Parser/Rules/PatternRule.cs | 37 +++++ cs/Markdown/Parser/Rules/TextRule.cs | 19 +++ .../Parser/Rules/Tools/ListMatchExtensions.cs | 50 ++++++ .../Parser/Rules/Tools/ListOrderExtensions.cs | 7 + .../Rules/Tools/NodeConvertingExtensions.cs | 20 +++ cs/Markdown/Parser/Rules/UnorderedListRule.cs | 17 +++ cs/Markdown/Parser/TokenParser.cs | 13 ++ cs/Markdown/Tokenizer/MdTokenizer.cs | 30 ++++ .../Tokenizer/Scanners/ITokenScanner.cs | 8 + .../Tokenizer/Scanners/NumberScanner.cs | 17 +++ cs/Markdown/Tokenizer/Scanners/SpecScanner.cs | 32 ++++ cs/Markdown/Tokenizer/Scanners/TextScanner.cs | 19 +++ cs/Markdown/Tokens/ListExtensions.cs | 9 ++ cs/Markdown/Tokens/Token.cs | 10 ++ cs/Markdown/Tokens/TokenType.cs | 13 ++ .../Generator/HtmlGeneratorTest.cs | 67 +++++++++ cs/MarkdownTests/MdAcceptanceTest.cs | 37 +++++ cs/MarkdownTests/MdAcceptanceTest.txt | 33 ++++ cs/MarkdownTests/MdTest.cs | 49 ++++++ cs/MarkdownTests/Parser/Rules/BodyRuleTest.cs | 102 +++++++++++++ cs/MarkdownTests/Parser/Rules/BoldRuleTest.cs | 142 ++++++++++++++++++ .../Parser/Rules/BoolRules/AndRuleTest.cs | 41 +++++ .../Rules/BoolRules/ConditionalRuleTest.cs | 46 ++++++ .../Rules/BoolRules/ContinuesRuleTest.cs | 34 +++++ .../Rules/BoolRules/KleeneStarRuleTest.cs | 41 +++++ .../Parser/Rules/BoolRules/OrRuleTest.cs | 47 ++++++ .../Parser/Rules/EscapeRuleTest.cs | 31 ++++ .../Parser/Rules/HeaderRuleTest.cs | 66 ++++++++ .../Parser/Rules/ItalicRuleTest.cs | 114 ++++++++++++++ .../Parser/Rules/ListItemRuleTest.cs | 83 ++++++++++ .../Parser/Rules/ParagraphRuleTest.cs | 75 +++++++++ .../Parser/Rules/PatternRuleTest.cs | 39 +++++ cs/MarkdownTests/Parser/Rules/TextRuleTest.cs | 61 ++++++++ .../Parser/Rules/UnorderedListRuleTest.cs | 65 ++++++++ cs/MarkdownTests/Tokenizer/MdTokenizerTest.cs | 35 +++++ .../Tokenizer/Scanners/NumberScannerTest.cs | 52 +++++++ .../Tokenizer/Scanners/SpecScannerTest.cs | 43 ++++++ .../Tokenizer/Scanners/TextScannerTest.cs | 53 +++++++ cs/clean-code.sln | 12 ++ cs/clean-code.sln.DotSettings | 3 + 64 files changed, 2236 insertions(+), 1 deletion(-) create mode 100644 cs/Markdown/Generator/HtmlGenerator.cs create mode 100644 cs/Markdown/Generator/IGenerator.cs create mode 100644 cs/Markdown/Md.cs create mode 100644 cs/Markdown/Parser/Nodes/Node.cs create mode 100644 cs/Markdown/Parser/Nodes/NodeType.cs create mode 100644 cs/Markdown/Parser/Nodes/SpecNode.cs create mode 100644 cs/Markdown/Parser/Nodes/TagNode.cs create mode 100644 cs/Markdown/Parser/Nodes/TextNode.cs create mode 100644 cs/Markdown/Parser/Rules/BodyRule.cs create mode 100644 cs/Markdown/Parser/Rules/BoldRule.cs create mode 100644 cs/Markdown/Parser/Rules/BoolRules/AndRule.cs create mode 100644 cs/Markdown/Parser/Rules/BoolRules/ConditionalRule.cs create mode 100644 cs/Markdown/Parser/Rules/BoolRules/ContinuesRule.cs create mode 100644 cs/Markdown/Parser/Rules/BoolRules/KleeneStarRule.cs create mode 100644 cs/Markdown/Parser/Rules/BoolRules/OrRule.cs create mode 100644 cs/Markdown/Parser/Rules/EscapeRule.cs create mode 100644 cs/Markdown/Parser/Rules/HeaderRule.cs create mode 100644 cs/Markdown/Parser/Rules/IParsingRule.cs create mode 100644 cs/Markdown/Parser/Rules/InWordBoldRule.cs create mode 100644 cs/Markdown/Parser/Rules/InWordItalicRule.cs create mode 100644 cs/Markdown/Parser/Rules/ItalicRule.cs create mode 100644 cs/Markdown/Parser/Rules/ListItemRule.cs create mode 100644 cs/Markdown/Parser/Rules/ParagraphRule.cs create mode 100644 cs/Markdown/Parser/Rules/PatternRule.cs create mode 100644 cs/Markdown/Parser/Rules/TextRule.cs create mode 100644 cs/Markdown/Parser/Rules/Tools/ListMatchExtensions.cs create mode 100644 cs/Markdown/Parser/Rules/Tools/ListOrderExtensions.cs create mode 100644 cs/Markdown/Parser/Rules/Tools/NodeConvertingExtensions.cs create mode 100644 cs/Markdown/Parser/Rules/UnorderedListRule.cs create mode 100644 cs/Markdown/Parser/TokenParser.cs create mode 100644 cs/Markdown/Tokenizer/MdTokenizer.cs create mode 100644 cs/Markdown/Tokenizer/Scanners/ITokenScanner.cs create mode 100644 cs/Markdown/Tokenizer/Scanners/NumberScanner.cs create mode 100644 cs/Markdown/Tokenizer/Scanners/SpecScanner.cs create mode 100644 cs/Markdown/Tokenizer/Scanners/TextScanner.cs create mode 100644 cs/Markdown/Tokens/ListExtensions.cs create mode 100644 cs/Markdown/Tokens/Token.cs create mode 100644 cs/Markdown/Tokens/TokenType.cs create mode 100644 cs/MarkdownTests/Generator/HtmlGeneratorTest.cs create mode 100644 cs/MarkdownTests/MdAcceptanceTest.cs create mode 100644 cs/MarkdownTests/MdAcceptanceTest.txt create mode 100644 cs/MarkdownTests/MdTest.cs create mode 100644 cs/MarkdownTests/Parser/Rules/BodyRuleTest.cs create mode 100644 cs/MarkdownTests/Parser/Rules/BoldRuleTest.cs create mode 100644 cs/MarkdownTests/Parser/Rules/BoolRules/AndRuleTest.cs create mode 100644 cs/MarkdownTests/Parser/Rules/BoolRules/ConditionalRuleTest.cs create mode 100644 cs/MarkdownTests/Parser/Rules/BoolRules/ContinuesRuleTest.cs create mode 100644 cs/MarkdownTests/Parser/Rules/BoolRules/KleeneStarRuleTest.cs create mode 100644 cs/MarkdownTests/Parser/Rules/BoolRules/OrRuleTest.cs create mode 100644 cs/MarkdownTests/Parser/Rules/EscapeRuleTest.cs create mode 100644 cs/MarkdownTests/Parser/Rules/HeaderRuleTest.cs create mode 100644 cs/MarkdownTests/Parser/Rules/ItalicRuleTest.cs create mode 100644 cs/MarkdownTests/Parser/Rules/ListItemRuleTest.cs create mode 100644 cs/MarkdownTests/Parser/Rules/ParagraphRuleTest.cs create mode 100644 cs/MarkdownTests/Parser/Rules/PatternRuleTest.cs create mode 100644 cs/MarkdownTests/Parser/Rules/TextRuleTest.cs create mode 100644 cs/MarkdownTests/Parser/Rules/UnorderedListRuleTest.cs create mode 100644 cs/MarkdownTests/Tokenizer/MdTokenizerTest.cs create mode 100644 cs/MarkdownTests/Tokenizer/Scanners/NumberScannerTest.cs create mode 100644 cs/MarkdownTests/Tokenizer/Scanners/SpecScannerTest.cs create mode 100644 cs/MarkdownTests/Tokenizer/Scanners/TextScannerTest.cs diff --git a/MarkdownSpec.md b/MarkdownSpec.md index 886e99c95..9f8821b93 100644 --- a/MarkdownSpec.md +++ b/MarkdownSpec.md @@ -70,4 +70,22 @@ __Непарные_ символы в рамках одного абзаца н превратится в: -\

Заголовок \с \разными\ символами\\

\ No newline at end of file +\

Заголовок \с \разными\ символами\\

+ +# Маркированный список +Списком считается непрерывная последовательность ListItem, где ListItem = "* текст\n". +Если после * нет пробела, то это не ListItem => список не начинается, либо прерывается +Если строка состоит только из * , то это не ListItem => список не начинается, либо прерывается +Список прерывается абзацами, то есть: +* Первый +* Список + + +* Второй +* Список + +И +* Третий +* Список + +В элементах списка поддерживаются _текст_ и __текст__. \ No newline at end of file diff --git a/cs/Markdown/Generator/HtmlGenerator.cs b/cs/Markdown/Generator/HtmlGenerator.cs new file mode 100644 index 000000000..9d66c7615 --- /dev/null +++ b/cs/Markdown/Generator/HtmlGenerator.cs @@ -0,0 +1,43 @@ +using System.Text; +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules.Tools; +using Markdown.Tokens; + +namespace Markdown.Generator; + +public class HtmlGenerator : IGenerator +{ + public string Render(Node? root, List tokens) + => RenderSpecificNode(root, tokens); + private string RenderSpecificNode(Node? node, List tokens) => node switch + { + { NodeType: NodeType.Text or NodeType.Escape } => node.ToText(tokens), + + SpecNode { Nodes: var nodes } => RenderChildren(nodes, tokens), + + TagNode { NodeType: NodeType.Body, Children: var children } + => $"
{RenderChildren(children, tokens)}
", + + TagNode { NodeType: NodeType.Italic, Children: var children } + => $"{RenderChildren(children, tokens)}", + + TagNode { NodeType: NodeType.Paragraph, Children: var children } + => $"

{RenderChildren(children, tokens)}

", + + TagNode { NodeType: NodeType.UnorderedList, Children: var children } + => $"
    {RenderChildren(children, tokens)}
", + + TagNode { NodeType: NodeType.ListItem, Children: var children } + => $"
  • {RenderChildren(children, tokens)}
  • ", + + TagNode { NodeType: NodeType.Header, Children: var children } + => $"

    {RenderChildren(children, tokens)}

    ", + + TagNode { NodeType: NodeType.Bold, Children: var children } + => $"{RenderChildren(children, tokens)}", + + _ => throw new ArgumentOutOfRangeException(nameof(node)) + }; + private string RenderChildren(List children, List tokens) + => children.Aggregate(new StringBuilder(), (sb, n) => sb.Append(RenderSpecificNode(n, tokens))).ToString(); +} \ No newline at end of file diff --git a/cs/Markdown/Generator/IGenerator.cs b/cs/Markdown/Generator/IGenerator.cs new file mode 100644 index 000000000..799ec7952 --- /dev/null +++ b/cs/Markdown/Generator/IGenerator.cs @@ -0,0 +1,9 @@ +using Markdown.Parser.Nodes; +using Markdown.Tokens; + +namespace Markdown.Generator; + +public interface IGenerator +{ + public string Render(Node root, List tokens); +} \ No newline at end of file diff --git a/cs/Markdown/Md.cs b/cs/Markdown/Md.cs new file mode 100644 index 000000000..258868bf6 --- /dev/null +++ b/cs/Markdown/Md.cs @@ -0,0 +1,24 @@ +using Markdown.Generator; +using Markdown.Parser; +using Markdown.Tokenizer; + +namespace Markdown; + +public static class Md +{ + public static string Render(string text) + { + var tokenizer = new MdTokenizer(); + var htmlGenerator = new HtmlGenerator(); + + var tokens = tokenizer.Tokenize($"{RemoveCarriageTransfer(text)}\n"); + var root = TokenParser.Parse(tokens); + + return htmlGenerator.Render(root, tokens); + } + + private static string RemoveCarriageTransfer(string text) + { + return text.Replace("\r\n", "\n").Replace("\n\r", "\n"); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Nodes/Node.cs b/cs/Markdown/Parser/Nodes/Node.cs new file mode 100644 index 000000000..6b224ad0b --- /dev/null +++ b/cs/Markdown/Parser/Nodes/Node.cs @@ -0,0 +1,6 @@ +namespace Markdown.Parser.Nodes; + +public record Node(NodeType NodeType, int Start, int Consumed) +{ + public int End { get; } = Start + Consumed - 1; +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Nodes/NodeType.cs b/cs/Markdown/Parser/Nodes/NodeType.cs new file mode 100644 index 000000000..891f34d65 --- /dev/null +++ b/cs/Markdown/Parser/Nodes/NodeType.cs @@ -0,0 +1,15 @@ +namespace Markdown.Parser.Nodes; + +public enum NodeType +{ + Text, + Bold, + Body, + Italic, + Escape, + Header, + Special, + ListItem, + Paragraph, + UnorderedList +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Nodes/SpecNode.cs b/cs/Markdown/Parser/Nodes/SpecNode.cs new file mode 100644 index 000000000..c29270555 --- /dev/null +++ b/cs/Markdown/Parser/Nodes/SpecNode.cs @@ -0,0 +1,3 @@ +namespace Markdown.Parser.Nodes; + +public record SpecNode(List Nodes, int Start, int Consumed) : Node(NodeType.Special, Start, Consumed); \ No newline at end of file diff --git a/cs/Markdown/Parser/Nodes/TagNode.cs b/cs/Markdown/Parser/Nodes/TagNode.cs new file mode 100644 index 000000000..7c1175946 --- /dev/null +++ b/cs/Markdown/Parser/Nodes/TagNode.cs @@ -0,0 +1,8 @@ +namespace Markdown.Parser.Nodes; + +public record TagNode(NodeType NodeType, List Children, int Start, int Consumed) : Node(NodeType, Start, Consumed) +{ + public TagNode(NodeType nodeType, Node child, int start, int consumed) + : this(nodeType, [child], start, consumed) + { } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Nodes/TextNode.cs b/cs/Markdown/Parser/Nodes/TextNode.cs new file mode 100644 index 000000000..abbc50210 --- /dev/null +++ b/cs/Markdown/Parser/Nodes/TextNode.cs @@ -0,0 +1,9 @@ +using Markdown.Tokens; + +namespace Markdown.Parser.Nodes; + +public record TextNode(int Start, int Consumed) : Node(NodeType.Text, Start, Consumed) +{ + public string ToText(List tokens) + => tokens.Skip(Start).Take(Consumed).ToList().ToText(); +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/BodyRule.cs b/cs/Markdown/Parser/Rules/BodyRule.cs new file mode 100644 index 000000000..be03e6068 --- /dev/null +++ b/cs/Markdown/Parser/Rules/BodyRule.cs @@ -0,0 +1,24 @@ +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules.BoolRules; +using Markdown.Tokens; + +namespace Markdown.Parser.Rules; + +public class BodyRule : IParsingRule +{ + public Node? Match(List tokens, int begin = 0) + { + var tagRules = new OrRule([ + new EscapeRule([TokenType.Octothorpe, TokenType.Asterisk]), + new HeaderRule(), + new UnorderedListRule(), + new ParagraphRule() + ]); + var tokenRules = new PatternRule(TokenType.Newline); + + var resultRule = new KleeneStarRule(new OrRule(tagRules, tokenRules)); + return resultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; + } + private static TagNode BuildNode(SpecNode node) + => new(NodeType.Body, node.Nodes, node.Start, node.Consumed); +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/BoldRule.cs b/cs/Markdown/Parser/Rules/BoldRule.cs new file mode 100644 index 000000000..14b80f419 --- /dev/null +++ b/cs/Markdown/Parser/Rules/BoldRule.cs @@ -0,0 +1,39 @@ +using System.Diagnostics; +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules.BoolRules; +using Markdown.Parser.Rules.Tools; +using Markdown.Tokens; + +namespace Markdown.Parser.Rules; + +public class BoldRule : IParsingRule +{ + public Node? Match(List tokens, int begin = 0) + { + return !InWordBoldRule.IsTagInWord(tokens, begin) + ? MatchBold(tokens, begin) + : new InWordBoldRule().Match(tokens, begin); + } + + private static TagNode? MatchBold(List tokens, int begin = 0) + { + var valueRule = new OrRule(new ItalicRule(), new TextRule()); + var pattern = new AndRule([ + PatternRuleFactory.DoubleUnderscore(), + new ConditionalRule(new KleeneStarRule(valueRule), HasRightBorders), + PatternRuleFactory.DoubleUnderscore() + ]); + var continuesRule = new OrRule(TokenType.Newline, TokenType.Space); + + var resultRule = new ContinuesRule(pattern, continuesRule); + return resultRule.Match(tokens, begin) is SpecNode specNode ? BuildNode(specNode) : null; + } + private static TagNode BuildNode(SpecNode node) + { + var valueNode = (node.Nodes.Second() as SpecNode); + Debug.Assert(valueNode != null, nameof(valueNode) + " != null"); + return new TagNode(NodeType.Bold, valueNode.Nodes, node.Start, node.Consumed); + } + private static bool HasRightBorders(Node node, List tokens) + => tokens[node.End].TokenType != TokenType.Space && tokens[node.Start].TokenType != TokenType.Space; +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/BoolRules/AndRule.cs b/cs/Markdown/Parser/Rules/BoolRules/AndRule.cs new file mode 100644 index 000000000..fabc1edc0 --- /dev/null +++ b/cs/Markdown/Parser/Rules/BoolRules/AndRule.cs @@ -0,0 +1,27 @@ +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules.Tools; +using Markdown.Tokens; + +namespace Markdown.Parser.Rules.BoolRules; + +public class AndRule(List pattern) : IParsingRule +{ + public AndRule(IParsingRule firstRule, IParsingRule secondRule) : + this([firstRule, secondRule]) + { + + } + + public AndRule(TokenType firstType, TokenType secondType) : + this([new PatternRule(firstType), new PatternRule(secondType)]) + { + + } + + public Node? Match(List tokens, int begin = 0) + { + var nodes = tokens.MatchPattern(pattern, begin); + var consumed = nodes.Aggregate(0, (acc, node) => acc + node.Consumed); + return consumed == 0 ? null : new SpecNode(nodes, begin, consumed); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/BoolRules/ConditionalRule.cs b/cs/Markdown/Parser/Rules/BoolRules/ConditionalRule.cs new file mode 100644 index 000000000..6f5d09a71 --- /dev/null +++ b/cs/Markdown/Parser/Rules/BoolRules/ConditionalRule.cs @@ -0,0 +1,13 @@ +using Markdown.Parser.Nodes; +using Markdown.Tokens; + +namespace Markdown.Parser.Rules.BoolRules; + +public class ConditionalRule(IParsingRule rule, Func, bool> condition) : IParsingRule +{ + public Node? Match(List tokens, int begin = 0) + { + var node = rule.Match(tokens, begin); + return node is not null && condition(node, tokens) ? node : null; + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/BoolRules/ContinuesRule.cs b/cs/Markdown/Parser/Rules/BoolRules/ContinuesRule.cs new file mode 100644 index 000000000..6a624c0ff --- /dev/null +++ b/cs/Markdown/Parser/Rules/BoolRules/ContinuesRule.cs @@ -0,0 +1,16 @@ +using Markdown.Parser.Nodes; +using Markdown.Tokens; + +namespace Markdown.Parser.Rules.BoolRules; + +public class ContinuesRule(IParsingRule rule, IParsingRule continuesRule) : IParsingRule +{ + public Node? Match(List tokens, int begin = 0) + => new ConditionalRule(rule, HasRightContinues).Match(tokens, begin); + + private bool HasRightContinues(Node node, List tokens) + { + if (tokens.Count == node.End + 1) return true; + return continuesRule.Match(tokens, node.End + 1) is not null; + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/BoolRules/KleeneStarRule.cs b/cs/Markdown/Parser/Rules/BoolRules/KleeneStarRule.cs new file mode 100644 index 000000000..af8bc1689 --- /dev/null +++ b/cs/Markdown/Parser/Rules/BoolRules/KleeneStarRule.cs @@ -0,0 +1,15 @@ +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules.Tools; +using Markdown.Tokens; + +namespace Markdown.Parser.Rules.BoolRules; + +public class KleeneStarRule(IParsingRule pattern) : IParsingRule +{ + public Node? Match(List tokens, int begin = 0) + { + var nodes = tokens.KleeneStarMatch(pattern, begin); + var consumed = nodes.Aggregate(0, (acc, node) => acc + node.Consumed); + return consumed == 0 ? null : new SpecNode(nodes, begin, consumed); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/BoolRules/OrRule.cs b/cs/Markdown/Parser/Rules/BoolRules/OrRule.cs new file mode 100644 index 000000000..c4e1e73ac --- /dev/null +++ b/cs/Markdown/Parser/Rules/BoolRules/OrRule.cs @@ -0,0 +1,30 @@ +using Markdown.Parser.Nodes; +using Markdown.Tokens; + +namespace Markdown.Parser.Rules.BoolRules; + +public class OrRule(List rules) : IParsingRule +{ + public OrRule(IParsingRule firstRule, IParsingRule secondRule) + : this([firstRule, secondRule]) + { } + + public OrRule(TokenType firstToken, TokenType secondToken) + : this(new PatternRule(firstToken), new PatternRule(secondToken)) + { } + + public OrRule(List tokenTypes) + : this(tokenTypes + .Select(tt => new PatternRule(tt)) + .ToList() + ) + { } + + public Node? Match(List tokens, int begin = 0) + { + var match = rules + .Select(rule => rule.Match(tokens, begin)) + .FirstOrDefault(node => node is not null, null); + return match; + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/EscapeRule.cs b/cs/Markdown/Parser/Rules/EscapeRule.cs new file mode 100644 index 000000000..05522f2fa --- /dev/null +++ b/cs/Markdown/Parser/Rules/EscapeRule.cs @@ -0,0 +1,25 @@ +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules.BoolRules; +using Markdown.Parser.Rules.Tools; +using Markdown.Tokens; + +namespace Markdown.Parser.Rules; + +public class EscapeRule(List escapedTokens) : IParsingRule +{ + public EscapeRule(TokenType escapedTokenType) + : this([escapedTokenType]) + { } + + + public Node? Match(List tokens, int begin = 0) + { + var resultRule = new AndRule([ + new PatternRule(TokenType.Backslash), + new OrRule(escapedTokens) + ]); + return resultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; + } + private static TagNode BuildNode(SpecNode node) + => new(NodeType.Escape, node.Nodes.Second() ?? throw new InvalidOperationException(), node.Start, node.Consumed); +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/HeaderRule.cs b/cs/Markdown/Parser/Rules/HeaderRule.cs new file mode 100644 index 000000000..ded758780 --- /dev/null +++ b/cs/Markdown/Parser/Rules/HeaderRule.cs @@ -0,0 +1,26 @@ +using System.Diagnostics; +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules.BoolRules; +using Markdown.Parser.Rules.Tools; +using Markdown.Tokens; + +namespace Markdown.Parser.Rules; + +public class HeaderRule : IParsingRule +{ + public Node? Match(List tokens, int begin = 0) + { + var resultRule = new AndRule([ + new PatternRule([TokenType.Octothorpe, TokenType.Space]), + new ParagraphRule(), + ]); + return resultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; + } + + private static TagNode BuildNode(SpecNode specNode) + { + var valueNode = (specNode.Nodes.Second() as TagNode); + Debug.Assert(valueNode != null, nameof(valueNode) + " != null"); + return new TagNode(NodeType.Header, valueNode.Children, specNode.Start,specNode.Consumed); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/IParsingRule.cs b/cs/Markdown/Parser/Rules/IParsingRule.cs new file mode 100644 index 000000000..22a2d0cd1 --- /dev/null +++ b/cs/Markdown/Parser/Rules/IParsingRule.cs @@ -0,0 +1,9 @@ +using Markdown.Parser.Nodes; +using Markdown.Tokens; + +namespace Markdown.Parser.Rules; + +public interface IParsingRule +{ + public Node? Match(List tokens, int begin); +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/InWordBoldRule.cs b/cs/Markdown/Parser/Rules/InWordBoldRule.cs new file mode 100644 index 000000000..a158d4f3c --- /dev/null +++ b/cs/Markdown/Parser/Rules/InWordBoldRule.cs @@ -0,0 +1,48 @@ +using System.Diagnostics; +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules.BoolRules; +using Markdown.Parser.Rules.Tools; +using Markdown.Tokens; + +namespace Markdown.Parser.Rules; + +public class InWordBoldRule : IParsingRule +{ + private readonly List possibleContinues = + [ + TokenType.Newline, TokenType.Space, TokenType.Word + ]; + + public Node? Match(List tokens, int begin = 0) + { + var valueRule = new OrRule(new InWordItalicRule(), new PatternRule(TokenType.Word)); + var pattern = new AndRule([ + PatternRuleFactory.DoubleUnderscore(), + new KleeneStarRule(valueRule), + PatternRuleFactory.DoubleUnderscore(), + ]); + var continuesRule = new OrRule(possibleContinues); + + var resultRule = new ContinuesRule(pattern, continuesRule); + return resultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; + } + + private static TagNode BuildNode(SpecNode node) + { + var valueNode = (node.Nodes.Second() as SpecNode); + Debug.Assert(valueNode != null, nameof(valueNode) + " != null"); + return new TagNode(NodeType.Bold, valueNode.Nodes, node.Start, node.Consumed); + } + + public static bool IsTagInWord(List tokens, int begin = 0) + { + if (begin != 0 && tokens[begin - 1].TokenType == TokenType.Word) + return true; + + var inStartRule = new PatternRule([ + TokenType.Underscore, TokenType.Underscore, TokenType.Word, + TokenType.Underscore, TokenType.Underscore, TokenType.Word, + ]); + return inStartRule.Match(tokens, begin) is not null; + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/InWordItalicRule.cs b/cs/Markdown/Parser/Rules/InWordItalicRule.cs new file mode 100644 index 000000000..aa74e98b2 --- /dev/null +++ b/cs/Markdown/Parser/Rules/InWordItalicRule.cs @@ -0,0 +1,42 @@ +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules.BoolRules; +using Markdown.Parser.Rules.Tools; +using Markdown.Tokens; + +namespace Markdown.Parser.Rules; + +public class InWordItalicRule : IParsingRule +{ + private readonly List possibleContinues = + [ + TokenType.Newline, TokenType.Space, TokenType.Word + ]; + + public Node? Match(List tokens, int begin = 0) + { + var pattern = new AndRule([ + new PatternRule(TokenType.Underscore), + new PatternRule(TokenType.Word), + new PatternRule(TokenType.Underscore), + ]); + var continuesRule = new OrRule(possibleContinues); + + var resultRule = new ContinuesRule(pattern, continuesRule); + return resultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; + } + + private static TagNode BuildNode(SpecNode node) + => new(NodeType.Italic, node.Nodes.Second() ?? throw new InvalidOperationException(), node.Start, node.Consumed); + + public static bool IsTagInWord(List tokens, int begin = 0) + { + if (begin != 0 && tokens[begin - 1].TokenType == TokenType.Word) + return true; + + var inStartRule = new PatternRule([ + TokenType.Underscore, TokenType.Word, + TokenType.Underscore, TokenType.Word, + ]); + return inStartRule.Match(tokens, begin) is not null; + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/ItalicRule.cs b/cs/Markdown/Parser/Rules/ItalicRule.cs new file mode 100644 index 000000000..2558b08fb --- /dev/null +++ b/cs/Markdown/Parser/Rules/ItalicRule.cs @@ -0,0 +1,48 @@ +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules.BoolRules; +using Markdown.Parser.Rules.Tools; +using Markdown.Tokens; + +namespace Markdown.Parser.Rules; + +public class ItalicRule : IParsingRule +{ + private readonly AndRule innerBoldRule = new([ + PatternRuleFactory.DoubleUnderscore(), + new TextRule(), + PatternRuleFactory.DoubleUnderscore() + ]); + private readonly List possibleContinues = + [ + PatternRuleFactory.DoubleUnderscore(), + new PatternRule(TokenType.Newline), + new PatternRule(TokenType.Space), + ]; + + public Node? Match(List tokens, int begin = 0) + { + return !InWordItalicRule.IsTagInWord(tokens, begin) + ? MatchItalic(tokens, begin) + : new InWordItalicRule().Match(tokens, begin); + } + private TagNode? MatchItalic(List tokens, int begin) + { + var valueRule = new OrRule(new TextRule(), innerBoldRule); + var pattern = new AndRule([ + new PatternRule(TokenType.Underscore), + new ConditionalRule(new KleeneStarRule(valueRule), HasRightBorders), + new PatternRule(TokenType.Underscore), + ]); + var continuesRule = new OrRule(possibleContinues); + + var resultRule = new ContinuesRule(pattern, continuesRule); + return resultRule.Match(tokens, begin) is SpecNode specNode ? BuildNode(specNode) : null; + } + private static TagNode BuildNode(SpecNode node) + { + var valueNode = (node.Nodes.Second() as SpecNode)!; + return new TagNode(NodeType.Italic, valueNode.Nodes, node.Start, node.Consumed); + } + private static bool HasRightBorders(Node node, List tokens) + => tokens[node.End].TokenType != TokenType.Space && tokens[node.Start].TokenType != TokenType.Space; +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/ListItemRule.cs b/cs/Markdown/Parser/Rules/ListItemRule.cs new file mode 100644 index 000000000..d27dea842 --- /dev/null +++ b/cs/Markdown/Parser/Rules/ListItemRule.cs @@ -0,0 +1,26 @@ +using System.Diagnostics; +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules.BoolRules; +using Markdown.Parser.Rules.Tools; +using Markdown.Tokens; + +namespace Markdown.Parser.Rules; + +public class ListItemRule : IParsingRule +{ + public Node? Match(List tokens, int begin = 0) + { + var resultRule = new AndRule([ + new PatternRule([TokenType.Asterisk, TokenType.Space]), + new ParagraphRule(), + ]); + return resultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; + } + + private static TagNode BuildNode(SpecNode specNode) + { + var valueNode = (specNode.Nodes.Second() as TagNode); + Debug.Assert(valueNode != null, nameof(valueNode) + " != null"); + return new TagNode(NodeType.ListItem, valueNode.Children, specNode.Start, specNode.Consumed); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/ParagraphRule.cs b/cs/Markdown/Parser/Rules/ParagraphRule.cs new file mode 100644 index 000000000..add21a5f7 --- /dev/null +++ b/cs/Markdown/Parser/Rules/ParagraphRule.cs @@ -0,0 +1,40 @@ +using System.Diagnostics; +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules.BoolRules; +using Markdown.Parser.Rules.Tools; +using Markdown.Tokens; + +namespace Markdown.Parser.Rules; + +public class ParagraphRule: IParsingRule +{ + public Node? Match(List tokens, int begin = 0) + { + var tagRules = new OrRule([ + new EscapeRule(TokenType.Underscore), + new ItalicRule(), new BoldRule(), new TextRule(), + ]); + var tokenRules = new OrRule([ + PatternRuleFactory.DoubleUnderscore(), + new PatternRule(TokenType.Number), + new PatternRule(TokenType.Octothorpe), + new PatternRule(TokenType.Underscore), + new PatternRule(TokenType.Asterisk), + new PatternRule(TokenType.Backslash), + ]); + + var resultRule = new AndRule([ + new KleeneStarRule(new OrRule(tagRules, tokenRules)), + new PatternRule(TokenType.Newline) + ]); + + return resultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; + } + + private static TagNode BuildNode(SpecNode node) + { + var valueNode = (node.Nodes.First() as SpecNode); + Debug.Assert(valueNode != null, nameof(valueNode) + " != null"); + return new TagNode(NodeType.Paragraph, valueNode.Nodes, node.Start, node.Consumed); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/PatternRule.cs b/cs/Markdown/Parser/Rules/PatternRule.cs new file mode 100644 index 000000000..7649050b4 --- /dev/null +++ b/cs/Markdown/Parser/Rules/PatternRule.cs @@ -0,0 +1,37 @@ +using Markdown.Parser.Nodes; +using Markdown.Tokens; + +namespace Markdown.Parser.Rules; + +public class PatternRule(List pattern) : IParsingRule +{ + public PatternRule(TokenType tokenType) + : this([tokenType]) + { + } + + public Node? Match(List tokens, int begin = 0) + { + if (pattern.Count == 0) + { + return null; + } + + if (tokens.Count - begin < pattern.Count) + { + return null; + } + + var isMatched = tokens + .Skip(begin) + .Take(pattern.Count) + .Zip(pattern) + .All(pair => pair.First.TokenType == pair.Second); + return !isMatched ? null : new TextNode(begin, pattern.Count); + } +} +public static class PatternRuleFactory +{ + public static PatternRule DoubleUnderscore() + => new([TokenType.Underscore, TokenType.Underscore]); +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/TextRule.cs b/cs/Markdown/Parser/Rules/TextRule.cs new file mode 100644 index 000000000..9a5da369f --- /dev/null +++ b/cs/Markdown/Parser/Rules/TextRule.cs @@ -0,0 +1,19 @@ +using Markdown.Parser.Nodes; +using Markdown.Tokens; + +namespace Markdown.Parser.Rules; + +public class TextRule : IParsingRule +{ + public Node? Match(List tokens, int begin = 0) + { + var textLength = tokens + .Skip(begin) + .TakeWhile(IsText) + .Count(); + return textLength == 0 ? null : new TextNode(begin, textLength); + } + + private static bool IsText(Token token) + => token.TokenType is TokenType.Word or TokenType.Space; +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/Tools/ListMatchExtensions.cs b/cs/Markdown/Parser/Rules/Tools/ListMatchExtensions.cs new file mode 100644 index 000000000..c91cecf59 --- /dev/null +++ b/cs/Markdown/Parser/Rules/Tools/ListMatchExtensions.cs @@ -0,0 +1,50 @@ +using Markdown.Parser.Nodes; +using Markdown.Tokens; + +namespace Markdown.Parser.Rules.Tools; + +public static class ListMatchExtensions +{ + public static List MatchPattern(this List tokens, + List pattern, int begin = 0) + { + List nodes = []; + + foreach (var node in pattern.Select(patternRule => patternRule.Match(tokens, begin))) + { + if (node is null) + { + return []; + } + nodes.Add(node); + begin += node.Consumed; + } + + return nodes; + } + + public static List KleeneStarMatch(this List tokens, + IParsingRule pattern, int begin = 0) + { + List nodes = []; + while (true) + { + var node = pattern.Match(tokens, begin); + if (node is null) + { + return nodes; + } + begin += node.Consumed; + nodes.Add(node); + } + } + + public static Node? FirstMatch(this List tokens, + List patterns, int begin = 0) + { + var match = patterns + .Select(patternRule => patternRule.Match(tokens, begin)) + .FirstOrDefault(match => match is not null, null); + return match; + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/Tools/ListOrderExtensions.cs b/cs/Markdown/Parser/Rules/Tools/ListOrderExtensions.cs new file mode 100644 index 000000000..08c7985bc --- /dev/null +++ b/cs/Markdown/Parser/Rules/Tools/ListOrderExtensions.cs @@ -0,0 +1,7 @@ +namespace Markdown.Parser.Rules.Tools; + +public static class ListOrderExtensions +{ + public static T? Second(this List list) => list.Count < 2 ? default : list[1]; + public static T? Third(this List list) => list.Count < 3 ? default : list[2]; +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/Tools/NodeConvertingExtensions.cs b/cs/Markdown/Parser/Rules/Tools/NodeConvertingExtensions.cs new file mode 100644 index 000000000..bae7a3325 --- /dev/null +++ b/cs/Markdown/Parser/Rules/Tools/NodeConvertingExtensions.cs @@ -0,0 +1,20 @@ +using System.Text; +using Markdown.Parser.Nodes; +using Markdown.Tokens; + +namespace Markdown.Parser.Rules.Tools; + +public static class NodeConvertingExtensions +{ + public static string ToText(this List nodes, List tokens) + => nodes.Aggregate(new StringBuilder(), (sb, n) => sb.Append(n.ToText(tokens))).ToString(); + + public static string ToText(this Node node, List tokens) => node switch + { + TextNode textNode => textNode.ToText(tokens), + TagNode tagNode => tagNode.Children.ToText(tokens), + SpecNode specNode => specNode.Nodes.ToText(tokens), + + _ => throw new ArgumentException("Unknown node type") + }; +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/UnorderedListRule.cs b/cs/Markdown/Parser/Rules/UnorderedListRule.cs new file mode 100644 index 000000000..956ac681f --- /dev/null +++ b/cs/Markdown/Parser/Rules/UnorderedListRule.cs @@ -0,0 +1,17 @@ +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules.BoolRules; +using Markdown.Tokens; + +namespace Markdown.Parser.Rules; + +public class UnorderedListRule : IParsingRule +{ + public Node? Match(List tokens, int begin = 0) + { + var resultRule = new KleeneStarRule(new ListItemRule()); + + return resultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; + } + private static TagNode BuildNode(SpecNode node) + => new(NodeType.UnorderedList, node.Nodes, node.Start, node.Consumed); +} \ No newline at end of file diff --git a/cs/Markdown/Parser/TokenParser.cs b/cs/Markdown/Parser/TokenParser.cs new file mode 100644 index 000000000..0abb545ed --- /dev/null +++ b/cs/Markdown/Parser/TokenParser.cs @@ -0,0 +1,13 @@ +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules; +using Markdown.Tokens; + +namespace Markdown.Parser; + +public static class TokenParser +{ + public static Node? Parse(List tokens) + { + return new BodyRule().Match(tokens); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/MdTokenizer.cs b/cs/Markdown/Tokenizer/MdTokenizer.cs new file mode 100644 index 000000000..3bed65a98 --- /dev/null +++ b/cs/Markdown/Tokenizer/MdTokenizer.cs @@ -0,0 +1,30 @@ +using System.Diagnostics; +using Markdown.Tokenizer.Scanners; +using Markdown.Tokens; + +namespace Markdown.Tokenizer; + +public class MdTokenizer +{ + private readonly ITokenScanner[] scanners = [ + new SpecScanner(), new TextScanner(), new NumberScanner() + ]; + + public List Tokenize(string text) + { + var begin = 0; + var tokenList = new List(); + + while (begin < text.Length) + { + var token = scanners + .Select(scanner => scanner.Scan(text, begin)) + .First(token => token != null); + + Debug.Assert(token != null, nameof(token) + " != null"); + begin += token.Length; + tokenList.Add(token); + } + return tokenList; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Scanners/ITokenScanner.cs b/cs/Markdown/Tokenizer/Scanners/ITokenScanner.cs new file mode 100644 index 000000000..9986613f5 --- /dev/null +++ b/cs/Markdown/Tokenizer/Scanners/ITokenScanner.cs @@ -0,0 +1,8 @@ +using Markdown.Tokens; + +namespace Markdown.Tokenizer.Scanners; + +public interface ITokenScanner +{ + public Token? Scan(string text, int begin); +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Scanners/NumberScanner.cs b/cs/Markdown/Tokenizer/Scanners/NumberScanner.cs new file mode 100644 index 000000000..4ae84a9b2 --- /dev/null +++ b/cs/Markdown/Tokenizer/Scanners/NumberScanner.cs @@ -0,0 +1,17 @@ +using Markdown.Tokens; + +namespace Markdown.Tokenizer.Scanners; + +public class NumberScanner : ITokenScanner +{ + public Token? Scan(string text, int begin = 0) + { + var numberEnumerable = text + .Skip(begin) + .TakeWhile(CanScan); + var numberLength = numberEnumerable.Count(); + return numberLength == 0 ? null : new Token(TokenType.Number, begin, numberLength, text); + } + + public static bool CanScan(char symbol) => char.IsDigit(symbol); +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Scanners/SpecScanner.cs b/cs/Markdown/Tokenizer/Scanners/SpecScanner.cs new file mode 100644 index 000000000..9b2d4445d --- /dev/null +++ b/cs/Markdown/Tokenizer/Scanners/SpecScanner.cs @@ -0,0 +1,32 @@ +using Markdown.Tokens; + +namespace Markdown.Tokenizer.Scanners; + +public class SpecScanner : ITokenScanner +{ + public Token? Scan(string text, int begin = 0) + { + var tokenType = GetTokenType(text[begin]); + + if (tokenType is null) + { + return null; + } + var notNullType = (TokenType)tokenType; + + return new Token(notNullType, begin, 1, text); + } + + public static bool CanScan(char symbol) => GetTokenType(symbol) != null; + + private static TokenType? GetTokenType(char symbol) => symbol switch + { + ' ' => TokenType.Space, + '*' => TokenType.Asterisk, + '\n' or '\r' => TokenType.Newline, + '\\' => TokenType.Backslash, + '_' => TokenType.Underscore, + '#' => TokenType.Octothorpe, + _ => null + }; +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Scanners/TextScanner.cs b/cs/Markdown/Tokenizer/Scanners/TextScanner.cs new file mode 100644 index 000000000..6293c9689 --- /dev/null +++ b/cs/Markdown/Tokenizer/Scanners/TextScanner.cs @@ -0,0 +1,19 @@ +using Markdown.Tokens; + +namespace Markdown.Tokenizer.Scanners; + +public class TextScanner : ITokenScanner +{ + public Token? Scan(string text, int begin = 0) + { + var valueEnumerable = text + .Skip(begin) + .TakeWhile(CanScan); + var valueLen = valueEnumerable.Count(); + return valueLen == 0 ? null : new Token(TokenType.Word, begin, valueLen, text); + } + + private static bool CanScan(char symbol) + => !SpecScanner.CanScan(symbol) && !NumberScanner.CanScan(symbol); + +} \ No newline at end of file diff --git a/cs/Markdown/Tokens/ListExtensions.cs b/cs/Markdown/Tokens/ListExtensions.cs new file mode 100644 index 000000000..4775210f6 --- /dev/null +++ b/cs/Markdown/Tokens/ListExtensions.cs @@ -0,0 +1,9 @@ +using System.Text; + +namespace Markdown.Tokens; + +public static class ListExtensions +{ + public static string ToText(this List tokens) => tokens + .Aggregate(new StringBuilder(), (sb, t) => sb.Append(t.Value)).ToString(); +} \ No newline at end of file diff --git a/cs/Markdown/Tokens/Token.cs b/cs/Markdown/Tokens/Token.cs new file mode 100644 index 000000000..90eb57e96 --- /dev/null +++ b/cs/Markdown/Tokens/Token.cs @@ -0,0 +1,10 @@ +namespace Markdown.Tokens; + +public record Token(TokenType TokenType, string Value) +{ + public Token(TokenType tokenType, int start, int length, string sourceText) + : this(tokenType, sourceText.Substring(start, length)) + { } + + public int Length { get; } = Value.Length; +} \ No newline at end of file diff --git a/cs/Markdown/Tokens/TokenType.cs b/cs/Markdown/Tokens/TokenType.cs new file mode 100644 index 000000000..ba0d0b637 --- /dev/null +++ b/cs/Markdown/Tokens/TokenType.cs @@ -0,0 +1,13 @@ +namespace Markdown.Tokens; + +public enum TokenType +{ + Word, + Space, + Number, + Newline, + Asterisk, + Backslash, + Underscore, + Octothorpe, +} \ No newline at end of file diff --git a/cs/MarkdownTests/Generator/HtmlGeneratorTest.cs b/cs/MarkdownTests/Generator/HtmlGeneratorTest.cs new file mode 100644 index 000000000..a1c621a01 --- /dev/null +++ b/cs/MarkdownTests/Generator/HtmlGeneratorTest.cs @@ -0,0 +1,67 @@ +using FluentAssertions; +using Markdown.Generator; +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules; +using Markdown.Tokenizer; +using Markdown.Tokens; + +namespace MarkdownTests.Generator; + +[TestFixture] +[TestOf(typeof(HtmlGenerator))] +public class HtmlGeneratorTest +{ + private readonly BodyRule rule = new(); + private readonly HtmlGenerator generator = new(); + private readonly MdTokenizer tokenizer = new(); + + [Test] + public void Render_ShouldCorrectlyRenderHeader() + { + var root = GenerateNode("# abc def", out var tokens); + var html = generator.Render(root, tokens); + html.Should().Be("

    abc def

    "); + } + [Test] + public void Render_ShouldCorrectlyRenderBold() + { + var root = GenerateNode("abc __def__ ghi", out var tokens); + var html = generator.Render(root, tokens); + html.Should().Be("

    abc def ghi

    "); + } + [Test] + public void Render_ShouldCorrectlyRenderItalic() + { + var root = GenerateNode("abc _def_ ghi", out var tokens); + var html = generator.Render(root, tokens); + html.Should().Be("

    abc def ghi

    "); + } + [Test] + public void Render_ShouldCorrectlyRenderPlainText() + { + var root = GenerateNode("abc def ghi jkl", out var tokens); + var html = generator.Render(root, tokens); + html.Should().Be("

    abc def ghi jkl

    "); + } + [Test] + public void Render_ShouldCorrectlyRenderNestedTags() + { + var root = GenerateNode("# abc _def_ __ghi__", out var tokens); + var html = generator.Render(root, tokens); + html.Should().Be("

    abc def ghi

    "); + } + + [Test] + public void Render_ShouldCorrectlyRenderUnorderedList() + { + var root = GenerateNode("* abc def\n* ghi jkl", out var tokens); + var html = generator.Render(root, tokens); + html.Should().Be("
    • abc def
    • ghi jkl
    "); + } + + private Node? GenerateNode(string text, out List tokens) + { + tokens = tokenizer.Tokenize($"{text}\n"); + return rule.Match(tokens); + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/MdAcceptanceTest.cs b/cs/MarkdownTests/MdAcceptanceTest.cs new file mode 100644 index 000000000..acd521869 --- /dev/null +++ b/cs/MarkdownTests/MdAcceptanceTest.cs @@ -0,0 +1,37 @@ +using System.Text; +using Markdown; + +namespace MarkdownTests; + +public class MdAcceptanceTest +{ + private const string TestInputFilename = "MdAcceptanceTest.txt"; + private const string TestOutputFilename = "MdAcceptanceResult.html"; + private static readonly string? TestDirectory = Directory.GetParent(".")?.Parent?.Parent?.FullName; + private readonly string testInputPath = Path.Combine( + TestDirectory ?? throw new InvalidOperationException(), TestInputFilename); + private readonly string testOutputPath = Path.Combine( + TestDirectory ?? throw new InvalidOperationException(), TestOutputFilename); + + [Test] + public async Task Render_ShouldReturnCorrectHtml() + { + string html; + await using (var fstream = new FileStream(testInputPath, FileMode.Open)) + { + var buffer = new byte[fstream.Length]; + _ = await fstream.ReadAsync(buffer); + var markdown = Encoding.UTF8.GetString(buffer); + Console.WriteLine(markdown); + + html = Md.Render(markdown); + } + Console.WriteLine(html); + await using (var fstream = new FileStream(testOutputPath, FileMode.Create)) + { + Console.WriteLine(fstream.Name); + var buffer = Encoding.UTF8.GetBytes(html); + await fstream.WriteAsync(buffer); + } + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/MdAcceptanceTest.txt b/cs/MarkdownTests/MdAcceptanceTest.txt new file mode 100644 index 000000000..efe9fdb4d --- /dev/null +++ b/cs/MarkdownTests/MdAcceptanceTest.txt @@ -0,0 +1,33 @@ +# Это заголовок +#Это не заголовок +И даже # это не заголовок +* Это список +*Это не список +И это тоже * не список +_Это выделится_ +__Это выделится__ +_Это не выделится__ +__И это не выделится_ +Внутри сл_оооооо_ва +Тоже раб__оооооо__тает +Но если неправильн_оооооо__ +То не раб__оооооо_тает +В _нача_ле и в кооо_оонце_ __тож__е рабо__тает__ +Так \_не получится_ +И _так\_ +Вот _ так_ и _так _не получится +И _если __пересечь_, то не получится__ +__Вот так все _получится___ +_А вот так уже __не все___ +# Все _здесь_ __тоже__ будет _раб_оооо__тать__ +* И даже _здесь_ +* Будет работать __обалдеть__ + +* А это уже другой список +Здесь не список +* Здесь опять другой список +* И вот * такая штуко*вина это всего одна запись в списке +* # И заголовка здесь нет +* В\роде все +Следующая строка не список +* \ No newline at end of file diff --git a/cs/MarkdownTests/MdTest.cs b/cs/MarkdownTests/MdTest.cs new file mode 100644 index 000000000..8a2235470 --- /dev/null +++ b/cs/MarkdownTests/MdTest.cs @@ -0,0 +1,49 @@ +using System.Diagnostics; +using System.Text; +using FluentAssertions; +using Markdown.Generator; +using Markdown.Parser.Rules; +using Markdown.Tokenizer; + +namespace MarkdownTests; + +public class Tests +{ + private readonly BodyRule rule = new(); + private readonly HtmlGenerator generator = new(); + private readonly MdTokenizer tokenizer = new(); + + [Test] + public void Markdown_Render_ShouldWorkFast() + { + const int scale = 2; + var sw = new Stopwatch(); + var results = new List(); + for (var len = 640; len <= 5120; len *= scale) + { + var markdown = GenerateMarkdown(len); + sw.Start(); RenderMarkdown(markdown); sw.Stop(); + results.Add(sw.Elapsed); sw.Reset(); + } + + Enumerable.Range(1, results.Count - 1) + .Select(i => results[i].Ticks / results[i - 1].Ticks) + .Should().OnlyContain(timeRatio => timeRatio < scale * scale); + } + private static string GenerateMarkdown(int len) + { + var rand = new Random(); + List specElements = [" ", "_", "__", "#", "\\", Environment.NewLine]; + var alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXY".Select(char.ToString).ToList(); + + var allElements = specElements.Concat(alphabet).ToList(); + return Enumerable.Range(0, len).Aggregate(new StringBuilder(), + (sb, _) => sb.Append(allElements[rand.Next(allElements.Count)])).ToString(); + } + private void RenderMarkdown(string markdown) + { + var tokens = tokenizer.Tokenize($"{markdown}\n"); + var root = rule.Match(tokens); + generator.Render(root, tokens); + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/Parser/Rules/BodyRuleTest.cs b/cs/MarkdownTests/Parser/Rules/BodyRuleTest.cs new file mode 100644 index 000000000..de48705a7 --- /dev/null +++ b/cs/MarkdownTests/Parser/Rules/BodyRuleTest.cs @@ -0,0 +1,102 @@ +using FluentAssertions; +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules; +using Markdown.Parser.Rules.Tools; +using Markdown.Tokenizer; + +namespace MarkdownTests.Parser.Rules; + +[TestFixture] +[TestOf(typeof(BodyRule))] +public class BodyRuleTest +{ + private readonly BodyRule rule = new(); + private readonly MdTokenizer tokenizer = new(); + + [Test] + public void Match_ShouldMatchCorrectly_WhenSimpleText() + { + const string text = + """ + abc def ghi + jkl mno pqrs + ter uvw xyz + """; + var tokens = tokenizer.Tokenize($"{text.Replace("\r", "")}\n"); + + var node = rule.Match(tokens) as TagNode; + node.Should().NotBeNull(); + node.Children.Should().OnlyContain(n => n.NodeType == NodeType.Paragraph); + } + + [Test] + public void Match_ShouldMatchCorrectly_WhenTextWithHeader() + { + const string text = + """ + # abc def ghi + jkl mno pqr + """; + var tokens = tokenizer.Tokenize($"{text.Replace("\r", "")}\n"); + + var node = rule.Match(tokens) as TagNode; + + node.Should().NotBeNull(); + node.Children.Select(n => n.NodeType).Should().BeEquivalentTo( + [NodeType.Header, NodeType.Paragraph], options => options.WithStrictOrdering()); + node.ToText(tokens).Should().Be("abc def ghijkl mno pqr"); + } + + [Test] + public void Match_ShouldMatchCorrectly_WhenTextWithEscapedHeader() + { + const string text = + """ + \# abc def ghi + jkl mno pqr + """; + var tokens = tokenizer.Tokenize($"{text.Replace("\r", "")}\n"); + + var node = rule.Match(tokens) as TagNode; + + node.Should().NotBeNull(); + node.Children.Select(n => n.NodeType).Should().BeEquivalentTo( + [NodeType.Escape, NodeType.Paragraph, NodeType.Paragraph], options => options.WithStrictOrdering()); + node.ToText(tokens).Should().Be("# abc def ghijkl mno pqr"); + } + + [Test] + public void Match_ShouldMatchCorrectly_WhenTextWithUnorderedList() + { + const string text = + """ + * abc def ghi + * jkl mno pqr + """; + var tokens = tokenizer.Tokenize($"{text.Replace("\r", "")}\n"); + + var node = rule.Match(tokens) as TagNode; + + node.Should().NotBeNull(); + node.Children.Count.Should().Be(1); + node.Children.First().NodeType.Should().Be(NodeType.UnorderedList); + } + + [Test] + public void Match_ShouldMatchCorrectly_WhenTextWithEscapedList() + { + const string text = + """ + \* abc def ghi + jkl mno pqr + """; + var tokens = tokenizer.Tokenize($"{text.Replace("\r", "")}\n"); + + var node = rule.Match(tokens) as TagNode; + + node.Should().NotBeNull(); + node.Children.Select(n => n.NodeType).Should().BeEquivalentTo( + [NodeType.Escape, NodeType.Paragraph, NodeType.Paragraph], options => options.WithStrictOrdering()); + node.ToText(tokens).Should().Be("* abc def ghijkl mno pqr"); + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/Parser/Rules/BoldRuleTest.cs b/cs/MarkdownTests/Parser/Rules/BoldRuleTest.cs new file mode 100644 index 000000000..a92d045ec --- /dev/null +++ b/cs/MarkdownTests/Parser/Rules/BoldRuleTest.cs @@ -0,0 +1,142 @@ +using FluentAssertions; +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules; +using Markdown.Parser.Rules.Tools; +using Markdown.Tokenizer; + +namespace MarkdownTests.Parser.Rules; + +[TestFixture] +[TestOf(typeof(BoldRule))] +public class BoldRuleTest +{ + private readonly BoldRule rule = new(); + private readonly MdTokenizer tokenizer = new(); + + [TestCase("abc")] + [TestCase("abc def ghi")] + public void Match_ShouldMatch_SimpleText(string text) + { + var tokens = tokenizer.Tokenize($"__{text}__"); + + var node = rule.Match(tokens) as TagNode; + + node.Should().NotBeNull(); + node.ToText(tokens).Should().Be(text); + node.NodeType.Should().Be(NodeType.Bold); + node.Children.Should().ContainSingle(n => n.NodeType == NodeType.Text); + } + + [TestCase("abc")] + [TestCase("abc def ghi")] + public void Match_ShouldMatch_InnerItalic(string text) + { + var tokens = tokenizer.Tokenize($"___{text}___"); + + var node = rule.Match(tokens) as TagNode; + + node.Should().NotBeNull(); + node.NodeType.Should().Be(NodeType.Bold); + node.ToText(tokens).Should().Be($"{text}"); + node.Children.Should().ContainSingle(n => n.NodeType == NodeType.Italic); + } + + [TestCase("abc def _ghi_")] + [TestCase("_abc_ def ghi")] + public void Match_ShouldMatch_TextWithItalicTagAfterOpenedBoldOrBeforeClosedBold(string text) + { + var tokens = tokenizer.Tokenize($"__{text}__"); + + var node = rule.Match(tokens) as TagNode; + + node.Should().NotBeNull(); + node.Children + .Select(n => n.NodeType).Should().HaveCount(2) + .And.BeEquivalentTo([NodeType.Text, NodeType.Italic]); + node.NodeType.Should().Be(NodeType.Bold); + } + + [Test] + public void Match_ShouldMatch_TextWithInnerItalicTag() + { + const string text = "abc _def_ ghi"; + var tokens = tokenizer.Tokenize($"__{text}__"); + + var node = rule.Match(tokens) as TagNode; + + node.Should().NotBeNull(); + node.Children + .Select(n => n.NodeType) + .Should().HaveCount(3) + .And.BeEquivalentTo([NodeType.Text, NodeType.Italic, NodeType.Text]); + node.NodeType.Should().Be(NodeType.Bold); + } + + [TestCase("a__bc__", 1, ExpectedResult = "bc")] + [TestCase("a__b__c", 1, ExpectedResult = "b")] + [TestCase("__a__bc", 0, ExpectedResult = "a")] + public string Match_ShouldMatch_WhenTagInsideWord(string text, int begin) + { + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens, begin) as TagNode; + + node.Should().NotBeNull(); + node.Children.Should().ContainSingle(n => n.NodeType == NodeType.Text); + return node.Children.ToText(tokens); + } + + [TestCase("a__bc_def_gh__i", 1)] + public void Match_ShouldMatch_WhenTagInsideWordAndItalicTagInsideTag(string text, int begin) + { + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens, begin) as TagNode; + + node.Should().NotBeNull(); + node.Children + .Select(n => n.NodeType) + .Should().HaveCount(3) + .And.BeEquivalentTo([NodeType.Text, NodeType.Italic, NodeType.Text]); + node.NodeType.Should().Be(NodeType.Bold); + } + + [TestCase("abc__123__def", 1)] + [TestCase("abc__123__", 1)] + [TestCase("__123__abc", 0)] + [TestCase("abc de__123__f", 3)] + public void Match_ShouldNotMatch_Numbers(string text, int begin) + { + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens, begin) as TagNode; + + node.Should().BeNull(); + } + + [TestCase("ab__c def__", 1)] + [TestCase("__abc d__ef", 0)] + [TestCase("a__bc d__ef", 1)] + public void Match_ShouldNotMatch_WhenTagInDifferentWords(string text, int begin) + { + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens, begin) as TagNode; + + node.Should().BeNull(); + } + + [TestCase("__abc __", 0)] + [TestCase("__ abc__", 0)] + [TestCase("__abc def __", 0)] + [TestCase("__ abc def__", 0)] + public void Match_ShouldNotMatch_WhenSpaceIsAfterOpeningTagOrBeforeClosing + (string text, int begin) + { + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens, begin) as TagNode; + + node.Should().BeNull(); + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/Parser/Rules/BoolRules/AndRuleTest.cs b/cs/MarkdownTests/Parser/Rules/BoolRules/AndRuleTest.cs new file mode 100644 index 000000000..1aeb0948b --- /dev/null +++ b/cs/MarkdownTests/Parser/Rules/BoolRules/AndRuleTest.cs @@ -0,0 +1,41 @@ +using FluentAssertions; +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules.BoolRules; +using Markdown.Parser.Rules.Tools; +using Markdown.Tokenizer; +using Markdown.Tokens; + +namespace MarkdownTests.Parser.Rules.BoolRules; + +[TestFixture] +[TestOf(typeof(AndRule))] +public class AndRuleTest +{ + private readonly MdTokenizer tokenizer = new(); + private readonly AndRule rule = new(TokenType.Word, TokenType.Number); + + [TestCase("abc123")] + public void Match_ShouldMatch_WhenRightPattern(string text) + { + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens) as SpecNode; + + node.Should().NotBeNull(); + node.Nodes.Select(n => n.NodeType).Should().BeEquivalentTo( + [NodeType.Text, NodeType.Text], options => options.WithStrictOrdering()); + node.ToText(tokens).Should().Be(text); + } + + [TestCase("123abc")] + [TestCase("abc")] + [TestCase("123")] + public void Match_ShouldNotMatch_WhenWrongPattern(string text) + { + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens) as SpecNode; + + node.Should().BeNull(); + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/Parser/Rules/BoolRules/ConditionalRuleTest.cs b/cs/MarkdownTests/Parser/Rules/BoolRules/ConditionalRuleTest.cs new file mode 100644 index 000000000..d7b742ec6 --- /dev/null +++ b/cs/MarkdownTests/Parser/Rules/BoolRules/ConditionalRuleTest.cs @@ -0,0 +1,46 @@ +using System.Collections; +using FluentAssertions; +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules; +using Markdown.Parser.Rules.BoolRules; +using Markdown.Parser.Rules.Tools; +using Markdown.Tokenizer; + +namespace MarkdownTests.Parser.Rules.BoolRules; + +[TestFixture] +[TestOf(typeof(ConditionalRule))] +public class ConditionalRuleTest +{ + private readonly TextRule primaryRule = new(); + private readonly MdTokenizer tokenizer = new(); + private static IEnumerable CasesThatMatchesPrimaryRule + { + get + { + yield return new TestCaseData("abc def"); + yield return new TestCaseData(" abc def ghi "); + } + } + [Test, TestCaseSource(nameof(CasesThatMatchesPrimaryRule))] + public void ConditionalRule_Match_ShouldMatchNodeWithRightCondition(string text) + { + var tokens = tokenizer.Tokenize(text); + var rule = new ConditionalRule(primaryRule, (node, _) => node.NodeType == NodeType.Text); + + var match = rule.Match(tokens); + match.Should().NotBeNull(); + match.ToText(tokens).Should().Be(text); + match.NodeType.Should().Be(NodeType.Text); + } + + [Test, TestCaseSource(nameof(CasesThatMatchesPrimaryRule))] + public void ConditionalRule_Match_ShouldNotMatchWithWrongCondition(string text) + { + var tokens = tokenizer.Tokenize(text); + var rule = new ConditionalRule(primaryRule, (_,_) => false); + + var match = rule.Match(tokens); + match.Should().BeNull(); + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/Parser/Rules/BoolRules/ContinuesRuleTest.cs b/cs/MarkdownTests/Parser/Rules/BoolRules/ContinuesRuleTest.cs new file mode 100644 index 000000000..20e8ec6db --- /dev/null +++ b/cs/MarkdownTests/Parser/Rules/BoolRules/ContinuesRuleTest.cs @@ -0,0 +1,34 @@ +using FluentAssertions; +using Markdown.Parser.Rules; +using Markdown.Parser.Rules.BoolRules; +using Markdown.Parser.Rules.Tools; +using Markdown.Tokenizer; +using Markdown.Tokens; + +namespace MarkdownTests.Parser.Rules.BoolRules; + +[TestFixture] +[TestOf(typeof(ContinuesRule))] +public class ContinuesRuleTest +{ + private readonly MdTokenizer tokenizer = new(); + [Test] + public void Match_ShouldMatchWhenRightContinues() + { + var tokens = tokenizer.Tokenize("abc def ghi_"); + var rule = new ContinuesRule(new TextRule(), new PatternRule(TokenType.Underscore)); + + var match = rule.Match(tokens); + match.Should().NotBeNull(); + match.ToText(tokens).Should().Be("abc def ghi"); + } + [Test] + public void Match_ShouldNotMatchWhenWrongContinues() + { + var tokens = tokenizer.Tokenize("abc def ghi_"); + var rule = new ContinuesRule(new TextRule(), new PatternRule(TokenType.Octothorpe)); + + var match = rule.Match(tokens); + match.Should().BeNull(); + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/Parser/Rules/BoolRules/KleeneStarRuleTest.cs b/cs/MarkdownTests/Parser/Rules/BoolRules/KleeneStarRuleTest.cs new file mode 100644 index 000000000..a6b9b0b48 --- /dev/null +++ b/cs/MarkdownTests/Parser/Rules/BoolRules/KleeneStarRuleTest.cs @@ -0,0 +1,41 @@ +using FluentAssertions; +using Markdown.Parser.Rules.BoolRules; +using Markdown.Parser.Rules.Tools; +using Markdown.Tokenizer; +using Markdown.Tokens; + +namespace MarkdownTests.Parser.Rules.BoolRules; + +[TestFixture] +[TestOf(typeof(KleeneStarRule))] +public class KleeneStarRuleTest +{ + private readonly MdTokenizer tokenizer = new(); + private readonly OrRule primaryRule = new(TokenType.Word, TokenType.Space); + + + [TestCase("abc")] + [TestCase("abc def ghi jkl")] + public void Match_ShouldReturnManyMatches(string text) + { + var rule = new KleeneStarRule(primaryRule); + var tokens = tokenizer.Tokenize(text); + + var match = rule.Match(tokens); + + match.Should().NotBeNull(); + match.ToText(tokens).Should().Be(text); + } + [TestCase("abc def ghi _ jkl", ExpectedResult = "abc def ghi ")] + [TestCase("abc def ghi _jkl_ mno", ExpectedResult = "abc def ghi ")] + public string Match_ShouldStopWhenNotMatched(string text) + { + var rule = new KleeneStarRule(primaryRule); + var tokens = tokenizer.Tokenize(text); + + var match = rule.Match(tokens); + + match.Should().NotBeNull(); + return match.ToText(tokens); + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/Parser/Rules/BoolRules/OrRuleTest.cs b/cs/MarkdownTests/Parser/Rules/BoolRules/OrRuleTest.cs new file mode 100644 index 000000000..25f428521 --- /dev/null +++ b/cs/MarkdownTests/Parser/Rules/BoolRules/OrRuleTest.cs @@ -0,0 +1,47 @@ +using FluentAssertions; +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules.BoolRules; +using Markdown.Tokenizer; +using Markdown.Tokens; + +namespace MarkdownTests.Parser.Rules.BoolRules; + +[TestFixture] +[TestOf(typeof(OrRule))] +public class OrRuleTest +{ + private readonly MdTokenizer tokenizer = new(); + private readonly OrRule rule = new(TokenType.Word, TokenType.Number); + + [TestCase("abc def")] + [TestCase("123 abc")] + public void Match_ShouldMatchOneOfRule(string text) + { + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens) as TextNode; + + node.Should().NotBeNull(); + } + [Test] + public void Match_ShouldMatchFirstAppearance() + { + const string text = "abc123"; + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens) as TextNode; + + node.Should().NotBeNull(); + node.ToText(tokens).Should().Be("abc"); + } + [TestCase("_abc def ghi_")] + [TestCase(" 123")] + public void Match_ShouldNotMatchWrongPattern(string text) + { + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens) as TextNode; + + node.Should().BeNull(); + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/Parser/Rules/EscapeRuleTest.cs b/cs/MarkdownTests/Parser/Rules/EscapeRuleTest.cs new file mode 100644 index 000000000..60295ff01 --- /dev/null +++ b/cs/MarkdownTests/Parser/Rules/EscapeRuleTest.cs @@ -0,0 +1,31 @@ +using FluentAssertions; +using Markdown.Parser.Rules; +using Markdown.Parser.Rules.Tools; +using Markdown.Tokenizer; +using Markdown.Tokens; + +namespace MarkdownTests.Parser.Rules; + +[TestFixture] +[TestOf(typeof(EscapeRule))] +public class EscapeRuleTest +{ + private readonly EscapeRule rule = new([TokenType.Underscore, TokenType.Octothorpe]); + private readonly MdTokenizer tokenizer = new(); + [TestCase(@"\_", ExpectedResult = "_")] + [TestCase(@"\#", ExpectedResult = "#")] + public string? EscapeRule_Match_ShouldEscapeTagsSymbols(string text) + { + var tokens = tokenizer.Tokenize(text); + var match = rule.Match(tokens); + return match?.ToText(tokens); + } + [TestCase(@"\abc def")] + [TestCase(@"\ abc def")] + public void EscapeRule_Match_ShouldNotEscapeNonTagSymbols(string text) + { + var tokens = tokenizer.Tokenize(text); + var match = rule.Match(tokens); + match.Should().BeNull(); + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/Parser/Rules/HeaderRuleTest.cs b/cs/MarkdownTests/Parser/Rules/HeaderRuleTest.cs new file mode 100644 index 000000000..574d3232a --- /dev/null +++ b/cs/MarkdownTests/Parser/Rules/HeaderRuleTest.cs @@ -0,0 +1,66 @@ +using FluentAssertions; +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules; +using Markdown.Tokenizer; + +namespace MarkdownTests.Parser.Rules; + +[TestFixture] +[TestOf(typeof(HeaderRule))] +public class HeaderRuleTest +{ + private readonly MdTokenizer tokenizer = new(); + private readonly HeaderRule rule = new(); + + [TestCase("abc")] + [TestCase("abc def ghi")] + public void Match_ShouldMatch_SimpleHeader(string text) + { + var tokens = tokenizer.Tokenize($"# {text}\n"); + + var node = rule.Match(tokens) as TagNode; + + node.Should().NotBeNull(); + } + + [TestCase("_abc_")] + [TestCase("__abc__")] + [TestCase("abc _def_")] + [TestCase("abc __def__")] + [TestCase("abc _def_ ghi")] + [TestCase("abc __def__ ghi")] + [TestCase("abc __d_e_f__ ghi")] + public void Match_ShouldMatch_HeaderWithInnerTags(string text) + { + var tokens = tokenizer.Tokenize($"# {text}\n"); + + var node = rule.Match(tokens) as TagNode; + + node.Should().NotBeNull(); + } + + [Test] + public void Match_ShouldNotMatch_IfNoSpaceAfterOctothorpe() + { + const string text = "#abc\n"; + + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens) as TagNode; + + node.Should().BeNull(); + } + + [Test] + public void Match_ShouldNotMatch_IfNoEndOfLine() + { + const string text = "# abc"; + + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens) as TagNode; + + node.Should().BeNull(); + } + +} \ No newline at end of file diff --git a/cs/MarkdownTests/Parser/Rules/ItalicRuleTest.cs b/cs/MarkdownTests/Parser/Rules/ItalicRuleTest.cs new file mode 100644 index 000000000..86c9c38a7 --- /dev/null +++ b/cs/MarkdownTests/Parser/Rules/ItalicRuleTest.cs @@ -0,0 +1,114 @@ +using FluentAssertions; +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules; +using Markdown.Parser.Rules.Tools; +using Markdown.Tokenizer; + +namespace MarkdownTests.Parser.Rules; + +[TestFixture] +[TestOf(typeof(ItalicRule))] +public class ItalicRuleTest +{ + private readonly ItalicRule rule = new(); + private readonly MdTokenizer tokenizer = new(); + + [TestCase("abc")] + [TestCase("abc def ghi jkl")] + public void Match_ShouldMatch_SimpleText(string text) + { + var tokens = tokenizer.Tokenize($"_{text}_"); + + var node = rule.Match(tokens) as TagNode; + + node.Should().NotBeNull(); + node.NodeType.Should().Be(NodeType.Italic); + node.Children.Should().ContainSingle(n => n.NodeType == NodeType.Text); + node.ToText(tokens).Should().Be(text); + } + + [TestCase("abc def ghi_123_jkl", 5)] + [TestCase("def 12_34_56 ghi jkl", 3)] + public void Match_ShouldNotMatch_TextWithNumbers(string text, int begin) + { + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens, begin) as TagNode; + + node.Should().BeNull(); + } + + [TestCase("ab_cde_f", 1, ExpectedResult = "cde")] + [TestCase("abcd_ef_", 1, ExpectedResult = "ef")] + [TestCase("abc _de_fghi", 2, ExpectedResult = "de")] + [TestCase("_ab_c", 0, ExpectedResult = "ab")] + [TestCase("ab_c_", 1, ExpectedResult = "c")] + public string Match_ShouldMatch_TagInWord(string text, int begin) + { + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens, begin) as TagNode; + + node.Should().NotBeNull(); + node.Children.Should().ContainSingle(n => n.NodeType == NodeType.Text); + return node.ToText(tokens); + } + + [TestCase("ab_c def gh_i", 1)] + [TestCase("ab_c def ghi_", 1)] + [TestCase("_abc def g_hi", 0)] + public void Match_ShouldNotMatch_TagInDifferentWords(string text, int begin) + { + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens, begin) as TagNode; + + node.Should().BeNull(); + } + + [TestCase("abc_ def_", 1)] + [TestCase("abc _def _ghi", 2)] + public void Match_ShouldNotMatch_WhenSpaceAfterOpenTagOrBeforeClosingTag + (string text, int begin) + { + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens, begin) as TagNode; + + node.Should().BeNull(); + } + + [TestCase("abc __def_", 2)] + [TestCase("abc _def__", 2)] + public void Match_ShouldNotMatch_DifferentUnderscores(string text, int begin) + { + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens, begin) as TagNode; + + node.Should().BeNull(); + } + + [TestCase("abc _def __ghi jkl_ mno__", 2)] + public void Match_ShouldNotMatch_IntersectedUnderscores(string text, int begin) + { + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens, begin) as TagNode; + + node.Should().BeNull(); + } + + [TestCase("__abc def__ ghi jkl")] + [TestCase("abc __def ghi__ jkl")] + [TestCase("abc def __ghi jkl__")] + public void ItalicRule_Match_BoldTagInItalicShouldNotBeMatched(string text) + { + var tokens = tokenizer.Tokenize($"_{text}_"); + + var node = rule.Match(tokens) as TagNode; + + node.Should().NotBeNull(); + node.ToText(tokens).Should().Be(text); + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/Parser/Rules/ListItemRuleTest.cs b/cs/MarkdownTests/Parser/Rules/ListItemRuleTest.cs new file mode 100644 index 000000000..692caa6e1 --- /dev/null +++ b/cs/MarkdownTests/Parser/Rules/ListItemRuleTest.cs @@ -0,0 +1,83 @@ +using FluentAssertions; +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules; +using Markdown.Parser.Rules.Tools; +using Markdown.Tokenizer; + +namespace MarkdownTests.Parser.Rules; + +[TestFixture] +[TestOf(typeof(ListItemRule))] +public class ListItemRuleTest +{ + private readonly ListItemRule rule = new(); + private readonly MdTokenizer tokenizer = new(); + + [TestCase("abc")] + [TestCase("abc def ghi")] + public void Match_ShouldMatch_SimpleListItem(string text) + { + var tokens = tokenizer.Tokenize($"* {text}\n"); + + var node = rule.Match(tokens) as TagNode; + + node.Should().NotBeNull(); + node.ToText(tokens).Should().Be(text); + } + + [TestCase("_abc_")] + [TestCase("__abc__")] + [TestCase("abc _def_")] + [TestCase("abc __def__")] + [TestCase("abc _def_ ghi")] + [TestCase("abc __def__ ghi")] + [TestCase("abc __d_e_f__ ghi")] + public void Match_ShouldMatch_ListItemWithInnerTags(string text) + { + var tokens = tokenizer.Tokenize($"* {text}\n"); + + var node = rule.Match(tokens) as TagNode; + + node.Should().NotBeNull(); + } + + [Test] + public void Match_ShouldMatchCorrectly_ComplexListItem() + { + const string text = "abc __def__ _ghi_"; + var tokens = tokenizer.Tokenize($"* {text}\n"); + + var node = rule.Match(tokens) as TagNode; + + node.Should().NotBeNull(); + node.Children.Count.Should().Be(4); + node.Children + .Select(n => n.NodeType).Should().BeEquivalentTo + ([NodeType.Text, NodeType.Bold, NodeType.Text, NodeType.Italic], + o => o.WithStrictOrdering()); + } + + [Test] + public void Match_ShouldNotMatch_IfNoSpaceAfterAsteriks() + { + const string text = "*abc\n"; + + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens) as TagNode; + + node.Should().BeNull(); + } + + [Test] + public void Match_ShouldNotMatch_IfNoEndOfLine() + { + const string text = "* abc"; + + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens) as TagNode; + + node.Should().BeNull(); + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/Parser/Rules/ParagraphRuleTest.cs b/cs/MarkdownTests/Parser/Rules/ParagraphRuleTest.cs new file mode 100644 index 000000000..91093f56e --- /dev/null +++ b/cs/MarkdownTests/Parser/Rules/ParagraphRuleTest.cs @@ -0,0 +1,75 @@ +using FluentAssertions; +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules; +using Markdown.Parser.Rules.Tools; +using Markdown.Tokenizer; + +namespace MarkdownTests.Parser.Rules; + +[TestFixture] +[TestOf(typeof(ParagraphRule))] +public class ParagraphRuleTest +{ + private readonly ParagraphRule rule = new(); + private readonly MdTokenizer tokenizer = new(); + + [TestCase("abcdefghi")] + [TestCase("abc def ghi ")] + public void Match_ShouldMatch_SimpleCase(string text) + { + var tokens = tokenizer.Tokenize($"{text}\n"); + + var node = rule.Match(tokens) as TagNode; + + node.Should().NotBeNull(); + node.NodeType.Should().Be(NodeType.Paragraph); + node.Consumed.Should().Be(tokens.Count); + node.ToText(tokens).Should().Be(text); + } + + [Test] + public void Match_ShouldMatch_WhenParagraphWithInnerTags() + { + const string text = "abc _def_ __ghi jkl__"; + var tokens = tokenizer.Tokenize($"{text}\n"); + + var node = rule.Match(tokens) as TagNode; + + node.Should().NotBeNull(); + node.NodeType.Should().Be(NodeType.Paragraph); + + node.Consumed.Should().Be(tokens.Count); + node.Children.Select(n => n.NodeType) + .Should().HaveCount(4) + .And.BeEquivalentTo([NodeType.Text, NodeType.Italic, NodeType.Bold, NodeType.Text]); + + node.Children + .First(n => n.NodeType == NodeType.Bold) + .ToText(tokens).Should().Be("ghi jkl"); + node.Children + .First(n => n.NodeType == NodeType.Italic) + .ToText(tokens).Should().Be("def"); + } + + [TestCase("_abc __def ghi_ jkl__")] + [TestCase("_abc __def ghi jkl")] + public void Match_ShouldMatchAsText_WhenInnerTagsIntersect(string text) + { + var tokens = tokenizer.Tokenize($"{text}\n"); + + var node = rule.Match(tokens) as TagNode; + + node.Should().NotBeNull(); + node.ToText(tokens).Should().Be(text); + node.NodeType.Should().Be(NodeType.Paragraph); + node.Children.Should().OnlyContain(n => n.NodeType == NodeType.Text); + } + + [TestCase(@"abc \_def\_ ghi", ExpectedResult = "abc _def_ ghi")] + public string? Match_ShouldMatchCorrectly_WhenTagsInParagraphAreEscaped(string text) + { + var tokens = tokenizer.Tokenize($"{text}\n"); + var node = rule.Match(tokens) as TagNode; + return node?.ToText(tokens); + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/Parser/Rules/PatternRuleTest.cs b/cs/MarkdownTests/Parser/Rules/PatternRuleTest.cs new file mode 100644 index 000000000..3cc1bce28 --- /dev/null +++ b/cs/MarkdownTests/Parser/Rules/PatternRuleTest.cs @@ -0,0 +1,39 @@ +using FluentAssertions; +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules; +using Markdown.Tokenizer; +using Markdown.Tokens; + +namespace MarkdownTests.Parser.Rules; + +[TestFixture] +[TestOf(typeof(PatternRule))] +public class PatternRuleTest +{ + private readonly MdTokenizer tokenizer = new(); + + [Test] + public void Match_ShouldMatchSinglePattern() + { + var tokens = tokenizer.Tokenize("_"); + var rule = new PatternRule([TokenType.Underscore]); + + var node = rule.Match(tokens) as TextNode; + + node.Should().NotBeNull(); + node.ToText(tokens).Should().BeEquivalentTo("_"); + } + + [Test] + public void Match_ContinuesPattern() + { + var tokens = tokenizer.Tokenize("_\n "); + var rule = new PatternRule([ + TokenType.Underscore, TokenType.Newline, TokenType.Space]); + + var node = rule.Match(tokens) as TextNode; + + node.Should().NotBeNull(); + node.ToText(tokens).Should().BeEquivalentTo("_\n "); + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/Parser/Rules/TextRuleTest.cs b/cs/MarkdownTests/Parser/Rules/TextRuleTest.cs new file mode 100644 index 000000000..eab6eaa89 --- /dev/null +++ b/cs/MarkdownTests/Parser/Rules/TextRuleTest.cs @@ -0,0 +1,61 @@ +using FluentAssertions; +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules; +using Markdown.Tokenizer; + +namespace MarkdownTests.Parser.Rules; + +[TestFixture] +[TestOf(typeof(TextRule))] +public class TextRuleTest +{ + private readonly TextRule rule = new(); + private readonly MdTokenizer tokenizer = new(); + + [Test] + public void Match_ShouldMatch_SimpleText() + { + const string text = "abc"; + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens) as TextNode; + + node.Should().NotBeNull(); + node.Consumed.Should().Be(1); + node.ToText(tokens).Should().Be(text); + } + + [TestCase("_")] + [TestCase("\n")] + public void Match_ShouldReturnNull_WhenNotText(string text) + { + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens) as TextNode; + + node.Should().BeNull(); + } + + [Test] + public void Match_ShouldMatchSequenceOfWordsAndSpaces() + { + const string text = "abc def ghi"; + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens) as TextNode; + + node.Should().NotBeNull(); + node.ToText(tokens).Should().BeEquivalentTo(text); + } + + [TestCase("abc _def_", ExpectedResult = "abc ")] + [TestCase("abc \ndef", ExpectedResult = "abc ")] + public string? Match_ShouldBeInterrupted_ByNonSpaceOrWordType(string text, int begin = 0) + { + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens, begin) as TextNode; + + return node?.ToText(tokens); + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/Parser/Rules/UnorderedListRuleTest.cs b/cs/MarkdownTests/Parser/Rules/UnorderedListRuleTest.cs new file mode 100644 index 000000000..16c4714df --- /dev/null +++ b/cs/MarkdownTests/Parser/Rules/UnorderedListRuleTest.cs @@ -0,0 +1,65 @@ +using FluentAssertions; +using Markdown.Parser.Nodes; +using Markdown.Parser.Rules; +using Markdown.Parser.Rules.Tools; +using Markdown.Tokenizer; + +namespace MarkdownTests.Parser.Rules; + +[TestFixture] +[TestOf(typeof(UnorderedListRule))] +public class UnorderedListRuleTest +{ + + private readonly MdTokenizer tokenizer = new(); + private readonly UnorderedListRule rule = new(); + + [Test] + public void Match_ShouldMatchCorrectly_WhenSimpleList() + { + var text = + """ + * abc def ghi + * jkl mno pqrs + * ter uvw xyz + """; + var tokens = tokenizer.Tokenize($"{text.Replace("\r", "")}\n"); + + var node = rule.Match(tokens) as TagNode; + node.Should().NotBeNull(); + node.ToText(tokens).Should().Be(text + .Replace("\r\n", "") + .Replace("* ", "")); + node.Children.Should().OnlyContain(n => n.NodeType == NodeType.ListItem); + } + + [Test] + public void Match_ShouldMatchCorrectly_WhenTextInItemsIsWithTags() + { + var text = + """ + * abc _def_ ghi + * jkl mno __pqrs__ + * __ter__ uvw xyz + """; + var tokens = tokenizer.Tokenize($"{text.Replace("\r", "")}\n"); + + var node = rule.Match(tokens) as TagNode; + + node.Should().NotBeNull(); + node.Children.Count.Should().Be(3); + node.Children.Should().OnlyContain(n => n.NodeType == NodeType.ListItem); + } + + [TestCase("* abc\n\n* def")] + [TestCase("* abc\ndef\n* ghi")] + public void Match_ShouldBeInterruptedByEmptyLineOrParagraph(string text) + { + var tokens = tokenizer.Tokenize(text); + + var node = rule.Match(tokens) as TagNode; + + node.Should().NotBeNull(); + node.Children.Count.Should().Be(1); + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/Tokenizer/MdTokenizerTest.cs b/cs/MarkdownTests/Tokenizer/MdTokenizerTest.cs new file mode 100644 index 000000000..ea4720247 --- /dev/null +++ b/cs/MarkdownTests/Tokenizer/MdTokenizerTest.cs @@ -0,0 +1,35 @@ +using System.Text; +using FluentAssertions; +using Markdown.Tokenizer; + +namespace MarkdownTests; + +[TestFixture] +[TestOf(typeof(MdTokenizer))] +public class MdTokenizerTest +{ + private const string TextWithAllTokens = "#Text with all _possible_ __tokens__ 100% types\n"; + + [TestCase(TextWithAllTokens)] + public void Tokenize_ShouldTransformAllTextToTokens(string text) + { + var tokenizer = new MdTokenizer(); + + var tokens = tokenizer.Tokenize(text); + + var totalLength = tokens.Sum(t => t.Length); + totalLength.Should().Be(text.Length); + } + + [TestCase(TextWithAllTokens)] + public void Tokenize_ShouldReturnTokensInExpectedOrder(string text) + { + var tokenizer = new MdTokenizer(); + + var tokens = tokenizer.Tokenize(text); + var resultStringBuilder = tokens + .Aggregate(new StringBuilder(), (sb, token) => sb.Append(token.Value)); + + resultStringBuilder.ToString().Should().Be(text); + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/Tokenizer/Scanners/NumberScannerTest.cs b/cs/MarkdownTests/Tokenizer/Scanners/NumberScannerTest.cs new file mode 100644 index 000000000..8c4b259e3 --- /dev/null +++ b/cs/MarkdownTests/Tokenizer/Scanners/NumberScannerTest.cs @@ -0,0 +1,52 @@ +using FluentAssertions; +using Markdown.Tokenizer.Scanners; +using Markdown.Tokens; + +namespace MarkdownTests.Scanners; + +[TestFixture] +[TestOf(typeof(NumberScanner))] +public class NumberScannerTest +{ + + [TestCase("1", 0)] + [TestCase("42", 0)] + [TestCase("12345", 2)] + [TestCase("ab1234", 2)] + [TestCase("123ab", 0)] + public void Scan_ShouldReturnNumberToken_WhenBeginPointsOnNumber(string text, int begin) + { + var scanner = new NumberScanner(); + + var token = scanner.Scan(text, begin); + + token.Should().NotBeNull(); + token.TokenType.Should().Be(TokenType.Number); + } + + [TestCase(" 123", 0)] + [TestCase("_\n", 0)] + [TestCase("abc", 0)] + [TestCase("ab1234", 0)] + [TestCase("123ab", 3)] + public void Scan_ShouldScanNull_WhenBeginPointsNotOnNumber(string text, int begin) + { + var scanner = new NumberScanner(); + + var token = scanner.Scan(text, begin); + + token.Should().BeNull(); + } + + [TestCase("123", 0, 3)] + [TestCase("a123", 1, 3)] + [TestCase("123a", 0, 3)] + public void Scan_ShouldReturnNumberWithRightLength(string text, int begin, int expectedLength) + { + var scanner = new NumberScanner(); + + var token = scanner.Scan(text, begin); + + token?.Value.Length.Should().Be(expectedLength); + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/Tokenizer/Scanners/SpecScannerTest.cs b/cs/MarkdownTests/Tokenizer/Scanners/SpecScannerTest.cs new file mode 100644 index 000000000..587d83b65 --- /dev/null +++ b/cs/MarkdownTests/Tokenizer/Scanners/SpecScannerTest.cs @@ -0,0 +1,43 @@ +using FluentAssertions; +using Markdown.Tokenizer.Scanners; +using Markdown.Tokens; + +namespace MarkdownTests.Scanners; + +[TestFixture] +[TestOf(typeof(SpecScanner))] +public class SpecScannerTest +{ + + [TestCase(" ", 0, TokenType.Space)] + [TestCase("*", 0, TokenType.Asterisk)] + [TestCase("\n", 0, TokenType.Newline)] + [TestCase("#", 0, TokenType.Octothorpe)] + [TestCase(@"\", 0, TokenType.Backslash)] + [TestCase("_", 0, TokenType.Underscore)] + [TestCase("_abc_", 0, TokenType.Underscore)] + [TestCase("1234ab_", 6, TokenType.Underscore)] + public void Scan_ShouldScanValidTokenType_WhenBeginPointsOnSpec + (string text, int begin, TokenType expectedType) + { + var scanner = new SpecScanner(); + + var token = scanner.Scan(text, begin); + + token?.TokenType.Should().Be(expectedType); + } + + [TestCase("abc", 0)] + [TestCase(" abc", 1)] + [TestCase("abc\n", 0)] + [TestCase("_abc_", 1)] + public void Scan_ShouldScanNull_WhenBeginPointsNotOnSpec + (string text, int begin) + { + var scanner = new SpecScanner(); + + var token = scanner.Scan(text, begin); + + token.Should().BeNull(); + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/Tokenizer/Scanners/TextScannerTest.cs b/cs/MarkdownTests/Tokenizer/Scanners/TextScannerTest.cs new file mode 100644 index 000000000..ed1ddda29 --- /dev/null +++ b/cs/MarkdownTests/Tokenizer/Scanners/TextScannerTest.cs @@ -0,0 +1,53 @@ +using Markdown.Tokens; +using FluentAssertions; +using Markdown.Tokenizer.Scanners; + +namespace MarkdownTests.Scanners; + +[TestFixture] +[TestOf(typeof(TextScanner))] +public class TextScannerTest +{ + + [TestCase("a", 0)] + [TestCase("_a_", 1)] + [TestCase("a\n", 0)] + [TestCase(" a ", 1)] + public void Scan_ShouldReturnTextToken_WhenBeginPointsOnText(string text, int begin) + { + var scanner = new TextScanner(); + + var token = scanner.Scan(text, begin); + + token.Should().NotBeNull(); + token.TokenType.Should().Be(TokenType.Word); + } + + [TestCase("_a", 0)] + [TestCase("\na", 0)] + [TestCase("#a", 0)] + [TestCase(" a", 0)] + [TestCase("a ", 1)] + public void Scan_ShouldScanNull_WhenBeginPointsNotOnText(string text, int begin) + { + var scanner = new TextScanner(); + + var token = scanner.Scan(text, begin); + + token.Should().BeNull(); + } + + [TestCase("abc", 0, 3)] + [TestCase("abc ", 0, 3)] + [TestCase(" abc", 1, 3)] + [TestCase("_abc_", 1, 3)] + public void Scan_ShouldReturnTextWithRightLength(string text, int begin, int expectedLength) + { + var scanner = new TextScanner(); + + var token = scanner.Scan(text, begin); + + token?.Value.Length.Should().Be(expectedLength); + } + +} \ No newline at end of file diff --git a/cs/clean-code.sln b/cs/clean-code.sln index 2206d54db..9c6339592 100644 --- a/cs/clean-code.sln +++ b/cs/clean-code.sln @@ -9,6 +9,10 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ControlDigit", "ControlDigi EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Samples", "Samples\Samples.csproj", "{C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown", "Markdown\Markdown.csproj", "{7B0EC48B-577E-4C90-8D88-A6004FE382E7}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MarkdownTests", "MarkdownTests\MarkdownTests.csproj", "{1604C203-FBCA-4B63-8F51-D018E99392BC}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -27,5 +31,13 @@ Global {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Debug|Any CPU.Build.0 = Debug|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.ActiveCfg = Release|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.Build.0 = Release|Any CPU + {7B0EC48B-577E-4C90-8D88-A6004FE382E7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {7B0EC48B-577E-4C90-8D88-A6004FE382E7}.Debug|Any CPU.Build.0 = Debug|Any CPU + {7B0EC48B-577E-4C90-8D88-A6004FE382E7}.Release|Any CPU.ActiveCfg = Release|Any CPU + {7B0EC48B-577E-4C90-8D88-A6004FE382E7}.Release|Any CPU.Build.0 = Release|Any CPU + {1604C203-FBCA-4B63-8F51-D018E99392BC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {1604C203-FBCA-4B63-8F51-D018E99392BC}.Debug|Any CPU.Build.0 = Debug|Any CPU + {1604C203-FBCA-4B63-8F51-D018E99392BC}.Release|Any CPU.ActiveCfg = Release|Any CPU + {1604C203-FBCA-4B63-8F51-D018E99392BC}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection EndGlobal diff --git a/cs/clean-code.sln.DotSettings b/cs/clean-code.sln.DotSettings index 135b83ecb..229f449d2 100644 --- a/cs/clean-code.sln.DotSettings +++ b/cs/clean-code.sln.DotSettings @@ -1,6 +1,9 @@  <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" /> <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb_AaBb" /> + <Policy><Descriptor Staticness="Instance" AccessRightKinds="Private" Description="Instance fields (private)"><ElementKinds><Kind Name="FIELD" /><Kind Name="READONLY_FIELD" /></ElementKinds></Descriptor><Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" /></Policy> + <Policy><Descriptor Staticness="Any" AccessRightKinds="Any" Description="Types and namespaces"><ElementKinds><Kind Name="NAMESPACE" /><Kind Name="CLASS" /><Kind Name="STRUCT" /><Kind Name="ENUM" /><Kind Name="DELEGATE" /></ElementKinds></Descriptor><Policy Inspect="True" Prefix="" Suffix="" Style="AaBb_AaBb" /></Policy> + True True True Imported 10.10.2016 From cc34849c95d4b3f38ffb7575e9af764a96acf6c1 Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Wed, 11 Dec 2024 10:56:19 +0500 Subject: [PATCH 02/27] =?UTF-8?q?=D0=94=D0=BE=D0=BB=D0=B8=D0=B2=D0=BA?= =?UTF-8?q?=D0=B0=20unversioned=20files?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/Markdown.csproj | 9 ++++++++ cs/MarkdownTests/MarkdownTests.csproj | 29 ++++++++++++++++++++++++ cs/MarkdownTests/MdAcceptanceResult.html | 2 ++ 3 files changed, 40 insertions(+) create mode 100644 cs/Markdown/Markdown.csproj create mode 100644 cs/MarkdownTests/MarkdownTests.csproj create mode 100644 cs/MarkdownTests/MdAcceptanceResult.html diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj new file mode 100644 index 000000000..3a6353295 --- /dev/null +++ b/cs/Markdown/Markdown.csproj @@ -0,0 +1,9 @@ + + + + net8.0 + enable + enable + + + diff --git a/cs/MarkdownTests/MarkdownTests.csproj b/cs/MarkdownTests/MarkdownTests.csproj new file mode 100644 index 000000000..ba0b0f1f9 --- /dev/null +++ b/cs/MarkdownTests/MarkdownTests.csproj @@ -0,0 +1,29 @@ + + + + net8.0 + enable + enable + + false + true + + + + + + + + + + + + + + + + + + + + diff --git a/cs/MarkdownTests/MdAcceptanceResult.html b/cs/MarkdownTests/MdAcceptanceResult.html new file mode 100644 index 000000000..eaf7f9b63 --- /dev/null +++ b/cs/MarkdownTests/MdAcceptanceResult.html @@ -0,0 +1,2 @@ +

    Это заголовок

    #Это не заголовок

    И даже # это не заголовок

    • Это список

    *Это не список

    И это тоже * не список

    Это выделится

    Это выделится

    _Это не выделится__

    __И это не выделится_

    Внутри слоооооова

    Тоже рабоооооотает

    Но если неправильн_оооооо__

    То не раб__оооооо_тает

    В начале и в кооооонце тоже работает

    Так _не получится_

    И _так_

    Вот _ так_ и _так _не получится

    И _если __пересечь_, то не получится__

    Вот так все получится

    А вот так уже __не все__

    Все здесь тоже будет рабоооотать

    • И даже здесь
    • Будет работать обалдеть
    +
    • А это уже другой список

    Здесь не список

    • Здесь опять другой список
    • И вот * такая штуко*вина это всего одна запись в списке
    • # И заголовка здесь нет
    • В\роде все

    Следующая строка не список

    *

    \ No newline at end of file From f0db85dc9c75477ea66f97f1279d67c96e7a33fd Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sat, 14 Dec 2024 18:48:43 +0500 Subject: [PATCH 03/27] =?UTF-8?q?fix(MdTests.cs):=20=D0=BF=D0=BE=D1=84?= =?UTF-8?q?=D0=B8=D0=BA=D1=81=D0=B8=D0=BB=20=D1=82=D0=B5=D1=81=D1=82=20?= =?UTF-8?q?=D0=BD=D0=B0=20=D1=81=D0=BA=D0=BE=D1=80=D0=BE=D1=81=D1=82=D1=8C?= =?UTF-8?q?=20=D1=80=D0=B0=D0=B1=D0=BE=D1=82=D1=8B:=20-=20=D0=A3=D0=B1?= =?UTF-8?q?=D1=80=D0=B0=D0=BB=20=D0=BD=D0=B5=D0=BD=D1=83=D0=B6=D0=BD=D1=8B?= =?UTF-8?q?=D0=B5=20=D0=BA=D0=BB=D0=B0=D1=81=D1=81=D1=8B=20-=20=D0=92=20?= =?UTF-8?q?=D1=81=D1=80=D0=B0=D0=B2=D0=BD=D0=B5=D0=BD=D0=B8=D0=B8=20=D0=B4?= =?UTF-8?q?=D0=BE=D0=B1=D0=B0=D0=B2=D0=B8=D0=BB=20=D0=BA=D0=B0=D1=81=D1=82?= =?UTF-8?q?=20=D0=BA=20(double),=20=D1=82=D0=B5=D0=BF=D0=B5=D1=80=D1=8C=20?= =?UTF-8?q?=D0=BF=D1=80=D0=BE=D0=B2=D0=B5=D1=80=D0=BA=D0=B0=20=D0=BA=D0=BE?= =?UTF-8?q?=D1=80=D1=80=D0=B5=D0=BA=D1=82=D0=BD=D0=B0=D1=8F=20-=20=D0=94?= =?UTF-8?q?=D0=BE=D0=B1=D0=B0=D0=B2=D0=B8=D0=BB=20GC.Collect()=20=D0=BF?= =?UTF-8?q?=D0=B5=D1=80=D0=B5=D0=B4=20=D0=B7=D0=B0=D0=BC=D0=B5=D1=80=D0=BE?= =?UTF-8?q?=D0=BC=20=D0=B2=D1=80=D0=B5=D0=BC=D0=B5=D0=BD=D0=B8=20=D0=B4?= =?UTF-8?q?=D0=BB=D1=8F=20=D0=B1=D0=BE=D0=BB=D0=B5=D0=B5=20=D0=BA=D0=BE?= =?UTF-8?q?=D1=80=D1=80=D0=B5=D0=BA=D1=82=D0=BD=D0=BE=D0=B9=20=D0=BF=D1=80?= =?UTF-8?q?=D0=BE=D0=B2=D0=B5=D1=80=D0=BA=D0=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/MarkdownTests/MdTest.cs | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/cs/MarkdownTests/MdTest.cs b/cs/MarkdownTests/MdTest.cs index 8a2235470..f57d2c1d8 100644 --- a/cs/MarkdownTests/MdTest.cs +++ b/cs/MarkdownTests/MdTest.cs @@ -1,33 +1,32 @@ using System.Diagnostics; using System.Text; using FluentAssertions; -using Markdown.Generator; -using Markdown.Parser.Rules; -using Markdown.Tokenizer; +using Markdown; namespace MarkdownTests; public class Tests { - private readonly BodyRule rule = new(); - private readonly HtmlGenerator generator = new(); - private readonly MdTokenizer tokenizer = new(); - [Test] public void Markdown_Render_ShouldWorkFast() { const int scale = 2; var sw = new Stopwatch(); var results = new List(); - for (var len = 640; len <= 5120; len *= scale) + for (var len = 640; len <= 655360; len *= scale) { var markdown = GenerateMarkdown(len); - sw.Start(); RenderMarkdown(markdown); sw.Stop(); - results.Add(sw.Elapsed); sw.Reset(); + GC.Collect(); + sw.Start(); + Md.Render(markdown); + sw.Stop(); + + results.Add(sw.Elapsed); + sw.Reset(); } Enumerable.Range(1, results.Count - 1) - .Select(i => results[i].Ticks / results[i - 1].Ticks) + .Select(i => (double)results[i].Ticks / results[i - 1].Ticks) .Should().OnlyContain(timeRatio => timeRatio < scale * scale); } private static string GenerateMarkdown(int len) @@ -40,10 +39,4 @@ private static string GenerateMarkdown(int len) return Enumerable.Range(0, len).Aggregate(new StringBuilder(), (sb, _) => sb.Append(allElements[rand.Next(allElements.Count)])).ToString(); } - private void RenderMarkdown(string markdown) - { - var tokens = tokenizer.Tokenize($"{markdown}\n"); - var root = rule.Match(tokens); - generator.Render(root, tokens); - } } \ No newline at end of file From 79cbb5e5abe0791358d3f187dc6c5be30aa497ce Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sat, 14 Dec 2024 21:14:57 +0500 Subject: [PATCH 04/27] =?UTF-8?q?feat(Scanners):=20=D0=BF=D0=B5=D1=80?= =?UTF-8?q?=D0=B5=D0=BF=D0=B8=D1=81=D0=B0=D0=BB=20=D1=81=D0=BA=D0=B0=D0=BD?= =?UTF-8?q?=D0=B5=D1=80=D1=8B=20=D0=BD=D0=B0=20Memory,=20=D1=82?= =?UTF-8?q?=D0=B5=D0=BF=D0=B5=D1=80=D1=8C=20=D1=80=D0=B0=D0=B1=D0=BE=D1=82?= =?UTF-8?q?=D0=B0=D1=8E=D1=82=20=D0=B1=D1=8B=D1=81=D1=82=D1=80=D0=B5=D0=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/Tokenizer/Scanners/ITokenScanner.cs | 2 +- cs/Markdown/Tokenizer/Scanners/NumberScanner.cs | 17 +++++++++++------ cs/Markdown/Tokenizer/Scanners/SpecScanner.cs | 7 ++++--- cs/Markdown/Tokenizer/Scanners/TextScanner.cs | 15 +++++++++------ 4 files changed, 25 insertions(+), 16 deletions(-) diff --git a/cs/Markdown/Tokenizer/Scanners/ITokenScanner.cs b/cs/Markdown/Tokenizer/Scanners/ITokenScanner.cs index 9986613f5..23b8cabdf 100644 --- a/cs/Markdown/Tokenizer/Scanners/ITokenScanner.cs +++ b/cs/Markdown/Tokenizer/Scanners/ITokenScanner.cs @@ -4,5 +4,5 @@ namespace Markdown.Tokenizer.Scanners; public interface ITokenScanner { - public Token? Scan(string text, int begin); + public Token? Scan(Memory textSlice); } \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Scanners/NumberScanner.cs b/cs/Markdown/Tokenizer/Scanners/NumberScanner.cs index 4ae84a9b2..8c5f724c0 100644 --- a/cs/Markdown/Tokenizer/Scanners/NumberScanner.cs +++ b/cs/Markdown/Tokenizer/Scanners/NumberScanner.cs @@ -1,16 +1,21 @@ +using System.Collections; using Markdown.Tokens; namespace Markdown.Tokenizer.Scanners; public class NumberScanner : ITokenScanner { - public Token? Scan(string text, int begin = 0) + public Token? Scan(Memory textSlice) { - var numberEnumerable = text - .Skip(begin) - .TakeWhile(CanScan); - var numberLength = numberEnumerable.Count(); - return numberLength == 0 ? null : new Token(TokenType.Number, begin, numberLength, text); + var numberLength = 0; + var textSpan = textSlice.Span; + + while (numberLength < textSpan.Length && CanScan(textSpan[numberLength])) + { + numberLength++; + } + + return numberLength == 0 ? null : new Token(TokenType.Number, textSlice[..numberLength].ToString()); } public static bool CanScan(char symbol) => char.IsDigit(symbol); diff --git a/cs/Markdown/Tokenizer/Scanners/SpecScanner.cs b/cs/Markdown/Tokenizer/Scanners/SpecScanner.cs index 9b2d4445d..73d9791b8 100644 --- a/cs/Markdown/Tokenizer/Scanners/SpecScanner.cs +++ b/cs/Markdown/Tokenizer/Scanners/SpecScanner.cs @@ -4,9 +4,10 @@ namespace Markdown.Tokenizer.Scanners; public class SpecScanner : ITokenScanner { - public Token? Scan(string text, int begin = 0) + public Token? Scan(Memory textSlice) { - var tokenType = GetTokenType(text[begin]); + var tokenValue = textSlice.Span[0]; + var tokenType = GetTokenType(tokenValue); if (tokenType is null) { @@ -14,7 +15,7 @@ public class SpecScanner : ITokenScanner } var notNullType = (TokenType)tokenType; - return new Token(notNullType, begin, 1, text); + return new Token(notNullType, tokenValue.ToString()); } public static bool CanScan(char symbol) => GetTokenType(symbol) != null; diff --git a/cs/Markdown/Tokenizer/Scanners/TextScanner.cs b/cs/Markdown/Tokenizer/Scanners/TextScanner.cs index 6293c9689..5cafca9bb 100644 --- a/cs/Markdown/Tokenizer/Scanners/TextScanner.cs +++ b/cs/Markdown/Tokenizer/Scanners/TextScanner.cs @@ -4,13 +4,16 @@ namespace Markdown.Tokenizer.Scanners; public class TextScanner : ITokenScanner { - public Token? Scan(string text, int begin = 0) + public Token? Scan(Memory textSlice) { - var valueEnumerable = text - .Skip(begin) - .TakeWhile(CanScan); - var valueLen = valueEnumerable.Count(); - return valueLen == 0 ? null : new Token(TokenType.Word, begin, valueLen, text); + var valueLength = 0; + var textSpan = textSlice.Span; + + while (valueLength < textSpan.Length && CanScan(textSpan[valueLength])) + { + valueLength++; + } + return valueLength == 0 ? null : new Token(TokenType.Word, textSlice[..valueLength].ToString()); } private static bool CanScan(char symbol) From 9bbaac308d2b6e7a42fd5216ff8e6473c430f55f Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sat, 14 Dec 2024 21:15:23 +0500 Subject: [PATCH 05/27] =?UTF-8?q?fix(ScannerTests):=20=D0=BF=D0=BE=D0=BF?= =?UTF-8?q?=D1=80=D0=B0=D0=B2=D0=B8=D0=BB=20=D1=82=D0=B5=D1=81=D1=82=D1=8B?= =?UTF-8?q?=20=D0=B4=D0=BB=D1=8F=20=D0=BD=D0=BE=D0=B2=D0=BE=D0=B9=20=D1=80?= =?UTF-8?q?=D0=B5=D0=B0=D0=BB=D0=B8=D0=B7=D0=B0=D1=86=D0=B8=D0=B8=20=D1=81?= =?UTF-8?q?=D0=BA=D0=B0=D0=BD=D0=B5=D1=80=D0=BE=D0=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/MarkdownTests/Tokenizer/Scanners/NumberScannerTest.cs | 8 +++++--- cs/MarkdownTests/Tokenizer/Scanners/SpecScannerTest.cs | 6 ++++-- cs/MarkdownTests/Tokenizer/Scanners/TextScannerTest.cs | 8 +++++--- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/cs/MarkdownTests/Tokenizer/Scanners/NumberScannerTest.cs b/cs/MarkdownTests/Tokenizer/Scanners/NumberScannerTest.cs index 8c4b259e3..74f047b04 100644 --- a/cs/MarkdownTests/Tokenizer/Scanners/NumberScannerTest.cs +++ b/cs/MarkdownTests/Tokenizer/Scanners/NumberScannerTest.cs @@ -18,7 +18,7 @@ public void Scan_ShouldReturnNumberToken_WhenBeginPointsOnNumber(string text, in { var scanner = new NumberScanner(); - var token = scanner.Scan(text, begin); + var token = scanner.Scan(GetMemorySlice(text, begin)); token.Should().NotBeNull(); token.TokenType.Should().Be(TokenType.Number); @@ -33,7 +33,7 @@ public void Scan_ShouldScanNull_WhenBeginPointsNotOnNumber(string text, int begi { var scanner = new NumberScanner(); - var token = scanner.Scan(text, begin); + var token = scanner.Scan(GetMemorySlice(text, begin)); token.Should().BeNull(); } @@ -45,8 +45,10 @@ public void Scan_ShouldReturnNumberWithRightLength(string text, int begin, int e { var scanner = new NumberScanner(); - var token = scanner.Scan(text, begin); + var token = scanner.Scan(GetMemorySlice(text, begin)); token?.Value.Length.Should().Be(expectedLength); } + + private static Memory GetMemorySlice(string text, int begin) => new(text.ToCharArray()[begin..]); } \ No newline at end of file diff --git a/cs/MarkdownTests/Tokenizer/Scanners/SpecScannerTest.cs b/cs/MarkdownTests/Tokenizer/Scanners/SpecScannerTest.cs index 587d83b65..310bb25aa 100644 --- a/cs/MarkdownTests/Tokenizer/Scanners/SpecScannerTest.cs +++ b/cs/MarkdownTests/Tokenizer/Scanners/SpecScannerTest.cs @@ -22,7 +22,7 @@ public void Scan_ShouldScanValidTokenType_WhenBeginPointsOnSpec { var scanner = new SpecScanner(); - var token = scanner.Scan(text, begin); + var token = scanner.Scan(GetMemorySlice(text, begin)); token?.TokenType.Should().Be(expectedType); } @@ -36,8 +36,10 @@ public void Scan_ShouldScanNull_WhenBeginPointsNotOnSpec { var scanner = new SpecScanner(); - var token = scanner.Scan(text, begin); + var token = scanner.Scan(GetMemorySlice(text, begin)); token.Should().BeNull(); } + + private static Memory GetMemorySlice(string text, int begin) => new(text.ToCharArray()[begin..]); } \ No newline at end of file diff --git a/cs/MarkdownTests/Tokenizer/Scanners/TextScannerTest.cs b/cs/MarkdownTests/Tokenizer/Scanners/TextScannerTest.cs index ed1ddda29..a88b605c6 100644 --- a/cs/MarkdownTests/Tokenizer/Scanners/TextScannerTest.cs +++ b/cs/MarkdownTests/Tokenizer/Scanners/TextScannerTest.cs @@ -17,7 +17,7 @@ public void Scan_ShouldReturnTextToken_WhenBeginPointsOnText(string text, int be { var scanner = new TextScanner(); - var token = scanner.Scan(text, begin); + var token = scanner.Scan(GetMemorySlice(text, begin)); token.Should().NotBeNull(); token.TokenType.Should().Be(TokenType.Word); @@ -32,7 +32,7 @@ public void Scan_ShouldScanNull_WhenBeginPointsNotOnText(string text, int begin) { var scanner = new TextScanner(); - var token = scanner.Scan(text, begin); + var token = scanner.Scan(GetMemorySlice(text, begin)); token.Should().BeNull(); } @@ -45,9 +45,11 @@ public void Scan_ShouldReturnTextWithRightLength(string text, int begin, int exp { var scanner = new TextScanner(); - var token = scanner.Scan(text, begin); + var token = scanner.Scan(GetMemorySlice(text, begin)); token?.Value.Length.Should().Be(expectedLength); } + private static Memory GetMemorySlice(string text, int begin) => new(text.ToCharArray()[begin..]); + } \ No newline at end of file From d82120d1b3766478bde612fc114423e5a60dfe30 Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sat, 14 Dec 2024 21:16:03 +0500 Subject: [PATCH 06/27] =?UTF-8?q?feat(ListOrderExtensions):=20=D0=BD=D0=B0?= =?UTF-8?q?=D0=BF=D0=B8=D1=81=D0=B0=D0=BB=20=D1=81=D0=B2=D0=BE=D0=B9=20?= =?UTF-8?q?=D1=81=D0=BA=D0=B8=D0=BF,=20=D0=BA=D0=BE=D1=82=D0=BE=D1=80?= =?UTF-8?q?=D1=8B=D0=B9=20=D0=BF=D1=80=D1=8B=D0=B3=D0=B0=D0=B5=D1=82=20?= =?UTF-8?q?=D1=81=D1=80=D0=B0=D0=B7=D1=83=20=D0=BA=D1=83=D0=B4=D0=B0=20?= =?UTF-8?q?=D0=BD=D0=B0=D0=B4=D0=BE,=20=D0=B0=20=D0=BD=D0=B5=20=D0=BA?= =?UTF-8?q?=D0=B0=D0=BA=20=D0=B2=20LINQ?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/Parser/Rules/Tools/ListOrderExtensions.cs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cs/Markdown/Parser/Rules/Tools/ListOrderExtensions.cs b/cs/Markdown/Parser/Rules/Tools/ListOrderExtensions.cs index 08c7985bc..90c8fe44c 100644 --- a/cs/Markdown/Parser/Rules/Tools/ListOrderExtensions.cs +++ b/cs/Markdown/Parser/Rules/Tools/ListOrderExtensions.cs @@ -4,4 +4,13 @@ public static class ListOrderExtensions { public static T? Second(this List list) => list.Count < 2 ? default : list[1]; public static T? Third(this List list) => list.Count < 3 ? default : list[2]; + + public static IEnumerable Skip(this List list, int begin) + { + while (begin < list.Count) + { + yield return list[begin]; + begin++; + } + } } \ No newline at end of file From 6a0bb5637e7bd4deebabb516966a509e953ab012 Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sat, 14 Dec 2024 21:16:46 +0500 Subject: [PATCH 07/27] =?UTF-8?q?feat(PatternRule|TextRule):=20=D1=82?= =?UTF-8?q?=D0=B5=D0=BF=D0=B5=D1=80=D1=8C=20=D0=B8=D1=81=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D1=8C=D0=B7=D1=83=D0=B5=D1=82=D1=81=D1=8F=20=D0=B1=D1=8B=D1=81?= =?UTF-8?q?=D1=82=D1=80=D1=8B=D0=B9=20Skip,=20=D0=BA=D0=B2=D0=B0=D0=B4?= =?UTF-8?q?=D1=80=D0=B0=D1=82=D0=B0=20=D0=B1=D0=BE=D0=BB=D1=8C=D1=88=D0=B5?= =?UTF-8?q?=20=D0=BD=D0=B5=D1=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/Parser/Rules/PatternRule.cs | 3 ++- cs/Markdown/Parser/Rules/TextRule.cs | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/cs/Markdown/Parser/Rules/PatternRule.cs b/cs/Markdown/Parser/Rules/PatternRule.cs index 7649050b4..f22f24dcc 100644 --- a/cs/Markdown/Parser/Rules/PatternRule.cs +++ b/cs/Markdown/Parser/Rules/PatternRule.cs @@ -1,6 +1,7 @@ using Markdown.Parser.Nodes; using Markdown.Tokens; - +using Markdown.Parser.Rules.Tools; + namespace Markdown.Parser.Rules; public class PatternRule(List pattern) : IParsingRule diff --git a/cs/Markdown/Parser/Rules/TextRule.cs b/cs/Markdown/Parser/Rules/TextRule.cs index 9a5da369f..85c4fbb45 100644 --- a/cs/Markdown/Parser/Rules/TextRule.cs +++ b/cs/Markdown/Parser/Rules/TextRule.cs @@ -1,5 +1,6 @@ using Markdown.Parser.Nodes; using Markdown.Tokens; +using Markdown.Parser.Rules.Tools; namespace Markdown.Parser.Rules; From 5061b702917f6e272968d9255fd62044d4c8f035 Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sat, 14 Dec 2024 21:17:45 +0500 Subject: [PATCH 08/27] =?UTF-8?q?feat(MdTokenizer):=20=D0=B2=D0=BD=D0=B5?= =?UTF-8?q?=D0=B4=D1=80=D0=B8=D0=BB=20=D0=BD=D0=BE=D0=B2=D1=8B=D0=B5=20?= =?UTF-8?q?=D1=81=D0=BA=D0=B0=D0=BD=D0=B5=D1=80=D1=8B=20=D0=B2=20=D1=82?= =?UTF-8?q?=D0=BE=D0=BA=D0=B5=D0=BD=D0=B8=D0=B7=D0=B0=D1=82=D0=BE=D1=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/Tokenizer/MdTokenizer.cs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cs/Markdown/Tokenizer/MdTokenizer.cs b/cs/Markdown/Tokenizer/MdTokenizer.cs index 3bed65a98..6c785e46d 100644 --- a/cs/Markdown/Tokenizer/MdTokenizer.cs +++ b/cs/Markdown/Tokenizer/MdTokenizer.cs @@ -14,11 +14,14 @@ public List Tokenize(string text) { var begin = 0; var tokenList = new List(); - + var memoryText = new Memory(text.ToCharArray()); + while (begin < text.Length) { + var textSlice = memoryText[begin..]; + var token = scanners - .Select(scanner => scanner.Scan(text, begin)) + .Select(scanner => scanner.Scan(textSlice)) .First(token => token != null); Debug.Assert(token != null, nameof(token) + " != null"); From 40c9ab18920b89fa40a5fcd5f029023ceba2e025 Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sat, 14 Dec 2024 21:18:27 +0500 Subject: [PATCH 09/27] =?UTF-8?q?feat(MdTests.cs):=20=D1=83=D0=B2=D0=B5?= =?UTF-8?q?=D0=BB=D0=B8=D1=87=D0=B8=D0=BB=20=D1=80=D0=B0=D0=B7=D0=BC=D0=B5?= =?UTF-8?q?=D1=80=D1=8B=20=D0=B3=D0=B5=D0=BD=D0=B5=D1=80=D0=B8=D1=80=D1=83?= =?UTF-8?q?=D0=B5=D0=BC=D1=8B=D1=85=20Markdown-=D1=80=D0=B0=D0=B7=D0=BC?= =?UTF-8?q?=D0=B5=D1=82=D0=BE=D0=BA=20=D0=B2=20=D1=82=D0=B5=D1=81=D1=82?= =?UTF-8?q?=D0=B5=20=D0=BD=D0=B0=20=D1=81=D0=BA=D0=BE=D1=80=D0=BE=D1=81?= =?UTF-8?q?=D1=82=D1=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/MarkdownTests/MdTest.cs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cs/MarkdownTests/MdTest.cs b/cs/MarkdownTests/MdTest.cs index f57d2c1d8..71c713e93 100644 --- a/cs/MarkdownTests/MdTest.cs +++ b/cs/MarkdownTests/MdTest.cs @@ -13,22 +13,23 @@ public void Markdown_Render_ShouldWorkFast() const int scale = 2; var sw = new Stopwatch(); var results = new List(); - for (var len = 640; len <= 655360; len *= scale) + for (var len = 640; len <= 1310720; len *= scale) { var markdown = GenerateMarkdown(len); GC.Collect(); sw.Start(); Md.Render(markdown); sw.Stop(); - + results.Add(sw.Elapsed); sw.Reset(); } - + Enumerable.Range(1, results.Count - 1) .Select(i => (double)results[i].Ticks / results[i - 1].Ticks) .Should().OnlyContain(timeRatio => timeRatio < scale * scale); } + private static string GenerateMarkdown(int len) { var rand = new Random(); From 887e3328d401b40d230863b382acb9c346939ace Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sat, 14 Dec 2024 21:19:23 +0500 Subject: [PATCH 10/27] =?UTF-8?q?feat(ItalicRuleTest/ShouldMatch=5FTagInWo?= =?UTF-8?q?rd):=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2=D0=B8=D0=BB=20=D0=BF?= =?UTF-8?q?=D1=80=D0=BE=D0=B2=D0=B5=D1=80=D0=BA=D1=83=20=D0=BD=D0=B0=20?= =?UTF-8?q?=D1=82=D0=B8=D0=BF=20=D0=BD=D0=BE=D0=B4=D1=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/MarkdownTests/Parser/Rules/ItalicRuleTest.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/cs/MarkdownTests/Parser/Rules/ItalicRuleTest.cs b/cs/MarkdownTests/Parser/Rules/ItalicRuleTest.cs index 86c9c38a7..be3daf598 100644 --- a/cs/MarkdownTests/Parser/Rules/ItalicRuleTest.cs +++ b/cs/MarkdownTests/Parser/Rules/ItalicRuleTest.cs @@ -50,6 +50,7 @@ public string Match_ShouldMatch_TagInWord(string text, int begin) var node = rule.Match(tokens, begin) as TagNode; node.Should().NotBeNull(); + node.NodeType.Should().Be(NodeType.Italic); node.Children.Should().ContainSingle(n => n.NodeType == NodeType.Text); return node.ToText(tokens); } From 7d63b4b7de514ccbb0809fb6a0fba81d4c053a11 Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sat, 14 Dec 2024 21:55:11 +0500 Subject: [PATCH 11/27] =?UTF-8?q?fix(SpecScanner):=20=D1=82=D0=B5=D0=BF?= =?UTF-8?q?=D0=B5=D1=80=D1=8C=20=D0=BF=D1=80=D0=B0=D0=B2=D0=B8=D0=BB=D1=8C?= =?UTF-8?q?=D0=BD=D0=BE=20=D1=81=D0=BA=D0=B0=D0=BD=D0=B8=D1=82=20=D0=BD?= =?UTF-8?q?=D0=B5=D1=80=D0=B0=D0=B7=D1=80=D1=8B=D0=B2=D0=BD=D1=8B=D0=B9=20?= =?UTF-8?q?=D0=BF=D1=80=D0=BE=D0=B1=D0=B5=D0=BB=20=D0=B8=20=D0=BF=D1=80?= =?UTF-8?q?=D0=BE=D0=B1=D0=B5=D0=BB=20=D0=BD=D1=83=D0=BB=D0=B5=D0=B2=D0=BE?= =?UTF-8?q?=D0=B9=20=D1=88=D0=B8=D1=80=D0=B8=D0=BD=D1=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/Tokenizer/Scanners/SpecScanner.cs | 2 +- cs/MarkdownTests/Tokenizer/Scanners/SpecScannerTest.cs | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/cs/Markdown/Tokenizer/Scanners/SpecScanner.cs b/cs/Markdown/Tokenizer/Scanners/SpecScanner.cs index 73d9791b8..be426d3ae 100644 --- a/cs/Markdown/Tokenizer/Scanners/SpecScanner.cs +++ b/cs/Markdown/Tokenizer/Scanners/SpecScanner.cs @@ -22,7 +22,7 @@ public class SpecScanner : ITokenScanner private static TokenType? GetTokenType(char symbol) => symbol switch { - ' ' => TokenType.Space, + ' ' or '\u00a0' or '\u200b' => TokenType.Space, '*' => TokenType.Asterisk, '\n' or '\r' => TokenType.Newline, '\\' => TokenType.Backslash, diff --git a/cs/MarkdownTests/Tokenizer/Scanners/SpecScannerTest.cs b/cs/MarkdownTests/Tokenizer/Scanners/SpecScannerTest.cs index 310bb25aa..c2eb54898 100644 --- a/cs/MarkdownTests/Tokenizer/Scanners/SpecScannerTest.cs +++ b/cs/MarkdownTests/Tokenizer/Scanners/SpecScannerTest.cs @@ -10,6 +10,8 @@ public class SpecScannerTest { [TestCase(" ", 0, TokenType.Space)] + [TestCase("\u00a0", 0, TokenType.Space)] + [TestCase("\u200b", 0, TokenType.Space)] [TestCase("*", 0, TokenType.Asterisk)] [TestCase("\n", 0, TokenType.Newline)] [TestCase("#", 0, TokenType.Octothorpe)] From a247e12d8dc5fa0a52b78141accbf773f2990d67 Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sat, 14 Dec 2024 21:56:08 +0500 Subject: [PATCH 12/27] =?UTF-8?q?fix(ParagraphRule):=20=D1=82=D0=B5=D0=BF?= =?UTF-8?q?=D0=B5=D1=80=D1=8C=20=D0=BA=D0=BE=D1=80=D1=80=D0=B5=D0=BA=D1=82?= =?UTF-8?q?=D0=BD=D0=BE=20=D0=BC=D0=B0=D1=82=D1=87=D0=B8=D1=82=20=D0=BA?= =?UTF-8?q?=D0=B5=D0=B9=D1=81=D1=8B=20=D1=81=20=D1=8D=D1=81=D0=BA=D0=B5?= =?UTF-8?q?=D0=B9=D0=BF=D0=BD=D1=83=D1=82=D1=8B=D0=BC=20=D0=B1=D1=8D=D0=BA?= =?UTF-8?q?=D1=81=D0=BB=D1=8D=D1=88=D0=B5=D0=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/Parser/Rules/ParagraphRule.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cs/Markdown/Parser/Rules/ParagraphRule.cs b/cs/Markdown/Parser/Rules/ParagraphRule.cs index add21a5f7..733694f07 100644 --- a/cs/Markdown/Parser/Rules/ParagraphRule.cs +++ b/cs/Markdown/Parser/Rules/ParagraphRule.cs @@ -11,7 +11,7 @@ public class ParagraphRule: IParsingRule public Node? Match(List tokens, int begin = 0) { var tagRules = new OrRule([ - new EscapeRule(TokenType.Underscore), + new EscapeRule([TokenType.Underscore, TokenType.Backslash]), new ItalicRule(), new BoldRule(), new TextRule(), ]); var tokenRules = new OrRule([ From 6562be2bd3fa0bf2eb40d82ef60fad622103f0f6 Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sat, 14 Dec 2024 22:28:19 +0500 Subject: [PATCH 13/27] =?UTF-8?q?fix(BoldRule.cs):=20=D1=82=D0=B5=D0=BF?= =?UTF-8?q?=D0=B5=D1=80=D1=8C=20=D0=BF=D1=80=D0=B0=D0=B2=D0=B8=D0=BB=D0=BE?= =?UTF-8?q?=20=D0=BA=D0=BE=D1=80=D1=80=D0=B5=D0=BA=D1=82=D0=BD=D0=BE=20?= =?UTF-8?q?=D0=BC=D1=8D=D1=82=D1=87=D0=B8=D1=82=20=D1=82=D1=8D=D0=B3=20?= =?UTF-8?q?=D1=81=20=D0=B2=D0=BD=D1=83=D1=82=D1=80=D0=B5=D0=BD=D0=BD=D0=B8?= =?UTF-8?q?=D0=BC=20=D1=81=D0=BB=D1=8D=D1=88=D0=B5=D0=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/Parser/Rules/BoldRule.cs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cs/Markdown/Parser/Rules/BoldRule.cs b/cs/Markdown/Parser/Rules/BoldRule.cs index 14b80f419..ccd957f02 100644 --- a/cs/Markdown/Parser/Rules/BoldRule.cs +++ b/cs/Markdown/Parser/Rules/BoldRule.cs @@ -17,7 +17,11 @@ public class BoldRule : IParsingRule private static TagNode? MatchBold(List tokens, int begin = 0) { - var valueRule = new OrRule(new ItalicRule(), new TextRule()); + var valueRule = new OrRule([ + new ItalicRule(), + new TextRule(), + new PatternRule(TokenType.Backslash) + ]); var pattern = new AndRule([ PatternRuleFactory.DoubleUnderscore(), new ConditionalRule(new KleeneStarRule(valueRule), HasRightBorders), From b0d70c6ac73bcbec66118017ca253d86bcaf7af8 Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sat, 14 Dec 2024 22:28:45 +0500 Subject: [PATCH 14/27] =?UTF-8?q?fix(ItalicRule|InWordItalicRule):=20?= =?UTF-8?q?=D1=82=D0=B5=D0=BF=D0=B5=D1=80=D1=8C=20=D0=BF=D1=80=D0=B0=D0=B2?= =?UTF-8?q?=D0=B8=D0=BB=D0=BE=20=D0=BA=D0=BE=D1=80=D1=80=D0=B5=D0=BA=D1=82?= =?UTF-8?q?=D0=BD=D0=BE=20=D0=BC=D1=8D=D1=82=D1=87=D0=B8=D1=82=20=D1=82?= =?UTF-8?q?=D1=8D=D0=B3=20=D1=81=20=D0=B2=D0=BD=D1=83=D1=82=D1=80=D0=B5?= =?UTF-8?q?=D0=BD=D0=BD=D0=B8=D0=BC=20=D1=81=D0=BB=D1=8D=D1=88=D0=B5=D0=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/Parser/Rules/InWordItalicRule.cs | 2 +- cs/Markdown/Parser/Rules/ItalicRule.cs | 8 ++++++-- cs/MarkdownTests/Parser/Rules/ItalicRuleTest.cs | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/cs/Markdown/Parser/Rules/InWordItalicRule.cs b/cs/Markdown/Parser/Rules/InWordItalicRule.cs index aa74e98b2..3e9058f41 100644 --- a/cs/Markdown/Parser/Rules/InWordItalicRule.cs +++ b/cs/Markdown/Parser/Rules/InWordItalicRule.cs @@ -16,7 +16,7 @@ public class InWordItalicRule : IParsingRule { var pattern = new AndRule([ new PatternRule(TokenType.Underscore), - new PatternRule(TokenType.Word), + new KleeneStarRule(new OrRule(new PatternRule(TokenType.Word), new PatternRule(TokenType.Backslash))), new PatternRule(TokenType.Underscore), ]); var continuesRule = new OrRule(possibleContinues); diff --git a/cs/Markdown/Parser/Rules/ItalicRule.cs b/cs/Markdown/Parser/Rules/ItalicRule.cs index 2558b08fb..c226bedac 100644 --- a/cs/Markdown/Parser/Rules/ItalicRule.cs +++ b/cs/Markdown/Parser/Rules/ItalicRule.cs @@ -9,7 +9,7 @@ public class ItalicRule : IParsingRule { private readonly AndRule innerBoldRule = new([ PatternRuleFactory.DoubleUnderscore(), - new TextRule(), + new KleeneStarRule(new OrRule([new PatternRule(TokenType.Backslash), new TextRule()])), PatternRuleFactory.DoubleUnderscore() ]); private readonly List possibleContinues = @@ -27,7 +27,11 @@ public class ItalicRule : IParsingRule } private TagNode? MatchItalic(List tokens, int begin) { - var valueRule = new OrRule(new TextRule(), innerBoldRule); + var valueRule = new OrRule([ + new TextRule(), + new PatternRule(TokenType.Backslash), + innerBoldRule]); + var pattern = new AndRule([ new PatternRule(TokenType.Underscore), new ConditionalRule(new KleeneStarRule(valueRule), HasRightBorders), diff --git a/cs/MarkdownTests/Parser/Rules/ItalicRuleTest.cs b/cs/MarkdownTests/Parser/Rules/ItalicRuleTest.cs index be3daf598..2f5e4df1c 100644 --- a/cs/MarkdownTests/Parser/Rules/ItalicRuleTest.cs +++ b/cs/MarkdownTests/Parser/Rules/ItalicRuleTest.cs @@ -51,7 +51,7 @@ public string Match_ShouldMatch_TagInWord(string text, int begin) node.Should().NotBeNull(); node.NodeType.Should().Be(NodeType.Italic); - node.Children.Should().ContainSingle(n => n.NodeType == NodeType.Text); + node.Children.Should().ContainSingle(n => n.NodeType == NodeType.Special); return node.ToText(tokens); } From 512ee58ff1d14aa3b6e56188d83b817e92e7be48 Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sat, 14 Dec 2024 22:55:38 +0500 Subject: [PATCH 15/27] =?UTF-8?q?refactor(Rules):=20=D0=B2=D1=81=D0=B5=20?= =?UTF-8?q?=D1=87=D1=82=D0=BE=20=D0=BC=D0=BE=D0=B6=D0=BD=D0=BE=20=D0=BF?= =?UTF-8?q?=D1=80=D0=BE=D0=B8=D0=BD=D0=B8=D1=86=D0=B8=D0=B0=D0=BB=D0=B8?= =?UTF-8?q?=D0=B7=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D1=82=D1=8C=20=D0=B1=D0=B5?= =?UTF-8?q?=D0=B7=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB=D0=BD=D0=B8=D1=82=D0=B5?= =?UTF-8?q?=D0=BB=D1=8C=D0=BD=D1=8B=D1=85=20=D0=BF=D0=B5=D1=80=D0=B5=D0=B4?= =?UTF-8?q?=D0=B0=D0=BD=D0=BD=D1=8B=D1=85=20=D0=BF=D0=B0=D1=80=D0=B0=D0=BC?= =?UTF-8?q?=D0=B5=D1=82=D1=80=D0=BE=D0=B2=20=D0=B2=D1=8B=D0=BD=D0=B5=D1=81?= =?UTF-8?q?=20=D0=B2=20=D1=81=D1=82=D0=B0=D1=82=D0=B8=D1=87=D0=B5=D1=81?= =?UTF-8?q?=D0=BA=D0=B8=D0=B5=20=D0=BF=D0=BE=D0=BB=D1=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/Parser/Rules/BodyRule.cs | 27 +++++----- cs/Markdown/Parser/Rules/BoldRule.cs | 34 +++++++------ cs/Markdown/Parser/Rules/EscapeRule.cs | 15 +++--- cs/Markdown/Parser/Rules/HeaderRule.cs | 14 +++--- cs/Markdown/Parser/Rules/InWordBoldRule.cs | 41 ++++++++-------- cs/Markdown/Parser/Rules/InWordItalicRule.cs | 37 +++++++------- cs/Markdown/Parser/Rules/ItalicRule.cs | 49 ++++++++++--------- cs/Markdown/Parser/Rules/ListItemRule.cs | 14 +++--- cs/Markdown/Parser/Rules/ParagraphRule.cs | 41 ++++++++-------- cs/Markdown/Parser/Rules/PatternRule.cs | 1 + cs/Markdown/Parser/Rules/TextRule.cs | 1 + cs/Markdown/Parser/Rules/UnorderedListRule.cs | 11 ++--- cs/Markdown/Parser/TokenParser.cs | 5 +- 13 files changed, 145 insertions(+), 145 deletions(-) diff --git a/cs/Markdown/Parser/Rules/BodyRule.cs b/cs/Markdown/Parser/Rules/BodyRule.cs index be03e6068..0110b2bce 100644 --- a/cs/Markdown/Parser/Rules/BodyRule.cs +++ b/cs/Markdown/Parser/Rules/BodyRule.cs @@ -6,19 +6,20 @@ namespace Markdown.Parser.Rules; public class BodyRule : IParsingRule { - public Node? Match(List tokens, int begin = 0) - { - var tagRules = new OrRule([ - new EscapeRule([TokenType.Octothorpe, TokenType.Asterisk]), - new HeaderRule(), - new UnorderedListRule(), - new ParagraphRule() - ]); - var tokenRules = new PatternRule(TokenType.Newline); - - var resultRule = new KleeneStarRule(new OrRule(tagRules, tokenRules)); - return resultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; - } + private static readonly IParsingRule TagRules = new OrRule([ + new EscapeRule([TokenType.Octothorpe, TokenType.Asterisk]), + new HeaderRule(), + new UnorderedListRule(), + new ParagraphRule() + ]); + + private static readonly IParsingRule TokenRules = new PatternRule(TokenType.Newline); + + private static readonly KleeneStarRule ResultRule = new(new OrRule(TagRules, TokenRules)); + + public Node? Match(List tokens, int begin = 0) + => ResultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; + private static TagNode BuildNode(SpecNode node) => new(NodeType.Body, node.Nodes, node.Start, node.Consumed); } \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/BoldRule.cs b/cs/Markdown/Parser/Rules/BoldRule.cs index ccd957f02..5ba022b93 100644 --- a/cs/Markdown/Parser/Rules/BoldRule.cs +++ b/cs/Markdown/Parser/Rules/BoldRule.cs @@ -15,23 +15,25 @@ public class BoldRule : IParsingRule : new InWordBoldRule().Match(tokens, begin); } - private static TagNode? MatchBold(List tokens, int begin = 0) - { - var valueRule = new OrRule([ - new ItalicRule(), - new TextRule(), - new PatternRule(TokenType.Backslash) - ]); - var pattern = new AndRule([ - PatternRuleFactory.DoubleUnderscore(), - new ConditionalRule(new KleeneStarRule(valueRule), HasRightBorders), - PatternRuleFactory.DoubleUnderscore() - ]); - var continuesRule = new OrRule(TokenType.Newline, TokenType.Space); + private static readonly IParsingRule ValueRule = new OrRule([ + new ItalicRule(), + new TextRule(), + new PatternRule(TokenType.Backslash) + ]); + + private static readonly IParsingRule Pattern = new AndRule([ + PatternRuleFactory.DoubleUnderscore(), + new ConditionalRule(new KleeneStarRule(ValueRule), HasRightBorders), + PatternRuleFactory.DoubleUnderscore() + ]); + + private static readonly IParsingRule ContinuesRule = new OrRule(TokenType.Newline, TokenType.Space); - var resultRule = new ContinuesRule(pattern, continuesRule); - return resultRule.Match(tokens, begin) is SpecNode specNode ? BuildNode(specNode) : null; - } + private static readonly ContinuesRule ResultRule = new(Pattern, ContinuesRule); + + private static TagNode? MatchBold(List tokens, int begin = 0) + => ResultRule.Match(tokens, begin) is SpecNode specNode ? BuildNode(specNode) : null; + private static TagNode BuildNode(SpecNode node) { var valueNode = (node.Nodes.Second() as SpecNode); diff --git a/cs/Markdown/Parser/Rules/EscapeRule.cs b/cs/Markdown/Parser/Rules/EscapeRule.cs index 05522f2fa..ffd6fbe83 100644 --- a/cs/Markdown/Parser/Rules/EscapeRule.cs +++ b/cs/Markdown/Parser/Rules/EscapeRule.cs @@ -11,15 +11,14 @@ public EscapeRule(TokenType escapedTokenType) : this([escapedTokenType]) { } + private readonly AndRule resultRule = new([ + new PatternRule(TokenType.Backslash), + new OrRule(escapedTokens) + ]); - public Node? Match(List tokens, int begin = 0) - { - var resultRule = new AndRule([ - new PatternRule(TokenType.Backslash), - new OrRule(escapedTokens) - ]); - return resultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; - } + public Node? Match(List tokens, int begin = 0) + => resultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; + private static TagNode BuildNode(SpecNode node) => new(NodeType.Escape, node.Nodes.Second() ?? throw new InvalidOperationException(), node.Start, node.Consumed); } \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/HeaderRule.cs b/cs/Markdown/Parser/Rules/HeaderRule.cs index ded758780..3d2daefca 100644 --- a/cs/Markdown/Parser/Rules/HeaderRule.cs +++ b/cs/Markdown/Parser/Rules/HeaderRule.cs @@ -8,14 +8,12 @@ namespace Markdown.Parser.Rules; public class HeaderRule : IParsingRule { - public Node? Match(List tokens, int begin = 0) - { - var resultRule = new AndRule([ - new PatternRule([TokenType.Octothorpe, TokenType.Space]), - new ParagraphRule(), - ]); - return resultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; - } + private readonly AndRule resultRule = new([ + new PatternRule([TokenType.Octothorpe, TokenType.Space]), + new ParagraphRule(), + ]); + public Node? Match(List tokens, int begin = 0) + => resultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; private static TagNode BuildNode(SpecNode specNode) { diff --git a/cs/Markdown/Parser/Rules/InWordBoldRule.cs b/cs/Markdown/Parser/Rules/InWordBoldRule.cs index a158d4f3c..bb2e6085b 100644 --- a/cs/Markdown/Parser/Rules/InWordBoldRule.cs +++ b/cs/Markdown/Parser/Rules/InWordBoldRule.cs @@ -8,28 +8,33 @@ namespace Markdown.Parser.Rules; public class InWordBoldRule : IParsingRule { - private readonly List possibleContinues = + private static readonly List PossibleContinues = [ TokenType.Newline, TokenType.Space, TokenType.Word ]; - public Node? Match(List tokens, int begin = 0) - { - var valueRule = new OrRule(new InWordItalicRule(), new PatternRule(TokenType.Word)); - var pattern = new AndRule([ - PatternRuleFactory.DoubleUnderscore(), - new KleeneStarRule(valueRule), - PatternRuleFactory.DoubleUnderscore(), - ]); - var continuesRule = new OrRule(possibleContinues); - var resultRule = new ContinuesRule(pattern, continuesRule); - return resultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; - } + private static readonly OrRule ValueRule = new(new InWordItalicRule(), new PatternRule(TokenType.Word)); + private static readonly AndRule Pattern = new([ + PatternRuleFactory.DoubleUnderscore(), + new KleeneStarRule(ValueRule), + PatternRuleFactory.DoubleUnderscore(), + ]); + private static readonly OrRule ContinuesRule = new(PossibleContinues); + + private static readonly ContinuesRule ResultRule = new(Pattern, ContinuesRule); + + private static readonly PatternRule InStartRule = new([ + TokenType.Underscore, TokenType.Underscore, TokenType.Word, + TokenType.Underscore, TokenType.Underscore, TokenType.Word, + ]); + + public Node? Match(List tokens, int begin = 0) + => ResultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; private static TagNode BuildNode(SpecNode node) { - var valueNode = (node.Nodes.Second() as SpecNode); + var valueNode = node.Nodes.Second() as SpecNode; Debug.Assert(valueNode != null, nameof(valueNode) + " != null"); return new TagNode(NodeType.Bold, valueNode.Nodes, node.Start, node.Consumed); } @@ -38,11 +43,7 @@ public static bool IsTagInWord(List tokens, int begin = 0) { if (begin != 0 && tokens[begin - 1].TokenType == TokenType.Word) return true; - - var inStartRule = new PatternRule([ - TokenType.Underscore, TokenType.Underscore, TokenType.Word, - TokenType.Underscore, TokenType.Underscore, TokenType.Word, - ]); - return inStartRule.Match(tokens, begin) is not null; + + return InStartRule.Match(tokens, begin) is not null; } } \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/InWordItalicRule.cs b/cs/Markdown/Parser/Rules/InWordItalicRule.cs index 3e9058f41..f12941219 100644 --- a/cs/Markdown/Parser/Rules/InWordItalicRule.cs +++ b/cs/Markdown/Parser/Rules/InWordItalicRule.cs @@ -7,23 +7,28 @@ namespace Markdown.Parser.Rules; public class InWordItalicRule : IParsingRule { - private readonly List possibleContinues = + private static readonly List PossibleContinues = [ TokenType.Newline, TokenType.Space, TokenType.Word ]; + + private static readonly AndRule Pattern = new([ + new PatternRule(TokenType.Underscore), + new KleeneStarRule(new OrRule(new PatternRule(TokenType.Word), new PatternRule(TokenType.Backslash))), + new PatternRule(TokenType.Underscore), + ]); + + private static readonly OrRule ContinuesRule = new(PossibleContinues); - public Node? Match(List tokens, int begin = 0) - { - var pattern = new AndRule([ - new PatternRule(TokenType.Underscore), - new KleeneStarRule(new OrRule(new PatternRule(TokenType.Word), new PatternRule(TokenType.Backslash))), - new PatternRule(TokenType.Underscore), - ]); - var continuesRule = new OrRule(possibleContinues); + private static readonly ContinuesRule ResultRule = new(Pattern, ContinuesRule); - var resultRule = new ContinuesRule(pattern, continuesRule); - return resultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; - } + private static readonly PatternRule InStartRule = new([ + TokenType.Underscore, TokenType.Word, + TokenType.Underscore, TokenType.Word, + ]); + + public Node? Match(List tokens, int begin = 0) + => ResultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; private static TagNode BuildNode(SpecNode node) => new(NodeType.Italic, node.Nodes.Second() ?? throw new InvalidOperationException(), node.Start, node.Consumed); @@ -32,11 +37,7 @@ public static bool IsTagInWord(List tokens, int begin = 0) { if (begin != 0 && tokens[begin - 1].TokenType == TokenType.Word) return true; - - var inStartRule = new PatternRule([ - TokenType.Underscore, TokenType.Word, - TokenType.Underscore, TokenType.Word, - ]); - return inStartRule.Match(tokens, begin) is not null; + + return InStartRule.Match(tokens, begin) is not null; } } \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/ItalicRule.cs b/cs/Markdown/Parser/Rules/ItalicRule.cs index c226bedac..52afb7da0 100644 --- a/cs/Markdown/Parser/Rules/ItalicRule.cs +++ b/cs/Markdown/Parser/Rules/ItalicRule.cs @@ -7,46 +7,49 @@ namespace Markdown.Parser.Rules; public class ItalicRule : IParsingRule { - private readonly AndRule innerBoldRule = new([ + private static readonly AndRule InnerBoldRule = new([ PatternRuleFactory.DoubleUnderscore(), new KleeneStarRule(new OrRule([new PatternRule(TokenType.Backslash), new TextRule()])), PatternRuleFactory.DoubleUnderscore() ]); - private readonly List possibleContinues = + + private static readonly List PossibleContinues = [ PatternRuleFactory.DoubleUnderscore(), new PatternRule(TokenType.Newline), new PatternRule(TokenType.Space), ]; - public Node? Match(List tokens, int begin = 0) - { - return !InWordItalicRule.IsTagInWord(tokens, begin) - ? MatchItalic(tokens, begin) - : new InWordItalicRule().Match(tokens, begin); - } - private TagNode? MatchItalic(List tokens, int begin) - { - var valueRule = new OrRule([ - new TextRule(), - new PatternRule(TokenType.Backslash), - innerBoldRule]); + private static readonly OrRule ValueRule = new([ + new TextRule(), + new PatternRule(TokenType.Backslash), + InnerBoldRule]); - var pattern = new AndRule([ - new PatternRule(TokenType.Underscore), - new ConditionalRule(new KleeneStarRule(valueRule), HasRightBorders), - new PatternRule(TokenType.Underscore), - ]); - var continuesRule = new OrRule(possibleContinues); + private static readonly AndRule Pattern = new([ + new PatternRule(TokenType.Underscore), + new ConditionalRule(new KleeneStarRule(ValueRule), HasRightBorders), + new PatternRule(TokenType.Underscore), + ]); + + private static readonly OrRule ContinuesRule = new(PossibleContinues); - var resultRule = new ContinuesRule(pattern, continuesRule); - return resultRule.Match(tokens, begin) is SpecNode specNode ? BuildNode(specNode) : null; - } + private static readonly ContinuesRule ResultRule = new(Pattern, ContinuesRule); + + + public Node? Match(List tokens, int begin = 0) + => !InWordItalicRule.IsTagInWord(tokens, begin) + ? MatchItalic(tokens, begin) + : new InWordItalicRule().Match(tokens, begin); + + private static TagNode? MatchItalic(List tokens, int begin) + => ResultRule.Match(tokens, begin) is SpecNode specNode ? BuildNode(specNode) : null; + private static TagNode BuildNode(SpecNode node) { var valueNode = (node.Nodes.Second() as SpecNode)!; return new TagNode(NodeType.Italic, valueNode.Nodes, node.Start, node.Consumed); } + private static bool HasRightBorders(Node node, List tokens) => tokens[node.End].TokenType != TokenType.Space && tokens[node.Start].TokenType != TokenType.Space; } \ No newline at end of file diff --git a/cs/Markdown/Parser/Rules/ListItemRule.cs b/cs/Markdown/Parser/Rules/ListItemRule.cs index d27dea842..6dc3e1862 100644 --- a/cs/Markdown/Parser/Rules/ListItemRule.cs +++ b/cs/Markdown/Parser/Rules/ListItemRule.cs @@ -8,14 +8,12 @@ namespace Markdown.Parser.Rules; public class ListItemRule : IParsingRule { - public Node? Match(List tokens, int begin = 0) - { - var resultRule = new AndRule([ - new PatternRule([TokenType.Asterisk, TokenType.Space]), - new ParagraphRule(), - ]); - return resultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; - } + private static readonly AndRule ResultRule = new([ + new PatternRule([TokenType.Asterisk, TokenType.Space]), + new ParagraphRule(), + ]); + public Node? Match(List tokens, int begin = 0) + => ResultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; private static TagNode BuildNode(SpecNode specNode) { diff --git a/cs/Markdown/Parser/Rules/ParagraphRule.cs b/cs/Markdown/Parser/Rules/ParagraphRule.cs index 733694f07..3383a8df8 100644 --- a/cs/Markdown/Parser/Rules/ParagraphRule.cs +++ b/cs/Markdown/Parser/Rules/ParagraphRule.cs @@ -8,28 +8,27 @@ namespace Markdown.Parser.Rules; public class ParagraphRule: IParsingRule { - public Node? Match(List tokens, int begin = 0) - { - var tagRules = new OrRule([ - new EscapeRule([TokenType.Underscore, TokenType.Backslash]), - new ItalicRule(), new BoldRule(), new TextRule(), - ]); - var tokenRules = new OrRule([ - PatternRuleFactory.DoubleUnderscore(), - new PatternRule(TokenType.Number), - new PatternRule(TokenType.Octothorpe), - new PatternRule(TokenType.Underscore), - new PatternRule(TokenType.Asterisk), - new PatternRule(TokenType.Backslash), - ]); + private static readonly OrRule TagRules = new([ + new EscapeRule([TokenType.Underscore, TokenType.Backslash]), + new ItalicRule(), new BoldRule(), new TextRule(), + ]); + + private static readonly OrRule TokenRules = new([ + PatternRuleFactory.DoubleUnderscore(), + new PatternRule(TokenType.Number), + new PatternRule(TokenType.Octothorpe), + new PatternRule(TokenType.Underscore), + new PatternRule(TokenType.Asterisk), + new PatternRule(TokenType.Backslash), + ]); - var resultRule = new AndRule([ - new KleeneStarRule(new OrRule(tagRules, tokenRules)), - new PatternRule(TokenType.Newline) - ]); - - return resultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; - } + private static readonly AndRule ResultRule = new([ + new KleeneStarRule(new OrRule(TagRules, TokenRules)), + new PatternRule(TokenType.Newline) + ]); + + public Node? Match(List tokens, int begin = 0) + => ResultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; private static TagNode BuildNode(SpecNode node) { diff --git a/cs/Markdown/Parser/Rules/PatternRule.cs b/cs/Markdown/Parser/Rules/PatternRule.cs index f22f24dcc..e54ffcd92 100644 --- a/cs/Markdown/Parser/Rules/PatternRule.cs +++ b/cs/Markdown/Parser/Rules/PatternRule.cs @@ -28,6 +28,7 @@ public PatternRule(TokenType tokenType) .Take(pattern.Count) .Zip(pattern) .All(pair => pair.First.TokenType == pair.Second); + return !isMatched ? null : new TextNode(begin, pattern.Count); } } diff --git a/cs/Markdown/Parser/Rules/TextRule.cs b/cs/Markdown/Parser/Rules/TextRule.cs index 85c4fbb45..914d7eba8 100644 --- a/cs/Markdown/Parser/Rules/TextRule.cs +++ b/cs/Markdown/Parser/Rules/TextRule.cs @@ -12,6 +12,7 @@ public class TextRule : IParsingRule .Skip(begin) .TakeWhile(IsText) .Count(); + return textLength == 0 ? null : new TextNode(begin, textLength); } diff --git a/cs/Markdown/Parser/Rules/UnorderedListRule.cs b/cs/Markdown/Parser/Rules/UnorderedListRule.cs index 956ac681f..08035ce60 100644 --- a/cs/Markdown/Parser/Rules/UnorderedListRule.cs +++ b/cs/Markdown/Parser/Rules/UnorderedListRule.cs @@ -6,12 +6,11 @@ namespace Markdown.Parser.Rules; public class UnorderedListRule : IParsingRule { - public Node? Match(List tokens, int begin = 0) - { - var resultRule = new KleeneStarRule(new ListItemRule()); - - return resultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; - } + private static readonly KleeneStarRule ResultRule = new(new ListItemRule()); + + public Node? Match(List tokens, int begin = 0) + => ResultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; + private static TagNode BuildNode(SpecNode node) => new(NodeType.UnorderedList, node.Nodes, node.Start, node.Consumed); } \ No newline at end of file diff --git a/cs/Markdown/Parser/TokenParser.cs b/cs/Markdown/Parser/TokenParser.cs index 0abb545ed..bdd7d03d0 100644 --- a/cs/Markdown/Parser/TokenParser.cs +++ b/cs/Markdown/Parser/TokenParser.cs @@ -6,8 +6,5 @@ namespace Markdown.Parser; public static class TokenParser { - public static Node? Parse(List tokens) - { - return new BodyRule().Match(tokens); - } + public static Node? Parse(List tokens) => new BodyRule().Match(tokens); } \ No newline at end of file From 3f07148e405d32636cbd2da11599527c4a07d25e Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sun, 15 Dec 2024 00:15:50 +0500 Subject: [PATCH 16/27] =?UTF-8?q?feat(HeaderRule.cs):=20=D1=82=D0=B5=D0=BF?= =?UTF-8?q?=D0=B5=D1=80=D1=8C=20=D0=BF=D1=80=D0=B0=D0=B2=D0=B8=D0=BB=D0=BE?= =?UTF-8?q?=20=D0=BC=D1=8D=D1=82=D1=87=D0=B8=D1=82=D1=8C=20=D0=B2=D1=81?= =?UTF-8?q?=D0=B5=206=20=D1=85=D0=B5=D0=B4=D0=B5=D1=80=D0=BE=D0=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/Parser/Rules/HeaderRule.cs | 27 +++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/cs/Markdown/Parser/Rules/HeaderRule.cs b/cs/Markdown/Parser/Rules/HeaderRule.cs index 3d2daefca..8e526acdd 100644 --- a/cs/Markdown/Parser/Rules/HeaderRule.cs +++ b/cs/Markdown/Parser/Rules/HeaderRule.cs @@ -8,17 +8,34 @@ namespace Markdown.Parser.Rules; public class HeaderRule : IParsingRule { + private const uint MaxHeaderSize = 6; + + private static readonly KleeneStarRule OctothorpeRule = new(new PatternRule(TokenType.Octothorpe)); + private readonly AndRule resultRule = new([ - new PatternRule([TokenType.Octothorpe, TokenType.Space]), + OctothorpeRule, + new PatternRule([TokenType.Space]), new ParagraphRule(), ]); - public Node? Match(List tokens, int begin = 0) - => resultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; + + public Node? Match(List tokens, int begin = 0) + { + if (OctothorpeRule.Match(tokens, begin)?.Consumed > MaxHeaderSize) + { + return null; + } + + return resultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; + } private static TagNode BuildNode(SpecNode specNode) { - var valueNode = (specNode.Nodes.Second() as TagNode); + var headerSize = specNode.Nodes.First() as SpecNode; + var valueNode = specNode.Nodes.Third() as TagNode; + Debug.Assert(valueNode != null, nameof(valueNode) + " != null"); - return new TagNode(NodeType.Header, valueNode.Children, specNode.Start,specNode.Consumed); + Debug.Assert(headerSize != null, nameof(headerSize) + " != null"); + + return new TagNode(NodeType.Header, valueNode.Children.Prepend(headerSize).ToList(), specNode.Start, specNode.Consumed); } } \ No newline at end of file From 7aa5af73d1b49f7c771c49f730ebfb04810285dc Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sun, 15 Dec 2024 00:16:53 +0500 Subject: [PATCH 17/27] =?UTF-8?q?feat(HtmlGenerator.cs):=20=D1=82=D0=B5?= =?UTF-8?q?=D0=BF=D0=B5=D1=80=D1=8C=20HtmlGenerator=20=D0=BC=D0=BE=D0=B6?= =?UTF-8?q?=D0=B5=D1=82=20=D0=B3=D0=B5=D0=BD=D0=B5=D1=80=D0=B8=D1=80=D0=BE?= =?UTF-8?q?=D0=B2=D0=B0=D1=82=D1=8C=20=D0=B2=D1=81=D0=B5=206=20html-=D0=B7?= =?UTF-8?q?=D0=B0=D0=B3=D0=BE=D0=BB=D0=BE=D0=B2=D0=BA=D0=BE=D0=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/Generator/HtmlGenerator.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cs/Markdown/Generator/HtmlGenerator.cs b/cs/Markdown/Generator/HtmlGenerator.cs index 9d66c7615..80b14d95d 100644 --- a/cs/Markdown/Generator/HtmlGenerator.cs +++ b/cs/Markdown/Generator/HtmlGenerator.cs @@ -31,7 +31,7 @@ public string Render(Node? root, List tokens) => $"
  • {RenderChildren(children, tokens)}
  • ", TagNode { NodeType: NodeType.Header, Children: var children } - => $"

    {RenderChildren(children, tokens)}

    ", + => $"{RenderChildren(children[1..], tokens)}", TagNode { NodeType: NodeType.Bold, Children: var children } => $"{RenderChildren(children, tokens)}", From 60d09c5a57f3f32b2c0f86d97a63de23e0641802 Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sun, 15 Dec 2024 00:18:00 +0500 Subject: [PATCH 18/27] =?UTF-8?q?fix(ItalicRule|BoldRule):=20=D1=82=D0=B5?= =?UTF-8?q?=D0=BF=D0=B5=D1=80=D1=8C=20=D0=BF=D1=80=D0=B0=D0=B2=D0=B8=D0=BB?= =?UTF-8?q?=D0=B0=20=D0=BA=D0=BE=D1=80=D1=80=D0=B5=D0=BA=D1=82=D0=BD=D0=BE?= =?UTF-8?q?=20=D0=BC=D1=8D=D1=82=D1=87=D0=B0=D1=82,=20=D0=BA=D0=BE=D0=B3?= =?UTF-8?q?=D0=B4=D0=B0=20=D0=B2=D0=BD=D1=83=D1=82=D1=80=D0=B8=20=D1=82?= =?UTF-8?q?=D1=8D=D0=B3=D0=B0=20=D0=B5=D1=81=D1=82=D1=8C=20*=20=D0=B8=20#?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/Parser/Rules/BoldRule.cs | 8 +++++++- cs/Markdown/Parser/Rules/InWordItalicRule.cs | 7 ++++++- cs/Markdown/Parser/Rules/ItalicRule.cs | 12 ++++++++++-- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/cs/Markdown/Parser/Rules/BoldRule.cs b/cs/Markdown/Parser/Rules/BoldRule.cs index 5ba022b93..82f351722 100644 --- a/cs/Markdown/Parser/Rules/BoldRule.cs +++ b/cs/Markdown/Parser/Rules/BoldRule.cs @@ -15,12 +15,18 @@ public class BoldRule : IParsingRule : new InWordBoldRule().Match(tokens, begin); } + private static readonly List AdditionalTextSymbols = + [ + new PatternRule(TokenType.Asterisk), new PatternRule(TokenType.Backslash), new PatternRule(TokenType.Octothorpe) + ]; + private static readonly IParsingRule ValueRule = new OrRule([ new ItalicRule(), new TextRule(), - new PatternRule(TokenType.Backslash) + new OrRule(AdditionalTextSymbols) ]); + private static readonly IParsingRule Pattern = new AndRule([ PatternRuleFactory.DoubleUnderscore(), new ConditionalRule(new KleeneStarRule(ValueRule), HasRightBorders), diff --git a/cs/Markdown/Parser/Rules/InWordItalicRule.cs b/cs/Markdown/Parser/Rules/InWordItalicRule.cs index f12941219..71ee947c1 100644 --- a/cs/Markdown/Parser/Rules/InWordItalicRule.cs +++ b/cs/Markdown/Parser/Rules/InWordItalicRule.cs @@ -11,10 +11,15 @@ public class InWordItalicRule : IParsingRule [ TokenType.Newline, TokenType.Space, TokenType.Word ]; + + private static readonly List AdditionalTextSymbols = + [ + new PatternRule(TokenType.Asterisk), new PatternRule(TokenType.Backslash), new PatternRule(TokenType.Octothorpe) + ]; private static readonly AndRule Pattern = new([ new PatternRule(TokenType.Underscore), - new KleeneStarRule(new OrRule(new PatternRule(TokenType.Word), new PatternRule(TokenType.Backslash))), + new KleeneStarRule(new OrRule(new PatternRule(TokenType.Word), new OrRule(AdditionalTextSymbols))), new PatternRule(TokenType.Underscore), ]); diff --git a/cs/Markdown/Parser/Rules/ItalicRule.cs b/cs/Markdown/Parser/Rules/ItalicRule.cs index 52afb7da0..bdacfd49b 100644 --- a/cs/Markdown/Parser/Rules/ItalicRule.cs +++ b/cs/Markdown/Parser/Rules/ItalicRule.cs @@ -1,3 +1,4 @@ +using System.Diagnostics; using Markdown.Parser.Nodes; using Markdown.Parser.Rules.BoolRules; using Markdown.Parser.Rules.Tools; @@ -20,9 +21,14 @@ public class ItalicRule : IParsingRule new PatternRule(TokenType.Space), ]; + private static readonly List AdditionalTextSymbols = + [ + new PatternRule(TokenType.Asterisk), new PatternRule(TokenType.Backslash), new PatternRule(TokenType.Octothorpe) + ]; + private static readonly OrRule ValueRule = new([ new TextRule(), - new PatternRule(TokenType.Backslash), + new OrRule(AdditionalTextSymbols), InnerBoldRule]); private static readonly AndRule Pattern = new([ @@ -46,7 +52,9 @@ public class ItalicRule : IParsingRule private static TagNode BuildNode(SpecNode node) { - var valueNode = (node.Nodes.Second() as SpecNode)!; + var valueNode = (node.Nodes.Second() as SpecNode); + + Debug.Assert(valueNode != null, nameof(valueNode) + " != null"); return new TagNode(NodeType.Italic, valueNode.Nodes, node.Start, node.Consumed); } From 5c1703817d3f2af602070471324b9b8393afec20 Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sun, 15 Dec 2024 00:25:36 +0500 Subject: [PATCH 19/27] =?UTF-8?q?fix(BodyRuleTest.cs):=20=D0=BF=D0=BE?= =?UTF-8?q?=D1=84=D0=B8=D0=BA=D1=81=D0=B8=D0=BB=20=D1=82=D0=B5=D1=81=D1=82?= =?UTF-8?q?=20=D1=81=20=D0=BD=D0=BE=D0=B2=D1=8B=D0=BC=20=D1=85=D0=B5=D0=B4?= =?UTF-8?q?=D0=B5=D1=80=D0=BE=D0=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/MarkdownTests/Parser/Rules/BodyRuleTest.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cs/MarkdownTests/Parser/Rules/BodyRuleTest.cs b/cs/MarkdownTests/Parser/Rules/BodyRuleTest.cs index de48705a7..8efde703d 100644 --- a/cs/MarkdownTests/Parser/Rules/BodyRuleTest.cs +++ b/cs/MarkdownTests/Parser/Rules/BodyRuleTest.cs @@ -44,7 +44,7 @@ jkl mno pqr node.Should().NotBeNull(); node.Children.Select(n => n.NodeType).Should().BeEquivalentTo( [NodeType.Header, NodeType.Paragraph], options => options.WithStrictOrdering()); - node.ToText(tokens).Should().Be("abc def ghijkl mno pqr"); + node.ToText(tokens).Should().Be("#abc def ghijkl mno pqr"); } [Test] From 81c14e9c4c450b6ea1a11531c50ef29f919d4f83 Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sun, 15 Dec 2024 00:26:00 +0500 Subject: [PATCH 20/27] =?UTF-8?q?feat(HeaderRuleTest.cs):=20=D0=B4=D0=BE?= =?UTF-8?q?=D0=B1=D0=B0=D0=B2=D0=B8=D0=BB=20=D1=82=D0=B5=D1=81=D1=82=D0=BE?= =?UTF-8?q?=D0=B2=20=D0=BD=D0=B0=20=D0=BC=D1=8D=D1=82=D1=87=D0=B8=D0=BD?= =?UTF-8?q?=D0=B3=20=D0=BD=D0=BE=D0=B2=D1=8B=D1=85=20=D0=B7=D0=B0=D0=B3?= =?UTF-8?q?=D0=BE=D0=BB=D0=BE=D0=B2=D0=BA=D0=BE=D0=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Parser/Rules/HeaderRuleTest.cs | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/cs/MarkdownTests/Parser/Rules/HeaderRuleTest.cs b/cs/MarkdownTests/Parser/Rules/HeaderRuleTest.cs index 574d3232a..1404a915e 100644 --- a/cs/MarkdownTests/Parser/Rules/HeaderRuleTest.cs +++ b/cs/MarkdownTests/Parser/Rules/HeaderRuleTest.cs @@ -21,6 +21,36 @@ public void Match_ShouldMatch_SimpleHeader(string text) var node = rule.Match(tokens) as TagNode; node.Should().NotBeNull(); + node.NodeType.Should().Be(NodeType.Header); + + } + + [TestCase("# abc def ghi")] + [TestCase("## abc def ghi")] + [TestCase("### abc def ghi")] + [TestCase("#### abc def ghi")] + [TestCase("##### abc def ghi")] + [TestCase("###### abc def ghi")] + public void Match_ShouldMatch_AllTypeOfExistingHeaders(string text) + { + var tokens = tokenizer.Tokenize($"{text}\n"); + + var node = rule.Match(tokens) as TagNode; + + node.Should().NotBeNull(); + node.NodeType.Should().Be(NodeType.Header); + } + + [TestCase("####### abc def ghi")] + [TestCase("######## abc def ghi")] + public void Match_ShouldNotMatch_NotExistingHeaders(string text) + { + + var tokens = tokenizer.Tokenize($"{text}\n"); + + var node = rule.Match(tokens) as TagNode; + + node.Should().BeNull(); } [TestCase("_abc_")] @@ -37,6 +67,7 @@ public void Match_ShouldMatch_HeaderWithInnerTags(string text) var node = rule.Match(tokens) as TagNode; node.Should().NotBeNull(); + node.NodeType.Should().Be(NodeType.Header); } [Test] From 4462f2403ce3520d309e51cef8e92c7e06faf7c4 Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sun, 15 Dec 2024 00:54:19 +0500 Subject: [PATCH 21/27] =?UTF-8?q?feat(ItalicRuleTest.cs):=20=D0=B4=D0=BE?= =?UTF-8?q?=D0=B1=D0=B0=D0=B2=D0=B8=D0=BB=20=D1=82=D0=B5=D1=81=D1=82=D0=BE?= =?UTF-8?q?=D0=B2=20=D0=B4=D0=BB=D1=8F=20=D0=B8=D1=82=D0=B0=D0=BB=D0=B8?= =?UTF-8?q?=D0=BA=D0=B0=20=D0=BD=D0=B0=20=D1=81=D0=BF=D0=B5=D1=86=20=D1=81?= =?UTF-8?q?=D0=B8=D0=BC=D0=B2=D0=BE=D0=BB=D1=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/MarkdownTests/Parser/Rules/ItalicRuleTest.cs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/cs/MarkdownTests/Parser/Rules/ItalicRuleTest.cs b/cs/MarkdownTests/Parser/Rules/ItalicRuleTest.cs index 2f5e4df1c..81fa550ee 100644 --- a/cs/MarkdownTests/Parser/Rules/ItalicRuleTest.cs +++ b/cs/MarkdownTests/Parser/Rules/ItalicRuleTest.cs @@ -27,6 +27,19 @@ public void Match_ShouldMatch_SimpleText(string text) node.ToText(tokens).Should().Be(text); } + + [TestCase("ab#c d/ef g*hi jkl")] + [TestCase("#/*")] + public void Match_ShouldMatch_TextWithSpecialCharacters(string text) + { + var tokens = tokenizer.Tokenize($"_{text}_"); + + var node = rule.Match(tokens) as TagNode; + + node.Should().NotBeNull(); + node.NodeType.Should().Be(NodeType.Italic); + } + [TestCase("abc def ghi_123_jkl", 5)] [TestCase("def 12_34_56 ghi jkl", 3)] public void Match_ShouldNotMatch_TextWithNumbers(string text, int begin) @@ -43,6 +56,9 @@ public void Match_ShouldNotMatch_TextWithNumbers(string text, int begin) [TestCase("abc _de_fghi", 2, ExpectedResult = "de")] [TestCase("_ab_c", 0, ExpectedResult = "ab")] [TestCase("ab_c_", 1, ExpectedResult = "c")] + [TestCase("a_*b_", 1, ExpectedResult = "*b")] + [TestCase("a_/b_", 1, ExpectedResult = "/b")] + [TestCase("a_#b_", 1, ExpectedResult = "#b")] public string Match_ShouldMatch_TagInWord(string text, int begin) { var tokens = tokenizer.Tokenize(text); From 118fc3236d81eac8a9b7e1df5cba6234cb997207 Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sun, 15 Dec 2024 00:54:38 +0500 Subject: [PATCH 22/27] =?UTF-8?q?feat(BoldRuleTest.cs):=20=D0=BD=D0=B0?= =?UTF-8?q?=D1=88=D0=B5=D0=BB=20=D1=82=D0=B5=D1=81=D1=82=D1=8B,=20=D0=BD?= =?UTF-8?q?=D0=B0=20=D0=BA=D0=BE=D1=82=D0=BE=D1=80=D1=8B=D1=85=20=D0=BD?= =?UTF-8?q?=D0=B5=20=D1=80=D0=B0=D0=B1=D0=BE=D1=82=D0=B0=D0=B5=D1=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/MarkdownTests/Parser/Rules/BoldRuleTest.cs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cs/MarkdownTests/Parser/Rules/BoldRuleTest.cs b/cs/MarkdownTests/Parser/Rules/BoldRuleTest.cs index a92d045ec..751b3cffb 100644 --- a/cs/MarkdownTests/Parser/Rules/BoldRuleTest.cs +++ b/cs/MarkdownTests/Parser/Rules/BoldRuleTest.cs @@ -75,6 +75,9 @@ public void Match_ShouldMatch_TextWithInnerItalicTag() [TestCase("a__bc__", 1, ExpectedResult = "bc")] [TestCase("a__b__c", 1, ExpectedResult = "b")] [TestCase("__a__bc", 0, ExpectedResult = "a")] + [TestCase("__a*__bc", 0, ExpectedResult = "a*")] + [TestCase("__a/__bc", 0, ExpectedResult = "a/")] + [TestCase("f__a#__bc", 1, ExpectedResult = "a#")] public string Match_ShouldMatch_WhenTagInsideWord(string text, int begin) { var tokens = tokenizer.Tokenize(text); @@ -82,7 +85,7 @@ public string Match_ShouldMatch_WhenTagInsideWord(string text, int begin) var node = rule.Match(tokens, begin) as TagNode; node.Should().NotBeNull(); - node.Children.Should().ContainSingle(n => n.NodeType == NodeType.Text); + node.Children.Should().Contain(n => n.NodeType == NodeType.Text); return node.Children.ToText(tokens); } From 9b1bd39f4031d7e1994a4d32abf669868f6ab489 Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sun, 15 Dec 2024 00:55:13 +0500 Subject: [PATCH 23/27] =?UTF-8?q?fix(InWorldBoldRule):=20=D1=82=D0=B5?= =?UTF-8?q?=D0=BF=D0=B5=D1=80=D1=8C=20=D0=BF=D1=80=D0=B0=D0=B2=D0=B8=D0=BB?= =?UTF-8?q?=D0=BE=20=D0=BA=D0=BE=D1=80=D1=80=D0=B5=D0=BA=D1=82=D0=BD=D0=BE?= =?UTF-8?q?=20=D0=BC=D1=8D=D1=82=D1=87=D0=B8=D1=82=20=D1=81=D0=BF=D0=B5?= =?UTF-8?q?=D1=86=20=D1=81=D0=B8=D0=BC=D0=B2=D0=BE=D0=BB=D1=8B,=20=D0=BA?= =?UTF-8?q?=D0=BE=D0=B3=D0=B4=D0=B0=20=D1=82=D1=8D=D0=B3=20=D0=B2=D0=BD?= =?UTF-8?q?=D1=83=D1=82=D1=80=D0=B8=20=D1=81=D0=BB=D0=BE=D0=B2=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/Parser/Rules/InWordBoldRule.cs | 27 +++++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/cs/Markdown/Parser/Rules/InWordBoldRule.cs b/cs/Markdown/Parser/Rules/InWordBoldRule.cs index bb2e6085b..867895951 100644 --- a/cs/Markdown/Parser/Rules/InWordBoldRule.cs +++ b/cs/Markdown/Parser/Rules/InWordBoldRule.cs @@ -12,9 +12,18 @@ public class InWordBoldRule : IParsingRule [ TokenType.Newline, TokenType.Space, TokenType.Word ]; + + private static readonly List AdditionalTextSymbols = + [ + new PatternRule(TokenType.Asterisk), new PatternRule(TokenType.Backslash), new PatternRule(TokenType.Octothorpe) + ]; - - private static readonly OrRule ValueRule = new(new InWordItalicRule(), new PatternRule(TokenType.Word)); + private static readonly OrRule ValueRule = new([ + new InWordItalicRule(), + new PatternRule(TokenType.Word), + new OrRule(AdditionalTextSymbols) + ]); + private static readonly AndRule Pattern = new([ PatternRuleFactory.DoubleUnderscore(), new KleeneStarRule(ValueRule), @@ -23,11 +32,17 @@ public class InWordBoldRule : IParsingRule private static readonly OrRule ContinuesRule = new(PossibleContinues); private static readonly ContinuesRule ResultRule = new(Pattern, ContinuesRule); - - private static readonly PatternRule InStartRule = new([ - TokenType.Underscore, TokenType.Underscore, TokenType.Word, - TokenType.Underscore, TokenType.Underscore, TokenType.Word, + + private static readonly AndRule InStartRule = new([ + PatternRuleFactory.DoubleUnderscore(), + new KleeneStarRule(ValueRule), + PatternRuleFactory.DoubleUnderscore(), + new KleeneStarRule(ValueRule) ]); + // private static readonly PatternRule InStartRule = new([ + // TokenType.Underscore, TokenType.Underscore, TokenType.Word, + // TokenType.Underscore, TokenType.Underscore, TokenType.Word, + // ]); public Node? Match(List tokens, int begin = 0) => ResultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; From 397c65f2ffad29ba70d6e4081e4fdbf059d6c601 Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sun, 15 Dec 2024 01:07:11 +0500 Subject: [PATCH 24/27] =?UTF-8?q?fix(MdTest.cs):=20=D1=82=D0=B5=D0=BF?= =?UTF-8?q?=D0=B5=D1=80=D1=8C=20=D0=B0=D1=81=D0=B8=D0=BC=D0=BF=D1=82=D0=BE?= =?UTF-8?q?=D1=82=D0=B8=D1=87=D0=B5=D1=81=D0=BA=D0=B0=D1=8F=20=D1=81=D0=BB?= =?UTF-8?q?=D0=BE=D0=B6=D0=BD=D0=BE=D1=81=D1=82=D1=8C=20=D1=81=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BD=D0=B8=D0=B2=D0=B0=D0=B5=D1=82=D1=81=D1=8F=20=D1=81?= =?UTF-8?q?=20N*Log2(N),=20=D0=B0=20=D0=BD=D0=B5=20=D1=81=20N*N?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/MarkdownTests/MdTest.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cs/MarkdownTests/MdTest.cs b/cs/MarkdownTests/MdTest.cs index 71c713e93..a75bd8e3c 100644 --- a/cs/MarkdownTests/MdTest.cs +++ b/cs/MarkdownTests/MdTest.cs @@ -10,10 +10,10 @@ public class Tests [Test] public void Markdown_Render_ShouldWorkFast() { - const int scale = 2; + const int scale = 10; var sw = new Stopwatch(); var results = new List(); - for (var len = 640; len <= 1310720; len *= scale) + for (var len = 10; len <= 1000000; len *= scale) { var markdown = GenerateMarkdown(len); GC.Collect(); @@ -27,7 +27,7 @@ public void Markdown_Render_ShouldWorkFast() Enumerable.Range(1, results.Count - 1) .Select(i => (double)results[i].Ticks / results[i - 1].Ticks) - .Should().OnlyContain(timeRatio => timeRatio < scale * scale); + .Should().OnlyContain(timeRatio => timeRatio < Math.Log2(scale) * scale); } private static string GenerateMarkdown(int len) From 7227fa0db1e2a11c01ba9745b2d0a9b77c8e6bc6 Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sun, 15 Dec 2024 01:07:51 +0500 Subject: [PATCH 25/27] =?UTF-8?q?feat(MdAcceptanceTest):=20=D0=BD=D0=B0?= =?UTF-8?q?=D0=BA=D0=B8=D0=B4=D0=B0=D0=BB=20=D1=81=D0=BB=D1=83=D1=87=D0=B0?= =?UTF-8?q?=D0=B5=D0=B2=20=D0=BF=D0=BE=D0=B4=20=D0=BD=D0=BE=D0=B2=D1=8B?= =?UTF-8?q?=D0=B9=20=D1=84=D1=83=D0=BD=D0=BA=D1=86=D0=B8=D0=BE=D0=BD=D0=B0?= =?UTF-8?q?=D0=BB=20=D0=B8=20=D0=BF=D0=BE=D1=84=D0=B8=D0=BA=D1=88=D0=B5?= =?UTF-8?q?=D0=BD=D0=BD=D1=8B=D0=B5=20=D0=BF=D1=80=D0=BE=D0=B1=D0=BB=D0=B5?= =?UTF-8?q?=D0=BC=D1=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/MarkdownTests/MdAcceptanceResult.html | 4 ++-- cs/MarkdownTests/MdAcceptanceTest.txt | 24 +++++++++++++++++++++++- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/cs/MarkdownTests/MdAcceptanceResult.html b/cs/MarkdownTests/MdAcceptanceResult.html index eaf7f9b63..c427e22ed 100644 --- a/cs/MarkdownTests/MdAcceptanceResult.html +++ b/cs/MarkdownTests/MdAcceptanceResult.html @@ -1,2 +1,2 @@ -

    Это заголовок

    #Это не заголовок

    И даже # это не заголовок

    • Это список

    *Это не список

    И это тоже * не список

    Это выделится

    Это выделится

    _Это не выделится__

    __И это не выделится_

    Внутри слоооооова

    Тоже рабоооооотает

    Но если неправильн_оооооо__

    То не раб__оооооо_тает

    В начале и в кооооонце тоже работает

    Так _не получится_

    И _так_

    Вот _ так_ и _так _не получится

    И _если __пересечь_, то не получится__

    Вот так все получится

    А вот так уже __не все__

    Все здесь тоже будет рабоооотать

    • И даже здесь
    • Будет работать обалдеть
    -
    • А это уже другой список

    Здесь не список

    • Здесь опять другой список
    • И вот * такая штуко*вина это всего одна запись в списке
    • # И заголовка здесь нет
    • В\роде все

    Следующая строка не список

    *

    \ No newline at end of file +

    Это заголовок

    Поменьше

    Еще поменьше

    И еще

    И еще
    И еще

    ####### Такого в html вроде не придумали

    #Это не заголовок

    И даже # это не заголовок

    • Это список

    *Это не список

    И это тоже * не список

    Это выделится

    Это выделится

    _Это не выделится__

    __И это не выделится_

    Внутри слоооооова

    Тоже рабоооооотает

    Но если неправильн_оооооо__

    То не раб__оооооо_тает

    В начале и в кооооонце тоже работает

    Так _не получится_

    И так\

    Вот _ так_ и _так _не получится

    И _если __пересечь_, то не получится__

    Вот так все получится

    А вот так уже __не все__

    Все здесь тоже будет рабоооотать

    • И даже здесь
    • Будет работать обалдеть
    +
    • А это уже другой список

    Здесь не список

    • Здесь опять другой список
    • И вот * такая штуко*вина это всего одна запись в списке
    • # И заголовка здесь нет
    • В\роде все

    Следующая строка не список

    *

    а это _​точно не работает?_

    а где?

    \emmm

    a\a\\aa\a

    aa\a\aa\a

    a\a__aaa\a__a\a

    aaaaa\aaa

    a\aaaaaa\a

    aaaa\aaaa

    aaa*aa

    a*

    a/

    a#

    a*

    a/

    a*

    \ No newline at end of file diff --git a/cs/MarkdownTests/MdAcceptanceTest.txt b/cs/MarkdownTests/MdAcceptanceTest.txt index efe9fdb4d..a5903742c 100644 --- a/cs/MarkdownTests/MdAcceptanceTest.txt +++ b/cs/MarkdownTests/MdAcceptanceTest.txt @@ -1,4 +1,10 @@ # Это заголовок +## Поменьше +### Еще поменьше +#### И еще +##### И еще +###### И еще +####### Такого в html вроде не придумали #Это не заголовок И даже # это не заголовок * Это список @@ -30,4 +36,20 @@ _А вот так уже __не все___ * # И заголовка здесь нет * В\роде все Следующая строка не список -* \ No newline at end of file +* +а это _​точно не работает?_ +__а где?__ +\\_emmm_ +_a\a\\aa\a_ +__aa\a\aa\a__ +_a\a__aaa\a__a\a_ +__aa_aaa\a_aa__ +__a\a_aaaa_a\a__ +aaa_a\aa_aa +_aaa*aa_ +__a*__ +__a/__ +__a#__ +_a*_ +_a/_ +_a*_ \ No newline at end of file From 28c279b228f9461947fcdbecd4614d9d522df01c Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sun, 15 Dec 2024 01:16:13 +0500 Subject: [PATCH 26/27] =?UTF-8?q?refactor(InWordBoldRule):=20=D1=83=D0=B4?= =?UTF-8?q?=D0=B0=D0=BB=D0=B8=D0=BB=20=D0=B7=D0=B0=D0=B1=D1=8B=D1=82=D1=8B?= =?UTF-8?q?=D0=B9=20=D0=BA=D0=BE=D0=BC=D0=BC=D0=B5=D0=BD=D1=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/Parser/Rules/InWordBoldRule.cs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cs/Markdown/Parser/Rules/InWordBoldRule.cs b/cs/Markdown/Parser/Rules/InWordBoldRule.cs index 867895951..760b7bc3b 100644 --- a/cs/Markdown/Parser/Rules/InWordBoldRule.cs +++ b/cs/Markdown/Parser/Rules/InWordBoldRule.cs @@ -39,10 +39,6 @@ public class InWordBoldRule : IParsingRule PatternRuleFactory.DoubleUnderscore(), new KleeneStarRule(ValueRule) ]); - // private static readonly PatternRule InStartRule = new([ - // TokenType.Underscore, TokenType.Underscore, TokenType.Word, - // TokenType.Underscore, TokenType.Underscore, TokenType.Word, - // ]); public Node? Match(List tokens, int begin = 0) => ResultRule.Match(tokens, begin) is SpecNode node ? BuildNode(node) : null; From e1ed0a39e85dbec359b39073bee93c0ef5cd1054 Mon Sep 17 00:00:00 2001 From: Geratoptus Date: Sun, 15 Dec 2024 01:17:08 +0500 Subject: [PATCH 27/27] =?UTF-8?q?refactor(MdTokenizerTests):=20=D0=BF?= =?UTF-8?q?=D0=BE=D0=BF=D1=80=D0=B0=D0=B2=D0=B8=D0=BB=20=D0=BD=D0=B5=D0=B9?= =?UTF-8?q?=D0=BC=D1=81=D0=BF=D0=B5=D0=B9=D1=81=D1=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/MarkdownTests/Tokenizer/MdTokenizerTest.cs | 2 +- cs/MarkdownTests/Tokenizer/Scanners/NumberScannerTest.cs | 2 +- cs/MarkdownTests/Tokenizer/Scanners/SpecScannerTest.cs | 2 +- cs/MarkdownTests/Tokenizer/Scanners/TextScannerTest.cs | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cs/MarkdownTests/Tokenizer/MdTokenizerTest.cs b/cs/MarkdownTests/Tokenizer/MdTokenizerTest.cs index ea4720247..7002ae82a 100644 --- a/cs/MarkdownTests/Tokenizer/MdTokenizerTest.cs +++ b/cs/MarkdownTests/Tokenizer/MdTokenizerTest.cs @@ -2,7 +2,7 @@ using FluentAssertions; using Markdown.Tokenizer; -namespace MarkdownTests; +namespace MarkdownTests.Tokenizer; [TestFixture] [TestOf(typeof(MdTokenizer))] diff --git a/cs/MarkdownTests/Tokenizer/Scanners/NumberScannerTest.cs b/cs/MarkdownTests/Tokenizer/Scanners/NumberScannerTest.cs index 74f047b04..73cf47e76 100644 --- a/cs/MarkdownTests/Tokenizer/Scanners/NumberScannerTest.cs +++ b/cs/MarkdownTests/Tokenizer/Scanners/NumberScannerTest.cs @@ -2,7 +2,7 @@ using Markdown.Tokenizer.Scanners; using Markdown.Tokens; -namespace MarkdownTests.Scanners; +namespace MarkdownTests.Tokenizer.Scanners; [TestFixture] [TestOf(typeof(NumberScanner))] diff --git a/cs/MarkdownTests/Tokenizer/Scanners/SpecScannerTest.cs b/cs/MarkdownTests/Tokenizer/Scanners/SpecScannerTest.cs index c2eb54898..709820473 100644 --- a/cs/MarkdownTests/Tokenizer/Scanners/SpecScannerTest.cs +++ b/cs/MarkdownTests/Tokenizer/Scanners/SpecScannerTest.cs @@ -2,7 +2,7 @@ using Markdown.Tokenizer.Scanners; using Markdown.Tokens; -namespace MarkdownTests.Scanners; +namespace MarkdownTests.Tokenizer.Scanners; [TestFixture] [TestOf(typeof(SpecScanner))] diff --git a/cs/MarkdownTests/Tokenizer/Scanners/TextScannerTest.cs b/cs/MarkdownTests/Tokenizer/Scanners/TextScannerTest.cs index a88b605c6..542e2dc62 100644 --- a/cs/MarkdownTests/Tokenizer/Scanners/TextScannerTest.cs +++ b/cs/MarkdownTests/Tokenizer/Scanners/TextScannerTest.cs @@ -1,8 +1,8 @@ -using Markdown.Tokens; using FluentAssertions; using Markdown.Tokenizer.Scanners; +using Markdown.Tokens; -namespace MarkdownTests.Scanners; +namespace MarkdownTests.Tokenizer.Scanners; [TestFixture] [TestOf(typeof(TextScanner))]