From 03b17535763506c363c1f8359b4b9eea6383c431 Mon Sep 17 00:00:00 2001 From: "a.oslina" Date: Fri, 22 Nov 2024 16:20:47 +0500 Subject: [PATCH 1/2] Init project design --- cs/Markdown/Enums/TagWrapper.cs | 9 +++++++ cs/Markdown/Markdown.csproj | 10 ++++++++ cs/Markdown/Md.cs | 28 +++++++++++++++++++++ cs/Markdown/Program.cs | 3 +++ cs/Markdown/Renderers/HtmlTokenConverter.cs | 11 ++++++++ cs/Markdown/Renderers/ITokenConverter.cs | 8 ++++++ cs/Markdown/Tokenizers/ITokenizer.cs | 8 ++++++ cs/Markdown/Tokenizers/Tokenizer.cs | 16 ++++++++++++ cs/Markdown/Tokens/BoldToken.cs | 12 +++++++++ cs/Markdown/Tokens/ItalicsToken.cs | 12 +++++++++ cs/Markdown/Tokens/LiteralToken.cs | 12 +++++++++ cs/Markdown/Tokens/ParagraphToken.cs | 12 +++++++++ cs/Markdown/Tokens/Token.cs | 14 +++++++++++ cs/clean-code.sln | 6 +++++ cs/clean-code.sln.DotSettings | 3 +++ 15 files changed, 164 insertions(+) create mode 100644 cs/Markdown/Enums/TagWrapper.cs create mode 100644 cs/Markdown/Markdown.csproj create mode 100644 cs/Markdown/Md.cs create mode 100644 cs/Markdown/Program.cs create mode 100644 cs/Markdown/Renderers/HtmlTokenConverter.cs create mode 100644 cs/Markdown/Renderers/ITokenConverter.cs create mode 100644 cs/Markdown/Tokenizers/ITokenizer.cs create mode 100644 cs/Markdown/Tokenizers/Tokenizer.cs create mode 100644 cs/Markdown/Tokens/BoldToken.cs create mode 100644 cs/Markdown/Tokens/ItalicsToken.cs create mode 100644 cs/Markdown/Tokens/LiteralToken.cs create mode 100644 cs/Markdown/Tokens/ParagraphToken.cs create mode 100644 cs/Markdown/Tokens/Token.cs diff --git a/cs/Markdown/Enums/TagWrapper.cs b/cs/Markdown/Enums/TagWrapper.cs new file mode 100644 index 000000000..618b8e3e7 --- /dev/null +++ b/cs/Markdown/Enums/TagWrapper.cs @@ -0,0 +1,9 @@ +namespace Markdown.Enums; + +public enum TagWrapper +{ + Em, + Strong, + H1, + None +} \ No newline at end of file diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj new file mode 100644 index 000000000..2f4fc7765 --- /dev/null +++ b/cs/Markdown/Markdown.csproj @@ -0,0 +1,10 @@ + + + + Exe + net8.0 + enable + enable + + + diff --git a/cs/Markdown/Md.cs b/cs/Markdown/Md.cs new file mode 100644 index 000000000..6544f39ce --- /dev/null +++ b/cs/Markdown/Md.cs @@ -0,0 +1,28 @@ +using Markdown.Renderers; +using Markdown.Tokenizers; + +namespace Markdown; + +public class Md +{ + private readonly ITokenizer tokenizer; + private readonly ITokenConverter tokenConverter; + + private Md(ITokenizer tokenizer, ITokenConverter tokenConverter) + { + this.tokenizer = tokenizer; + this.tokenConverter = tokenConverter; + } + + public Md Create(ITokenizer tokenizer, ITokenConverter tokenConverter) + { + return new Md(tokenizer, tokenConverter); + } + + public string Render(string markdownStr) + { + var tokens = tokenizer.SplitToTokens(markdownStr); + var convertedStr = tokenConverter.ConvertTokens(tokens); + return convertedStr; + } +} \ No newline at end of file diff --git a/cs/Markdown/Program.cs b/cs/Markdown/Program.cs new file mode 100644 index 000000000..e5dff12bc --- /dev/null +++ b/cs/Markdown/Program.cs @@ -0,0 +1,3 @@ +// See https://aka.ms/new-console-template for more information + +Console.WriteLine("Hello, World!"); \ No newline at end of file diff --git a/cs/Markdown/Renderers/HtmlTokenConverter.cs b/cs/Markdown/Renderers/HtmlTokenConverter.cs new file mode 100644 index 000000000..90b97c8bc --- /dev/null +++ b/cs/Markdown/Renderers/HtmlTokenConverter.cs @@ -0,0 +1,11 @@ +using Markdown.Tokens; + +namespace Markdown.Renderers; + +public class HtmlTokenConverter : ITokenConverter +{ + public string ConvertTokens(List tokens) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Renderers/ITokenConverter.cs b/cs/Markdown/Renderers/ITokenConverter.cs new file mode 100644 index 000000000..88fb9357e --- /dev/null +++ b/cs/Markdown/Renderers/ITokenConverter.cs @@ -0,0 +1,8 @@ +using Markdown.Tokens; + +namespace Markdown.Renderers; + +public interface ITokenConverter +{ + public string ConvertTokens(List tokens); +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizers/ITokenizer.cs b/cs/Markdown/Tokenizers/ITokenizer.cs new file mode 100644 index 000000000..f6edb5d58 --- /dev/null +++ b/cs/Markdown/Tokenizers/ITokenizer.cs @@ -0,0 +1,8 @@ +using Markdown.Tokens; + +namespace Markdown.Tokenizers; + +public interface ITokenizer +{ + public List SplitToTokens(string text); +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizers/Tokenizer.cs b/cs/Markdown/Tokenizers/Tokenizer.cs new file mode 100644 index 000000000..03bd8ad28 --- /dev/null +++ b/cs/Markdown/Tokenizers/Tokenizer.cs @@ -0,0 +1,16 @@ +using Markdown.Tokens; + +namespace Markdown.Tokenizers; + +public class Tokenizer : ITokenizer +{ + public List SplitToTokens(string text) + { + throw new NotImplementedException(); + } + + private Token CreateToken(string separator) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokens/BoldToken.cs b/cs/Markdown/Tokens/BoldToken.cs new file mode 100644 index 000000000..744188b8a --- /dev/null +++ b/cs/Markdown/Tokens/BoldToken.cs @@ -0,0 +1,12 @@ +using Markdown.Enums; + +namespace Markdown.Tokens; + +public class BoldToken : Token +{ + public override TagWrapper TagWrapper => TagWrapper.Strong; + public override string StartSeparator => "__"; + public override string EndSeparator => "__"; + public override bool HasSingleSeparator => false; + public override bool MayContainOtherTokens => true; +} \ No newline at end of file diff --git a/cs/Markdown/Tokens/ItalicsToken.cs b/cs/Markdown/Tokens/ItalicsToken.cs new file mode 100644 index 000000000..fa6d2b039 --- /dev/null +++ b/cs/Markdown/Tokens/ItalicsToken.cs @@ -0,0 +1,12 @@ +using Markdown.Enums; + +namespace Markdown.Tokens; + +public class ItalicsToken : Token +{ + public override TagWrapper TagWrapper => TagWrapper.Em; + public override string StartSeparator => "_"; + public override string EndSeparator => "_"; + public override bool HasSingleSeparator => false; + public override bool MayContainOtherTokens => false; +} \ No newline at end of file diff --git a/cs/Markdown/Tokens/LiteralToken.cs b/cs/Markdown/Tokens/LiteralToken.cs new file mode 100644 index 000000000..3845388a4 --- /dev/null +++ b/cs/Markdown/Tokens/LiteralToken.cs @@ -0,0 +1,12 @@ +using Markdown.Enums; + +namespace Markdown.Tokens; + +public class LiteralToken : Token +{ + public override TagWrapper TagWrapper => TagWrapper.None; + public override string StartSeparator => ""; + public override string EndSeparator => ""; + public override bool HasSingleSeparator => false; + public override bool MayContainOtherTokens => true; +} \ No newline at end of file diff --git a/cs/Markdown/Tokens/ParagraphToken.cs b/cs/Markdown/Tokens/ParagraphToken.cs new file mode 100644 index 000000000..340d5205a --- /dev/null +++ b/cs/Markdown/Tokens/ParagraphToken.cs @@ -0,0 +1,12 @@ +using Markdown.Enums; + +namespace Markdown.Tokens; + +public class ParagraphToken : Token +{ + public override TagWrapper TagWrapper => TagWrapper.H1; + public override string StartSeparator => "# "; + public override string EndSeparator => "\\n"; + public override bool HasSingleSeparator => true; + public override bool MayContainOtherTokens => true; +} \ No newline at end of file diff --git a/cs/Markdown/Tokens/Token.cs b/cs/Markdown/Tokens/Token.cs new file mode 100644 index 000000000..dac5557ca --- /dev/null +++ b/cs/Markdown/Tokens/Token.cs @@ -0,0 +1,14 @@ +using Markdown.Enums; + +namespace Markdown.Tokens; + +public abstract class Token +{ + public abstract TagWrapper TagWrapper { get; } + public abstract string StartSeparator { get; } + public abstract string EndSeparator { get; } + public abstract bool HasSingleSeparator { get; } + public abstract bool MayContainOtherTokens { get; } + public string Content { get; } + public List ChildrenTokens { get; } = []; +} \ No newline at end of file diff --git a/cs/clean-code.sln b/cs/clean-code.sln index 2206d54db..a9f58d3ce 100644 --- a/cs/clean-code.sln +++ b/cs/clean-code.sln @@ -9,6 +9,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ControlDigit", "ControlDigi EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Samples", "Samples\Samples.csproj", "{C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown", "Markdown\Markdown.csproj", "{6A39364C-A9DE-4518-BFB3-835EFB41F813}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -27,5 +29,9 @@ Global {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Debug|Any CPU.Build.0 = Debug|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.ActiveCfg = Release|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.Build.0 = Release|Any CPU + {6A39364C-A9DE-4518-BFB3-835EFB41F813}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {6A39364C-A9DE-4518-BFB3-835EFB41F813}.Debug|Any CPU.Build.0 = Debug|Any CPU + {6A39364C-A9DE-4518-BFB3-835EFB41F813}.Release|Any CPU.ActiveCfg = Release|Any CPU + {6A39364C-A9DE-4518-BFB3-835EFB41F813}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection EndGlobal diff --git a/cs/clean-code.sln.DotSettings b/cs/clean-code.sln.DotSettings index 135b83ecb..229f449d2 100644 --- a/cs/clean-code.sln.DotSettings +++ b/cs/clean-code.sln.DotSettings @@ -1,6 +1,9 @@  <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" /> <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb_AaBb" /> + <Policy><Descriptor Staticness="Instance" AccessRightKinds="Private" Description="Instance fields (private)"><ElementKinds><Kind Name="FIELD" /><Kind Name="READONLY_FIELD" /></ElementKinds></Descriptor><Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" /></Policy> + <Policy><Descriptor Staticness="Any" AccessRightKinds="Any" Description="Types and namespaces"><ElementKinds><Kind Name="NAMESPACE" /><Kind Name="CLASS" /><Kind Name="STRUCT" /><Kind Name="ENUM" /><Kind Name="DELEGATE" /></ElementKinds></Descriptor><Policy Inspect="True" Prefix="" Suffix="" Style="AaBb_AaBb" /></Policy> + True True True Imported 10.10.2016 From 374a68dfc1d188935b0c769a23c38f959d0e4df1 Mon Sep 17 00:00:00 2001 From: "a.oslina" Date: Mon, 2 Dec 2024 18:08:37 +0500 Subject: [PATCH 2/2] Complete homework --- cs/Markdown/Enums/LiteralType.cs | 8 + cs/Markdown/Enums/TagWrapper.cs | 9 - cs/Markdown/Md.cs | 5 +- cs/Markdown/Program.cs | 15 +- cs/Markdown/Renderers/HtmlTokenConverter.cs | 15 +- cs/Markdown/Tokenizers/Tokenizer.cs | 181 +++++++++++++++++++- cs/Markdown/Tokens/BoldToken.cs | 80 ++++++++- cs/Markdown/Tokens/ItalicsToken.cs | 77 ++++++++- cs/Markdown/Tokens/LiteralToken.cs | 25 ++- cs/Markdown/Tokens/ParagraphToken.cs | 16 +- cs/Markdown/Tokens/ScreeningToken.cs | 13 ++ cs/Markdown/Tokens/SeparatorType.cs | 10 ++ cs/Markdown/Tokens/SpaceToken.cs | 13 ++ cs/Markdown/Tokens/Token.cs | 29 +++- cs/Markdown/Tokens/TokenFactory.cs | 67 ++++++++ cs/MarkdownTests/MarkdownTests.csproj | 29 ++++ cs/MarkdownTests/MdTests.cs | 17 ++ cs/MarkdownTests/MdTestsData.cs | 93 ++++++++++ cs/clean-code.sln | 6 + 19 files changed, 650 insertions(+), 58 deletions(-) create mode 100644 cs/Markdown/Enums/LiteralType.cs delete mode 100644 cs/Markdown/Enums/TagWrapper.cs create mode 100644 cs/Markdown/Tokens/ScreeningToken.cs create mode 100644 cs/Markdown/Tokens/SeparatorType.cs create mode 100644 cs/Markdown/Tokens/SpaceToken.cs create mode 100644 cs/Markdown/Tokens/TokenFactory.cs create mode 100644 cs/MarkdownTests/MarkdownTests.csproj create mode 100644 cs/MarkdownTests/MdTests.cs create mode 100644 cs/MarkdownTests/MdTestsData.cs diff --git a/cs/Markdown/Enums/LiteralType.cs b/cs/Markdown/Enums/LiteralType.cs new file mode 100644 index 000000000..8cb6c4457 --- /dev/null +++ b/cs/Markdown/Enums/LiteralType.cs @@ -0,0 +1,8 @@ +namespace Markdown.Enums; + +public enum LiteralType +{ + Number, + Text, + None +} \ No newline at end of file diff --git a/cs/Markdown/Enums/TagWrapper.cs b/cs/Markdown/Enums/TagWrapper.cs deleted file mode 100644 index 618b8e3e7..000000000 --- a/cs/Markdown/Enums/TagWrapper.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace Markdown.Enums; - -public enum TagWrapper -{ - Em, - Strong, - H1, - None -} \ No newline at end of file diff --git a/cs/Markdown/Md.cs b/cs/Markdown/Md.cs index 6544f39ce..8929b0b93 100644 --- a/cs/Markdown/Md.cs +++ b/cs/Markdown/Md.cs @@ -14,7 +14,7 @@ private Md(ITokenizer tokenizer, ITokenConverter tokenConverter) this.tokenConverter = tokenConverter; } - public Md Create(ITokenizer tokenizer, ITokenConverter tokenConverter) + public static Md Create(ITokenizer tokenizer, ITokenConverter tokenConverter) { return new Md(tokenizer, tokenConverter); } @@ -22,7 +22,6 @@ public Md Create(ITokenizer tokenizer, ITokenConverter tokenConverter) public string Render(string markdownStr) { var tokens = tokenizer.SplitToTokens(markdownStr); - var convertedStr = tokenConverter.ConvertTokens(tokens); - return convertedStr; + return tokenConverter.ConvertTokens(tokens); } } \ No newline at end of file diff --git a/cs/Markdown/Program.cs b/cs/Markdown/Program.cs index e5dff12bc..0a2abc0e5 100644 --- a/cs/Markdown/Program.cs +++ b/cs/Markdown/Program.cs @@ -1,3 +1,14 @@ -// See https://aka.ms/new-console-template for more information +using Markdown.Renderers; +using Markdown.Tokenizers; -Console.WriteLine("Hello, World!"); \ No newline at end of file +namespace Markdown; + +public static class Program +{ + public static void Main() + { + var input = "Подчерки внутри текста c цифрами_12_3 "; + var md = Md.Create(new Tokenizer(), new HtmlTokenConverter()).Render(input); + Console.WriteLine(md); + } +} \ No newline at end of file diff --git a/cs/Markdown/Renderers/HtmlTokenConverter.cs b/cs/Markdown/Renderers/HtmlTokenConverter.cs index 90b97c8bc..5cd1eda6a 100644 --- a/cs/Markdown/Renderers/HtmlTokenConverter.cs +++ b/cs/Markdown/Renderers/HtmlTokenConverter.cs @@ -1,4 +1,5 @@ -using Markdown.Tokens; +using System.Text; +using Markdown.Tokens; namespace Markdown.Renderers; @@ -6,6 +7,16 @@ public class HtmlTokenConverter : ITokenConverter { public string ConvertTokens(List tokens) { - throw new NotImplementedException(); + var result = new StringBuilder(); + foreach (var token in tokens) + result.Append(ConvertToTag(token)); + return result.ToString(); + } + + private static string ConvertToTag(Token token) + { + if (token.TagWrapper == null) + return token.Content; + return token.IsClosing ? $"" : $"<{token.TagWrapper}>"; } } \ No newline at end of file diff --git a/cs/Markdown/Tokenizers/Tokenizer.cs b/cs/Markdown/Tokenizers/Tokenizer.cs index 03bd8ad28..09bd9ddc4 100644 --- a/cs/Markdown/Tokenizers/Tokenizer.cs +++ b/cs/Markdown/Tokenizers/Tokenizer.cs @@ -1,16 +1,189 @@ -using Markdown.Tokens; +using Markdown.Enums; +using Markdown.Tokens; namespace Markdown.Tokenizers; public class Tokenizer : ITokenizer { + private List tokens = []; public List SplitToTokens(string text) { - throw new NotImplementedException(); + var result = new List(); + var paragraphs = text.Split('\n'); + for (var i = 0; i < paragraphs.Length; i++) + { + tokens = GetTokens(paragraphs[i]); + tokens = ProcessScreeningTokens(tokens); + ProcessInvalidTokens(); + ProcessNonPairTokens(); + ProcessWrongOrder(); + result.AddRange(tokens); + if (paragraphs.Length > 1 && i < paragraphs.Length - 1) + result.Add(new LiteralToken("\n", LiteralType.Text)); + tokens.Clear(); + } + return result; } - private Token CreateToken(string separator) + private static List GetTokens(string text) { - throw new NotImplementedException(); + var tokens = new List(); + var index = 0; + while (index < text.Length) + { + tokens.Add(TokenFactory.GenerateToken(text, index)); + index += tokens.Last().Content.Length; + } + if (tokens.FirstOrDefault() is ParagraphToken) + tokens.Add(new ParagraphToken(tokens.First().Content)); + return tokens; + } + + private static List ProcessScreeningTokens(List tokens) + { + Token? previousToken = null; + var result = new List(); + foreach (var token in tokens) + { + if (previousToken is ScreeningToken) + { + if (!token.IsTag && token is not ScreeningToken) + { + var newToken = new LiteralToken(previousToken); + result.Add(newToken); + result.Add(token); + previousToken = token; + + } + else + { + var newToken = new LiteralToken(token); + previousToken = newToken; + result.Add(newToken); + } + } + else + { + if (token is not ScreeningToken) + result.Add(token); + previousToken = token; + } + } + + if (previousToken is not ScreeningToken) + return result; + result.Add(new LiteralToken(previousToken)); + return result; + } + + private void ProcessInvalidTokens() + { + for (var i = 0; i < tokens.Count; i++) + tokens[i] = tokens[i].Validate(tokens, i) ? tokens[i] : new LiteralToken(tokens[i]); + } + + private void ProcessNonPairTokens() + { + var openTokensIndexes = new Stack(); + var incorrectTokensIndexes = new List(); + for (var i = 0; i < tokens.Count; i++) + { + var token = tokens[i]; + if (!token.IsTag) + continue; + if (token.IsOpen(tokens, i)) + openTokensIndexes.Push(i); + else + { + if (openTokensIndexes.Count == 0) + incorrectTokensIndexes.Add(i); + else + CheckOpenAndCloseTokens(openTokensIndexes, openTokensIndexes.Pop(), i, incorrectTokensIndexes); + } + } + incorrectTokensIndexes.AddRange(openTokensIndexes); + + foreach (var index in incorrectTokensIndexes) + tokens[index] = new LiteralToken(tokens[index]); + } + + private void ProcessWrongOrder() + { + var openedTokens = new Stack(); + for (var i = 0; i < tokens.Count; i++) + { + var token = tokens[i]; + if (!token.IsTag) continue; + if (token.IsClosing) + openedTokens.Pop(); + else + openedTokens.Push(token); + if (IsCorrectOrder(openedTokens, token)) + continue; + tokens[i] = new LiteralToken(token); + tokens[tokens[i].PairedTokenIndex] = new LiteralToken(tokens[tokens[i].PairedTokenIndex]); + } + } + + private static bool IsCorrectOrder(Stack openedTokens, Token token) + => token is not BoldToken || openedTokens.All(x => x is not ItalicsToken); + + private void CheckOpenAndCloseTokens(Stack openTokens, int openIndex, int closeIndex, List incorrectTokens) + { + var openToken = tokens[openIndex]; + var closeToken = tokens[closeIndex]; + closeToken.IsClosing = true; + if (openToken.GetType() == closeToken.GetType()) + { + SetPairedTokens(openIndex, closeIndex); + return; + } + var nextIndex = GetNextTokenIndex(closeIndex); + if (openTokens.Count == 0) + { + if (nextIndex > -1) + { + if (tokens[nextIndex].IsOpen(tokens, nextIndex) || + tokens[nextIndex].GetType() == tokens[openIndex].GetType()) + { + openTokens.Push(openIndex); + incorrectTokens.Add(closeIndex); + } + if (tokens[nextIndex].GetType() == tokens[openIndex].GetType()) + return; + } + incorrectTokens.Add(openIndex); + incorrectTokens.Add(closeIndex); + return; + } + var preOpenIndex = openTokens.Peek(); + if (tokens[preOpenIndex].GetType() != closeToken.GetType()) + return; + if (nextIndex > -1 && !tokens[nextIndex].IsOpen(tokens, nextIndex) + && tokens[nextIndex].GetType() == openToken.GetType()) + { + openTokens.Pop(); + incorrectTokens.Add(preOpenIndex); + incorrectTokens.Add(openIndex); + incorrectTokens.Add(closeIndex); + return; + } + + openTokens.Pop(); + SetPairedTokens(preOpenIndex, closeIndex); + incorrectTokens.Add(openIndex); + } + + private void SetPairedTokens(int openIndex, int closeIndex) + { + tokens[openIndex].PairedTokenIndex = closeIndex; + tokens[closeIndex].PairedTokenIndex = openIndex; + } + + private int GetNextTokenIndex(int index) + { + for (var i = index + 1; i < tokens.Count; i++) + if (tokens[i].IsTag) return i; + return -1; } } \ No newline at end of file diff --git a/cs/Markdown/Tokens/BoldToken.cs b/cs/Markdown/Tokens/BoldToken.cs index 744188b8a..8d3444fb4 100644 --- a/cs/Markdown/Tokens/BoldToken.cs +++ b/cs/Markdown/Tokens/BoldToken.cs @@ -1,12 +1,76 @@ -using Markdown.Enums; - -namespace Markdown.Tokens; +namespace Markdown.Tokens; public class BoldToken : Token { - public override TagWrapper TagWrapper => TagWrapper.Strong; - public override string StartSeparator => "__"; - public override string EndSeparator => "__"; - public override bool HasSingleSeparator => false; - public override bool MayContainOtherTokens => true; + public override string TagWrapper => "strong"; + public override bool IsTag => true; + + public BoldToken(string content) : base(content) {} + + public override bool Validate(List tokens, int index) + { + return !IsNearNumber(tokens, index) && (IsValid(tokens, index) || CanBeInWord(tokens, index)); + } + + public override bool IsOpen(List tokens, int index) + { + return CanBeOpen(tokens, index) || (!IsInClosingPosition(tokens, index) && CanBeInWord(tokens, index)); + } + + private static bool CanBeClose(List tokens, int index) + { + return index - 1 > 0 && tokens[index - 1] is not SpaceToken && + (index + 1 >= tokens.Count || tokens[index + 1].IsTag || tokens[index + 1] is SpaceToken); + } + + private static bool CanBeOpen(List tokens, int index) + { + return index + 1 < tokens.Count && tokens[index + 1] is not SpaceToken && + (index - 1 < 0 || tokens[index - 1].IsTag || tokens[index - 1] is SpaceToken); + } + + private static bool IsValid(List tokens, int index) + { + return CanBeClose(tokens, index) != CanBeOpen(tokens, index); + } + + private static bool IsNearNumber(List tokens, int index) + { + return index - 1 >= 0 && index + 1 < tokens.Count && + ((tokens[index - 1] is LiteralToken && ((LiteralToken)tokens[index - 1]).IsNumber() && + tokens[index + 1] is not SpaceToken) || + (tokens[index + 1] is LiteralToken && ((LiteralToken)tokens[index + 1]).IsNumber() && + tokens[index - 1] is not SpaceToken)); + } + + private static bool CanBeInWord(List tokens, int index) + { + var neededToken = tokens[index].GetType(); + var wordCount = 0; + var amountInWord = 0; + while (index - 1 >= 0 && tokens[index - 1] is not SpaceToken) + index--; + while (index < tokens.Count && tokens[index] is not SpaceToken) + { + if (tokens[index].GetType() == neededToken) + amountInWord++; + else + wordCount++; + index++; + } + return wordCount > 0 && amountInWord % 2 == 0; + } + + private static bool IsInClosingPosition(List tokens, int index) + { + var currentIndex = index - 1; + var countBefore = 1; + while (currentIndex >= 0 && tokens[currentIndex] is not SpaceToken) + { + if (tokens[currentIndex].GetType() == tokens[index].GetType()) + countBefore++; + currentIndex--; + } + return countBefore % 2 == 0; + } } \ No newline at end of file diff --git a/cs/Markdown/Tokens/ItalicsToken.cs b/cs/Markdown/Tokens/ItalicsToken.cs index fa6d2b039..ecc085bb4 100644 --- a/cs/Markdown/Tokens/ItalicsToken.cs +++ b/cs/Markdown/Tokens/ItalicsToken.cs @@ -1,12 +1,73 @@ -using Markdown.Enums; - -namespace Markdown.Tokens; +namespace Markdown.Tokens; public class ItalicsToken : Token { - public override TagWrapper TagWrapper => TagWrapper.Em; - public override string StartSeparator => "_"; - public override string EndSeparator => "_"; - public override bool HasSingleSeparator => false; - public override bool MayContainOtherTokens => false; + public override string TagWrapper => "em"; + public override bool IsTag => true; + + public ItalicsToken(string content) : base(content) {} + + public override bool Validate(List tokens, int index) + { + return !IsNearNumber(tokens, index) && (IsValid(tokens, index) || CanBeInWord(tokens, index)); + } + + public override bool IsOpen(List tokens, int index) + { + return CanBeOpen(tokens, index) || (!IsInClosingPosition(tokens, index) && CanBeInWord(tokens, index)); + } + + private static bool IsValid(List tokens, int index) + { + return CanBeClose(tokens, index) != CanBeOpen(tokens, index); + } + + private static bool CanBeClose(List tokens, int index) + { + return index - 1 > 0 && tokens[index - 1] is not SpaceToken && + (index + 1 >= tokens.Count || tokens[index + 1].IsTag || tokens[index + 1] is SpaceToken); + } + + private static bool CanBeOpen(List tokens, int index) + { + return index + 1 < tokens.Count && tokens[index + 1] is not SpaceToken && + (index - 1 < 0 || tokens[index - 1].IsTag || tokens[index - 1] is SpaceToken); + } + + private static bool IsNearNumber(List tokens, int index) + { + return index - 1 >= 0 && index + 1 < tokens.Count && + ((tokens[index - 1] is LiteralToken && ((LiteralToken)tokens[index - 1]).IsNumber() && + tokens[index + 1] is not SpaceToken) || + (tokens[index + 1] is LiteralToken && ((LiteralToken)tokens[index + 1]).IsNumber() && + tokens[index - 1] is not SpaceToken)); + } + + private static bool CanBeInWord(List tokens, int index) + { + var neededToken = tokens[index].GetType(); + var amountInWord = 0; + while (index - 1 >= 0 && tokens[index - 1] is not SpaceToken) + index--; + while (index < tokens.Count && tokens[index] is not SpaceToken) + { + if (tokens[index].GetType() == neededToken) + amountInWord++; + index++; + } + return amountInWord % 2 == 0; + } + + private static bool IsInClosingPosition(List tokens, int index) + { + var currentIndex = index - 1; + var countBefore = 1; + while (currentIndex >= 0 && tokens[currentIndex] is not SpaceToken) + { + if (tokens[currentIndex].GetType() == tokens[index].GetType()) + countBefore++; + currentIndex--; + } + return countBefore % 2 == 0; + } } \ No newline at end of file diff --git a/cs/Markdown/Tokens/LiteralToken.cs b/cs/Markdown/Tokens/LiteralToken.cs index 3845388a4..964fb6588 100644 --- a/cs/Markdown/Tokens/LiteralToken.cs +++ b/cs/Markdown/Tokens/LiteralToken.cs @@ -4,9 +4,24 @@ namespace Markdown.Tokens; public class LiteralToken : Token { - public override TagWrapper TagWrapper => TagWrapper.None; - public override string StartSeparator => ""; - public override string EndSeparator => ""; - public override bool HasSingleSeparator => false; - public override bool MayContainOtherTokens => true; + public override string TagWrapper => null; + public override bool IsTag => false; + + public LiteralType ContentType { get; } + + public LiteralToken(string content, LiteralType contentType) : base(content) + { + ContentType = contentType; + } + + public LiteralToken(Token token) : base(token) + { + ContentType = LiteralType.Text; + } + + public override bool Validate(List tokens, int index) => true; + + public override bool IsOpen(List tokens, int index) => true; + + public bool IsNumber() => ContentType == LiteralType.Number; } \ No newline at end of file diff --git a/cs/Markdown/Tokens/ParagraphToken.cs b/cs/Markdown/Tokens/ParagraphToken.cs index 340d5205a..902a8e6d8 100644 --- a/cs/Markdown/Tokens/ParagraphToken.cs +++ b/cs/Markdown/Tokens/ParagraphToken.cs @@ -1,12 +1,12 @@ -using Markdown.Enums; - -namespace Markdown.Tokens; +namespace Markdown.Tokens; public class ParagraphToken : Token { - public override TagWrapper TagWrapper => TagWrapper.H1; - public override string StartSeparator => "# "; - public override string EndSeparator => "\\n"; - public override bool HasSingleSeparator => true; - public override bool MayContainOtherTokens => true; + public override string TagWrapper => "h1"; + public override bool IsTag => true; + + public ParagraphToken(string content) : base(content) {} + + public override bool Validate(List tokens, int index) => true; + public override bool IsOpen(List tokens, int index) => index == 0; } \ No newline at end of file diff --git a/cs/Markdown/Tokens/ScreeningToken.cs b/cs/Markdown/Tokens/ScreeningToken.cs new file mode 100644 index 000000000..358410c64 --- /dev/null +++ b/cs/Markdown/Tokens/ScreeningToken.cs @@ -0,0 +1,13 @@ +namespace Markdown.Tokens; + +public class ScreeningToken : Token +{ + public override string TagWrapper => null; + public override bool IsTag => false; + + public ScreeningToken(string content) : base(content) {} + + public override bool Validate(List tokens, int index) => true; + + public override bool IsOpen(List tokens, int index) => true; +} \ No newline at end of file diff --git a/cs/Markdown/Tokens/SeparatorType.cs b/cs/Markdown/Tokens/SeparatorType.cs new file mode 100644 index 000000000..94deac552 --- /dev/null +++ b/cs/Markdown/Tokens/SeparatorType.cs @@ -0,0 +1,10 @@ +namespace Markdown.Tokens; + +public static class SeparatorType +{ + public const string Underscore = "_"; + public const string DoubleUnderscore = "__"; + public const string Hash = "# "; + public const string Space = " "; + public const string Screening = "\\"; +} \ No newline at end of file diff --git a/cs/Markdown/Tokens/SpaceToken.cs b/cs/Markdown/Tokens/SpaceToken.cs new file mode 100644 index 000000000..6a5235738 --- /dev/null +++ b/cs/Markdown/Tokens/SpaceToken.cs @@ -0,0 +1,13 @@ +namespace Markdown.Tokens; + +public class SpaceToken : Token +{ + public override string TagWrapper => null; + public override bool IsTag => false; + + public SpaceToken(string content) : base(content) {} + + public override bool Validate(List tokens, int index) => true; + + public override bool IsOpen(List tokens, int index) => true; +} \ No newline at end of file diff --git a/cs/Markdown/Tokens/Token.cs b/cs/Markdown/Tokens/Token.cs index dac5557ca..d5075d261 100644 --- a/cs/Markdown/Tokens/Token.cs +++ b/cs/Markdown/Tokens/Token.cs @@ -1,14 +1,25 @@ -using Markdown.Enums; - -namespace Markdown.Tokens; +namespace Markdown.Tokens; public abstract class Token { - public abstract TagWrapper TagWrapper { get; } - public abstract string StartSeparator { get; } - public abstract string EndSeparator { get; } - public abstract bool HasSingleSeparator { get; } - public abstract bool MayContainOtherTokens { get; } + public abstract string? TagWrapper { get; } + public abstract bool IsTag { get; } + public bool IsClosing { get; set; } + public int PairedTokenIndex { get; set; } public string Content { get; } - public List ChildrenTokens { get; } = []; + + protected Token(string content) + { + Content = content; + } + + protected Token(Token token) + { + Content = token.Content; + IsClosing = token.IsClosing; + PairedTokenIndex = token.PairedTokenIndex; + } + + public abstract bool Validate(List tokens, int index); + public abstract bool IsOpen(List tokens, int index); } \ No newline at end of file diff --git a/cs/Markdown/Tokens/TokenFactory.cs b/cs/Markdown/Tokens/TokenFactory.cs new file mode 100644 index 000000000..1944c7c83 --- /dev/null +++ b/cs/Markdown/Tokens/TokenFactory.cs @@ -0,0 +1,67 @@ +using System.Text; +using Markdown.Enums; + +namespace Markdown.Tokens; + +public static class TokenFactory +{ + private static readonly string[] TagSymbols = + [SeparatorType.Screening, SeparatorType.Underscore, SeparatorType.Space]; + + public static Token GenerateToken(string text, int index) + { + string separator; + if (index + 1 < text.Length) + { + separator = text.Substring(index, 2); + switch (separator) + { + case SeparatorType.DoubleUnderscore: + return CreateBoldToken(separator); + case SeparatorType.Hash: + if (index > 0) break; + return CreateParagraphToken(separator); + } + } + separator = text[index].ToString(); + return separator switch + { + SeparatorType.Underscore => CreateItalicsToken(separator), + SeparatorType.Screening => CreateScreeningToken(separator), + SeparatorType.Space => CreateSpaceToken(separator), + _ => CreateLiteralToken(text, index) + }; + } + + private static BoldToken CreateBoldToken(string text) => new(text); + + private static ItalicsToken CreateItalicsToken(string text) => new(text); + + private static ScreeningToken CreateScreeningToken(string text) => new(text); + + private static SpaceToken CreateSpaceToken(string text) => new(text); + + private static ParagraphToken CreateParagraphToken(string text) => new(text); + + private static LiteralToken CreateLiteralToken(string text, int index) + { + var literalType = LiteralType.None; + var content = new StringBuilder(); + for (var i = index; i < text.Length; i++) + { + if (i == index) + literalType = char.IsNumber(text[i]) ? LiteralType.Number : LiteralType.Text; + if (IsTextEnd(literalType, text[i]) || IsNumberEnd(literalType, text[i])) + break; + content.Append(text[i]); + } + + return new LiteralToken(content.ToString(), literalType); + } + + private static bool IsTextEnd(LiteralType type, char symbol) + => type == LiteralType.Text && (char.IsNumber(symbol) || TagSymbols.Any(s => s.StartsWith(symbol))); + + private static bool IsNumberEnd(LiteralType type, char symbol) + => type == LiteralType.Number && !char.IsNumber(symbol); +} \ No newline at end of file diff --git a/cs/MarkdownTests/MarkdownTests.csproj b/cs/MarkdownTests/MarkdownTests.csproj new file mode 100644 index 000000000..eeb9843a4 --- /dev/null +++ b/cs/MarkdownTests/MarkdownTests.csproj @@ -0,0 +1,29 @@ + + + + net8.0 + enable + enable + + false + true + + + + + + + + + + + + + + + + + + + + diff --git a/cs/MarkdownTests/MdTests.cs b/cs/MarkdownTests/MdTests.cs new file mode 100644 index 000000000..b49fe56bb --- /dev/null +++ b/cs/MarkdownTests/MdTests.cs @@ -0,0 +1,17 @@ +using FluentAssertions; +using Markdown; +using Markdown.Renderers; +using Markdown.Tokenizers; + +namespace MarkdownTests; + +public class MdTests +{ + [TestCaseSource(typeof(MdTestsData), nameof(MdTestsData.TestData))] + public void ShouldConvertMdToHtmlCorrectly(string input, string expected) + { + var mdRenderer = Md.Create(new Tokenizer(), new HtmlTokenConverter()); + var result = mdRenderer.Render(input); + result.Should().BeEquivalentTo(expected); + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/MdTestsData.cs b/cs/MarkdownTests/MdTestsData.cs new file mode 100644 index 000000000..5c95627c7 --- /dev/null +++ b/cs/MarkdownTests/MdTestsData.cs @@ -0,0 +1,93 @@ +namespace MarkdownTests; + +public class MdTestsData +{ + public static IEnumerable TestData + { + get + { + yield return new TestCaseData("a b c", "a b c").SetName("ShouldReturnSameString_WhenThereAreNoTokens"); + + yield return new TestCaseData("b _окруженный с двух сторон_", "b окруженный с двух сторон").SetName( + "ShouldReturnWithItalicsToken_WhenItalicsInInput"); + + yield return new TestCaseData("b __окруженный с двух сторон__", "b окруженный с двух сторон").SetName( + "ShouldReturnWithBoldToken_WhenBoldInInput"); + + yield return new TestCaseData("Здесь сим\\волы экранирования\\ \\должны остаться.\\", + "Здесь сим\\волы экранирования\\ \\должны остаться.\\").SetName( + "ShouldReturnWithScreeningSeparator_WhenItDoesNotScreen"); + + yield return new TestCaseData("a \\\\ b c", "a \\ b c").SetName( + "ShouldReturnWithScreeningToken_WhenItScreen"); + + yield return new TestCaseData("a \\_b_ c", "a _b_ c").SetName( + "ShouldNotReturnItalicsToken_WhenItsScreened"); + + yield return new TestCaseData("a \\__b__ c", "a __b__ c").SetName( + "ShouldNotReturnBoldToken_WhenItScreened"); + + yield return new TestCaseData("_ a b_", "_ a b_").SetName( + "ShouldIgnoreInvalidOpeningItalicsSeparator"); + + yield return new TestCaseData("__ a b__", "__ a b__").SetName( + "ShouldIgnoreInvalidOpeningBoldSeparator"); + + yield return new TestCaseData("a _a b _c", "a _a b _c").SetName( + "ShouldIgnoreInvalidClosingItalicsSeparator"); + + yield return new TestCaseData("a __a b __c", "a __a b __c").SetName( + "ShouldIgnoreInvalidClosingBoldsSeparator"); + + yield return new TestCaseData("цифрами_12_3", "цифрами_12_3").SetName( + "ShouldNotReturnTokens_WhenSeparatorsIsInsideWordWithDigit"); + + yield return new TestCaseData("_нач_але", "начале").SetName( + "ShouldReturnItalicsToken_WhenItHighlightStartOfWord"); + + yield return new TestCaseData("сер_еди_не", "середине").SetName( + "ShouldReturnItalicsToken_WhenItHighlightCenterOfWord"); + + yield return new TestCaseData("кон_це._", "конце.").SetName( + "ShouldReturnItalicsToken_WhenItHighlightEndOfWord"); + + yield return new TestCaseData("__нач__але", "начале").SetName( + "ShouldReturnBoldToken_WhenItHighlightStartOfWord"); + + yield return new TestCaseData("сер__еди__не", "середине").SetName( + "ShouldReturnBoldToken_WhenItHighlightCenterOfWord"); + + yield return new TestCaseData("кон__це.__", "конце.").SetName( + "ShouldReturnBoldToken_WhenItHighlightEndOfWord"); + + yield return new TestCaseData("ра_зных сл_овах", "ра_зных сл_овах").SetName( + "ShouldNotReturnToken_WhenSeparatorsIsInsideDifferentWords"); + + yield return new TestCaseData("____", "____").SetName( + "ShouldNotReturnToken_WhenTokenHasNoContent"); + + yield return new TestCaseData("__a _b_ c__", "a b c").SetName( + "ShouldReturnItalicsToken_WhenItIsInsideBoldToken"); + + yield return new TestCaseData("_a __b__ c_", "a __b__ c").SetName( + "ShouldNotReturnBoldToken_WhenItIsInsideItalicsToken"); + + yield return new TestCaseData("__пересечения _двойных__ и одинарных_", + "__пересечения _двойных__ и одинарных_").SetName( + "ShouldNotReturnTokens_WhenTokensIsIntersecting"); + + yield return new TestCaseData("# a ", "

a

").SetName( + "ShouldReturnParagraphToken_WhenParagraphTokenIsInInput"); + + yield return new TestCaseData("# a # b c", "

a # b c

").SetName( + "ShouldIgnoreParagraphSeparator_WhenItIsNotInStartOfParagraph"); + + yield return new TestCaseData("# a b c \n d e", "

a b c

\n d e").SetName( + "ShouldCloseParagraphToken_WhenParagraphIsEnding"); + + yield return new TestCaseData("# Заголовок __с _разными_ символами__", + "

Заголовок с разными символами

") + .SetName("ShouldReturnParagraphWithDifferentTokens"); + } + } +} \ No newline at end of file diff --git a/cs/clean-code.sln b/cs/clean-code.sln index a9f58d3ce..4d0a92752 100644 --- a/cs/clean-code.sln +++ b/cs/clean-code.sln @@ -11,6 +11,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Samples", "Samples\Samples. EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown", "Markdown\Markdown.csproj", "{6A39364C-A9DE-4518-BFB3-835EFB41F813}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MarkdownTests", "MarkdownTests\MarkdownTests.csproj", "{305D2F71-6EE2-4479-B118-AF6BBBFCFD4D}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -33,5 +35,9 @@ Global {6A39364C-A9DE-4518-BFB3-835EFB41F813}.Debug|Any CPU.Build.0 = Debug|Any CPU {6A39364C-A9DE-4518-BFB3-835EFB41F813}.Release|Any CPU.ActiveCfg = Release|Any CPU {6A39364C-A9DE-4518-BFB3-835EFB41F813}.Release|Any CPU.Build.0 = Release|Any CPU + {305D2F71-6EE2-4479-B118-AF6BBBFCFD4D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {305D2F71-6EE2-4479-B118-AF6BBBFCFD4D}.Debug|Any CPU.Build.0 = Debug|Any CPU + {305D2F71-6EE2-4479-B118-AF6BBBFCFD4D}.Release|Any CPU.ActiveCfg = Release|Any CPU + {305D2F71-6EE2-4479-B118-AF6BBBFCFD4D}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection EndGlobal