kontur-courses · kokodio · Nov 24, 2024 · Dec 1, 2024 · Dec 1, 2024 · Dec 1, 2024
diff --git a/cs/Markdown/Converters/HtmlConverter.cs b/cs/Markdown/Converters/HtmlConverter.cs
@@ -0,0 +1,41 @@
+using System.Text;
+
+namespace Markdown.Converters;
+
+public class HtmlConverter : IConverter
+{
+    private static readonly Dictionary<TokenType, string> HtmlTag = new()
+    {
+        { TokenType.Italic, "em" },
+        { TokenType.Strong, "strong" },
+    };
+
+    public string Convert(List<Token> tokens)
+    {
+        var html = new StringBuilder();
+        var isClosed = new Dictionary<TokenType, bool>
+        {
+            {TokenType.Italic, true},
+            {TokenType.Text, true},
+            {TokenType.Strong, true},
+        };
+
+        foreach (var token in tokens)
+        {
+            html.Append(token.Type switch
+            {
+                TokenType.Text => token.Content,
+                TokenType.Italic or TokenType.Strong when token.Pair != null =>
+                    isClosed[token.Type] ? Tag.Open(HtmlTag[token.Type]) : Tag.Close(HtmlTag[token.Type]),
+                _ => token.Content
+            });
+
+            if (token.Pair != null)
+            {
+                isClosed[token.Type] = !isClosed[token.Type];
+            }
+        }
+
+        return html.ToString();
+    }
+}
diff --git a/cs/Markdown/Converters/IConverter.cs b/cs/Markdown/Converters/IConverter.cs
@@ -0,0 +1,6 @@
+namespace Markdown.Converters;
+
+public interface IConverter
+{
+    public string Convert(List<Token> tokens);
+}
diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj
@@ -0,0 +1,10 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+    <PropertyGroup>
+        <OutputType>Exe</OutputType>
+        <TargetFramework>net9.0</TargetFramework>
+        <ImplicitUsings>enable</ImplicitUsings>
+        <Nullable>enable</Nullable>
+    </PropertyGroup>
+
+</Project>
diff --git a/cs/Markdown/Md.cs b/cs/Markdown/Md.cs
@@ -0,0 +1,51 @@
+using System.Text;
+using Markdown.Converters;
+using Markdown.Tokenizers;
+
+namespace Markdown;
+
+public class Md(ITokenizer tokenizer, IConverter converter)
+{
+    public string Render(string markdown)
+    {
+        if (string.IsNullOrEmpty(markdown))
+            return string.Empty;
+
+        var result = new StringBuilder();
+        var paragraphs = markdown.Split(Environment.NewLine);
+
+        foreach (var paragraph in paragraphs)
+        {
+            var htmlLine = ProcessParagraph(paragraph);
+            result.AppendLine(htmlLine);
+        }
+
+        return result
+            .ToString()
+            .TrimEnd(Environment.NewLine.ToCharArray());
+    }
+
+    private string ProcessParagraph(string line)
+    {
+        var trimmedLine = line.TrimEnd(Environment.NewLine.ToCharArray());
+
+        if (trimmedLine.StartsWith("# "))
+        {
+            var headerContent = trimmedLine[2..];
+            var htmlContent = ParseMarkdown(headerContent);
+            return Tag.Wrap("h1", htmlContent);
+        }
+        else
+        {
+            var htmlContent = ParseMarkdown(trimmedLine);
+            return htmlContent;
+        }
+    }
+
+    private string ParseMarkdown(string markdown)
+    {
+        var tokens = tokenizer.Tokenize(markdown);
+        var html = converter.Convert(tokens);
+        return html;
+    }
+}
diff --git a/cs/Markdown/Program.cs b/cs/Markdown/Program.cs
@@ -0,0 +1,15 @@
+using Markdown;
+using Markdown.Converters;
+using Markdown.Tokenizers;
+
+var tokenizer = new KonturMdTokenizer();
+var converter = new HtmlConverter();
+var markdown = new Md(tokenizer, converter);
+
+const string input = """
+                     Подчерки внутри текста c цифрами_12_3 не считаются выделением и должны оставаться символами подчерка.
+                     """;
+
+var result = markdown.Render(input);
+
+Console.WriteLine(result);
diff --git a/cs/Markdown/Tag.cs b/cs/Markdown/Tag.cs
@@ -0,0 +1,10 @@
+namespace Markdown;
+
+public static class Tag
+{
+    public static string Open(string tagName) => $"<{tagName}>";
+
+    public static string Close(string tagName) => $"</{tagName}>";
+
+    public static string Wrap(string tagName, string content) => $"{Open(tagName)}{content}{Close(tagName)}";
+}
diff --git a/cs/Markdown/Token.cs b/cs/Markdown/Token.cs
@@ -0,0 +1,11 @@
+namespace Markdown;
+
+public class Token
+{
+    public TokenType Type { get; init; }
+    public required string Content { get; init; }
+    public bool IsClosing { get; init; }
+    public bool IsOpening { get; init; }
+    public int Position { get; init; }
+    public Token? Pair { get; set; }
+}
diff --git a/cs/Markdown/TokenType.cs b/cs/Markdown/TokenType.cs
@@ -0,0 +1,8 @@
+namespace Markdown;
+
+public enum TokenType
+{
+    Text,
+    Italic,
+    Strong 
+}
diff --git a/cs/Markdown/Tokenizers/ITokenizer.cs b/cs/Markdown/Tokenizers/ITokenizer.cs
@@ -0,0 +1,6 @@
+namespace Markdown.Tokenizers;
+
+public interface ITokenizer
+{
+    public List<Token> Tokenize(string markdown);
+}
diff --git a/cs/Markdown/Tokenizers/KonturMdTokenizer.cs b/cs/Markdown/Tokenizers/KonturMdTokenizer.cs
@@ -0,0 +1,201 @@
+using System.Text;
+
+namespace Markdown.Tokenizers;
+
+public class KonturMdTokenizer : ITokenizer
+{
+    public List<Token> Tokenize(string markdown)
+    {
+        var text = new StringBuilder();
+        var tokens = GetTokens(markdown, text);
+
+        CreateTokenPairs(tokens, TokenType.Italic, markdown);
+        CreateTokenPairs(tokens, TokenType.Strong, markdown);
+        FilterStrongInsideItalic(tokens);
+        FlushText(tokens, text);
+
+        return tokens;
+    }
+
+    private void FlushText(List<Token> tokens, StringBuilder text)
+    {
+        if (text.Length > 0)
+            tokens.Add(CreateTextToken(text.ToString()));
+    }
+
+    private void FilterStrongInsideItalic(List<Token> tokens)
+    {
+        var isItalicOpen = false;
+        Token? currentItalicOpening = null;
+
+        foreach (var token in tokens)
+        {
+            if (token.Pair == null) 
+                continue;
+
+            switch (token.Type)
+            {
+                case TokenType.Italic:
+                    isItalicOpen = !isItalicOpen;
+                    currentItalicOpening = isItalicOpen ? token : null;
+                    break;
+                case TokenType.Strong when isItalicOpen:
+                    RemoveTokenPair(currentItalicOpening);
+                    RemoveTokenPair(token);
+                    isItalicOpen = false;
+                    break;
+            }
+        }
+    }
+
+    private void RemoveTokenPair(Token? token)
+    {
+        if (token?.Pair == null) 
+            return;
+
+        token.Pair.Pair = null;
+        token.Pair = null;
+    }
+
+    private List<Token> GetTokens(string text, StringBuilder sb)
+    {
+        var tokens = new List<Token>();
+
+        for (var index = 0; index < text.Length;)
+        {
+            switch (text[index])
+            {
+                case '\\':
+                    index = HandleEscape(text, index, sb);
+                    break;
+
+                case '_':
+                    index = HandleUnderscore(text, index, sb, tokens);
+                    break;
+
+                default:
+                    sb.Append(text[index++]);
+                    break;
+            }
+        }
+
+        return tokens;
+    }
+
+    private int HandleEscape(string markdown, int index, StringBuilder text)
+    {
+        if (index + 1 < markdown.Length)
+        {
+            text.Append(markdown[index + 1]);
+            return index + 2;
+        }
+
+        text.Append(markdown[index]);
+        return index + 1;
+    }
+
+    private int HandleUnderscore(string markdown, int index, StringBuilder text, List<Token> tokens)
+    {
+        var count = CountUnderscores(markdown, index);
+        var tokenType = count == 2 ? TokenType.Strong : TokenType.Italic;
+
+        if (IsValidDelimiter(markdown, index, count, out var isOpening, out var isClosing))
+        {
+            if (text.Length > 0)
+            {
+                tokens.Add(CreateTextToken(text.ToString()));
+                text.Clear();
+            }
+
+            tokens.Add(new Token
+            {
+                Type = tokenType,
+                Content = new string('_', count),
+                IsOpening = isOpening,
+                IsClosing = isClosing,
+                Position = index
+            });
+
+            return index + count;
+        }
+
+        text.Append(new string('_', count));
+        return index + count;
+    }
+
+    private int CountUnderscores(string text, int index)
+    {
+        var result = 0;
+
+        if (text[index] == '_') result++;
+        if (index + 1 < text.Length && text[index + 1] == '_') result++;
+
+        return result;
+    }
+
+    private bool IsValidDelimiter(string markdown, int index, int length, out bool isOpening, out bool isClosing)
+    {
+        var before = index > 0 
+            ? markdown[index - 1] 
+            : '\0';
+        var after = index + length < markdown.Length 
+            ? markdown[index + length] 
+            : '\0';
+
+        isClosing = IsDelimiter(before);
+        isOpening = IsDelimiter(after);
+
+        return isOpening || isClosing;
+    }
+
+    private bool IsDelimiter(char value) => value != '_' && (char.IsLetter(value) || char.IsPunctuation(value));
+
+    private Token CreateTextToken(string content) => new() { Type = TokenType.Text, Content = content };
+
+    private void CreateTokenPairs(List<Token> tokens, TokenType type, string markdown)
+    {
+        var pairableTokens = new Stack<int>();
+
+        for (var i = 0; i < tokens.Count; i++)
+        {
+            var token = tokens[i];
+            if (type != token.Type) continue;
+
+            switch (token.IsOpening, token.IsClosing)
+            {
+                case (true, false):
+                    pairableTokens.Push(i);
+                    break;
+
+                case (false, true):
+                    if (pairableTokens.Count > 0)
+                    {
+                        var openingIndex = pairableTokens.Pop();
+                        tokens[openingIndex].Pair = tokens[i];
+                        tokens[i].Pair = tokens[openingIndex];
+                    }
+                    break;
+
+                case (true, true):
+                    if (pairableTokens.Count > 0)
+                    {
+                        var openingIndex = pairableTokens.Peek();
+                        var lenght = token.Position - tokens[openingIndex].Position;
+                        var slice = markdown.AsSpan().Slice(tokens[openingIndex].Position, lenght);
+
+                        if (!slice.Contains(' '))
+                        {
+                            pairableTokens.Pop();
+                            tokens[openingIndex].Pair = tokens[i];
+                            tokens[i].Pair = tokens[openingIndex];
+                        }
+                    }
+                    else
+                    {
+                        pairableTokens.Push(i);
+                    }
+                    break;
+            }
+        }
+    }
+}