-
Notifications
You must be signed in to change notification settings - Fork 300
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Кирилл Зарипов #232
base: master
Are you sure you want to change the base?
Кирилл Зарипов #232
Changes from 9 commits
13afde6
e7278b3
2b891cd
d13b9f2
da0c7b9
0079e72
28b8ee3
2704b59
d176445
a5a1c74
5023431
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
using System.Text; | ||
|
||
namespace Markdown.Converters; | ||
|
||
public class HtmlConverter : IConverter | ||
{ | ||
private static readonly Dictionary<TokenType, string> HtmlTag = new() | ||
{ | ||
{ TokenType.Italic, "em" }, | ||
{ TokenType.Strong, "strong" }, | ||
}; | ||
|
||
public string Convert(List<Token> tokens) | ||
{ | ||
var html = new StringBuilder(); | ||
var isClosed = new Dictionary<TokenType, bool> | ||
{ | ||
{TokenType.Italic, true}, | ||
{TokenType.Text, true}, | ||
{TokenType.Strong, true}, | ||
}; | ||
|
||
foreach (var token in tokens) | ||
{ | ||
html.Append(token.Type switch | ||
{ | ||
TokenType.Text => token.Content, | ||
TokenType.Italic or TokenType.Strong when token.Pair != null => | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Нарушение SRP. Класс должен нагенерить html по входным данным, а он еще что-то проверяет There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. убрал проверку |
||
isClosed[token.Type] ? Tag.Open(HtmlTag[token.Type]) : Tag.Close(HtmlTag[token.Type]), | ||
_ => token.Content | ||
}); | ||
|
||
if (token.Pair != null) | ||
{ | ||
isClosed[token.Type] = !isClosed[token.Type]; | ||
} | ||
} | ||
|
||
return html.ToString(); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
namespace Markdown.Converters; | ||
|
||
public interface IConverter | ||
{ | ||
public string Convert(List<Token> tokens); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<OutputType>Exe</OutputType> | ||
<TargetFramework>net9.0</TargetFramework> | ||
<ImplicitUsings>enable</ImplicitUsings> | ||
<Nullable>enable</Nullable> | ||
</PropertyGroup> | ||
|
||
</Project> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
using System.Text; | ||
using Markdown.Converters; | ||
using Markdown.Tokenizers; | ||
|
||
namespace Markdown; | ||
|
||
public class Md(ITokenizer tokenizer, IConverter converter) | ||
{ | ||
public string Render(string markdown) | ||
{ | ||
if (string.IsNullOrEmpty(markdown)) | ||
return string.Empty; | ||
|
||
var result = new StringBuilder(); | ||
var paragraphs = markdown.Split(Environment.NewLine); | ||
|
||
foreach (var paragraph in paragraphs) | ||
{ | ||
var htmlLine = ProcessParagraph(paragraph); | ||
result.AppendLine(htmlLine); | ||
} | ||
|
||
return result | ||
.ToString() | ||
.TrimEnd(Environment.NewLine.ToCharArray()); | ||
} | ||
|
||
private string ProcessParagraph(string line) | ||
{ | ||
var trimmedLine = line.TrimEnd(Environment.NewLine.ToCharArray()); | ||
|
||
if (trimmedLine.StartsWith("# ")) | ||
{ | ||
var headerContent = trimmedLine[2..]; | ||
var htmlContent = ParseMarkdown(headerContent); | ||
return Tag.Wrap("h1", htmlContent); | ||
} | ||
else | ||
{ | ||
var htmlContent = ParseMarkdown(trimmedLine); | ||
return htmlContent; | ||
} | ||
} | ||
|
||
private string ParseMarkdown(string markdown) | ||
{ | ||
var tokens = tokenizer.Tokenize(markdown); | ||
var html = converter.Convert(tokens); | ||
return html; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
using Markdown; | ||
using Markdown.Converters; | ||
using Markdown.Tokenizers; | ||
|
||
var tokenizer = new KonturMdTokenizer(); | ||
var converter = new HtmlConverter(); | ||
var markdown = new Md(tokenizer, converter); | ||
|
||
const string input = """ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Советы категории Б: можешь вынести эту строчку в константу, чтобы потом, при добавлении новых примеров, можно было легко между ними переключаться :)
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. А еще давай сюда буду накидывать, в каких случаях все ломается:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. исправил эти случаи |
||
Подчерки внутри текста c цифрами_12_3 не считаются выделением и должны оставаться символами подчерка. | ||
"""; | ||
|
||
var result = markdown.Render(input); | ||
|
||
Console.WriteLine(result); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
namespace Markdown; | ||
|
||
public static class Tag | ||
{ | ||
public static string Open(string tagName) => $"<{tagName}>"; | ||
|
||
public static string Close(string tagName) => $"</{tagName}>"; | ||
|
||
public static string Wrap(string tagName, string content) => $"{Open(tagName)}{content}{Close(tagName)}"; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
namespace Markdown; | ||
|
||
public class Token | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Условно, мне все еще с трудом понятно зачем иметь Непонятно, как работать с этим классом, без полного прочтения кода. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. переделал логику с IsClosing & IsOpening |
||
{ | ||
public TokenType Type { get; init; } | ||
public required string Content { get; init; } | ||
public bool IsClosing { get; init; } | ||
public bool IsOpening { get; init; } | ||
public int Position { get; init; } | ||
public Token? Pair { get; set; } | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
namespace Markdown; | ||
|
||
public enum TokenType | ||
{ | ||
Text, | ||
Italic, | ||
Strong | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
namespace Markdown.Tokenizers; | ||
|
||
public interface ITokenizer | ||
{ | ||
public List<Token> Tokenize(string markdown); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,201 @@ | ||
using System.Text; | ||
|
||
namespace Markdown.Tokenizers; | ||
|
||
public class KonturMdTokenizer : ITokenizer | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. В этом классе намешано все и сразу. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Давай начнем отсюда: почему There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Я часть спецификации не так понял и реализовал ее не так как она работает в гитхабе, поэтому подумал это контуровский Md) |
||
{ | ||
public List<Token> Tokenize(string markdown) | ||
{ | ||
var text = new StringBuilder(); | ||
var tokens = GetTokens(markdown, text); | ||
|
||
CreateTokenPairs(tokens, TokenType.Italic, markdown); | ||
CreateTokenPairs(tokens, TokenType.Strong, markdown); | ||
FilterStrongInsideItalic(tokens); | ||
FlushText(tokens, text); | ||
|
||
return tokens; | ||
} | ||
|
||
private void FlushText(List<Token> tokens, StringBuilder text) | ||
{ | ||
if (text.Length > 0) | ||
tokens.Add(CreateTextToken(text.ToString())); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. метода больше нету |
||
} | ||
|
||
private void FilterStrongInsideItalic(List<Token> tokens) | ||
{ | ||
var isItalicOpen = false; | ||
Token? currentItalicOpening = null; | ||
|
||
foreach (var token in tokens) | ||
{ | ||
if (token.Pair == null) | ||
continue; | ||
|
||
switch (token.Type) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Виднеется закономерность: как только в коде появляется |
||
{ | ||
case TokenType.Italic: | ||
isItalicOpen = !isItalicOpen; | ||
currentItalicOpening = isItalicOpen ? token : null; | ||
break; | ||
case TokenType.Strong when isItalicOpen: | ||
RemoveTokenPair(currentItalicOpening); | ||
RemoveTokenPair(token); | ||
isItalicOpen = false; | ||
break; | ||
} | ||
} | ||
} | ||
|
||
private void RemoveTokenPair(Token? token) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Метод можно унести в сам Token There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
{ | ||
if (token?.Pair == null) | ||
return; | ||
|
||
token.Pair.Pair = null; | ||
token.Pair = null; | ||
} | ||
|
||
private List<Token> GetTokens(string text, StringBuilder sb) | ||
{ | ||
var tokens = new List<Token>(); | ||
|
||
for (var index = 0; index < text.Length;) | ||
{ | ||
switch (text[index]) | ||
{ | ||
case '\\': | ||
index = HandleEscape(text, index, sb); | ||
break; | ||
|
||
case '_': | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. При добавлении новых тэгов нужно будет расширять этот Switch до гигантских масштабов. В общем, главная проблема - что оно не очень масшабируемое. Да и понять, где нужно масштабировать довольно тяжело (нужно прочитать весь код) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. И вынести теги в константы, они сейчас по всему коду разбросаны There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
index = HandleUnderscore(text, index, sb, tokens); | ||
break; | ||
|
||
default: | ||
sb.Append(text[index++]); | ||
break; | ||
} | ||
} | ||
|
||
return tokens; | ||
} | ||
|
||
private int HandleEscape(string markdown, int index, StringBuilder text) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Он удаляет одиночные There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
{ | ||
if (index + 1 < markdown.Length) | ||
{ | ||
text.Append(markdown[index + 1]); | ||
return index + 2; | ||
} | ||
|
||
text.Append(markdown[index]); | ||
return index + 1; | ||
} | ||
|
||
private int HandleUnderscore(string markdown, int index, StringBuilder text, List<Token> tokens) | ||
{ | ||
var count = CountUnderscores(markdown, index); | ||
var tokenType = count == 2 ? TokenType.Strong : TokenType.Italic; | ||
|
||
if (IsValidDelimiter(markdown, index, count, out var isOpening, out var isClosing)) | ||
{ | ||
if (text.Length > 0) | ||
{ | ||
tokens.Add(CreateTextToken(text.ToString())); | ||
text.Clear(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Три метода вызываются в одной строчке. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
} | ||
|
||
tokens.Add(new Token | ||
{ | ||
Type = tokenType, | ||
Content = new string('_', count), | ||
IsOpening = isOpening, | ||
IsClosing = isClosing, | ||
Position = index | ||
}); | ||
|
||
return index + count; | ||
} | ||
|
||
text.Append(new string('_', count)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. переделал в dictionary |
||
return index + count; | ||
} | ||
|
||
private int CountUnderscores(string text, int index) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Попробую описать, что делает метод: Считает идет 1 или 2 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
{ | ||
var result = 0; | ||
|
||
if (text[index] == '_') result++; | ||
if (index + 1 < text.Length && text[index + 1] == '_') result++; | ||
|
||
return result; | ||
} | ||
|
||
private bool IsValidDelimiter(string markdown, int index, int length, out bool isOpening, out bool isClosing) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Столько входящих параметров тяжело переварить человеческому мозгу... Точнее, ответить на вопрос: "Зачем для проверки на валидность нужно 5 аргументов" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. тоже удалил |
||
{ | ||
var before = index > 0 | ||
? markdown[index - 1] | ||
: '\0'; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Почему именно There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Это Null символ, в C используется как конец строки |
||
var after = index + length < markdown.Length | ||
? markdown[index + length] | ||
: '\0'; | ||
|
||
isClosing = IsDelimiter(before); | ||
isOpening = IsDelimiter(after); | ||
|
||
return isOpening || isClosing; | ||
} | ||
|
||
private bool IsDelimiter(char value) => value != '_' && (char.IsLetter(value) || char.IsPunctuation(value)); | ||
|
||
private Token CreateTextToken(string content) => new() { Type = TokenType.Text, Content = content }; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Такое можно вынести в сам Token. Или даже лучше в его Extension методы |
||
|
||
private void CreateTokenPairs(List<Token> tokens, TokenType type, string markdown) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Еще перед прочтением кода - метод капец какой длинный, его тяжело читать. Нужно держать слишком много логики в голове There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. После прочтения - да, слишком много логики. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ДКА интрересная штука, но сейчас уже что есть то есть |
||
{ | ||
var pairableTokens = new Stack<int>(); | ||
|
||
for (var i = 0; i < tokens.Count; i++) | ||
{ | ||
var token = tokens[i]; | ||
if (type != token.Type) continue; | ||
|
||
switch (token.IsOpening, token.IsClosing) | ||
{ | ||
case (true, false): | ||
pairableTokens.Push(i); | ||
break; | ||
|
||
case (false, true): | ||
if (pairableTokens.Count > 0) | ||
{ | ||
var openingIndex = pairableTokens.Pop(); | ||
tokens[openingIndex].Pair = tokens[i]; | ||
tokens[i].Pair = tokens[openingIndex]; | ||
} | ||
break; | ||
|
||
case (true, true): | ||
if (pairableTokens.Count > 0) | ||
{ | ||
var openingIndex = pairableTokens.Peek(); | ||
var lenght = token.Position - tokens[openingIndex].Position; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
var slice = markdown.AsSpan().Slice(tokens[openingIndex].Position, lenght); | ||
|
||
if (!slice.Contains(' ')) | ||
{ | ||
pairableTokens.Pop(); | ||
tokens[openingIndex].Pair = tokens[i]; | ||
tokens[i].Pair = tokens[openingIndex]; | ||
} | ||
} | ||
else | ||
{ | ||
pairableTokens.Push(i); | ||
} | ||
break; | ||
} | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Давай принимать обобщенные типы)
В этом случае вообще IEnumerable достаточно
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.