Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Брозовский Максим #234

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
10 changes: 10 additions & 0 deletions cs/Markdown/Markdown.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

</Project>
53 changes: 53 additions & 0 deletions cs/Markdown/Md.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
using Markdown.Tags;

namespace Markdown;

public static class Md
{
private static IEnumerable<IMdTagKind> Tags
{
get
{
yield return new EscapeMdTagKind();
yield return new SingleMdTagKind("#", "<h1>", "</h1>");
yield return new PairMdTagKind("_", "<em>", "</em>");
yield return new PairMdTagKind("__", "<strong>", "</strong>");
}
}

private static IEnumerable<Func<Token, IEnumerable<Token>, bool>> TagRules
{
get
{
yield return IgnoreIntersectionBetweenPairTagsRule;
yield return IgnorePairTagWhenParentPairTagHasGreaterLengthRule;
}
}

public static string Render(string markdownText)
{
var root = new MdTokenizer(Tags.ToList(), TagRules).Tokenize(markdownText);
return root.ConvertToHtml();
}

private static bool IgnorePairTagWhenParentPairTagHasGreaterLengthRule(Token tokenToCheck,
IEnumerable<Token> tokens) =>
tokenToCheck.Tag is PairMdTagKind
&& tokens
.Where(t => t != tokenToCheck && t.Tag is PairMdTagKind)
.Any(parent => parent.IsChild(tokenToCheck)
&& !(parent.Tag.MdTag.Length > tokenToCheck.Tag.MdTag.Length));


private static bool IgnoreIntersectionBetweenPairTagsRule(Token tokenToCheck, IEnumerable<Token> tokens) =>
tokenToCheck.Tag is PairMdTagKind
&& tokens
.Where(t => t != tokenToCheck && t.Tag is PairMdTagKind)
.Any(t => IsIntersectionBetween(tokenToCheck, t)
|| IsIntersectionBetween(t, tokenToCheck));

private static bool IsIntersectionBetween(Token token, Token otherToken) =>
token.Position > otherToken.Position
&& token.Position < otherToken.Position + otherToken.Value.Length
&& token.Position + token.Value.Length > otherToken.Position + otherToken.Value.Length;
}
111 changes: 111 additions & 0 deletions cs/Markdown/MdTokenizer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
using Markdown.Models;
using Markdown.Tags;

namespace Markdown;

public class MdTokenizer(List<IMdTagKind> tags, IEnumerable<Func<Token, IEnumerable<Token>, bool>> tagRules)
{
private readonly Dictionary<string, IMdTagKind> availableTags = tags.ToDictionary(tag => tag.MdTag, tag => tag);
private readonly List<Func<Token, IEnumerable<Token>, bool>> tagRules = tagRules.ToList();
private readonly List<int> mdLenOfTagSignatures = tags
.Select(tag => tag.MdTag.Length)
.Distinct()
.OrderDescending()
.ToList();

Comment on lines +10 to +15

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Какое-то неявное соответствие тега и его длины получается

public Token Tokenize(string text)
{
var root = new Token(text);

foreach (var line in GetLines(text))
{
var tokens = GetTokens(line.Value).OrderBy(t => t.Position).ToList();
foreach (var token in tokens
.Where(t => tagRules.Select(rule => rule(t, tokens))
.All(result => !result))) line.AddToken(token);
root.AddToken(line);
}

return root;
}

private static IEnumerable<Token> GetLines(string text)
{
var position = 0;
foreach (var line in text.Split(Environment.NewLine))
{
yield return new Token(line, position, new SingleMdTagKind());
position += line.Length + Environment.NewLine.Length;
}
}

private IEnumerable<Token> GetTokens(string text)
{
var tags = GetTags(text).ToList();
var escapeTokens = ParseEscapedTokens(text, tags).ToList();

return ParseTokens(text, tags).Concat(escapeTokens);
}

private IEnumerable<Tag> GetTags(string text)
{
for (var position = 0; position < text.Length; position += 1)
{
if (!TryGetTag(text, position, out var tag)) continue;

yield return new Tag(position, tag);

position += tag.Length - 1;
}
}

private bool TryGetTag(string text, int position, out IMdTagKind mdTag)
{
foreach (var mdLenOfTagSignature in mdLenOfTagSignatures)
{
if (position + mdLenOfTagSignature > text.Length || !availableTags
.TryGetValue(text.Substring(position, mdLenOfTagSignature), out var tag)) continue;

mdTag = tag;
return true;
}

mdTag = null!;
return false;
}

private static IEnumerable<Token> ParseEscapedTokens(string text, List<Tag> tags)
{
for (var idx = 0; idx < tags.Count - 1; idx += 1)
{
if (tags[idx].TagKind is not EscapeMdTagKind) continue;

var position = tags[idx].Position;
tags.Remove(tags[idx]);

if (tags[idx].Position - position == 1)
{
yield return text.CreateEscapeToken(tags[idx]);
tags.Remove(tags[idx]);
}

idx -= 1;
}
}

private static IEnumerable<Token> ParseTokens(string text, List<Tag> tags)
{
for (var idx = 0; idx < tags.Count; idx += 1)
{
if (!tags[idx].TagKind.TryGetToken(text, tags[idx], tags, out var token,
out var closeToken)) continue;

if (closeToken != null) tags.Remove(closeToken);

yield return token;
tags.RemoveAt(idx);

idx -= 1;
}
}
}
5 changes: 5 additions & 0 deletions cs/Markdown/Models/Tag.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
using Markdown.Tags;

namespace Markdown.Models;

public record Tag(int Position, IMdTagKind TagKind);
9 changes: 9 additions & 0 deletions cs/Markdown/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
namespace Markdown;

class Program
{
public static void Main(string[] args)
{
Console.WriteLine("Hello, World!");
}
}
44 changes: 44 additions & 0 deletions cs/Markdown/StringExtension.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
using Markdown.Models;
using Markdown.Tags;

namespace Markdown;

public static class StringExtension
{
public static bool IsSubstring(this string text, int position, string value, bool isForward = true)
{
if (isForward ? position + value.Length > text.Length : position - value.Length < 0) return false;

var substring = isForward
? text.Substring(position, value.Length)
: text.Substring(position - value.Length, value.Length);

return substring == value;
}

public static bool? IsSubstring(this string text, int position, Predicate<char> predicate, bool isForward = true)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nullable-bool выглядит как что-то незаконное :)

{
if (isForward ? position + 1 > text.Length : position - 1 < 0) return null;

position = isForward ? position : position - 1;
return predicate(text[position]);
}

public static Token CreateToken(this string text, int startIndex, int stopIndex, IMdTagKind tag)
{
var value = text.Substring(startIndex, stopIndex - startIndex);
return new Token(value, startIndex, tag);
}
Comment on lines +27 to +31

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Такое лучше вынести статикой в класс Token или в его конструктор.
У Тебя string - базовый тип. Любой, кто подключит твою библиотеку, бонусом получит мусорный(для него) метод CreateToken.

Либо же его делать internal, но тогда у нас логика по тому как из текста создать токен выносится во вне класса.


public static int GetEndOfLinePosition(this string text, int startIndex = 0)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

У строк нет "позиций". Только индексы

{
var newLinePosition = text.IndexOf(Environment.NewLine, startIndex, StringComparison.Ordinal);
return newLinePosition != -1 ? newLinePosition + Environment.NewLine.Length : text.Length;
}

public static Token CreateEscapeToken(this string text, Tag escapeTag)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Точно так же, как и с обычным Token'ом

{
var value = text.Substring(escapeTag.Position - 1, escapeTag.TagKind.Length);
return new Token(value, escapeTag.Position - 1, new EscapeMdTagKind());
}
}
38 changes: 38 additions & 0 deletions cs/Markdown/Tags/EscapeMdTagKind.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
using Markdown.Models;

namespace Markdown.Tags;

public class EscapeMdTagKind : IMdTagKind
{
public string MdTag => "\\";
public string HtmlOpenTag => string.Empty;
public string HtmlCloseTag => string.Empty;

public bool TokenCanBeCreated(string text, int startIndex, int stopIndex) =>
text.IsSubstring(startIndex, MdTag);

public bool TryGetToken(string text, Tag openTag, List<Tag> closeTags, out Token token,
out Tag closeTag)
{
var openTagIndex = closeTags.IndexOf(openTag);
var escapedTag = openTagIndex + 1 > closeTags.Count
? null
: closeTags[openTagIndex + 1];

if (escapedTag != null)
{
closeTag = escapedTag;
token = text.CreateToken(openTag.Position, escapedTag.Position +
escapedTag.TagKind.Length, this);
return true;
}

closeTag = null!;
token = null!;
return false;
}

public string RemoveMdTags(string text) => text.Remove(0, MdTag.Length);

public string InsertHtmlTags(string text) => text;
}
16 changes: 16 additions & 0 deletions cs/Markdown/Tags/IMdTagKind.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
using Markdown.Models;

namespace Markdown.Tags;

public interface IMdTagKind
{
public string MdTag { get; }
public string HtmlOpenTag { get; }
public string HtmlCloseTag { get; }
public int Length => MdTag.Length;

public bool TokenCanBeCreated(string text, int startIndex, int stopIndex);
public bool TryGetToken(string text, Tag openTag, List<Tag> closeTags, out Token token, out Tag? closeTag);
public string RemoveMdTags(string text);
public string InsertHtmlTags(string text);
Comment on lines +14 to +15

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Как будто это должна быть атомарная операция, но по архитектуре вижу, что просто поменять не получится

}
56 changes: 56 additions & 0 deletions cs/Markdown/Tags/PairMdTagKind.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
using Markdown.Models;

namespace Markdown.Tags;

public class PairMdTagKind(string mdTag, string htmlOpenTag, string htmlCloseTag) : IMdTagKind
{
public string MdTag => mdTag;
public string HtmlOpenTag => htmlOpenTag;
public string HtmlCloseTag => htmlCloseTag;

private bool IsValidTag(string text, int position) =>
text.IsSubstring(position, MdTag)
&& text.IsSubstring(position, char.IsDigit, false) != true
&& text.IsSubstring(position + MdTag.Length, char.IsDigit) != true;

public bool TokenCanBeCreated(string text, int startIndex, int stopIndex)
{
if (!IsValidTag(text, startIndex) || !IsValidTag(text, stopIndex - MdTag.Length)) return false;

var value = text.Substring(startIndex, stopIndex - startIndex);
if (value.Split(' ').Length == 1) return value.Length > MdTag.Length * 2;

return value.Split(Environment.NewLine).Length == 1
&& text.IsSubstring(startIndex, char.IsWhiteSpace, false) != false
&& text.IsSubstring(stopIndex, char.IsWhiteSpace) != false;
}

public bool TryGetToken(string text, Tag openTag, List<Tag> closeTags, out Token token, out Tag closeTag)
{
foreach (var tag in closeTags.Where(t => openTag != t
&& openTag.TagKind == t.TagKind
&& openTag.Position <= t.Position
&& openTag.TagKind.TokenCanBeCreated(text, openTag.Position,
t.Position + t.TagKind.Length)))
{
closeTag = tag;
token = text.CreateToken(openTag.Position,
tag.Position + tag.TagKind.Length, openTag.TagKind);
return true;
}

closeTag = null!;
token = null!;
return false;
}

public string RemoveMdTags(string text) =>
text
.Remove(text.Length - MdTag.Length)
.Remove(0, MdTag.Length);

public string InsertHtmlTags(string text) =>
text
.Insert(text.Length, HtmlCloseTag)
.Insert(0, HtmlOpenTag);
}
Loading