-
Notifications
You must be signed in to change notification settings - Fork 300
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Смышляев Дмитрий #231
base: master
Are you sure you want to change the base?
Смышляев Дмитрий #231
Changes from 16 commits
e26d15e
459f8f4
350c559
4a2abdb
7889590
8a31839
9900175
65fc929
9b750df
7f664cd
c8fcea8
302aa2f
620fb70
cbb6061
110c4d2
5408bec
2727bca
7fc6935
8446c91
d22da48
d1d8413
5d586ba
fec4199
fdd4a28
676fd39
680e394
25288d6
df27307
43b7116
3771ebd
647e1ff
fac2a80
816efd4
fec2be8
7664345
d0ff733
89f2a8f
5ad9ed2
945a4a9
c33f29b
fdd61ec
9cf741a
d902ab6
69653bc
2aee82e
d36920c
a4dae45
34760cc
81b9907
05b2a1a
992df7a
88d2842
b3a605c
de49690
ed1043c
245b5c6
81b7300
7650424
03f7791
764838a
c78f04d
0b33846
c946f8d
2b6337a
af53027
36db829
7177671
1bb8d65
de2fa80
b1c321f
de913c7
0cfad9e
ff0d4c1
06f5619
1f3cd85
e462015
6d1b752
d9f6d32
1f5429a
c36f843
6323061
d51a496
b8c92f5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
using Markdown.Parser.Nodes; | ||
|
||
namespace Markdown.Generator; | ||
|
||
public class HTMLGenerator | ||
{ | ||
|
||
public string GenerateHTML(Node astRoot) | ||
{ | ||
/* Do magic with ast root */ | ||
return "<h1>Hello world</h1>"; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<OutputType>Exe</OutputType> | ||
<TargetFramework>net8.0</TargetFramework> | ||
<ImplicitUsings>enable</ImplicitUsings> | ||
<Nullable>enable</Nullable> | ||
</PropertyGroup> | ||
|
||
</Project> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
namespace Markdown.Parser.Nodes; | ||
|
||
public class Node | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Допиши, что тут должно быть плиз, не совсем понимаю, чем от токена отличается( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. По существу, токен - язык автомата. Вместо того чтобы читать посимвольно, я объединяю группы каких-то символов в токены, даю им тайпы и уже работаю с ними. Ноды же это результаты работы рулов. По сути в нодах будет лежать непрерывная последовательность токенов, которая будет являться каким либо тегом. А древовидность нод позволит описывать вложение одного тега в другой |
||
{ | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
using Markdown.Parser.Nodes; | ||
using Markdown.Tokenizer.Tokens; | ||
|
||
namespace Markdown.Parser.Rules; | ||
|
||
public class BodyRule : IParsingRule | ||
{ | ||
public Node Match(List<Token> tokens) | ||
{ | ||
throw new NotImplementedException(); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
using Markdown.Parser.Nodes; | ||
using Markdown.Tokenizer.Tokens; | ||
|
||
namespace Markdown.Parser.Rules; | ||
|
||
public class BoldRule : IParsingRule | ||
{ | ||
public Node Match(List<Token> tokens) | ||
{ | ||
throw new NotImplementedException(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. А хде... |
||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
using Markdown.Parser.Nodes; | ||
using Markdown.Tokenizer.Tokens; | ||
|
||
namespace Markdown.Parser.Rules; | ||
|
||
public class HeadlineRule : IParsingRule | ||
{ | ||
public Node Match(List<Token> tokens) | ||
{ | ||
throw new NotImplementedException(); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
using Markdown.Parser.Nodes; | ||
using Markdown.Tokenizer.Tokens; | ||
|
||
namespace Markdown.Parser.Rules; | ||
|
||
public interface IParsingRule | ||
{ | ||
public Node Match(List<Token> tokens); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
using Markdown.Parser.Nodes; | ||
using Markdown.Tokenizer.Tokens; | ||
|
||
namespace Markdown.Parser.Rules; | ||
|
||
public class ItalicRule : IParsingRule | ||
{ | ||
public Node Match(List<Token> tokens) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Из этого как-то должен получиться автомат 🤔 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Из этого действительно не получится автомат: параметров уже больше. На данный момент основная задача рулов - найти свой тег в листе токенов. Я начну описание с примитивных тегов, типо текста и италика. Далее, при описании тега выделения, я хочу переиспользовать ранее написанные теги, чтобы получить типо такого Говоря иначе, я хочу написать автомат, который строится на регвырах. Сами же регвыры я буду переиспользовать, чтобы написать еще более сложный регвыр. Финальный же автомат будет выглядить как or по всем получившимся регвырам. |
||
{ | ||
throw new NotImplementedException(); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
using Markdown.Parser.Nodes; | ||
using Markdown.Parser.Rules; | ||
using Markdown.Tokenizer.Tokens; | ||
|
||
namespace Markdown.Parser; | ||
|
||
public class TokenParser | ||
{ | ||
public Node Parse(List<Token> tokens) | ||
{ | ||
return new BodyRule().Match(tokens); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
// See https://aka.ms/new-console-template for more information | ||
|
||
using Markdown.Generator; | ||
using Markdown.Parser; | ||
using Markdown.Tokenizer; | ||
|
||
namespace Markdown; | ||
|
||
internal class Program | ||
{ | ||
public static void Main(string[] args) | ||
{ | ||
var markdown = "This _is_ a __sample__ markdown _file_.\n"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Советы категории Б: можешь вынести эту строчку в константу, чтобы потом, при добавлении новых примеров, можно было легко между ними переключаться :)
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Окей, сделаю :) |
||
|
||
var tokens = new MarkdownTokenizer().Tokenize(markdown); | ||
var astRoot = new TokenParser().Parse(tokens); | ||
|
||
Console.WriteLine(new HTMLGenerator().GenerateHTML(astRoot)); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
using Markdown.Tokenizer.Scanners; | ||
using Markdown.Tokenizer.Tokens; | ||
|
||
namespace Markdown.Tokenizer; | ||
|
||
public class MarkdownTokenizer | ||
{ | ||
private readonly ITokenScanner[] scanners = [ | ||
new SpecScanner(), new NumberScanner(), new TextScanner() | ||
]; | ||
|
||
public List<Token> Tokenize(string markdown) | ||
{ | ||
var begin = 0; | ||
var tokenList = new List<Token>(); | ||
|
||
while (begin < markdown.Length) | ||
{ | ||
var token = scanners | ||
.Select(sc => sc.Scan(markdown, begin)) | ||
.First(token => token is not null); | ||
begin += token!.Length; tokenList.Add(token); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Перенеси на новую строчку плиз, глазкам больно ахаха There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ахахахаха, ладно перенесу |
||
} | ||
return tokenList; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
using Markdown.Tokenizer.Tokens; | ||
|
||
namespace Markdown.Tokenizer.Scanners; | ||
|
||
public interface ITokenScanner | ||
{ | ||
public Token? Scan(string markdown, int begin = 0); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
using Markdown.Tokenizer.Tokens; | ||
|
||
namespace Markdown.Tokenizer.Scanners; | ||
|
||
public class NumberScanner : ITokenScanner | ||
{ | ||
public Token? Scan(string markdown, int begin = 0) | ||
{ | ||
var numberIterator = markdown | ||
.Skip(begin) | ||
.TakeWhile(CanScan); | ||
var numberLen = numberIterator.Count(); | ||
return numberLen == 0 ? null : new Token(TokenType.NUMBER, begin, numberLen, markdown); | ||
} | ||
|
||
public static bool CanScan(char symbol) => char.IsDigit(symbol); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
using Markdown.Tokenizer.Tokens; | ||
|
||
namespace Markdown.Tokenizer.Scanners; | ||
|
||
public class SpecScanner : ITokenScanner | ||
{ | ||
public Token? Scan(string markdown, int begin = 0) | ||
{ | ||
var tokenType = GetTokenType(markdown[begin]); | ||
if (tokenType is null) return null; | ||
|
||
var notNullType = (TokenType)tokenType; | ||
return new Token(notNullType, begin, 1, markdown); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. А как будешь разбираться с bold? Это, если что, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Как я описывал выше: токены это язык, который мой автомат собирается распознавать. Поэтому болд для меня это +- такая вещь There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Не. я к тому, что сейчас все заточено под 1 символ (абсолютно все). bold в 2 символа может добавить проблем, если сразу не подумать над ним. |
||
} | ||
|
||
public static bool CanScan(char symbol) | ||
=> GetTokenType(symbol) != null; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. А если у нас будет текст вида There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Оно не должно сломаться, так как автомат не найдет закрывающей |
||
|
||
private static TokenType? GetTokenType(char symbol) => symbol switch | ||
{ | ||
' ' => TokenType.SPACE, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NIT: мб словарем сделать? Удобнее будет :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Словарь так словарь: ща поправлю :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Не, NIT - это буквально "докопаться". Такие треды можешь не исправлять, если не хочешь/считаешь свое решение лучше. Я тут просто варианты накидываю) |
||
'\n' => TokenType.NEW_LINE, | ||
'_' => TokenType.UNDERSCORE, | ||
_ => null | ||
}; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
using Markdown.Tokenizer.Tokens; | ||
|
||
namespace Markdown.Tokenizer.Scanners; | ||
|
||
public class TextScanner : ITokenScanner | ||
{ | ||
public Token? Scan(string markdown, int begin = 0) | ||
{ | ||
var valueIterator = markdown | ||
.Skip(begin) | ||
.TakeWhile(CanScan); | ||
var valueLen = valueIterator.Count(); | ||
return valueLen == 0 ? null : new Token(TokenType.TEXT, begin, valueLen, markdown); | ||
} | ||
|
||
private static bool CanScan(char symbol) | ||
=> !SpecScanner.CanScan(symbol) && !NumberScanner.CanScan(symbol); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
namespace Markdown.Tokenizer.Tokens; | ||
|
||
public class Token(TokenType tokenType, int begin, int length, string sourceText) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Это DataClass, в нем нет логики (совсем чуть-чуть :D), только данные. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. И для чего сюда передавать сразу весь текст There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Рекод сделаю :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
{ | ||
private string? value; | ||
|
||
public int Begin { get; } = begin; | ||
public int Length { get; } = length; | ||
public TokenType TokenType { get; } = tokenType; | ||
|
||
public string GetValue() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Мб свойством? Тут логики не особо много) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Будет свойство, согл |
||
{ | ||
return value ??= sourceText.Substring(Begin, Length); | ||
} | ||
|
||
public override string ToString() | ||
{ | ||
return $"Token {TokenType} | Value \"{GetValue()}\""; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NIT: Лучше не переопределять p.s. а если хочешь оставить что-то подобное для дебага, то можно заюзать Externsion метод по типу There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Да да, знаю, что переопределять ту стринг это харам. Но это осмысленный харам, так как я хотел, чтобы мне дебагер писал, что лежит в листе, когда я по шагам гуляю. А если делать как предложил ты, то среда не подтянет этот метод, когда я буду в дебаге смотреть код |
||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
namespace Markdown.Tokenizer.Tokens; | ||
|
||
public enum TokenType | ||
{ | ||
TEXT, | ||
UNDERSCORE, | ||
SPACE, | ||
NEW_LINE, | ||
NUMBER, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. А чем There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. И похожий вопрос на засыпку: для чего хочешь использовать There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NUMBER и TEXT отличаются тем, что число с |
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<TargetFramework>net8.0</TargetFramework> | ||
<ImplicitUsings>enable</ImplicitUsings> | ||
<Nullable>enable</Nullable> | ||
|
||
<IsPackable>false</IsPackable> | ||
<IsTestProject>true</IsTestProject> | ||
</PropertyGroup> | ||
|
||
<ItemGroup> | ||
<PackageReference Include="coverlet.collector" Version="6.0.0"/> | ||
<PackageReference Include="FluentAssertions" Version="7.0.0-alpha.5" /> | ||
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0"/> | ||
<PackageReference Include="NUnit" Version="3.14.0"/> | ||
<PackageReference Include="NUnit.Analyzers" Version="3.9.0"/> | ||
<PackageReference Include="NUnit3TestAdapter" Version="4.5.0"/> | ||
</ItemGroup> | ||
|
||
<ItemGroup> | ||
<Using Include="NUnit.Framework"/> | ||
</ItemGroup> | ||
|
||
<ItemGroup> | ||
<ProjectReference Include="..\Markdown\Markdown.csproj" /> | ||
</ItemGroup> | ||
|
||
</Project> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
using System.Text; | ||
using FluentAssertions; | ||
using Markdown.Tokenizer; | ||
|
||
namespace MarkdownTests.Tokenizer; | ||
|
||
[TestFixture] | ||
public class MarkdownTokenizerTest | ||
{ | ||
[TestCase("Text with numbers 321")] | ||
[TestCase("Some not specific text")] | ||
[TestCase("Text with __markdown__ characters")] | ||
[TestCase("_A_ __lot__ of _characters_ in _markdown_\n")] | ||
public void MarkdownTokenizer_Tokenize_TransformAllTextToTokens(string markdown) | ||
{ | ||
var tokenizer = new MarkdownTokenizer(); | ||
|
||
var tokens = tokenizer.Tokenize(markdown); | ||
|
||
var totalLength = tokens.Sum(token => token.Length); | ||
totalLength.Should().Be(markdown.Length); | ||
} | ||
|
||
[TestCase("Hello world!")] | ||
[TestCase("0123456789 - this is all digits")] | ||
[TestCase("Some _wonderful __text with_ intersects__")] | ||
public void MarkdownTokenizer_Tokenize_AllTokensAreNotIntersect(string markdown) | ||
{ | ||
var tokenizer = new MarkdownTokenizer(); | ||
|
||
var tokens = tokenizer.Tokenize(markdown); | ||
|
||
var pairs = Enumerable | ||
.Range(0, tokens.Count - 1) | ||
.Select(i => tokens[i + 1]).Zip(tokens) | ||
.Select(pair => (next : pair.First, prev : pair.Second)); | ||
pairs.Should().OnlyContain(pair => pair.next.Begin - pair.prev.Begin == pair.prev.Length); | ||
} | ||
|
||
[TestCase("Text with numbers 321")] | ||
[TestCase("Some not specific text")] | ||
[TestCase("Text with __markdown__ characters")] | ||
[TestCase("_A_ __lot__ of _characters_ in _markdown_\n")] | ||
public void MarkdownTokenizer_Tokenize_TokensPresentInCorrectOrder(string markdown) | ||
{ | ||
var tokenizer = new MarkdownTokenizer(); | ||
|
||
var tokens = tokenizer.Tokenize(markdown); | ||
|
||
var resultStringBuilder = tokens | ||
.Aggregate(new StringBuilder(), (sb, token) => sb.Append(token.GetValue())); | ||
resultStringBuilder.ToString().Should().Be(markdown); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
using FluentAssertions; | ||
using Markdown.Tokenizer.Scanners; | ||
using Markdown.Tokenizer.Tokens; | ||
|
||
namespace MarkdownTests.Tokenizer.Scanners; | ||
|
||
[TestFixture] | ||
public class NumberScannerTest | ||
{ | ||
[TestCase("1", 0)] | ||
[TestCase("12345", 0)] | ||
public void NumberScanner_Scan_TokenShouldHaveNumberType(string text, int begin) | ||
{ | ||
var scanner = new NumberScanner(); | ||
|
||
var token = scanner.Scan(text, begin); | ||
|
||
token.Should().NotBeNull(); | ||
token.TokenType.Should().Be(TokenType.NUMBER); | ||
} | ||
|
||
[TestCase(" 123", 0)] | ||
[TestCase("_\n ", 0)] | ||
[TestCase("abcdifgh", 0)] | ||
public void NumberScanner_Scan_ShouldScanNullFromText(string text, int begin) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NIT: можно поставить There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Я с этим уже разобрался в своей "приватной" ветке )) |
||
{ | ||
var scanner = new NumberScanner(); | ||
var token = scanner.Scan(text, begin); | ||
token.Should().BeNull(); | ||
} | ||
} |
This comment was marked as resolved.
Sorry, something went wrong.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Согласен: сделаю, как начну генераторы пилить )