From e647089554ad6fdd5673a02dbcb2342d76190d85 Mon Sep 17 00:00:00 2001 From: Emiya Date: Fri, 29 Jul 2016 00:26:13 +0300 Subject: [PATCH] Add support for github style tables --- CommonMark.Tests/CommonMark.Tests.csproj | 1 + CommonMark.Tests/TableTests.cs | 336 +++++++++++++++++ CommonMark/CommonMark.Base.csproj | 1 + CommonMark/CommonMarkAdditionalFeatures.cs | 14 + CommonMark/CommonMarkConverter.cs | 4 +- CommonMark/Formatters/HtmlFormatterSlim.cs | 99 +++++ CommonMark/Parser/BlockMethods.cs | 415 +++++++++++++++++++-- CommonMark/Parser/TabTextReader.cs | 1 + CommonMark/Syntax/Block.cs | 25 +- CommonMark/Syntax/BlockTag.cs | 17 +- CommonMark/Syntax/TableHeaderAlignment.cs | 28 ++ 11 files changed, 884 insertions(+), 57 deletions(-) create mode 100644 CommonMark.Tests/TableTests.cs create mode 100644 CommonMark/Syntax/TableHeaderAlignment.cs diff --git a/CommonMark.Tests/CommonMark.Tests.csproj b/CommonMark.Tests/CommonMark.Tests.csproj index 4d0bd9d..d867031 100644 --- a/CommonMark.Tests/CommonMark.Tests.csproj +++ b/CommonMark.Tests/CommonMark.Tests.csproj @@ -65,6 +65,7 @@ + diff --git a/CommonMark.Tests/TableTests.cs b/CommonMark.Tests/TableTests.cs new file mode 100644 index 0000000..2009145 --- /dev/null +++ b/CommonMark.Tests/TableTests.cs @@ -0,0 +1,336 @@ +using CommonMark.Syntax; +using Microsoft.VisualStudio.TestTools.UnitTesting; +using System.IO; + +namespace CommonMark.Tests +{ + [TestClass] + public class TableTests + { + static CommonMarkSettings ReadSettings; + static CommonMarkSettings WriteSettings; + + static TableTests() + { + ReadSettings = CommonMarkSettings.Default.Clone(); + ReadSettings.AdditionalFeatures = CommonMarkAdditionalFeatures.GithubStyleTables; + ReadSettings.TrackSourcePosition = true; + + WriteSettings = CommonMarkSettings.Default.Clone(); + WriteSettings.AdditionalFeatures = CommonMarkAdditionalFeatures.GithubStyleTables; + } + + [TestMethod] + public void SimpleTable() + { + var markdown = "First Header | Second Header\n------------- | -------------\nContent Cell | Content Cell\nContent Cell | Content Cell\n"; + + var ast = + CommonMarkConverter.Parse( + markdown, + ReadSettings + ); + + string html; + using (var str = new StringWriter()) + { + CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); + html = str.ToString(); + } + Assert.AreEqual("
First HeaderSecond Header
Content CellContent Cell
Content CellContent Cell
\r\n", html); + + var firstChild = ast.FirstChild; + Assert.AreEqual(BlockTag.Table, firstChild.Tag); + Assert.AreEqual(markdown, markdown.Substring(firstChild.SourcePosition, firstChild.SourceLength)); + Assert.IsNotNull(firstChild.TableHeaderAlignments); + Assert.AreEqual(2, firstChild.TableHeaderAlignments.Count); + Assert.AreEqual(TableHeaderAlignment.None, firstChild.TableHeaderAlignments[0]); + Assert.AreEqual(TableHeaderAlignment.None, firstChild.TableHeaderAlignments[1]); + + var headerRow = firstChild.FirstChild; + Assert.AreEqual(BlockTag.TableRow, headerRow.Tag); + Assert.AreEqual("First Header | Second Header\n", markdown.Substring(headerRow.SourcePosition, headerRow.SourceLength)); + + var headerCell1 = headerRow.FirstChild; + Assert.AreEqual(BlockTag.TableCell, headerCell1.Tag); + Assert.AreEqual("First Header", markdown.Substring(headerCell1.SourcePosition, headerCell1.SourceLength)); + + var headerCell2 = headerCell1.NextSibling; + Assert.AreEqual(BlockTag.TableCell, headerCell2.Tag); + Assert.AreEqual("Second Header", markdown.Substring(headerCell2.SourcePosition, headerCell2.SourceLength)); + Assert.IsNull(headerCell2.NextSibling); + + var firstRow = headerRow.NextSibling; + Assert.AreEqual(BlockTag.TableRow, firstRow.Tag); + Assert.AreEqual("Content Cell | Content Cell\n", markdown.Substring(firstRow.SourcePosition, firstRow.SourceLength)); + + var firstRowCell1 = firstRow.FirstChild; + Assert.AreEqual(BlockTag.TableCell, firstRowCell1.Tag); + Assert.AreEqual("Content Cell", markdown.Substring(firstRowCell1.SourcePosition, firstRowCell1.SourceLength)); + + var firstRowCell2 = firstRowCell1.NextSibling; + Assert.AreEqual(BlockTag.TableCell, firstRowCell2.Tag); + Assert.AreEqual("Content Cell", markdown.Substring(firstRowCell2.SourcePosition, firstRowCell2.SourceLength)); + Assert.IsNull(firstRowCell2.NextSibling); + + var secondRow = firstRow.NextSibling; + Assert.AreEqual(BlockTag.TableRow, secondRow.Tag); + Assert.AreEqual("Content Cell | Content Cell\n", markdown.Substring(secondRow.SourcePosition, secondRow.SourceLength)); + Assert.IsNull(secondRow.NextSibling); + + var secondRowCell1 = secondRow.FirstChild; + Assert.AreEqual(BlockTag.TableCell, secondRowCell1.Tag); + Assert.AreEqual("Content Cell", markdown.Substring(secondRowCell1.SourcePosition, secondRowCell1.SourceLength)); + + var secondRowCell2 = secondRowCell1.NextSibling; + Assert.AreEqual(BlockTag.TableCell, secondRowCell2.Tag); + Assert.AreEqual("Content Cell", markdown.Substring(secondRowCell2.SourcePosition, secondRowCell2.SourceLength)); + Assert.IsNull(secondRowCell2.NextSibling); + } + + [TestMethod] + public void SplitTable() + { + var markdown = +@"First Header | Second Header +------------- | ------------- +Content Cell1 | Content Cell2 +Content Cell3 | Content Cell4 +Hello world +"; + markdown = markdown.Replace("\r\n", "\n"); + + var ast = + CommonMarkConverter.Parse( + markdown, + ReadSettings + ); + + string html; + using (var str = new StringWriter()) + { + CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); + html = str.ToString(); + } + Assert.AreEqual("
First HeaderSecond Header
Content Cell1Content Cell2
Content Cell3Content Cell4
\r\n

Hello world

\r\n\r\n", html); + + var firstChild = ast.FirstChild; + var secondChild = firstChild.NextSibling; + Assert.AreEqual(BlockTag.Table, firstChild.Tag); + var firstMarkdown = markdown.Substring(firstChild.SourcePosition, firstChild.SourceLength); + var shouldMatch = @"First Header | Second Header +------------- | ------------- +Content Cell1 | Content Cell2 +Content Cell3 | Content Cell4 +"; + shouldMatch = shouldMatch.Replace("\r\n", "\n"); + + Assert.AreEqual(shouldMatch, firstMarkdown); + Assert.IsNotNull(firstChild.TableHeaderAlignments); + Assert.AreEqual(2, firstChild.TableHeaderAlignments.Count); + Assert.AreEqual(TableHeaderAlignment.None, firstChild.TableHeaderAlignments[0]); + Assert.AreEqual(TableHeaderAlignment.None, firstChild.TableHeaderAlignments[1]); + + var headerRow = firstChild.FirstChild; + Assert.AreEqual(BlockTag.TableRow, headerRow.Tag); + Assert.AreEqual("First Header | Second Header\n", markdown.Substring(headerRow.SourcePosition, headerRow.SourceLength)); + + var headerCell1 = headerRow.FirstChild; + Assert.AreEqual(BlockTag.TableCell, headerCell1.Tag); + Assert.AreEqual("First Header", markdown.Substring(headerCell1.SourcePosition, headerCell1.SourceLength)); + + var headerCell2 = headerCell1.NextSibling; + Assert.AreEqual(BlockTag.TableCell, headerCell2.Tag); + Assert.AreEqual("Second Header", markdown.Substring(headerCell2.SourcePosition, headerCell2.SourceLength)); + Assert.IsNull(headerCell2.NextSibling); + + var firstRow = headerRow.NextSibling; + Assert.AreEqual(BlockTag.TableRow, firstRow.Tag); + Assert.AreEqual("Content Cell1 | Content Cell2\n", markdown.Substring(firstRow.SourcePosition, firstRow.SourceLength)); + + var firstRowCell1 = firstRow.FirstChild; + Assert.AreEqual(BlockTag.TableCell, firstRowCell1.Tag); + Assert.AreEqual("Content Cell1", markdown.Substring(firstRowCell1.SourcePosition, firstRowCell1.SourceLength)); + + var firstRowCell2 = firstRowCell1.NextSibling; + Assert.AreEqual(BlockTag.TableCell, firstRowCell2.Tag); + Assert.AreEqual("Content Cell2", markdown.Substring(firstRowCell2.SourcePosition, firstRowCell2.SourceLength)); + Assert.IsNull(firstRowCell2.NextSibling); + + var secondRow = firstRow.NextSibling; + Assert.AreEqual(BlockTag.TableRow, secondRow.Tag); + Assert.AreEqual("Content Cell3 | Content Cell4\n", markdown.Substring(secondRow.SourcePosition, secondRow.SourceLength)); + Assert.IsNull(secondRow.NextSibling); + + var secondRowCell1 = secondRow.FirstChild; + Assert.AreEqual(BlockTag.TableCell, secondRowCell1.Tag); + Assert.AreEqual("Content Cell3", markdown.Substring(secondRowCell1.SourcePosition, secondRowCell1.SourceLength)); + + var secondRowCell2 = secondRowCell1.NextSibling; + Assert.AreEqual(BlockTag.TableCell, secondRowCell2.Tag); + Assert.AreEqual("Content Cell4", markdown.Substring(secondRowCell2.SourcePosition, secondRowCell2.SourceLength)); + Assert.IsNull(secondRowCell2.NextSibling); + + Assert.AreEqual(BlockTag.Paragraph, secondChild.Tag); + var secondMarkdown = markdown.Substring(secondChild.SourcePosition, secondChild.SourceLength); + Assert.AreEqual("Hello world\n", secondMarkdown); + } + + [TestMethod] + public void WrappedTable() + { + var markdown = +@"Nope nope. + +First Header | Second Header +------------- | ------------- +Content Cell | Content Cell +Content Cell | Content Cell +Hello world +"; + + var ast = + CommonMarkConverter.Parse( + markdown, + ReadSettings + ); + + string html; + using (var str = new StringWriter()) + { + CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); + html = str.ToString(); + } + Assert.AreEqual("

Nope nope.

\r\n
First HeaderSecond Header
Content CellContent Cell
Content CellContent Cell
\r\n

Hello world

\r\n\r\n", html); + + Assert.AreEqual(BlockTag.Paragraph, ast.FirstChild.Tag); + Assert.AreEqual(BlockTag.Table, ast.FirstChild.NextSibling.Tag); + Assert.AreEqual(BlockTag.Paragraph, ast.FirstChild.NextSibling.NextSibling.Tag); + } + + [TestMethod] + public void TableWithInlines() + { + var markdown = +@" Name | Description + ------------- | ----------- + Help | **Display the** [help](/help) window. + Close | _Closes_ a window "; + + var ast = + CommonMarkConverter.Parse( + markdown, + ReadSettings + ); + string html; + using (var str = new StringWriter()) + { + CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); + html = str.ToString(); + } + Assert.AreEqual("
NameDescription
HelpDisplay the help window.
CloseCloses a window
\r\n", html); + } + + [TestMethod] + public void TableWithExtraPipes() + { + var markdown = "| First Header | Second Header |\n| ------------- | ------------- |\n| cell #11 | cell #12 |\n| cell #21 | cell #22 |\n"; + + var ast = + CommonMarkConverter.Parse( + markdown, + ReadSettings + ); + + var firstChild = ast.FirstChild; + Assert.AreEqual(BlockTag.Table, firstChild.Tag); + Assert.AreEqual(markdown, markdown.Substring(firstChild.SourcePosition, firstChild.SourceLength)); + + var headerRow = firstChild.FirstChild; + Assert.AreEqual(BlockTag.TableRow, headerRow.Tag); + Assert.AreEqual("| First Header | Second Header |\n", markdown.Substring(headerRow.SourcePosition, headerRow.SourceLength)); + + var headerCell1 = headerRow.FirstChild; + Assert.AreEqual(BlockTag.TableCell, headerCell1.Tag); + Assert.AreEqual("First Header", markdown.Substring(headerCell1.SourcePosition, headerCell1.SourceLength)); + + var headerCell2 = headerCell1.NextSibling; + Assert.AreEqual(BlockTag.TableCell, headerCell2.Tag); + Assert.AreEqual("Second Header", markdown.Substring(headerCell2.SourcePosition, headerCell2.SourceLength)); + Assert.IsNull(headerCell2.NextSibling); + + var firstRow = headerRow.NextSibling; + Assert.AreEqual(BlockTag.TableRow, firstRow.Tag); + Assert.AreEqual("| cell #11 | cell #12 |\n", markdown.Substring(firstRow.SourcePosition, firstRow.SourceLength)); + + var firstRowCell1 = firstRow.FirstChild; + Assert.AreEqual(BlockTag.TableCell, firstRowCell1.Tag); + Assert.AreEqual("cell #11", markdown.Substring(firstRowCell1.SourcePosition, firstRowCell1.SourceLength)); + + var firstRowCell2 = firstRowCell1.NextSibling; + Assert.AreEqual(BlockTag.TableCell, firstRowCell2.Tag); + Assert.AreEqual("cell #12", markdown.Substring(firstRowCell2.SourcePosition, firstRowCell2.SourceLength)); + Assert.IsNull(firstRowCell2.NextSibling); + + var secondRow = firstRow.NextSibling; + Assert.AreEqual(BlockTag.TableRow, secondRow.Tag); + Assert.AreEqual("| cell #21 | cell #22 |\n", markdown.Substring(secondRow.SourcePosition, secondRow.SourceLength)); + Assert.IsNull(secondRow.NextSibling); + + var secondRowCell1 = secondRow.FirstChild; + Assert.AreEqual(BlockTag.TableCell, secondRowCell1.Tag); + Assert.AreEqual("cell #21", markdown.Substring(secondRowCell1.SourcePosition, secondRowCell1.SourceLength)); + + var secondRowCell2 = secondRowCell1.NextSibling; + Assert.AreEqual(BlockTag.TableCell, secondRowCell2.Tag); + Assert.AreEqual("cell #22", markdown.Substring(secondRowCell2.SourcePosition, secondRowCell2.SourceLength)); + Assert.IsNull(secondRowCell2.NextSibling); + } + + [TestMethod] + public void TableCellMismatch() + { + var markdown = +@"| First Header | Second Header | +| ------------- | ------------- | +| 11 | +| 21 | 22 | 23 +"; + + var ast = CommonMarkConverter.Parse(markdown, ReadSettings); + string html; + using (var str = new StringWriter()) + { + CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); + html = str.ToString(); + } + Assert.AreEqual("
First HeaderSecond Header
11
2122
\r\n", html); + } + + [TestMethod] + public void TableAlignment() + { + var markdown = +@"| H1 | H2 | H3 | H4 + --- | :-- | ---:| :-: | +|1|2|3|4| +"; + + var ast = CommonMarkConverter.Parse(markdown, ReadSettings); + var table = ast.FirstChild; + Assert.AreEqual(BlockTag.Table, table.Tag); + Assert.AreEqual(4, table.TableHeaderAlignments.Count); + Assert.AreEqual(TableHeaderAlignment.None, table.TableHeaderAlignments[0]); + Assert.AreEqual(TableHeaderAlignment.Left, table.TableHeaderAlignments[1]); + Assert.AreEqual(TableHeaderAlignment.Right, table.TableHeaderAlignments[2]); + Assert.AreEqual(TableHeaderAlignment.Center, table.TableHeaderAlignments[3]); + string html; + using (var str = new StringWriter()) + { + CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); + html = str.ToString(); + } + Assert.AreEqual("
H1H2H3H4
1234
\r\n", html); + } + } +} \ No newline at end of file diff --git a/CommonMark/CommonMark.Base.csproj b/CommonMark/CommonMark.Base.csproj index c58644b..f05c409 100644 --- a/CommonMark/CommonMark.Base.csproj +++ b/CommonMark/CommonMark.Base.csproj @@ -86,6 +86,7 @@ + diff --git a/CommonMark/CommonMarkAdditionalFeatures.cs b/CommonMark/CommonMarkAdditionalFeatures.cs index f1e1814..07ae2ac 100644 --- a/CommonMark/CommonMarkAdditionalFeatures.cs +++ b/CommonMark/CommonMarkAdditionalFeatures.cs @@ -20,6 +20,20 @@ public enum CommonMarkAdditionalFeatures /// StrikethroughTilde = 1, + /// + /// The parser will recognize + /// + /// First Header | Second Header + /// ------------- | ------------- + /// Content Cell | Content Cell + /// Content Cell | Content Cell + /// + /// style tables. + /// + /// Refer to https://help.github.com/articles/organizing-information-with-tables/ for more examples + /// + GithubStyleTables = 2, + /// /// All additional features are enabled. /// diff --git a/CommonMark/CommonMarkConverter.cs b/CommonMark/CommonMarkConverter.cs index 69f6a3e..7f1d94c 100644 --- a/CommonMark/CommonMarkConverter.cs +++ b/CommonMark/CommonMarkConverter.cs @@ -111,7 +111,7 @@ public static Syntax.Block ProcessStage1(TextReader source, CommonMarkSettings s reader.ReadLine(line); while (line.Line != null) { - BlockMethods.IncorporateLine(line, ref cur); + BlockMethods.IncorporateLine(line, ref cur, settings); reader.ReadLine(line); } } @@ -132,7 +132,7 @@ public static Syntax.Block ProcessStage1(TextReader source, CommonMarkSettings s { do { - BlockMethods.Finalize(cur, line); + BlockMethods.Finalize(cur, line, settings); cur = cur.Parent; } while (cur != null); } diff --git a/CommonMark/Formatters/HtmlFormatterSlim.cs b/CommonMark/Formatters/HtmlFormatterSlim.cs index c9da0c6..b7ac2ad 100644 --- a/CommonMark/Formatters/HtmlFormatterSlim.cs +++ b/CommonMark/Formatters/HtmlFormatterSlim.cs @@ -212,6 +212,101 @@ internal static void PrintPosition(HtmlTextWriter writer, Inline inline) writer.WriteConstant("\""); } + static void WriteTable(Block table, HtmlTextWriter writer, CommonMarkSettings settings, Stack stack) + { + if ((settings.AdditionalFeatures & CommonMarkAdditionalFeatures.GithubStyleTables) == 0) + { + throw new CommonMarkException("Table encountered in AST, but GithubStyleTables are not enabled"); + } + + var header = table.FirstChild; + var firstRow = table.FirstChild.NextSibling; + + writer.WriteConstant(""); + writer.WriteConstant(""); + writer.WriteConstant(""); + + var numHeadings = 0; + + var curHeaderCell = header.FirstChild; + while (curHeaderCell != null) + { + var alignment = table.TableHeaderAlignments[numHeadings]; + + numHeadings++; + + if (alignment == TableHeaderAlignment.None) + { + writer.WriteConstant(""); + + curHeaderCell = curHeaderCell.NextSibling; + } + + writer.WriteConstant(""); + writer.WriteConstant(""); + + writer.WriteConstant(""); + var curRow = firstRow; + while (curRow != null) + { + writer.WriteConstant(""); + var curRowCell = curRow.FirstChild; + + var numCells = 0; + + while (curRowCell != null && numCells < numHeadings) + { + var alignment = table.TableHeaderAlignments[numCells]; + + numCells++; + + if (alignment == TableHeaderAlignment.None) + { + writer.WriteConstant(""); + + curRowCell = curRowCell.NextSibling; + } + + while (numCells < numHeadings) + { + numCells++; + writer.WriteConstant(""); + } + + writer.WriteConstant(""); + + curRow = curRow.NextSibling; + } + writer.WriteConstant(""); + writer.WriteConstant("
"); + } + else + { + switch (alignment) + { + case TableHeaderAlignment.Center: writer.WriteConstant(""); break; + case TableHeaderAlignment.Left: writer.WriteConstant(""); break; + case TableHeaderAlignment.Right: writer.WriteConstant(""); break; + default: throw new CommonMarkException("Unexpected TableHeaderAlignment [" + alignment + "]"); + } + } + InlinesToHtml(writer, curHeaderCell.InlineContent, settings, stack); + writer.WriteConstant("
"); + } + else + { + switch (alignment) + { + case TableHeaderAlignment.Center: writer.WriteConstant(""); break; + case TableHeaderAlignment.Left: writer.WriteConstant(""); break; + case TableHeaderAlignment.Right: writer.WriteConstant(""); break; + default: throw new CommonMarkException("Unexpected TableHeaderAlignment [" + alignment + "]"); + } + } + InlinesToHtml(writer, curRowCell.InlineContent, settings, stack); + writer.WriteConstant("
"); + } + private static void BlocksToHtmlInner(HtmlTextWriter writer, Block block, CommonMarkSettings settings) { var stack = new Stack(); @@ -357,6 +452,10 @@ private static void BlocksToHtmlInner(HtmlTextWriter writer, Block block, Common break; + case BlockTag.Table: + WriteTable(block, writer, settings, inlineStack); + break; + case BlockTag.ReferenceDefinition: break; diff --git a/CommonMark/Parser/BlockMethods.cs b/CommonMark/Parser/BlockMethods.cs index 5b1ce59..19e77e7 100644 --- a/CommonMark/Parser/BlockMethods.cs +++ b/CommonMark/Parser/BlockMethods.cs @@ -1,5 +1,6 @@ using System.Collections.Generic; using System.Globalization; +using System.Text; using CommonMark.Syntax; namespace CommonMark.Parser @@ -79,7 +80,7 @@ private static bool EndsWithBlankLine(Block block) /// /// Break out of all containing lists /// - private static void BreakOutOfLists(ref Block blockRef, LineInfo line) + private static void BreakOutOfLists(ref Block blockRef, LineInfo line, CommonMarkSettings settings) { Block container = blockRef; Block b = container.Top; @@ -92,16 +93,355 @@ private static void BreakOutOfLists(ref Block blockRef, LineInfo line) { while (container != null && container != b) { - Finalize(container, line); + Finalize(container, line, settings); container = container.Parent; } - Finalize(b, line); + Finalize(b, line, settings); blockRef = b.Parent; } } - public static void Finalize(Block b, LineInfo line) + static List ParseTableLine(string line, StringBuilder sb) + { + var ret = new List(); + + var i = 0; + + if (i < line.Length && line[i] == '|') i++; + + while (i < line.Length && char.IsWhiteSpace(line[i])) i++; + + for (; i < line.Length; i++) + { + var c = line[i]; + if (c == '\\') + { + i++; + continue; + } + + if (c == '|') + { + ret.Add(sb.ToString()); + sb.Clear(); + } + else + { + sb.Append(c); + } + } + + if (sb.Length != 0) + { + ret.Add(sb.ToString()); + sb.Clear(); + } + + return ret; + } + + static void MakeTableCells(Block row, StringBuilder sb) + { + var asStr = row.StringContent.ToString(); + + var offset = 0; + + for (var i = 0; i < asStr.Length; i++) + { + var c = asStr[i]; + + if (c == '|') + { + var text = sb.ToString(); + sb.Clear(); + + if (text.Length > 0) + { + var leadingWhiteSpace = 0; + while (leadingWhiteSpace < text.Length && char.IsWhiteSpace(text[leadingWhiteSpace])) leadingWhiteSpace++; + var trailingWhiteSpace = 0; + while (trailingWhiteSpace < text.Length && char.IsWhiteSpace(text[text.Length - trailingWhiteSpace - 1])) trailingWhiteSpace++; + + var cell = new Block(BlockTag.TableCell, row.SourcePosition + offset + leadingWhiteSpace); + cell.SourceLastPosition = cell.SourcePosition + text.Length - trailingWhiteSpace - leadingWhiteSpace; + cell.StringContent = new StringContent(); + cell.StringContent.Append(text, leadingWhiteSpace, text.Length - leadingWhiteSpace - trailingWhiteSpace); + + if (row.LastChild == null) + { + row.FirstChild = row.LastChild = cell; + } + else + { + row.LastChild.NextSibling = cell; + row.LastChild = cell; + } + + cell.IsOpen = false; + } + + offset += text.Length; + + // skip the | + offset++; + continue; + } + + if (c == '\\') + { + sb.Append(c); + if (i + 1 < asStr.Length) + { + sb.Append(asStr[i + 1]); + } + i++; + } + else + { + sb.Append(c); + } + } + + if (sb.Length > 0) + { + var text = sb.ToString(); + sb.Clear(); + + if (text.Length > 0) + { + var leadingWhiteSpace = 0; + while (leadingWhiteSpace < text.Length && char.IsWhiteSpace(text[leadingWhiteSpace])) leadingWhiteSpace++; + var trailingWhiteSpace = 0; + while (trailingWhiteSpace < text.Length && char.IsWhiteSpace(text[text.Length - trailingWhiteSpace - 1])) trailingWhiteSpace++; + + if (text.Length - leadingWhiteSpace - trailingWhiteSpace > 0) + { + var cell = new Block(BlockTag.TableCell, row.SourcePosition + offset + leadingWhiteSpace); + cell.SourceLastPosition = cell.SourcePosition + text.Length - trailingWhiteSpace - leadingWhiteSpace; + cell.StringContent = new StringContent(); + cell.StringContent.Append(text, leadingWhiteSpace, text.Length - leadingWhiteSpace - trailingWhiteSpace); + + if (row.LastChild == null) + { + row.FirstChild = row.LastChild = cell; + } + else + { + row.LastChild.NextSibling = cell; + row.LastChild = cell; + } + + cell.IsOpen = false; + } + } + } + } + + static void MakeTableRows(Block table, StringBuilder sb) + { + var asStr = table.StringContent.ToString(); + var lines = asStr.Split('\n'); + + var offset = 0; + + for (var i = 0; i < lines.Length; i++) + { + var line = lines[i]; + + var lineLength = line.Length; + var hasLineBreak = offset + lineLength < asStr.Length && asStr[offset + lineLength] == '\n'; + if (hasLineBreak) lineLength++; + + // skip the header row + if (i != 1 && !string.IsNullOrWhiteSpace(line)) + { + var rowStartsInDocument = table.SourcePosition + offset; + var row = new Block(BlockTag.TableRow, rowStartsInDocument); + row.SourceLastPosition = rowStartsInDocument + lineLength; + + row.StringContent = new StringContent(); + row.StringContent.Append(asStr, offset, row.SourceLength); + + if (table.LastChild == null) + { + table.FirstChild = row; + table.LastChild = row; + } + else + { + table.LastChild.NextSibling = row; + table.LastChild = row; + } + + MakeTableCells(row, sb); + row.IsOpen = false; + } + + offset += lineLength; + } + } + + static bool TryMakeTable(Block b, LineInfo line, CommonMarkSettings settings) + { + if ((settings.AdditionalFeatures & CommonMarkAdditionalFeatures.GithubStyleTables) == 0) return false; + + var asStr = b.StringContent.ToString(); + var lines = asStr.Split('\n'); + + if (lines.Length < 2) return false; + + var sb = new StringBuilder(); + + var columnsLine = ParseTableLine(lines[0], sb); + if (columnsLine.Count == 1) return false; + + var headerLine = ParseTableLine(lines[1], sb); + if (headerLine.Count == 1) return false; + + var headerAlignment = new List(); + + foreach (var headerPart in headerLine) + { + var trimmed = headerPart.Trim(); + if (trimmed.Length < 3) return false; + + var validateFrom = 0; + var startsWithColon = trimmed[validateFrom] == ':'; + if (startsWithColon) validateFrom++; + + var validateTo = trimmed.Length - 1; + var endsWithColon = trimmed[validateTo] == ':'; + if (endsWithColon) validateTo--; + + for (var i = validateFrom; i <= validateTo; i++) + { + // don't check for escapes, they don't count in header + if (trimmed[i] != '-') return false; + } + + if (!startsWithColon && !endsWithColon) + { + headerAlignment.Add(TableHeaderAlignment.None); + continue; + } + + if (startsWithColon && endsWithColon) + { + headerAlignment.Add(TableHeaderAlignment.Center); + continue; + } + + if (startsWithColon) + { + headerAlignment.Add(TableHeaderAlignment.Left); + } + + if (endsWithColon) + { + headerAlignment.Add(TableHeaderAlignment.Right); + } + } + + while (columnsLine.Count > 0 && string.IsNullOrWhiteSpace(columnsLine[0])) columnsLine.RemoveAt(0); + while (columnsLine.Count > 0 && string.IsNullOrWhiteSpace(columnsLine[columnsLine.Count - 1])) columnsLine.RemoveAt(columnsLine.Count - 1); + while (headerLine.Count > 0 && string.IsNullOrWhiteSpace(headerLine[0])) headerLine.RemoveAt(0); + while (headerLine.Count > 0 && string.IsNullOrWhiteSpace(headerLine[headerLine.Count - 1])) headerLine.RemoveAt(headerLine.Count - 1); + + if (columnsLine.Count < 2) return false; + if (headerLine.Count < columnsLine.Count) return false; + + var lastTableLine = 1; + + // it's a table! + for (var i = 2; i < lines.Length; i++) + { + var hasPipe = false; + for (var j = 0; j < lines[i].Length; j++) + { + var c = lines[i][j]; + if (c == '\\') + { + j++; + continue; + } + + if (c == '|') + { + hasPipe = true; + break; + } + } + if (!hasPipe) break; + + lastTableLine = i; + } + + if (lastTableLine + 1 < lines.Length && string.IsNullOrWhiteSpace(lines[lastTableLine + 1])) + { + lastTableLine++; + } + + var wholeBlockIsTable = lastTableLine == (lines.Length - 1); + + // No need to break, the whole block is a table now + if (wholeBlockIsTable) + { + b.Tag = BlockTag.Table; + b.TableHeaderAlignments = headerAlignment; + + // create table rows + MakeTableRows(b, sb); + return true; + } + + var takingCharsForTable = 0; + for (var i = 0; i <= lastTableLine; i++) + { + takingCharsForTable += lines[i].Length; + var hasFollowingLineBreak = takingCharsForTable < asStr.Length && asStr[takingCharsForTable] == '\n'; + if (hasFollowingLineBreak) + { + takingCharsForTable++; + } + } + + // get the text of the table separate + var tableBlockString = b.StringContent.TakeFromStart(takingCharsForTable, trim: true); + var newBlock = new Block(BlockTag.Paragraph, b.SourcePosition + tableBlockString.Length); + + // create the trailing paragraph, and set it's text and source positions + var newParagraph = b.Clone(); + newParagraph.StringContent = b.StringContent; + if (settings.TrackSourcePosition) + { + newParagraph.SourcePosition = b.SourcePosition + tableBlockString.Length; + newParagraph.SourceLastPosition = newParagraph.SourcePosition + (asStr.Length - tableBlockString.Length); + } + + // update the text of the table block + b.Tag = BlockTag.Table; + b.TableHeaderAlignments = headerAlignment; + b.StringContent = new StringContent(); + b.StringContent.Append(tableBlockString, 0, tableBlockString.Length); + if (settings.TrackSourcePosition) + { + b.SourceLastPosition = b.SourcePosition + tableBlockString.Length; + } + + // create table rows + MakeTableRows(b, sb); + + // put the new paragraph after the table + newParagraph.NextSibling = b.NextSibling; + b.NextSibling = newParagraph; + + Finalize(newParagraph, line, settings); + + return true; + } + + public static void Finalize(Block b, LineInfo line, CommonMarkSettings settings) { // don't do anything if the block is already closed if (!b.IsOpen) @@ -124,22 +464,24 @@ public static void Finalize(Block b, LineInfo line) b.SourceLastPosition = line.CalculateOrigin(0, false); } -#pragma warning disable 0618 - b.EndLine = (line.LineNumber > b.StartLine) ? line.LineNumber - 1 : line.LineNumber; -#pragma warning restore 0618 - switch (b.Tag) { case BlockTag.Paragraph: var sc = b.StringContent; + + if (TryMakeTable(b, line, settings)) + { + break; + } + if (!sc.StartsWith('[')) break; var subj = new Subject(b.Top.Document); sc.FillSubject(subj); var origPos = subj.Position; - while (subj.Position < subj.Buffer.Length - && subj.Buffer[subj.Position] == '[' + while (subj.Position < subj.Buffer.Length + && subj.Buffer[subj.Position] == '[' && 0 != InlineMethods.ParseReference(subj)) { } @@ -208,13 +550,13 @@ public static void Finalize(Block b, LineInfo line) /// Adds a new block as child of another. Return the child. /// /// Original: add_child - public static Block CreateChildBlock(Block parent, LineInfo line, BlockTag blockType, int startColumn) + public static Block CreateChildBlock(Block parent, LineInfo line, CommonMarkSettings settings, BlockTag blockType, int startColumn) { // if 'parent' isn't the kind of block that can accept this child, // then back up til we hit a block that can. while (!CanContain(parent.Tag, blockType)) { - Finalize(parent, line); + Finalize(parent, line, settings); parent = parent.Parent; } @@ -229,9 +571,6 @@ public static Block CreateChildBlock(Block parent, LineInfo line, BlockTag block if (lastChild != null) { lastChild.NextSibling = child; -#pragma warning disable 0618 - child.Previous = lastChild; -#pragma warning restore 0618 } else { @@ -295,7 +634,7 @@ public static void ProcessInlines(Block block, DocumentData data, CommonMarkSett while (block != null) { var tag = block.Tag; - if (tag == BlockTag.Paragraph || tag == BlockTag.AtxHeading || tag == BlockTag.SetextHeading) + if (tag == BlockTag.Paragraph || tag == BlockTag.AtxHeading || tag == BlockTag.SetextHeading || tag == BlockTag.TableCell) { sc = block.StringContent; if (sc != null) @@ -403,7 +742,7 @@ private static bool ListsMatch(ListData listData, ListData itemData) { return (listData.ListType == itemData.ListType && listData.Delimiter == itemData.Delimiter && - // list_data.marker_offset == item_data.marker_offset && + // list_data.marker_offset == item_data.marker_offset && listData.BulletChar == itemData.BulletChar); } @@ -481,12 +820,12 @@ private static void AdvanceOffset(string line, int count, bool columns, ref int // Process one line at a time, modifying a block. // Returns 0 if successful. curptr is changed to point to // the currently open block. - public static void IncorporateLine(LineInfo line, ref Block curptr) + public static void IncorporateLine(LineInfo line, ref Block curptr, CommonMarkSettings settings) { var ln = line.Line; Block last_matched_container; - + // offset is the char position in the line var offset = 0; @@ -646,7 +985,7 @@ public static void IncorporateLine(LineInfo line, ref Block curptr) // check to see if we've hit 2nd blank line, break out of list: if (blank && container.IsLastLineBlank) - BreakOutOfLists(ref container, line); + BreakOutOfLists(ref container, line, settings); var maybeLazy = cur.Tag == BlockTag.Paragraph; @@ -669,21 +1008,21 @@ public static void IncorporateLine(LineInfo line, ref Block curptr) AdvanceOffset(ln, first_nonspace + 1 - offset, false, ref offset, ref column, ref remainingSpaces); AdvanceOptionalSpace(ln, ref offset, ref column, ref remainingSpaces); - container = CreateChildBlock(container, line, BlockTag.BlockQuote, first_nonspace); + container = CreateChildBlock(container, line, settings, BlockTag.BlockQuote, first_nonspace); } else if (!indented && curChar == '#' && 0 != (matched = Scanner.scan_atx_heading_start(ln, first_nonspace, ln.Length, out i))) { AdvanceOffset(ln, first_nonspace + matched - offset, false, ref offset, ref column, ref remainingSpaces); - container = CreateChildBlock(container, line, BlockTag.AtxHeading, first_nonspace); + container = CreateChildBlock(container, line, settings, BlockTag.AtxHeading, first_nonspace); container.Heading = new HeadingData(i); } else if (!indented && (curChar == '`' || curChar == '~') && 0 != (matched = Scanner.scan_open_code_fence(ln, first_nonspace, ln.Length))) { - container = CreateChildBlock(container, line, BlockTag.FencedCode, first_nonspace); + container = CreateChildBlock(container, line, settings, BlockTag.FencedCode, first_nonspace); container.FencedCodeData = new FencedCodeData(); container.FencedCodeData.FenceChar = curChar; container.FencedCodeData.FenceLength = matched; @@ -692,13 +1031,13 @@ public static void IncorporateLine(LineInfo line, ref Block curptr) AdvanceOffset(ln, first_nonspace + matched - offset, false, ref offset, ref column, ref remainingSpaces); } - else if (!indented && curChar == '<' && + else if (!indented && curChar == '<' && (0 != (matched = (int)Scanner.scan_html_block_start(ln, first_nonspace, ln.Length)) || (container.Tag != BlockTag.Paragraph && 0 != (matched = (int)Scanner.scan_html_block_start_7(ln, first_nonspace, ln.Length))) )) { - container = CreateChildBlock(container, line, BlockTag.HtmlBlock, first_nonspace); + container = CreateChildBlock(container, line, settings, BlockTag.HtmlBlock, first_nonspace); container.HtmlBlockType = (HtmlBlockType)matched; // note, we don't adjust offset because the tag is part of the text @@ -712,19 +1051,19 @@ public static void IncorporateLine(LineInfo line, ref Block curptr) AdvanceOffset(ln, ln.Length - 1 - offset, false, ref offset, ref column, ref remainingSpaces); } - else if (!indented - && !(container.Tag == BlockTag.Paragraph && !all_matched) + else if (!indented + && !(container.Tag == BlockTag.Paragraph && !all_matched) && 0 != (Scanner.scan_thematic_break(ln, first_nonspace, ln.Length))) { // it's only now that we know the line is not part of a setext heading: - container = CreateChildBlock(container, line, BlockTag.ThematicBreak, first_nonspace); - Finalize(container, line); + container = CreateChildBlock(container, line, settings, BlockTag.ThematicBreak, first_nonspace); + Finalize(container, line, settings); container = container.Parent; AdvanceOffset(ln, ln.Length - 1 - offset, false, ref offset, ref column, ref remainingSpaces); } - else if ((!indented || container.Tag == BlockTag.List) + else if ((!indented || container.Tag == BlockTag.List) && 0 != (matched = ParseListMarker(ln, first_nonspace, out data))) { @@ -769,18 +1108,18 @@ public static void IncorporateLine(LineInfo line, ref Block curptr) if (container.Tag != BlockTag.List || !ListsMatch(container.ListData, data)) { - container = CreateChildBlock(container, line, BlockTag.List, first_nonspace); + container = CreateChildBlock(container, line, settings, BlockTag.List, first_nonspace); container.ListData = data; } // add the list item - container = CreateChildBlock(container, line, BlockTag.ListItem, first_nonspace); + container = CreateChildBlock(container, line, settings, BlockTag.ListItem, first_nonspace); container.ListData = data; } else if (indented && !maybeLazy && !blank) { AdvanceOffset(ln, CODE_INDENT, true, ref offset, ref column, ref remainingSpaces); - container = CreateChildBlock(container, line, BlockTag.IndentedCode, offset); + container = CreateChildBlock(container, line, settings, BlockTag.IndentedCode, offset); } else { @@ -844,7 +1183,7 @@ public static void IncorporateLine(LineInfo line, ref Block curptr) while (cur != last_matched_container) { - Finalize(cur, line); + Finalize(cur, line, settings); cur = cur.Parent; if (cur == null) @@ -880,7 +1219,7 @@ public static void IncorporateLine(LineInfo line, ref Block curptr) if (Scanner.scan_html_block_end(container.HtmlBlockType, ln, first_nonspace, ln.Length)) { - Finalize(container, line); + Finalize(container, line, settings); container = container.Parent; } @@ -915,7 +1254,7 @@ public static void IncorporateLine(LineInfo line, ref Block curptr) p--; AddLine(container, line, ln, first_nonspace, remainingSpaces, p - first_nonspace + 1); - Finalize(container, line); + Finalize(container, line, settings); container = container.Parent; } @@ -929,7 +1268,7 @@ public static void IncorporateLine(LineInfo line, ref Block curptr) { // create paragraph container for line - container = CreateChildBlock(container, line, BlockTag.Paragraph, first_nonspace); + container = CreateChildBlock(container, line, settings, BlockTag.Paragraph, first_nonspace); AddLine(container, line, ln, first_nonspace, remainingSpaces); } @@ -944,7 +1283,7 @@ public static void IncorporateLine(LineInfo line, ref Block curptr) } } - private static void FindFirstNonspace(string ln, int offset, int column, out int first_nonspace, + private static void FindFirstNonspace(string ln, int offset, int column, out int first_nonspace, out int first_nonspace_column, out char curChar) { var chars_to_tab = TabSize - (column % TabSize); diff --git a/CommonMark/Parser/TabTextReader.cs b/CommonMark/Parser/TabTextReader.cs index f426ff9..f1865c3 100644 --- a/CommonMark/Parser/TabTextReader.cs +++ b/CommonMark/Parser/TabTextReader.cs @@ -57,6 +57,7 @@ public void ReadLine(LineInfo line) if (c == '\r' || c == '\n') goto IL_4A; + if (c == '\0') this._buffer[num] = '\uFFFD'; diff --git a/CommonMark/Syntax/Block.cs b/CommonMark/Syntax/Block.cs index 792f911..73d9232 100644 --- a/CommonMark/Syntax/Block.cs +++ b/CommonMark/Syntax/Block.cs @@ -32,7 +32,6 @@ public Block(BlockTag tag, int startLine, int startColumn, int sourcePosition) { this.Tag = tag; this.StartLine = startLine; - this.EndLine = startLine; this.StartColumn = startColumn; this.SourcePosition = sourcePosition; this.IsOpen = true; @@ -88,14 +87,6 @@ internal static Block CreateDocument() [System.ComponentModel.EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] public int StartColumn { get; set; } - /// - /// Gets or sets the number of the last line in the source text that contains this element. - /// - [Obsolete("This is deprecated in favor of SourcePosition/SourceLength and will be removed in future. If you have a use case where this property cannot be replaced with the new ones, please log an issue at https://github.com/Knagis/CommonMark.NET", false)] - [System.Diagnostics.DebuggerBrowsable(System.Diagnostics.DebuggerBrowsableState.Never)] - [System.ComponentModel.EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] - public int EndLine { get; set; } - /// /// Gets or sets the position of the block element within the source data. This position is before /// any opening characters. must be enabled @@ -174,6 +165,11 @@ public int SourceLength /// public FencedCodeData FencedCodeData { get; set; } + /// + /// Gets or sets the alignment specified as part of a table heading in a GithubStyleTables. + /// + public List TableHeaderAlignments { get; set; } + /// /// Gets or sets the additional properties that apply to heading elements. /// @@ -220,12 +216,9 @@ public Dictionary ReferenceMap /// public Block NextSibling { get; set; } - /// - /// Gets or sets the previous sibling of this block element. null if this is the first element. - /// - [Obsolete("This property will be removed in future. If you have a use case where this property is required, please log an issue at https://github.com/Knagis/CommonMark.NET", false)] - [System.Diagnostics.DebuggerBrowsable(System.Diagnostics.DebuggerBrowsableState.Never)] - [System.ComponentModel.EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] - public Block Previous { get; set; } + internal Block Clone() + { + return (Block)this.MemberwiseClone(); + } } } diff --git a/CommonMark/Syntax/BlockTag.cs b/CommonMark/Syntax/BlockTag.cs index d3e034d..bc242a5 100644 --- a/CommonMark/Syntax/BlockTag.cs +++ b/CommonMark/Syntax/BlockTag.cs @@ -83,6 +83,21 @@ public enum BlockTag : byte /// /// A text block that contains only link reference definitions. /// - ReferenceDefinition + ReferenceDefinition, + + /// + /// A table block + /// + Table, + + /// + /// A table row block + /// + TableRow, + + /// + /// A table cell block + /// + TableCell, } } diff --git a/CommonMark/Syntax/TableHeaderAlignment.cs b/CommonMark/Syntax/TableHeaderAlignment.cs new file mode 100644 index 0000000..810245d --- /dev/null +++ b/CommonMark/Syntax/TableHeaderAlignment.cs @@ -0,0 +1,28 @@ +namespace CommonMark.Syntax +{ + /// + /// Defines the alignment specified in a header column for a GithubStyleTable + /// + public enum TableHeaderAlignment + { + /// + /// No alignment specified + /// + None = 0, + + /// + /// Left alignment + /// + Left = 1, + + /// + /// Right alignment + /// + Right = 2, + + /// + /// Center alignment + /// + Center = 3 + } +} \ No newline at end of file