From 53a6e43316d98e294da9d18767926d399202de9b Mon Sep 17 00:00:00 2001 From: "Fletcher T. Penney" Date: Tue, 22 Mar 2016 11:35:33 -0400 Subject: [PATCH 1/7] FIXED: Don't allow math spans inside strong/emph --- src/parser.leg | 48 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/src/parser.leg b/src/parser.leg index af2d88c..8a1a443 100644 --- a/src/parser.leg +++ b/src/parser.leg @@ -407,18 +407,28 @@ SingleDollarMathStart = '$' !(Spacechar | Newline | '$' ) SingleDollarMathEnd = (!'\\' !'$' Nonspacechar) '$' !NonPunctuation -SingleDollarMath = < SingleDollarMathStart (!SingleDollarMathEnd !(BlankLine BlankLine) .)* SingleDollarMathEnd > +SingleDollarMath = < SingleDollarMathText > { $$ = str(yytext); $$->key = MATHSPAN; } DoubleDollarMathStart = '$' '$' !(Spacechar | Newline) DoubleDollarMathEnd = (!'\\' Nonspacechar) '$' '$' !NonPunctuation -DoubleDollarMath = < DoubleDollarMathStart (!DoubleDollarMathEnd !(BlankLine BlankLine) .)* DoubleDollarMathEnd > +DoubleDollarMath = < DoubleDollarMathText > { $$ = str(yytext); $$->key = MATHSPAN; } DollarMath = SingleDollarMath | DoubleDollarMath + +MathText = &{ !ext(EXT_COMPATIBILITY) } + (MathSpanText | SingleDollarMathText | DoubleDollarMathText) + +MathSpanText = '\\' ( ('\\[' (!'\\\\]' .)* '\\\\]') | ('\\(' (!'\\\\)' .)* '\\\\)') ) + +SingleDollarMathText = SingleDollarMathStart (!SingleDollarMathEnd !(BlankLine BlankLine) .)* SingleDollarMathEnd + +DoubleDollarMathText = DoubleDollarMathStart (!DoubleDollarMathEnd !(BlankLine BlankLine) .)* DoubleDollarMathEnd + # This keeps the parser from getting bogged down on long strings of '*' or '_', # or strings of '*' or '_' with space on each side: UlOrStarLine = (UlLine | StarLine) { $$ = str(yytext); } @@ -439,16 +449,23 @@ Emph = < EmphMatch > EmphMatch = (EmphStar | EmphUl) -EmphStar = '*' !Whitespace +EmphStar = '*' !Whitespace !MathText (StrongMatch | BracketedText | (!'*' !(BlankLine BlankLine) .) ) - ((Whitespace? StrongMatch) | (Whitespace? EmphMatch) | (Whitespace? BracketedText) | - (!'*' !(BlankLine BlankLine) !Whitespace .) | (!'*' !(BlankLine BlankLine) !(Whitespace '*') .) + ( + !MathText + ( + (Whitespace? StrongMatch) | (Whitespace? EmphMatch) | (Whitespace? BracketedText) | + (!'*' !(BlankLine BlankLine) !Whitespace .) | (!'*' !(BlankLine BlankLine) !(Whitespace '*') .) + ) )* '*' -EmphUl = '_' !Whitespace +EmphUl = '_' !Whitespace !MathText (StrongMatch | BracketedText | (!'_' !(BlankLine BlankLine) .) ) - (StrongMatch | EmphMatch | BracketedText | (!'_' !(BlankLine BlankLine) .) )* + ( + !MathText + (StrongMatch | EmphMatch | BracketedText | (!'_' !(BlankLine BlankLine) .)) + )* '_' &('_'* (SpecialChar | Whitespace)) Strong = < StrongMatch > @@ -463,16 +480,23 @@ Strong = < StrongMatch > StrongMatch = (StrongStar | StrongUl) -StrongStar = "**" !Whitespace +StrongStar = "**" !Whitespace !MathText (EmphMatch | BracketedText | (!'*' !(BlankLine BlankLine) .) ) - ((Whitespace? StrongMatch) | (Whitespace? EmphMatch) | (Whitespace? BracketedText) | - (!'*' !(BlankLine BlankLine) !Whitespace .) | (!'*' !(BlankLine BlankLine) !(Whitespace '*') .) + ( + !MathText + ( + (Whitespace? StrongMatch) | (Whitespace? EmphMatch) | (Whitespace? BracketedText) | + (!'*' !(BlankLine BlankLine) !Whitespace .) | (!'*' !(BlankLine BlankLine) !(Whitespace '*') .) + ) )* "**" -StrongUl = "__" !Whitespace +StrongUl = "__" !Whitespace !MathText (EmphMatch | BracketedText | (!'_' !(BlankLine BlankLine) .) ) - (StrongMatch | EmphMatch | BracketedText | (!'_' !(BlankLine BlankLine) .) )* + ( + !MathText + (StrongMatch | EmphMatch | BracketedText | (!'_' !(BlankLine BlankLine) .)) + )* "__" &('_'* (SpecialChar | Whitespace)) Link = ExplicitLink | ReferenceLink | AutoLink From 5fda9dd48bfede1530e020ebe6154c45347f8508 Mon Sep 17 00:00:00 2001 From: "Fletcher T. Penney" Date: Thu, 24 Mar 2016 11:59:14 -0400 Subject: [PATCH 2/7] FIXED: Fix slow parsing of complex strong/emph (Fixes #18) --- src/parser.leg | 84 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 26 deletions(-) diff --git a/src/parser.leg b/src/parser.leg index 8a1a443..c9f6404 100644 --- a/src/parser.leg +++ b/src/parser.leg @@ -449,24 +449,40 @@ Emph = < EmphMatch > EmphMatch = (EmphStar | EmphUl) -EmphStar = '*' !Whitespace !MathText - (StrongMatch | BracketedText | (!'*' !(BlankLine BlankLine) .) ) +EmphStarOpen = '*' !Whitespace + +EmphStarClose = '*' + +EmphStar = EmphStarOpen + (StrongMatch | BracketedText | MathText | (!'*' .) ) ( - !MathText - ( - (Whitespace? StrongMatch) | (Whitespace? EmphMatch) | (Whitespace? BracketedText) | - (!'*' !(BlankLine BlankLine) !Whitespace .) | (!'*' !(BlankLine BlankLine) !(Whitespace '*') .) - ) + &(Whitespace) ( + Whitespace + (StrongMatch | EmphMatch) + ) | + MathText | + BracketedText | + (!'*' !Whitespace .) | + (!'*' !(BlankLine BlankLine) !(Whitespace '*') .) )* - '*' + EmphStarClose + +EmphUlOpen = '_' !Whitespace + +EmphUlClose = '_' &('__'? !'_' (SpecialChar | Whitespace | Eof)) -EmphUl = '_' !Whitespace !MathText - (StrongMatch | BracketedText | (!'_' !(BlankLine BlankLine) .) ) +EmphUl = EmphUlOpen + (StrongMatch | BracketedText | MathText | (!'_' .) ) ( - !MathText - (StrongMatch | EmphMatch | BracketedText | (!'_' !(BlankLine BlankLine) .)) + &(Whitespace) ( + Whitespace + (StrongMatch | EmphMatch) + ) | + MathText | + BracketedText | + (!'_' !(BlankLine BlankLine) .) )* - '_' &('_'* (SpecialChar | Whitespace)) + EmphUlClose Strong = < StrongMatch > { @@ -480,24 +496,40 @@ Strong = < StrongMatch > StrongMatch = (StrongStar | StrongUl) -StrongStar = "**" !Whitespace !MathText - (EmphMatch | BracketedText | (!'*' !(BlankLine BlankLine) .) ) +StrongStarOpen = '**' !Whitespace + +StrongStarClose = '**' + +StrongStar = StrongStarOpen + (EmphMatch | BracketedText | MathText | (!'*' .) ) ( - !MathText - ( - (Whitespace? StrongMatch) | (Whitespace? EmphMatch) | (Whitespace? BracketedText) | - (!'*' !(BlankLine BlankLine) !Whitespace .) | (!'*' !(BlankLine BlankLine) !(Whitespace '*') .) - ) + &(Whitespace) ( + Whitespace + (StrongMatch | EmphMatch) + ) | + MathText | + BracketedText | + (!'*' !Whitespace .) | + (!'*' !(BlankLine BlankLine) !(Whitespace '*') .) )* - "**" + StrongStarClose + +StrongUlOpen = '__' !Whitespace + +StrongUlClose = '__' &('_'? !'_' (SpecialChar | Whitespace)) -StrongUl = "__" !Whitespace !MathText - (EmphMatch | BracketedText | (!'_' !(BlankLine BlankLine) .) ) +StrongUl = StrongUlOpen + (EmphMatch | BracketedText | MathText | (!'_' .) ) ( - !MathText - (StrongMatch | EmphMatch | BracketedText | (!'_' !(BlankLine BlankLine) .)) + &(Whitespace) ( + Whitespace + (StrongMatch | EmphMatch) + ) | + MathText | + BracketedText | + (!'_' !(BlankLine BlankLine) .) )* - "__" &('_'* (SpecialChar | Whitespace)) + StrongUlClose Link = ExplicitLink | ReferenceLink | AutoLink From be14ec4b81db4e081502056dbcc76c2d52df891d Mon Sep 17 00:00:00 2001 From: "Fletcher T. Penney" Date: Thu, 24 Mar 2016 12:02:47 -0400 Subject: [PATCH 3/7] CHANGED: Update test suite --- submodules/MarkdownTest | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/MarkdownTest b/submodules/MarkdownTest index 45d54df..8d5ad66 160000 --- a/submodules/MarkdownTest +++ b/submodules/MarkdownTest @@ -1 +1 @@ -Subproject commit 45d54dfaae4c3715366faf161118ecb2422201ee +Subproject commit 8d5ad66e3bbaeb0de8646783bfe964261ddb39c7 From 5d9a834b1f2acfa9b54a2ed23c683eee9987e14f Mon Sep 17 00:00:00 2001 From: "Fletcher T. Penney" Date: Thu, 24 Mar 2016 16:59:31 -0400 Subject: [PATCH 4/7] FIXED: Improve performance; FIXED: Fix additional edge case in emph/strong parser --- src/parser.leg | 43 +++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/src/parser.leg b/src/parser.leg index c9f6404..588aef6 100644 --- a/src/parser.leg +++ b/src/parser.leg @@ -102,7 +102,7 @@ YAMLStop = ("---"|"...") BlankLine DocForMetaDataOnly = BOM? a:StartList ( &( YAMLStart? MetaDataKey Sp ':' Sp (!Newline)) MetaData { a = cons($$, a); } )? - ( SkipBlock )* + ( . )* BlankLine* { ((parser_data *)G->data)->result = reverse_list(a); @@ -251,8 +251,7 @@ SpecialChar = '*' | '_' | '`' | '&' | '[' | ']' | '(' | ')' | '<' | '!' | '#' ExtendedSpecialChar = &{ ext(EXT_SMART) } ('.' | '-' | '\'' | '"') | &{ ext(EXT_NOTES) } ( '^' ) | &{ ext(EXT_CRITIC) } ( '{' ) - | &{ !ext(EXT_COMPATIBILITY) } ( '~' ) - | &{ !ext(EXT_COMPATIBILITY) } ( '|' ) + | &{ !ext(EXT_COMPATIBILITY) } ( '~' | '|' ) Punctuation = '.' | ',' | '?' | '!' | ';' | ':' | '。' | '、' NonPunctuation = !(Punctuation | SpecialChar | Spacechar | Newline) . @@ -453,15 +452,26 @@ EmphStarOpen = '*' !Whitespace EmphStarClose = '*' +OneStar = '*' !'*' + +TwoStar = '**' !'*' + +OneTwoStar = '*' '*'? !'*' + +ThreeStar = '***' !'*' + EmphStar = EmphStarOpen (StrongMatch | BracketedText | MathText | (!'*' .) ) ( &(Whitespace) ( Whitespace (StrongMatch | EmphMatch) - ) | - MathText | + ) | + &TwoStar ( + StrongStar + ) | BracketedText | + MathText | (!'*' !Whitespace .) | (!'*' !(BlankLine BlankLine) !(Whitespace '*') .) )* @@ -477,9 +487,12 @@ EmphUl = EmphUlOpen &(Whitespace) ( Whitespace (StrongMatch | EmphMatch) - ) | - MathText | + ) | + &OneTwoStar ( + StrongStar | EmphStar + ) | BracketedText | + MathText | (!'_' !(BlankLine BlankLine) .) )* EmphUlClose @@ -506,9 +519,12 @@ StrongStar = StrongStarOpen &(Whitespace) ( Whitespace (StrongMatch | EmphMatch) - ) | + ) | + &OneStar ( + EmphStar + ) | + BracketedText | MathText | - BracketedText | (!'*' !Whitespace .) | (!'*' !(BlankLine BlankLine) !(Whitespace '*') .) )* @@ -525,8 +541,11 @@ StrongUl = StrongUlOpen Whitespace (StrongMatch | EmphMatch) ) | + &OneTwoStar ( + StrongStar | EmphStar + ) | + BracketedText | MathText | - BracketedText | (!'_' !(BlankLine BlankLine) .) )* StrongUlClose @@ -1541,9 +1560,9 @@ TOCPlain = ( (y:Fenced { free_node_tree(y); } ) | (!BlankLine !Heading z:Line { # Critic Markup -CriticMarkup = CriticAddition | CriticDeletion | CriticSubstitution | CriticHighlight | CriticComment +CriticMarkup = &'{' (CriticAddition | CriticDeletion | CriticSubstitution | CriticHighlight | CriticComment) -CriticClean = CleanAddition | CleanDeletion | CleanSubstitution | CleanHighlight | CleanComment +CriticClean = &'{' (CleanAddition | CleanDeletion | CleanSubstitution | CleanHighlight | CleanComment) AddStart = '{++' From 0af336712ab2008824283a34439e494a453bd63a Mon Sep 17 00:00:00 2001 From: "Fletcher T. Penney" Date: Wed, 8 Jun 2016 09:58:22 -0400 Subject: [PATCH 5/7] Improve accuracy and performance of strong/emph --- src/parser.leg | 48 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/src/parser.leg b/src/parser.leg index 588aef6..3c2acf8 100644 --- a/src/parser.leg +++ b/src/parser.leg @@ -314,22 +314,27 @@ Inlines = a:StartList ( !Endline Inline { a = cons($$, a); } Inline = #&{ check_timeout((parser_data *)G->data) } # TODO: the check_timeout function still slows us down -- do we still need it?? &{ ext(EXT_CRITIC) } CriticMarkup - | &{ !ext(EXT_COMPATIBILITY) } DollarMath + | &'$' &{ !ext(EXT_COMPATIBILITY) } DollarMath | Str - | &{ !ext(EXT_COMPATIBILITY) } MathSpan + | &'\\\\' &{ !ext(EXT_COMPATIBILITY) } MathSpan | Endline | UlOrStarLine | Space | Strong | Emph - | &{ !ext(EXT_COMPATIBILITY) } CitationReference - | &{ !ext(EXT_COMPATIBILITY) } Variable - | Image - | Link - | NoteReference + | &'[' ( + &{ !ext(EXT_COMPATIBILITY) } CitationReference + | &{ !ext(EXT_COMPATIBILITY) } Variable + | NoteReference + | Link + ) + | &'![' Image | Code - | MarkdownHtmlTagOpen - | RawHtml + | &'<' ( + Link + | MarkdownHtmlTagOpen + | RawHtml + ) | Entity | EscapedChar | Smart @@ -479,7 +484,7 @@ EmphStar = EmphStarOpen EmphUlOpen = '_' !Whitespace -EmphUlClose = '_' &('__'? !'_' (SpecialChar | Whitespace | Eof)) +EmphUlClose = '_' &('__'? !'_' ( Whitespace | Punctuation | SpecialChar | Eof)) EmphUl = EmphUlOpen (StrongMatch | BracketedText | MathText | (!'_' .) ) @@ -532,7 +537,7 @@ StrongStar = StrongStarOpen StrongUlOpen = '__' !Whitespace -StrongUlClose = '__' &('_'? !'_' (SpecialChar | Whitespace)) +StrongUlClose = '__' &('_'? !'_' ( Whitespace | Punctuation | SpecialChar | Eof)) StrongUl = StrongUlOpen (EmphMatch | BracketedText | MathText | (!'_' .) ) @@ -572,7 +577,7 @@ ReferenceLinkSingle = ( a:Label < (Spnl "[]")? > ) CitationReference = CitationReferenceDouble | CitationReferenceSingle -CitationReferenceDouble = !"[]" a:Label < Spnl > !"[]" b:RawCitationReference +CitationReferenceDouble = !"[]" !"[#" a:Label < Spnl > !"[]" b:RawCitationReference { char *label; label = label_from_node_tree(a); @@ -737,8 +742,19 @@ Image = '!' ( !AutoLink Link ) $$->key = IMAGE; } +Label = &( '[' ) + < BracketedText > + { + yytext[strlen(yytext) - 1] = '\0'; + $$ = markdown_chunk_to_node(&yytext[1], ((parser_data *)G->data)->extensions); + if ($$ == NULL) { + $$ = str(""); + } else { + $$->key = LIST; + } + } -Label = < "[" (&"[%" | !'[' ( !'^' !'#' &{ ext(EXT_NOTES) } | &. &{ !ext(EXT_NOTES) } )) +Label2 = < "[" (&"[%" | !'[' ( !'^' !'#' &{ ext(EXT_NOTES) } | &. &{ !ext(EXT_NOTES) } )) a:StartList ( !']' Inline { a = cons($$, a); } )* ']'> @@ -1457,7 +1473,11 @@ RightAlign = ('-'+ | '='+)':' &(!'-' !'=' !':') CellDivider = '|' -BracketedText = '[' ((!'[' !']' .) | BracketedText )* ']' # Matches text within [...], but allows for recursive brackets +BracketedTextz = &('[' (!(Endline Endline) !']' .)* ']') BracketedText2 + +BracketedText = &('[' (!(BlankLine BlankLine) !']' .)* ']') BracketedText2 + +BracketedText2 = '[' ( ('\\' .) | (!'[' !']' .) | BracketedText2 )* ']' # Matches text within [...], but allows for recursive brackets TableCaption = b:StartList a:StartList (< BracketedText > { From 0739aa53481afeebf194a0774c7d4a32e6c3b9b2 Mon Sep 17 00:00:00 2001 From: "Fletcher T. Penney" Date: Wed, 8 Jun 2016 10:01:59 -0400 Subject: [PATCH 6/7] CHANGED: Update greg --- submodules/greg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/greg b/submodules/greg index 98ecf13..265003c 160000 --- a/submodules/greg +++ b/submodules/greg @@ -1 +1 @@ -Subproject commit 98ecf13c3502b47e342b09fb7b0cad453e1bb797 +Subproject commit 265003c8cb3022183f48df3e2684336b788af39d From e212934fa48be8be76a5dcb1c7053ec85ee305d6 Mon Sep 17 00:00:00 2001 From: "Fletcher T. Penney" Date: Wed, 8 Jun 2016 10:49:46 -0400 Subject: [PATCH 7/7] 5.3.0 --- CHANGELOG.md | 10 ++++++++++ CMakeLists.txt | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 72df146..f09acb7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # MultiMarkdown Change Log # +## [5.3.0] - 2016-06-08 ## + +* CHANGED: Update test suite +* FIXED: Don't allow math spans inside strong/emph +* FIXED: Fix additional edge case in emph/strong parser +* FIXED: Fix slow parsing of complex strong/emph (Fixes #18) +* FIXED: Improve accuracy and performance of strong/emph +* FIXED: Improve performance + ## [5.2.0] - 2016-03-16 ## @@ -96,3 +105,4 @@ [5.0.1]: https://github.com/fletcher/MultiMarkdown-5/releases/tag/5.0.1 [5.1.0]: https://github.com/fletcher/MultiMarkdown-5/releases/tag/5.1.0 [5.2.0]: https://github.com/fletcher/MultiMarkdown-5/releases/tag/5.2.0 +[5.3.0]: https://github.com/fletcher/MultiMarkdown-5/releases/tag/5.3.0 diff --git a/CMakeLists.txt b/CMakeLists.txt index 8fbd90a..1547de8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,9 +8,9 @@ cmake_minimum_required (VERSION 2.6) set (My_Project_Title "MultiMarkdown") set (My_Project_Description "MultiMarkdown - lightweight markup processor") set (My_Project_Author "Fletcher T. Penney") -set (My_Project_Revised_Date "2016-03-16") +set (My_Project_Revised_Date "2016-06-08") set (My_Project_Version_Major 5) -set (My_Project_Version_Minor 2) +set (My_Project_Version_Minor 3) set (My_Project_Version_Patch 0) set (My_Project_Version "${My_Project_Version_Major}.${My_Project_Version_Minor}.${My_Project_Version_Patch}")