From 7f2d008d461d77df15370c7020843f37500a853b Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Sat, 28 Oct 2023 17:16:47 -0700 Subject: [PATCH] Fix incorrectly parsing links with nested `[]` This markdown input: ```markdown [[]](https://haskell.org) [[][]](https://haskell.org) [[[][]](https://haskell.org) [[][][]](https://haskell.org) ``` In commonmark-hs, [it used to make this HTML](https://pandoc.org/try/?params=%7B%22text%22%3A%22%5B%5B%5D%5D%28https%3A%2F%2Fhaskell.org%29%5Cn%5Cn%5B%5B%5D%5B%5D%5D%28https%3A%2F%2Fhaskell.org%29%5Cn%5Cn%5B%5B%5B%5D%5B%5D%5D%28https%3A%2F%2Fhaskell.org%29%5Cn%5Cn%5B%5B%5D%5B%5D%5B%5D%5D%28https%3A%2F%2Fhaskell.org%29%22%2C%22to%22%3A%22html5%22%2C%22from%22%3A%22commonmark_x%22%2C%22standalone%22%3Afalse%2C%22embed-resources%22%3Afalse%2C%22table-of-contents%22%3Afalse%2C%22number-sections%22%3Afalse%2C%22citeproc%22%3Afalse%2C%22html-math-method%22%3A%22plain%22%2C%22wrap%22%3A%22auto%22%2C%22highlight-style%22%3Anull%2C%22files%22%3A%7B%7D%2C%22template%22%3Anull%7D): ```html

[]

[][]

[[][]

[[]][]

``` In commonmark.js, [it makes this instead](https://spec.commonmark.org/dingus/?text=%5B%5B%5D%5D(https%3A%2F%2Fhaskell.org)%0A%0A%5B%5B%5D%5B%5D%5D(https%3A%2F%2Fhaskell.org)%0A%0A%5B%5B%5B%5D%5B%5D%5D(https%3A%2F%2Fhaskell.org)%0A%0A%5B%5B%5D%5B%5D%5B%5D%5D(https%3A%2F%2Fhaskell.org)): ```html

[]

[][]

[[][]

[][][]

``` The commonmark.js output seems to be correct according to the specification: > Brackets are allowed in the [link text](https://spec.commonmark.org/0.30/#link-text) > only if (a) they are backslash-escaped or (b) they appear as a > matched pair of brackets, with an open bracket `[`, a sequence of > zero or more inlines, and a close bracket `]`. --- commonmark/src/Commonmark/Inlines.hs | 26 ++++++++++++++++++++++---- commonmark/test/regression.md | 16 ++++++++++++++++ 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/commonmark/src/Commonmark/Inlines.hs b/commonmark/src/Commonmark/Inlines.hs index 475a71d..560dc05 100644 --- a/commonmark/src/Commonmark/Inlines.hs +++ b/commonmark/src/Commonmark/Inlines.hs @@ -686,6 +686,14 @@ delimsMatch (Chunk open@Delim{} _ opents) (Chunk close@Delim{} _ closets) = opents /= closets delimsMatch _ _ = False +-- check for balanced `[]` brackets +bracketChunkToNumber :: Chunk a -> Int +bracketChunkToNumber (Chunk Delim{ delimType = '[' } _ _) = 1 +bracketChunkToNumber (Chunk Delim{ delimType = ']' } _ _) = -1 +bracketChunkToNumber _ = 0 +bracketMatchedCount :: [Chunk a] -> Int +bracketMatchedCount chunksinside = sum $ map bracketChunkToNumber chunksinside + processBrackets :: IsInline a => [BracketedSpec a] -> ReferenceMap -> [Chunk a] -> [Chunk a] processBrackets bracketedSpecs rm xs = @@ -783,7 +791,7 @@ processBs bracketedSpecs st = suffixPos = incSourceColumn closePos 1 - in case parse + in case (bracketMatchedCount chunksinside, parse (withRaw (do setPosition suffixPos (spec, constructor) <- choice $ @@ -791,13 +799,13 @@ processBs bracketedSpecs st = specs pos <- getPosition return (spec, constructor, pos))) - "" suffixToks of - Left _ -> -- match but no link/image + "" suffixToks) of + (0, Left _) -> -- match but no link/image processBs bracketedSpecs st{ leftCursor = moveLeft (leftCursor st) , rightCursor = fixSingleQuote $ moveRight (rightCursor st) } - Right ((spec, constructor, newpos), desttoks) -> + (0, Right ((spec, constructor, newpos), desttoks)) -> let left' = case bracketedPrefix spec of Just _ -> moveLeft left Nothing -> left @@ -854,6 +862,16 @@ processBs bracketedSpecs st = (chunkPos opener) $ stackBottoms st } + -- Bracket matched count /= 0 + -- + -- Links § 6.3 ¶ 2 • 2 + -- Brackets are allowed in the link text only if (a) they are + -- backslash-escaped or (b) they appear as a matched pair of + -- brackets, with an open bracket [, a sequence of zero or more + -- inlines, and a close bracket ]. + _ -> + processBs bracketedSpecs + st{ leftCursor = moveLeft left } (_, Just (Chunk Delim{ delimType = ']' } _ _)) diff --git a/commonmark/test/regression.md b/commonmark/test/regression.md index 4b9c3bb..b4183cc 100644 --- a/commonmark/test/regression.md +++ b/commonmark/test/regression.md @@ -219,3 +219,19 @@ x
x
 
```````````````````````````````` + +Issue #119 +```````````````````````````````` example +[[]](https://haskell.org) + +[[][]](https://haskell.org) + +[[[][]](https://haskell.org) + +[[][][]](https://haskell.org) +. +

[]

+

[][]

+

[[][]

+

[][][]

+````````````````````````````````