diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000000000..4c0ad9406 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,83 @@ +name: CI tests + +on: [push, workflow_dispatch] + +jobs: + linux: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + cmake_opts: + - '-DCMARK_SHARED=ON' + - '' + compiler: + - c: 'clang' + cpp: 'clang++' + - c: 'gcc' + cpp: 'g++' + env: + CMAKE_OPTIONS: ${{ matrix.cmake_opts }} + CC: ${{ matrix.compiler.c }} + CXX: ${{ matrix.compiler.cpp }} + + steps: + - uses: actions/checkout@v1 + - name: Install valgrind + run: | + sudo apt install -y valgrind + - name: Build and test + run: | + make + make test + make leakcheck + + macos: + + runs-on: macOS-latest + strategy: + fail-fast: false + matrix: + cmake_opts: + - '-DCMARK_SHARED=ON' + - '' + compiler: + - c: 'clang' + cpp: 'clang++' + - c: 'gcc' + cpp: 'g++' + env: + CMAKE_OPTIONS: ${{ matrix.cmake_opts }} + CC: ${{ matrix.compiler.c }} + CXX: ${{ matrix.compiler.cpp }} + + steps: + - uses: actions/checkout@v1 + - name: Build and test + env: + CMAKE_OPTIONS: -DCMARK_SHARED=OFF + run: | + make + make test + + windows: + + runs-on: windows-latest + strategy: + fail-fast: false + matrix: + cmake_opts: + - '-DCMARK_SHARED=ON' + - '' + env: + CMAKE_OPTIONS: ${{ matrix.cmake_opts }} + + steps: + - uses: actions/checkout@v1 + - uses: ilammy/msvc-dev-cmd@v1 + - name: Build and test + run: | + chcp 65001 + nmake.exe /nologo /f Makefile.nmake test + shell: cmd diff --git a/CMakeLists.txt b/CMakeLists.txt index e30278cb2..bb4976a27 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ project(cmark-gfm) set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 29) set(PROJECT_VERSION_PATCH 0) -set(PROJECT_VERSION_GFM 0) +set(PROJECT_VERSION_GFM 2) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM}) include("FindAsan.cmake") diff --git a/changelog.txt b/changelog.txt index b86a41a22..a8f7bb1b1 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,18 @@ +[0.29.0.gfm.2] + * Fixed issues with footnote rendering when used with the autolinker (#121), + and when footnotes are adjacent (#139). + * We now allow footnotes to be referenced from inside a footnote definition, + we use the footnote label for the fnref href text when rendering html, and + we insert multiple backrefs when a footnote has been referenced multiple + times (#229, #230) + * We added new data- attributes to footnote html rendering to make them + easier to style (#234) + +[0.29.0.gfm.1] + + * Fixed denial of service bug in GFM's table extension + per https://github.com/github/cmark-gfm/security/advisories/GHSA-7gc6-9qr5-hc85 + [0.29.0] * Update spec to 0.29. diff --git a/src/blocks.c b/src/blocks.c index 6e87f19cb..99da25eb3 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -494,7 +494,6 @@ static void process_footnotes(cmark_parser *parser) { while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); if (ev_type == CMARK_EVENT_EXIT && cur->type == CMARK_NODE_FOOTNOTE_DEFINITION) { - cmark_node_unlink(cur); cmark_footnote_create(map, cur); } } @@ -511,6 +510,15 @@ static void process_footnotes(cmark_parser *parser) { if (!footnote->ix) footnote->ix = ++ix; + // store a reference to this footnote reference's footnote definition + // this is used by renderers when generating label ids + cur->parent_footnote_def = footnote->node; + + // keep track of a) count of how many times this footnote def has been + // referenced, and b) which reference index this footnote ref is at. + // this is used by renderers when generating links and backreferences. + cur->footnote.ref_ix = ++footnote->node->footnote.def_count; + char n[32]; snprintf(n, sizeof(n), "%d", footnote->ix); cmark_chunk_free(parser->mem, &cur->as.literal); @@ -541,13 +549,16 @@ static void process_footnotes(cmark_parser *parser) { qsort(map->sorted, map->size, sizeof(cmark_map_entry *), sort_footnote_by_ix); for (unsigned int i = 0; i < map->size; ++i) { cmark_footnote *footnote = (cmark_footnote *)map->sorted[i]; - if (!footnote->ix) + if (!footnote->ix) { + cmark_node_unlink(footnote->node); continue; + } cmark_node_append_child(parser->root, footnote->node); footnote->node = NULL; } } + cmark_unlink_footnotes_map(map); cmark_map_free(map); } diff --git a/src/commonmark.c b/src/commonmark.c index 94fd4388f..328da12a3 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -488,7 +488,13 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, case CMARK_NODE_FOOTNOTE_REFERENCE: if (entering) { LIT("[^"); - OUT(cmark_chunk_to_cstr(renderer->mem, &node->as.literal), false, LITERAL); + + char *footnote_label = renderer->mem->calloc(node->parent_footnote_def->as.literal.len + 1, sizeof(char)); + memmove(footnote_label, node->parent_footnote_def->as.literal.data, node->parent_footnote_def->as.literal.len); + + OUT(footnote_label, false, LITERAL); + renderer->mem->free(footnote_label); + LIT("]"); } break; @@ -497,9 +503,13 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, if (entering) { renderer->footnote_ix += 1; LIT("[^"); - char n[32]; - snprintf(n, sizeof(n), "%d", renderer->footnote_ix); - OUT(n, false, LITERAL); + + char *footnote_label = renderer->mem->calloc(node->as.literal.len + 1, sizeof(char)); + memmove(footnote_label, node->as.literal.data, node->as.literal.len); + + OUT(footnote_label, false, LITERAL); + renderer->mem->free(footnote_label); + LIT("]:\n"); cmark_strbuf_puts(renderer->prefix, " "); diff --git a/src/footnotes.c b/src/footnotes.c index f2d2765f4..c2b745f79 100644 --- a/src/footnotes.c +++ b/src/footnotes.c @@ -38,3 +38,26 @@ void cmark_footnote_create(cmark_map *map, cmark_node *node) { cmark_map *cmark_footnote_map_new(cmark_mem *mem) { return cmark_map_new(mem, footnote_free); } + +// Before calling `cmark_map_free` on a map with `cmark_footnotes`, first +// unlink all of the footnote nodes before freeing their memory. +// +// Sometimes, two (unused) footnote nodes can end up referencing each other, +// which as they get freed up by calling `cmark_map_free` -> `footnote_free` -> +// etc, can lead to a use-after-free error. +// +// Better to `unlink` every footnote node first, setting their next, prev, and +// parent pointers to NULL, and only then walk thru & free them up. +void cmark_unlink_footnotes_map(cmark_map *map) { + cmark_map_entry *ref; + cmark_map_entry *next; + + ref = map->refs; + while(ref) { + next = ref->next; + if (((cmark_footnote *)ref)->node) { + cmark_node_unlink(((cmark_footnote *)ref)->node); + } + ref = next; + } +} diff --git a/src/html.c b/src/html.c index 5959d7a0b..96daa18e2 100644 --- a/src/html.c +++ b/src/html.c @@ -59,16 +59,30 @@ static void filter_html_block(cmark_html_renderer *renderer, uint8_t *data, size cmark_strbuf_put(html, data, (bufsize_t)len); } -static bool S_put_footnote_backref(cmark_html_renderer *renderer, cmark_strbuf *html) { +static bool S_put_footnote_backref(cmark_html_renderer *renderer, cmark_strbuf *html, cmark_node *node) { if (renderer->written_footnote_ix >= renderer->footnote_ix) return false; renderer->written_footnote_ix = renderer->footnote_ix; - cmark_strbuf_puts(html, "footnote_ix); - cmark_strbuf_puts(html, n); - cmark_strbuf_puts(html, "\" class=\"footnote-backref\">↩"); + cmark_strbuf_puts(html, "as.literal.data, node->as.literal.len); + cmark_strbuf_puts(html, "\" class=\"footnote-backref\" data-footnote-backref aria-label=\"Back to content\">↩"); + + if (node->footnote.def_count > 1) + { + for(int i = 2; i <= node->footnote.def_count; i++) { + char n[32]; + snprintf(n, sizeof(n), "%d", i); + + cmark_strbuf_puts(html, " as.literal.data, node->as.literal.len); + cmark_strbuf_puts(html, "-"); + cmark_strbuf_puts(html, n); + cmark_strbuf_puts(html, "\" class=\"footnote-backref\" data-footnote-backref aria-label=\"Back to content\">↩"); + cmark_strbuf_puts(html, n); + cmark_strbuf_puts(html, ""); + } + } return true; } @@ -273,7 +287,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, } else { if (parent->type == CMARK_NODE_FOOTNOTE_DEFINITION && node->next == NULL) { cmark_strbuf_putc(html, ' '); - S_put_footnote_backref(renderer, html); + S_put_footnote_backref(renderer, html, parent); } cmark_strbuf_puts(html, "

\n"); } @@ -405,16 +419,15 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, case CMARK_NODE_FOOTNOTE_DEFINITION: if (entering) { if (renderer->footnote_ix == 0) { - cmark_strbuf_puts(html, "
\n
    \n"); + cmark_strbuf_puts(html, "
    \n
      \n"); } ++renderer->footnote_ix; - cmark_strbuf_puts(html, "
    1. footnote_ix); - cmark_strbuf_puts(html, n); + + cmark_strbuf_puts(html, "
    2. as.literal.data, node->as.literal.len); cmark_strbuf_puts(html, "\">\n"); } else { - if (S_put_footnote_backref(renderer, html)) { + if (S_put_footnote_backref(renderer, html, node)) { cmark_strbuf_putc(html, '\n'); } cmark_strbuf_puts(html, "
    3. \n"); @@ -423,12 +436,20 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, case CMARK_NODE_FOOTNOTE_REFERENCE: if (entering) { - cmark_strbuf_puts(html, "as.literal.data, node->as.literal.len); - cmark_strbuf_puts(html, "\" id=\"fnref"); - cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len); - cmark_strbuf_puts(html, "\">"); - cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len); + cmark_strbuf_puts(html, "parent_footnote_def->as.literal.data, node->parent_footnote_def->as.literal.len); + cmark_strbuf_puts(html, "\" id=\"fnref-"); + houdini_escape_href(html, node->parent_footnote_def->as.literal.data, node->parent_footnote_def->as.literal.len); + + if (node->footnote.ref_ix > 1) { + char n[32]; + snprintf(n, sizeof(n), "%d", node->footnote.ref_ix); + cmark_strbuf_puts(html, "-"); + cmark_strbuf_puts(html, n); + } + + cmark_strbuf_puts(html, "\" data-footnote-ref>"); + houdini_escape_href(html, node->as.literal.data, node->as.literal.len); cmark_strbuf_puts(html, ""); } break; diff --git a/src/include/footnotes.h b/src/include/footnotes.h index 43dd64ff6..64e2901e3 100644 --- a/src/include/footnotes.h +++ b/src/include/footnotes.h @@ -18,6 +18,8 @@ typedef struct cmark_footnote cmark_footnote; void cmark_footnote_create(cmark_map *map, cmark_node *node); cmark_map *cmark_footnote_map_new(cmark_mem *mem); +void cmark_unlink_footnotes_map(cmark_map *map); + #ifdef __cplusplus } #endif diff --git a/src/include/node.h b/src/include/node.h index e2c0216d1..0f9482fb3 100644 --- a/src/include/node.h +++ b/src/include/node.h @@ -81,6 +81,13 @@ struct cmark_node { cmark_syntax_extension *extension; + union { + int ref_ix; + int def_count; + } footnote; + + cmark_node *parent_footnote_def; + union { cmark_chunk literal; cmark_list list; diff --git a/src/inlines.c b/src/inlines.c index ddfb7814c..8020a73e2 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -1283,19 +1283,77 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) { // What if we're a footnote link? if (parser->options & CMARK_OPT_FOOTNOTES && opener->inl_text->next && - opener->inl_text->next->type == CMARK_NODE_TEXT && - !opener->inl_text->next->next) { + opener->inl_text->next->type == CMARK_NODE_TEXT) { + cmark_chunk *literal = &opener->inl_text->next->as.literal; - if (literal->len > 1 && literal->data[0] == '^') { - inl = make_simple(subj->mem, CMARK_NODE_FOOTNOTE_REFERENCE); - inl->as.literal = cmark_chunk_dup(literal, 1, literal->len - 1); - inl->start_line = inl->end_line = subj->line; - inl->start_column = opener->inl_text->start_column; - inl->end_column = subj->pos + subj->column_offset + subj->block_offset; - cmark_node_insert_before(opener->inl_text, inl); - cmark_node_free(opener->inl_text->next); - cmark_node_free(opener->inl_text); + + // look back to the opening '[', and skip ahead to the next character + // if we're looking at a '[^' sequence, and there is other text or nodes + // after the ^, let's call it a footnote reference. + if ((literal->len > 0 && literal->data[0] == '^') && (literal->len > 1 || opener->inl_text->next->next)) { + + // Before we got this far, the `handle_close_bracket` function may have + // advanced the current state beyond our footnote's actual closing + // bracket, ie if it went looking for a `link_label`. + // Let's just rewind the subject's position: + subj->pos = initial_pos; + + cmark_node *fnref = make_simple(subj->mem, CMARK_NODE_FOOTNOTE_REFERENCE); + + // the start and end of the footnote ref is the opening and closing brace + // i.e. the subject's current position, and the opener's start_column + int fnref_end_column = subj->pos + subj->column_offset + subj->block_offset; + int fnref_start_column = opener->inl_text->start_column; + + // any given node delineates a substring of the line being processed, + // with the remainder of the line being pointed to thru its 'literal' + // struct member. + // here, we copy the literal's pointer, moving it past the '^' character + // for a length equal to the size of footnote reference text. + // i.e. end_col minus start_col, minus the [ and the ^ characters + // + // this copies the footnote reference string, even if between the + // `opener` and the subject's current position there are other nodes + // + // (first, check for underflows) + if ((fnref_start_column + 2) <= fnref_end_column) { + fnref->as.literal = cmark_chunk_dup(literal, 1, (fnref_end_column - fnref_start_column) - 2); + } else { + fnref->as.literal = cmark_chunk_dup(literal, 1, 0); + } + + fnref->start_line = fnref->end_line = subj->line; + fnref->start_column = fnref_start_column; + fnref->end_column = fnref_end_column; + + // we then replace the opener with this new fnref node, the net effect + // being replacing the opening '[' text node with a `^footnote-ref]` node. + cmark_node_insert_before(opener->inl_text, fnref); + process_emphasis(parser, subj, opener->previous_delimiter); + // sometimes, the footnote reference text gets parsed into multiple nodes + // i.e. '[^example]' parsed into '[', '^exam', 'ple]'. + // this happens for ex with the autolink extension. when the autolinker + // finds the 'w' character, it will split the text into multiple nodes + // in hopes of being able to match a 'www.' substring. + // + // because this function is called one character at a time via the + // `parse_inlines` function, and the current subj->pos is pointing at the + // closing ] brace, and because we copy all the text between the [ ] + // braces, we should be able to safely ignore and delete any nodes after + // the opener->inl_text->next. + // + // therefore, here we walk thru the list and free them all up + cmark_node *next_node; + cmark_node *current_node = opener->inl_text->next; + while(current_node) { + next_node = current_node->next; + cmark_node_free(current_node); + current_node = next_node; + } + + cmark_node_free(opener->inl_text); + pop_bracket(subj); return NULL; } diff --git a/test/extensions.txt b/test/extensions.txt index 0d9993782..37033b1ca 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -672,31 +672,68 @@ Hi! [^unused]: This is unused. . -

      This is some text!1. Other text.2.

      -

      Here's a thing3.

      -

      And another thing4.

      +

      This is some text!1. Other text.2.

      +

      Here's a thing3.

      +

      And another thing4.

      This doesn't have a referent[^nope].

      Hi!

      -
      +
        -
      1. -

        Some bolded footnote definition.

        +
      2. +

        Some bolded footnote definition.

      3. -
      4. +
      5. Blockquotes can be in a footnote.

        as well as code blocks
         
        -

        or, naturally, simple paragraphs.

        +

        or, naturally, simple paragraphs.

      6. -
      7. -

        no code block here (spaces are stripped away)

        +
      8. +

        no code block here (spaces are stripped away)

      9. -
      10. +
      11. this is now a code block (8 spaces indentation)
         
        - + +
      12. +
      +
      +```````````````````````````````` + +## When a footnote is used multiple times, we insert multiple backrefs. + +```````````````````````````````` example +This is some text. It has a footnote[^a-footnote]. + +This footnote is referenced[^a-footnote] multiple times, in lots of different places.[^a-footnote] + +[^a-footnote]: This footnote definition should have three backrefs. +. +

      This is some text. It has a footnote1.

      +

      This footnote is referenced1 multiple times, in lots of different places.1

      +
      +
        +
      1. +

        This footnote definition should have three backrefs. 2 3

        +
      2. +
      +
      +```````````````````````````````` + +## Footnote reference labels are href escaped + +```````````````````````````````` example +Hello[^">] + +[^">]: pwned +. +

      Hello1

      +
      +
        +
      1. +

        pwned

      diff --git a/test/normalize.py b/test/normalize.py index 6073bf011..b7fd9b245 100644 --- a/test/normalize.py +++ b/test/normalize.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from html.parser import HTMLParser import urllib +import html try: from html.parser import HTMLParseError @@ -13,7 +14,6 @@ class HTMLParseError(Exception): from html.entities import name2codepoint import sys import re -import cgi # Normalization code, adapted from # https://github.com/karlcow/markdown-testsuite/ @@ -66,7 +66,7 @@ def handle_starttag(self, tag, attrs): self.output += ("=" + '"' + urllib.quote(urllib.unquote(v), safe='/') + '"') elif v != None: - self.output += ("=" + '"' + cgi.escape(v,quote=True) + '"') + self.output += ("=" + '"' + html.escape(v,quote=True) + '"') self.output += ">" self.last_tag = tag self.last = "starttag" diff --git a/test/regression.txt b/test/regression.txt index d033a4003..d4fbe3102 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -4,7 +4,8 @@ Issue #113: EOL character weirdness on Windows (Important: first line ends with CR + CR + LF) ```````````````````````````````` example -line1 +line1 + line2 .

      line1

      @@ -175,7 +176,7 @@ A footnote in a paragraph[^1] [^1]: a footnote . -

      A footnote in a paragraph1

      +

      A footnote in a paragraph1

      @@ -185,15 +186,15 @@ A footnote in a paragraph[^1] - +
      foot 1foot 1 note
      -
      +
        -
      1. -

        a footnote

        +
      2. +

        a footnote 2

      @@ -269,3 +270,99 @@ Pull request #128 - Buffer overread in tables extension

      | -|

      ```````````````````````````````` + +Footnotes may be nested inside other footnotes. + +```````````````````````````````` example footnotes +This is some text. It has a citation.[^citation] + +[^another-citation]: My second citation. + +[^citation]: This is a long winded parapgraph that also has another citation.[^another-citation] +. +

      This is some text. It has a citation.1

      +
      +
        +
      1. +

        This is a long winded parapgraph that also has another citation.2

        +
      2. +
      3. +

        My second citation.

        +
      4. +
      +
      +```````````````````````````````` + +Footnotes are similar to, but should not be confused with, link references + +```````````````````````````````` example footnotes +This is some text. It has two footnotes references, side-by-side without any spaces,[^footnote1][^footnote2] which are definitely not link references. + +[^footnote1]: Hello. + +[^footnote2]: Goodbye. +. +

      This is some text. It has two footnotes references, side-by-side without any spaces,12 which are definitely not link references.

      +
      +
        +
      1. +

        Hello.

        +
      2. +
      3. +

        Goodbye.

        +
      4. +
      +
      +```````````````````````````````` + +Footnotes may begin with or have a 'w' or a '_' in their reference label. + +```````````````````````````````` example footnotes autolink +This is some text. Sometimes the autolinker splits up text into multiple nodes, hoping it will find a hyperlink, so this text has a footnote whose reference label begins with a `w`.[^widely-cited] + +It has another footnote that contains many different characters (the autolinker was also breaking on `_`).[^sphinx-of-black-quartz_judge-my-vow-0123456789] + +[^sphinx-of-black-quartz_judge-my-vow-0123456789]: so does this. + +[^widely-cited]: this renders properly. +. +

      This is some text. Sometimes the autolinker splits up text into multiple nodes, hoping it will find a hyperlink, so this text has a footnote whose reference label begins with a w.1

      +

      It has another footnote that contains many different characters (the autolinker was also breaking on _).2

      +
      +
        +
      1. +

        this renders properly.

        +
      2. +
      3. +

        so does this.

        +
      4. +
      +
      +```````````````````````````````` + +Footnotes interacting with strikethrough should not lead to a use-after-free + +```````````````````````````````` example footnotes autolink strikethrough table +|Tot.....[^_a_]| +. +

      |Tot.....[^_a_]|

      +```````````````````````````````` + +Footnotes interacting with strikethrough should not lead to a use-after-free pt2 + +```````````````````````````````` example footnotes autolink strikethrough table +[^~~is~~1] +. +

      [^~~is~~1]

      +```````````````````````````````` + +Adjacent unused footnotes definitions should not lead to a use after free + +```````````````````````````````` example footnotes autolink strikethrough table +Hello world + + +[^a]:[^b]: +. +

      Hello world

      +````````````````````````````````