From 74631f8610d1067570d107e7c701f3e602e6c32e Mon Sep 17 00:00:00 2001
From: Phill MV
Date: Tue, 3 Aug 2021 12:15:37 -0400
Subject: [PATCH 01/36] Copied over the actions CI workflows that @jgm added in
commonmark/cmark.
---
.github/workflows/ci.yml | 102 +++++++++++++++++++++++++++++++++++++
.github/workflows/fuzz.yml | 23 +++++++++
2 files changed, 125 insertions(+)
create mode 100644 .github/workflows/ci.yml
create mode 100644 .github/workflows/fuzz.yml
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 000000000..8a0d2c1ca
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,102 @@
+name: CI tests
+
+on: [push, pull_request, workflow_dispatch]
+
+jobs:
+
+ linter:
+
+ runs-on: ubuntu-latest
+
+ steps:
+
+ - uses: actions/checkout@v1
+ - name: Install clang-tidy
+ run: |
+ sudo apt-get install -y clang-tidy-9
+ sudo update-alternatives --install /usr/bin/clang-tidy clang-tidy /usr/bin/clang-tidy-9 100
+ - name: lint with clang-tidy
+ run: |
+ make lint
+ env:
+ CC: clang
+ CXX: clang++
+
+ linux:
+
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ cmake_opts:
+ - '-DCMARK_SHARED=ON'
+ - ''
+ compiler:
+ - c: 'clang'
+ cpp: 'clang++'
+ - c: 'gcc'
+ cpp: 'g++'
+ env:
+ CMAKE_OPTIONS: ${{ matrix.cmake_opts }}
+ CC: ${{ matrix.compiler.c }}
+ CXX: ${{ matrix.compiler.cpp }}
+
+ steps:
+ - uses: actions/checkout@v1
+ - name: Install valgrind
+ run: |
+ sudo apt install -y valgrind
+ - name: Build and test
+ run: |
+ make
+ make test
+ make leakcheck
+
+ macos:
+
+ runs-on: macOS-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ cmake_opts:
+ - '-DCMARK_SHARED=ON'
+ - ''
+ compiler:
+ - c: 'clang'
+ cpp: 'clang++'
+ - c: 'gcc'
+ cpp: 'g++'
+ env:
+ CMAKE_OPTIONS: ${{ matrix.cmake_opts }}
+ CC: ${{ matrix.compiler.c }}
+ CXX: ${{ matrix.compiler.cpp }}
+
+ steps:
+ - uses: actions/checkout@v1
+ - name: Build and test
+ env:
+ CMAKE_OPTIONS: -DCMARK_SHARED=OFF
+ run: |
+ make
+ make test
+
+ windows:
+
+ runs-on: windows-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ cmake_opts:
+ - '-DCMARK_SHARED=ON'
+ - ''
+ env:
+ CMAKE_OPTIONS: ${{ matrix.cmake_opts }}
+
+ steps:
+ - uses: actions/checkout@v1
+ - uses: ilammy/msvc-dev-cmd@v1
+ - name: Build and test
+ run: |
+ chcp 65001
+ nmake.exe /nologo /f Makefile.nmake test
+ shell: cmd
diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml
new file mode 100644
index 000000000..c918fd81c
--- /dev/null
+++ b/.github/workflows/fuzz.yml
@@ -0,0 +1,23 @@
+name: CIFuzz
+on: [pull_request, workflow_dispatch]
+jobs:
+ Fuzzing:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Build Fuzzers
+ uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
+ with:
+ oss-fuzz-project-name: 'cmark'
+ dry-run: false
+ - name: Run Fuzzers
+ uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
+ with:
+ oss-fuzz-project-name: 'cmark'
+ fuzz-seconds: 600
+ dry-run: false
+ - name: Upload Crash
+ uses: actions/upload-artifact@v1
+ if: failure()
+ with:
+ name: artifacts
+ path: ./out/artifacts
From 7346f8dedc8ba382180a4b87caa6df49769bc4e4 Mon Sep 17 00:00:00 2001
From: Phill MV
Date: Tue, 3 Aug 2021 12:19:00 -0400
Subject: [PATCH 02/36] Turns out we don't have a lint task in our Makefile
(yet).
---
.github/workflows/ci.yml | 19 -------------------
1 file changed, 19 deletions(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 8a0d2c1ca..7b7dd1d67 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -3,25 +3,6 @@ name: CI tests
on: [push, pull_request, workflow_dispatch]
jobs:
-
- linter:
-
- runs-on: ubuntu-latest
-
- steps:
-
- - uses: actions/checkout@v1
- - name: Install clang-tidy
- run: |
- sudo apt-get install -y clang-tidy-9
- sudo update-alternatives --install /usr/bin/clang-tidy clang-tidy /usr/bin/clang-tidy-9 100
- - name: lint with clang-tidy
- run: |
- make lint
- env:
- CC: clang
- CXX: clang++
-
linux:
runs-on: ubuntu-latest
From 2b2ffafb675fb9d36072a84f826f5cc658371a12 Mon Sep 17 00:00:00 2001
From: Phill MV
Date: Tue, 3 Aug 2021 12:22:35 -0400
Subject: [PATCH 03/36] Let's drop the fuzzer for now, looks like we have to
set something up with oss-fuzz first.
---
.github/workflows/fuzz.yml | 23 -----------------------
1 file changed, 23 deletions(-)
delete mode 100644 .github/workflows/fuzz.yml
diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml
deleted file mode 100644
index c918fd81c..000000000
--- a/.github/workflows/fuzz.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-name: CIFuzz
-on: [pull_request, workflow_dispatch]
-jobs:
- Fuzzing:
- runs-on: ubuntu-latest
- steps:
- - name: Build Fuzzers
- uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
- with:
- oss-fuzz-project-name: 'cmark'
- dry-run: false
- - name: Run Fuzzers
- uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
- with:
- oss-fuzz-project-name: 'cmark'
- fuzz-seconds: 600
- dry-run: false
- - name: Upload Crash
- uses: actions/upload-artifact@v1
- if: failure()
- with:
- name: artifacts
- path: ./out/artifacts
From 4e0a81fff706e76a1f2ad84a7d3e033f97ac424f Mon Sep 17 00:00:00 2001
From: Keith Packard
Date: Mon, 9 Mar 2020 13:54:22 -0700
Subject: [PATCH 04/36] Make normalize test compatible with python 3.8
Python 3.8 has removed the cgi.escape function, which had been
deprecated since version 3.2. html.escape does the same thing, use
that instead.
Signed-off-by: Keith Packard
---
test/normalize.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/test/normalize.py b/test/normalize.py
index 6073bf011..e9e6320b8 100644
--- a/test/normalize.py
+++ b/test/normalize.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
from html.parser import HTMLParser
import urllib
+import html
try:
from html.parser import HTMLParseError
@@ -66,7 +67,7 @@ def handle_starttag(self, tag, attrs):
self.output += ("=" + '"' +
urllib.quote(urllib.unquote(v), safe='/') + '"')
elif v != None:
- self.output += ("=" + '"' + cgi.escape(v,quote=True) + '"')
+ self.output += ("=" + '"' + html.escape(v,quote=True) + '"')
self.output += ">"
self.last_tag = tag
self.last = "starttag"
From 4bab14a5e2537704868acaf6998a34d2d969b498 Mon Sep 17 00:00:00 2001
From: Phill MV
Date: Thu, 12 Aug 2021 09:21:08 -0400
Subject: [PATCH 05/36] Removed outdated build status badges.
---
README.md | 3 ---
1 file changed, 3 deletions(-)
diff --git a/README.md b/README.md
index a36d6f9b3..aafe55ed5 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,6 @@
cmark-gfm
=========
-[![Build Status]](https://travis-ci.org/github/cmark-gfm)
-[![Windows Build Status]](https://ci.appveyor.com/project/github/cmark)
-
`cmark-gfm` is an extended version of the C reference implementation of
[CommonMark], a rationalized version of Markdown syntax with a spec. This
repository adds GitHub Flavored Markdown extensions to
From 4a7186e3eb95d40c1bbbdbc3cda5912aa671320a Mon Sep 17 00:00:00 2001
From: Phill MV
Date: Thu, 12 Aug 2021 09:28:54 -0400
Subject: [PATCH 06/36] Run ci build only on push, thanks.
---
.github/workflows/ci.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 7b7dd1d67..4c0ad9406 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,6 +1,6 @@
name: CI tests
-on: [push, pull_request, workflow_dispatch]
+on: [push, workflow_dispatch]
jobs:
linux:
From 71e27f25f11c9a34f0532dba459940e1fb5f6316 Mon Sep 17 00:00:00 2001
From: Phill MV
Date: Thu, 19 Aug 2021 09:15:56 -0400
Subject: [PATCH 07/36] Footnotes now support being nested, i.e. a footnote may
reference another footnote.
---
src/blocks.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/src/blocks.c b/src/blocks.c
index 53e882f19..ec5bbe98c 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -468,7 +468,6 @@ static void process_footnotes(cmark_parser *parser) {
while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
cur = cmark_iter_get_node(iter);
if (ev_type == CMARK_EVENT_EXIT && cur->type == CMARK_NODE_FOOTNOTE_DEFINITION) {
- cmark_node_unlink(cur);
cmark_footnote_create(map, cur);
}
}
@@ -515,8 +514,10 @@ static void process_footnotes(cmark_parser *parser) {
qsort(map->sorted, map->size, sizeof(cmark_map_entry *), sort_footnote_by_ix);
for (unsigned int i = 0; i < map->size; ++i) {
cmark_footnote *footnote = (cmark_footnote *)map->sorted[i];
- if (!footnote->ix)
+ if (!footnote->ix) {
+ cmark_node_unlink(footnote->node);
continue;
+ }
cmark_node_append_child(parser->root, footnote->node);
footnote->node = NULL;
}
From 1f026ef38bea7636b49c31925ecf95af9bb8834f Mon Sep 17 00:00:00 2001
From: Phill MV
Date: Thu, 19 Aug 2021 09:22:54 -0400
Subject: [PATCH 08/36] Fix for footnotes being confused for link references.
When two footnote references are adjacent, the handle_close_bracket
function will first try to match the closing bracket to a link
reference. Now we reset the subject's state, so that the parser
correctly picks up both footnote references.
---
src/inlines.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/src/inlines.c b/src/inlines.c
index c21430bde..bf85dc5a2 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -1141,6 +1141,13 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) {
!opener->inl_text->next->next) {
cmark_chunk *literal = &opener->inl_text->next->as.literal;
if (literal->len > 1 && literal->data[0] == '^') {
+
+ // Before we got this far, the `handle_close_bracket` function may have
+ // advanced the current state beyond our footnote's actual closing
+ // bracket, ie if it went looking for a `link_label`.
+ // Let's just rewind the subject's position:
+ subj->pos = initial_pos;
+
inl = make_simple(subj->mem, CMARK_NODE_FOOTNOTE_REFERENCE);
inl->as.literal = cmark_chunk_dup(literal, 1, literal->len - 1);
inl->start_line = inl->end_line = subj->line;
From bb117ffa7f0dcccdc4d7773f8585ef07e2402f36 Mon Sep 17 00:00:00 2001
From: Phill MV
Date: Thu, 19 Aug 2021 09:31:02 -0400
Subject: [PATCH 09/36] Fix for when footnote reference labels get broken up
into multiple cmark_nodes.
Sometimes, the autolinker will go ahead and greedily split input into
multiple text nodes in the hopes of matching a hyperlink. This broke
footnotes, which expected a singular node. Instead of relying on the
tokenizing to have worked perfectly, when handling footnote references
we now simply insert the reference based on the closing bracket and
ignore and delete any existing and superfluous nodes.
---
src/inlines.c | 65 +++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 55 insertions(+), 10 deletions(-)
diff --git a/src/inlines.c b/src/inlines.c
index c21430bde..02b75fa7f 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -1137,17 +1137,62 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) {
// What if we're a footnote link?
if (parser->options & CMARK_OPT_FOOTNOTES &&
opener->inl_text->next &&
- opener->inl_text->next->type == CMARK_NODE_TEXT &&
- !opener->inl_text->next->next) {
+ opener->inl_text->next->type == CMARK_NODE_TEXT) {
+
cmark_chunk *literal = &opener->inl_text->next->as.literal;
- if (literal->len > 1 && literal->data[0] == '^') {
- inl = make_simple(subj->mem, CMARK_NODE_FOOTNOTE_REFERENCE);
- inl->as.literal = cmark_chunk_dup(literal, 1, literal->len - 1);
- inl->start_line = inl->end_line = subj->line;
- inl->start_column = opener->inl_text->start_column;
- inl->end_column = subj->pos + subj->column_offset + subj->block_offset;
- cmark_node_insert_before(opener->inl_text, inl);
- cmark_node_free(opener->inl_text->next);
+
+ // look back to the opening '[', and skip ahead to the next character
+ // if we're looking at a '[^' sequence, and there is other text or nodes
+ // after the ^, let's call it a footnote reference.
+ if (literal->data[0] == '^' && (literal->len > 1 || opener->inl_text->next->next)) {
+
+ cmark_node *fnref = make_simple(subj->mem, CMARK_NODE_FOOTNOTE_REFERENCE);
+
+ // the start and end of the footnote ref is the opening and closing brace
+ // i.e. the subject's current position, and the opener's start_column
+ int fnref_end_column = subj->pos + subj->column_offset + subj->block_offset;
+ int fnref_start_column = opener->inl_text->start_column;
+
+ // any given node delineates a substring of the line being processed,
+ // with the remainder of the line being pointed to thru its 'literal'
+ // struct member.
+ // here, we copy the literal's pointer, moving it past the '^' character
+ // for a length equal to the size of footnote reference text.
+ // i.e. end_col minus start_col, minus the [ and the ^ characters
+ //
+ // this copies the footnote reference string, even if between the
+ // `opener` and the subject's current position there are other nodes
+ fnref->as.literal = cmark_chunk_dup(literal, 1, (fnref_end_column - fnref_start_column) - 2);
+
+ fnref->start_line = fnref->end_line = subj->line;
+ fnref->start_column = fnref_start_column;
+ fnref->end_column = fnref_end_column;
+
+ // we then replace the opener with this new fnref node, the net effect
+ // being replacing the opening '[' text node with a `^footnote-ref]` node.
+ cmark_node_insert_before(opener->inl_text, fnref);
+
+ // sometimes, the footnote reference text gets parsed into multiple nodes
+ // i.e. '[^example]' parsed into '[', '^exam', 'ple]'.
+ // this happens for ex with the autolink extension. when the autolinker
+ // finds the 'w' character, it will split the text into multiple nodes
+ // in hopes of being able to match a 'www.' substring.
+ //
+ // because this function is called one character at a time via the
+ // `parse_inlines` function, and the current subj->pos is pointing at the
+ // closing ] brace, and because we copy all the text between the [ ]
+ // braces, we should be able to safely ignore and delete any nodes after
+ // the opener->inl_text->next.
+ //
+ // therefore, here we walk thru the list and free them all up
+ cmark_node *next_node;
+ cmark_node *current_node = opener->inl_text->next;
+ while(current_node) {
+ next_node = current_node->next;
+ cmark_node_free(current_node);
+ current_node = next_node;
+ }
+
cmark_node_free(opener->inl_text);
process_emphasis(parser, subj, opener->previous_delimiter);
pop_bracket(subj);
From bf76871f492b2e76236bcd817b2ef5d501fa0beb Mon Sep 17 00:00:00 2001
From: Phill MV
Date: Thu, 19 Aug 2021 09:44:19 -0400
Subject: [PATCH 10/36] Fix footnote reference label text, and add multiple
backrefs.
When a footnote is referenced multiple times, we now insert multiple
backrefs linking back to each reference. In order to do this, we had to
change how footnote ref link labels work away from an incrementing
index, and instead use footnote reference label text *plus* an index.
---
src/blocks.c | 11 +++++++++++
src/commonmark.c | 10 ++++++----
src/html.c | 51 ++++++++++++++++++++++++++++++++++--------------
src/node.h | 5 +++++
4 files changed, 58 insertions(+), 19 deletions(-)
diff --git a/src/blocks.c b/src/blocks.c
index 53e882f19..d70551c08 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -485,6 +485,17 @@ static void process_footnotes(cmark_parser *parser) {
if (!footnote->ix)
footnote->ix = ++ix;
+ // keep track of a) how many times this footnote def has been
+ // referenced, and b) which reference count this footnote ref is at
+ // this is used by renderers when generating links and backreferences.
+ cur->footnote.ix = ++footnote->node->footnote.count;
+
+ // store the footnote reference text label in the footnote ref's node's
+ // `user_data`, so that renderers can use the label when generating
+ // links and backreferences.
+ cur->user_data = parser->mem->calloc(1, (sizeof(char) * cur->as.literal.len) + 1);
+ memmove(cur->user_data, cur->as.literal.data, cur->as.literal.len);
+
char n[32];
snprintf(n, sizeof(n), "%d", footnote->ix);
cmark_chunk_free(parser->mem, &cur->as.literal);
diff --git a/src/commonmark.c b/src/commonmark.c
index f272d4d29..a368474f3 100644
--- a/src/commonmark.c
+++ b/src/commonmark.c
@@ -477,7 +477,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
case CMARK_NODE_FOOTNOTE_REFERENCE:
if (entering) {
LIT("[^");
- OUT(cmark_chunk_to_cstr(renderer->mem, &node->as.literal), false, LITERAL);
+ OUT(node->user_data, false, LITERAL);
LIT("]");
}
break;
@@ -486,9 +486,11 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
if (entering) {
renderer->footnote_ix += 1;
LIT("[^");
- char n[32];
- snprintf(n, sizeof(n), "%d", renderer->footnote_ix);
- OUT(n, false, LITERAL);
+
+ char *str = renderer->mem->calloc(1, (sizeof(char) * node->as.literal.len) + 1);
+ memmove(str, node->as.literal.data, node->as.literal.len);
+
+ OUT(str, false, LITERAL);
LIT("]:\n");
cmark_strbuf_puts(renderer->prefix, " ");
diff --git a/src/html.c b/src/html.c
index ea1f6e189..a70271336 100644
--- a/src/html.c
+++ b/src/html.c
@@ -59,17 +59,31 @@ static void filter_html_block(cmark_html_renderer *renderer, uint8_t *data, size
cmark_strbuf_put(html, data, (bufsize_t)len);
}
-static bool S_put_footnote_backref(cmark_html_renderer *renderer, cmark_strbuf *html) {
+static bool S_put_footnote_backref(cmark_html_renderer *renderer, cmark_strbuf *html, cmark_node *node) {
if (renderer->written_footnote_ix >= renderer->footnote_ix)
return false;
renderer->written_footnote_ix = renderer->footnote_ix;
- cmark_strbuf_puts(html, "footnote_ix);
- cmark_strbuf_puts(html, n);
+ cmark_strbuf_puts(html, "as.literal.data, node->as.literal.len);
cmark_strbuf_puts(html, "\" class=\"footnote-backref\">↩");
+ if (node->footnote.count > 1)
+ {
+ for(int i = 2; i <= node->footnote.count; i++) {
+ char n[32];
+ snprintf(n, sizeof(n), "%d", i);
+
+ cmark_strbuf_puts(html, " as.literal.data, node->as.literal.len);
+ cmark_strbuf_puts(html, ":");
+ cmark_strbuf_put(html, (const unsigned char *)n, strlen(n));
+ cmark_strbuf_puts(html, "\" class=\"footnote-backref\">↩");
+ }
+ }
+
return true;
}
@@ -273,7 +287,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
} else {
if (parent->type == CMARK_NODE_FOOTNOTE_DEFINITION && node->next == NULL) {
cmark_strbuf_putc(html, ' ');
- S_put_footnote_backref(renderer, html);
+ S_put_footnote_backref(renderer, html, parent);
}
cmark_strbuf_puts(html, "
\n");
}
@@ -395,13 +409,12 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
cmark_strbuf_puts(html, "
````````````````````````````````
+## When a footnote is used multiple times, we insert multiple backrefs.
+
+```````````````````````````````` example
+This is some text. It has a footnote[^a-footnote].
+
+This footnote is referenced[^a-footnote] multiple times, in lots of different places.[^a-footnote]
+
+[^a-footnote]: This footnote definition should have three backrefs.
+.
+This is some text. It has a footnote.
+This footnote is referenced multiple times, in lots of different places.
+
+````````````````````````````````
+
## Interop
Autolink and strikethrough.
From 8ccdaa7d1cf65501735220366c1316474ff7e893 Mon Sep 17 00:00:00 2001
From: Phill MV
Date: Thu, 19 Aug 2021 09:56:10 -0400
Subject: [PATCH 13/36] Converted regression test to expect new footnote ref
link labels.
---
test/regression.txt | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/test/regression.txt b/test/regression.txt
index d033a4003..ae0c8be40 100644
--- a/test/regression.txt
+++ b/test/regression.txt
@@ -175,7 +175,7 @@ A footnote in a paragraph[^1]
[^1]: a footnote
.
-A footnote in a paragraph
+A footnote in a paragraph
@@ -185,15 +185,15 @@ A footnote in a paragraph[^1]
-foot |
+foot |
note |
From a0de7d891c350b306d0e8188f49a1a4c72824a9a Mon Sep 17 00:00:00 2001
From: Phill MV
Date: Fri, 20 Aug 2021 10:15:40 -0400
Subject: [PATCH 14/36] Added regression test that exercises nested footnotes.
---
test/regression.txt | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/test/regression.txt b/test/regression.txt
index d033a4003..7cc10c0d2 100644
--- a/test/regression.txt
+++ b/test/regression.txt
@@ -269,3 +269,26 @@ Pull request #128 - Buffer overread in tables extension
|
-|
````````````````````````````````
+
+Footnotes may be nested inside other footnotes.
+
+
+```````````````````````````````` example footnotes
+This is some text. It has a citation.[^citation]
+
+[^another-citation]: My second citation.
+
+[^citation]: This is a long winded parapgraph that also has another citation.[^another-citation]
+.
+This is some text. It has a citation.
+
+````````````````````````````````
From 7fa237247371bb3067bcb8fcb4ce74bc95ac04e0 Mon Sep 17 00:00:00 2001
From: Phill MV
Date: Fri, 20 Aug 2021 10:18:16 -0400
Subject: [PATCH 15/36] Added test that properly exercises footnotes whose
reference labels contain 'w' or '_'.
---
test/regression.txt | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
diff --git a/test/regression.txt b/test/regression.txt
index d033a4003..f0e99b4a6 100644
--- a/test/regression.txt
+++ b/test/regression.txt
@@ -269,3 +269,28 @@ Pull request #128 - Buffer overread in tables extension
|
-|
````````````````````````````````
+
+Footnotes may begin with or have a 'w' or a '_' in their reference label.
+
+```````````````````````````````` example footnotes autolink
+This is some text. Sometimes the autolinker splits up text into multiple nodes, hoping it will find a hyperlink, so this text has a footnote whose reference label begins with a `w`.[^widely-cited]
+
+It has another footnote that contains many different characters (the autolinker was also breaking on `_`).[^sphinx-of-black-quartz_judge-my-vow-0123456789]
+
+[^sphinx-of-black-quartz_judge-my-vow-0123456789]: so does this.
+
+[^widely-cited]: this renders properly.
+.
+This is some text. Sometimes the autolinker splits up text into multiple nodes, hoping it will find a hyperlink, so this text has a footnote whose reference label begins with a w
.
+It has another footnote that contains many different characters (the autolinker was also breaking on _
).
+
+````````````````````````````````
From 740b98704fbd93d1a7dee46077964db7df14263d Mon Sep 17 00:00:00 2001
From: Phill MV
Date: Fri, 20 Aug 2021 10:21:45 -0400
Subject: [PATCH 16/36] Added test that exercises whether footnotes are
confused for link references.
---
test/regression.txt | 22 ++++++++++++++++++++++
1 file changed, 22 insertions(+)
diff --git a/test/regression.txt b/test/regression.txt
index d033a4003..85c79507b 100644
--- a/test/regression.txt
+++ b/test/regression.txt
@@ -269,3 +269,25 @@ Pull request #128 - Buffer overread in tables extension
|
-|
````````````````````````````````
+
+## Footnotes are similar to, but should not be confused with, link references
+
+```````````````````````````````` example footnotes
+This is some text. It has two footnotes references, side-by-side without any spaces,[^footnote1][^footnote2] which are definitely not link references.
+
+[^footnote1]: Hello.
+
+[^footnote2]: Goodbye.
+.
+This is some text. It has two footnotes references, side-by-side without any spaces, which are definitely not link references.
+
+````````````````````````````````
From 7b5d45d05678504d8b62d80bec8e78cf0358b343 Mon Sep 17 00:00:00 2001
From: Phill MV
Date: Fri, 20 Aug 2021 11:42:05 -0400
Subject: [PATCH 17/36] Adapted existing regression tests to conform to new
footnote ref label.
---
test/regression.txt | 32 ++++++++++++++++----------------
1 file changed, 16 insertions(+), 16 deletions(-)
diff --git a/test/regression.txt b/test/regression.txt
index 37d795394..47bd104f5 100644
--- a/test/regression.txt
+++ b/test/regression.txt
@@ -279,14 +279,14 @@ This is some text. It has a citation.[^citation]
[^citation]: This is a long winded parapgraph that also has another citation.[^another-citation]
.
-This is some text. It has a citation.
+This is some text. It has a citation.
@@ -301,14 +301,14 @@ This is some text. It has two footnotes references, side-by-side without any spa
[^footnote2]: Goodbye.
.
-This is some text. It has two footnotes references, side-by-side without any spaces, which are definitely not link references.
+This is some text. It has two footnotes references, side-by-side without any spaces, which are definitely not link references.
@@ -325,15 +325,15 @@ It has another footnote that contains many different characters (the autolinker
[^widely-cited]: this renders properly.
.
-This is some text. Sometimes the autolinker splits up text into multiple nodes, hoping it will find a hyperlink, so this text has a footnote whose reference label begins with a w
.
-It has another footnote that contains many different characters (the autolinker was also breaking on _
).
+This is some text. Sometimes the autolinker splits up text into multiple nodes, hoping it will find a hyperlink, so this text has a footnote whose reference label begins with a w
.
+It has another footnote that contains many different characters (the autolinker was also breaking on _
).
From fdd7851bbc7238022a6c3d6221ad2d9541435352 Mon Sep 17 00:00:00 2001
From: Phill MV
Date: Mon, 23 Aug 2021 14:35:47 -0400
Subject: [PATCH 18/36] Added Actions CI badge.
---
README.md | 2 ++
1 file changed, 2 insertions(+)
diff --git a/README.md b/README.md
index aafe55ed5..0b9f719f6 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,8 @@
cmark-gfm
=========
+![Actions CI](https://github.com/github/cmark-gfm/actions/workflows/ci.yml/badge.svg)
+
`cmark-gfm` is an extended version of the C reference implementation of
[CommonMark], a rationalized version of Markdown syntax with a spec. This
repository adds GitHub Flavored Markdown extensions to
From 02776382422fd97986681458aa6c156a7fdcc652 Mon Sep 17 00:00:00 2001
From: Phill MV
Date: Mon, 23 Aug 2021 14:37:16 -0400
Subject: [PATCH 19/36] Removed redundant 'import cgi' from normalize.py
---
test/normalize.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/test/normalize.py b/test/normalize.py
index e9e6320b8..b7fd9b245 100644
--- a/test/normalize.py
+++ b/test/normalize.py
@@ -14,7 +14,6 @@ class HTMLParseError(Exception):
from html.entities import name2codepoint
import sys
import re
-import cgi
# Normalization code, adapted from
# https://github.com/karlcow/markdown-testsuite/
From b790eca090237255ab1e60c1dfb2ec5f67ba5dcd Mon Sep 17 00:00:00 2001
From: Phill MV
Date: Wed, 25 Aug 2021 10:41:58 -0400
Subject: [PATCH 20/36] Bumped version to 0.29.0.gfm.1
---
CMakeLists.txt | 2 +-
changelog.txt | 5 +++++
2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c92bde52d..34773819f 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,7 +4,7 @@ project(cmark-gfm)
set(PROJECT_VERSION_MAJOR 0)
set(PROJECT_VERSION_MINOR 29)
set(PROJECT_VERSION_PATCH 0)
-set(PROJECT_VERSION_GFM 0)
+set(PROJECT_VERSION_GFM 1)
set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM})
include("FindAsan.cmake")
diff --git a/changelog.txt b/changelog.txt
index b86a41a22..5ba1e62b3 100644
--- a/changelog.txt
+++ b/changelog.txt
@@ -1,3 +1,8 @@
+[0.29.0.gfm.1]
+
+ * Fixed denial of service bug in GFM's table extension
+ per https://github.com/github/cmark-gfm/security/advisories/GHSA-7gc6-9qr5-hc85
+
[0.29.0]
* Update spec to 0.29.
From 32ffc7719e72c39422ebb3c161f28aa553799f30 Mon Sep 17 00:00:00 2001
From: Phill MV
Date: Wed, 1 Sep 2021 10:48:27 -0400
Subject: [PATCH 21/36] Renamed cmark_node->footnote.{ix,count} to
{ref_ix,def_count} to make intent more obvious.
---
src/blocks.c | 6 +++---
src/html.c | 8 ++++----
src/node.h | 4 ++--
3 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/src/blocks.c b/src/blocks.c
index 836de2c8e..5d43ce339 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -484,10 +484,10 @@ static void process_footnotes(cmark_parser *parser) {
if (!footnote->ix)
footnote->ix = ++ix;
- // keep track of a) how many times this footnote def has been
- // referenced, and b) which reference count this footnote ref is at
+ // keep track of a) count of how many times this footnote def has been
+ // referenced, and b) which reference index this footnote ref is at.
// this is used by renderers when generating links and backreferences.
- cur->footnote.ix = ++footnote->node->footnote.count;
+ cur->footnote.ref_ix = ++footnote->node->footnote.def_count;
// store the footnote reference text label in the footnote ref's node's
// `user_data`, so that renderers can use the label when generating
diff --git a/src/html.c b/src/html.c
index a70271336..0e3ecb36d 100644
--- a/src/html.c
+++ b/src/html.c
@@ -68,9 +68,9 @@ static bool S_put_footnote_backref(cmark_html_renderer *renderer, cmark_strbuf *
cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
cmark_strbuf_puts(html, "\" class=\"footnote-backref\">↩");
- if (node->footnote.count > 1)
+ if (node->footnote.def_count > 1)
{
- for(int i = 2; i <= node->footnote.count; i++) {
+ for(int i = 2; i <= node->footnote.def_count; i++) {
char n[32];
snprintf(n, sizeof(n), "%d", i);
@@ -428,9 +428,9 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
cmark_strbuf_puts(html, "\" id=\"fnref:");
cmark_strbuf_puts(html, node->user_data);
- if (node->footnote.ix > 1) {
+ if (node->footnote.ref_ix > 1) {
char n[32];
- snprintf(n, sizeof(n), "%d", node->footnote.ix);
+ snprintf(n, sizeof(n), "%d", node->footnote.ref_ix);
cmark_strbuf_puts(html, ":");
cmark_strbuf_put(html, (const unsigned char *)n, strlen(n));
}
diff --git a/src/node.h b/src/node.h
index c7cdc0f55..e158e908c 100644
--- a/src/node.h
+++ b/src/node.h
@@ -77,8 +77,8 @@ struct cmark_node {
cmark_syntax_extension *extension;
union {
- int ix;
- int count;
+ int ref_ix;
+ int def_count;
} footnote;
union {
From 17170400954862dc0a626ae7467aaef6f6f78448 Mon Sep 17 00:00:00 2001
From: Phill MV
Date: Wed, 1 Sep 2021 11:52:53 -0400
Subject: [PATCH 22/36] Added cmark_node.parent_footnote_def, removed usage of
'user_data', made sure to free allocated string in commonmark.c
---
src/blocks.c | 10 ++++------
src/commonmark.c | 16 ++++++++++++----
src/html.c | 4 ++--
src/node.h | 2 ++
4 files changed, 20 insertions(+), 12 deletions(-)
diff --git a/src/blocks.c b/src/blocks.c
index 5d43ce339..e19b7e905 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -484,17 +484,15 @@ static void process_footnotes(cmark_parser *parser) {
if (!footnote->ix)
footnote->ix = ++ix;
+ // store a reference to this footnote reference's footnote definition
+ // this is used by renderers when generating label ids
+ cur->parent_footnote_def = footnote->node;
+
// keep track of a) count of how many times this footnote def has been
// referenced, and b) which reference index this footnote ref is at.
// this is used by renderers when generating links and backreferences.
cur->footnote.ref_ix = ++footnote->node->footnote.def_count;
- // store the footnote reference text label in the footnote ref's node's
- // `user_data`, so that renderers can use the label when generating
- // links and backreferences.
- cur->user_data = parser->mem->calloc(1, (sizeof(char) * cur->as.literal.len) + 1);
- memmove(cur->user_data, cur->as.literal.data, cur->as.literal.len);
-
char n[32];
snprintf(n, sizeof(n), "%d", footnote->ix);
cmark_chunk_free(parser->mem, &cur->as.literal);
diff --git a/src/commonmark.c b/src/commonmark.c
index a368474f3..8fe03687b 100644
--- a/src/commonmark.c
+++ b/src/commonmark.c
@@ -477,7 +477,13 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
case CMARK_NODE_FOOTNOTE_REFERENCE:
if (entering) {
LIT("[^");
- OUT(node->user_data, false, LITERAL);
+
+ char *footnote_label = renderer->mem->calloc(1, (sizeof(char) * node->parent_footnote_def->as.literal.len) + 1);
+ memmove(footnote_label, node->parent_footnote_def->as.literal.data, node->parent_footnote_def->as.literal.len);
+
+ OUT(footnote_label, false, LITERAL);
+ renderer->mem->free(footnote_label);
+
LIT("]");
}
break;
@@ -487,10 +493,12 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
renderer->footnote_ix += 1;
LIT("[^");
- char *str = renderer->mem->calloc(1, (sizeof(char) * node->as.literal.len) + 1);
- memmove(str, node->as.literal.data, node->as.literal.len);
+ char *footnote_label = renderer->mem->calloc(1, (sizeof(char) * node->as.literal.len) + 1);
+ memmove(footnote_label, node->as.literal.data, node->as.literal.len);
+
+ OUT(footnote_label, false, LITERAL);
+ renderer->mem->free(footnote_label);
- OUT(str, false, LITERAL);
LIT("]:\n");
cmark_strbuf_puts(renderer->prefix, " ");
diff --git a/src/html.c b/src/html.c
index 0e3ecb36d..a3fe23802 100644
--- a/src/html.c
+++ b/src/html.c
@@ -424,9 +424,9 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
case CMARK_NODE_FOOTNOTE_REFERENCE:
if (entering) {
cmark_strbuf_puts(html, "");
}
diff --git a/test/extensions.txt b/test/extensions.txt
index c93e4dedd..37033b1ca 100644
--- a/test/extensions.txt
+++ b/test/extensions.txt
@@ -672,15 +672,15 @@ Hi!
[^unused]: This is unused.
.
-This is some text!. Other text..
-Here's a thing.
-And another thing.
+This is some text!. Other text..
+Here's a thing.
+And another thing.
This doesn't have a referent[^nope].
Hi!
-