From 3bc2f3e25df0cecc5dc269f7ccae65d0f386f06a Mon Sep 17 00:00:00 2001 From: QuietMisdreavus Date: Tue, 1 Aug 2023 06:07:33 -0700 Subject: [PATCH] integrate changes from cmark-gfm 0.29.0.gfm.12 and gfm.13 (#61) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Normalize nomenclature: marker row vs. delimiter row The code for the table extension used the term 'marker row', but the spec calls it 'delimiter row'. This change normalizes the terminology so that it's consistent. * Update autolink.c ``` ../../../../ext/markly/autolink.c: In function ‘postprocess_text’: ../../../../ext/markly/autolink.c:364:31: warning: passing argument 1 of ‘validate_protocol’ discards ‘const’ qualifier from pointer target type [-Wdiscarded-qualifiers] 364 | if (validate_protocol("mailto:", data + start + offset + max_rewind, rewind, max_rewind)) { | ^~~~~~~~~ ../../../../ext/markly/commonmark.c: In function ‘S_render_node’: ../../../../ext/markly/autolink.c:299:36: note: expected ‘char *’ but argument is of type ‘const char *’ 299 | static bool validate_protocol(char protocol[], uint8_t *data, size_t rewind, size_t max_rewind) { | ~~~~~^~~~~~~~~~ ``` * Update commonmark.c ``` ../../../../ext/markly/commonmark.c:405:18: warning: assignment discards ‘const’ qualifier from pointer target type [-Wdiscarded-qualifiers] 405 | emph_delim = "_"; | ^ ``` * Fix GHSL-2023-119: prevent quadratic performance by not allowing very deeply nested footnote definitions. * Fix GHSL-2023-117: store cell index on node so that it doesn't need to be recomputed during rendering. * Fix GHSL-2023-118: limit number of autocompleted table cells to prevent DOS. * Expose CMARK_NODE_FOOTNOTE_DEFINION literal value. In addition, fix a bug where the length of the literal value was calculated AFTER the actual literal string (null terminated) was allocated. * Update src/node.h Co-authored-by: Phill MV * Rename custom_int -> cell_index. * Add newline * Remove unnecessary scope. * Create codeql.yml * Changelog and version bump for 0.29.0.12 * Fix format specifier for printing a size_t * Changelog and version bump for 0.29.0.13 * move cell index into node_cell_data --------- Co-authored-by: Waldir Pimenta Co-authored-by: Samuel Williams Co-authored-by: Kevin Backhouse Co-authored-by: Phill MV Co-authored-by: Bas Alberts <13686387+anticomputer@users.noreply.github.com> Co-authored-by: Bas Alberts --- .github/workflows/codeql.yml | 77 ++++++++++++++++++ CMakeLists.txt | 2 +- api_test/main.c | 2 +- changelog.txt | 12 +++ extensions/autolink.c | 2 +- extensions/table.c | 152 +++++++++++++++++++++++------------ src/blocks.c | 6 +- src/commonmark.c | 2 +- src/node.c | 1 + test/extensions.txt | 2 +- 10 files changed, 201 insertions(+), 57 deletions(-) create mode 100644 .github/workflows/codeql.yml diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 000000000..4648c7cf7 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,77 @@ +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +# +# ******** NOTE ******** +# We have attempted to detect the languages in your repository. Please check +# the `language` matrix defined below to confirm you have the correct set of +# supported CodeQL languages. +# +name: "CodeQL" + +on: + push: + branches: [ "master" ] + pull_request: + # The branches below must be a subset of the branches above + branches: [ "master" ] + schedule: + - cron: '45 14 * * 3' + +jobs: + analyze: + name: Analyze + runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} + timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: [ 'cpp', 'javascript', 'python', 'ruby' ] + # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby', 'swift' ] + # Use only 'java' to analyze code written in Java, Kotlin or both + # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both + # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v2 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + + # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + # queries: security-extended,security-and-quality + + + # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v2 + + # ℹ️ Command-line programs to run using the OS shell. + # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun + + # If the Autobuild fails above, remove it and uncomment the following three lines. + # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. + + # - run: | + # echo "Run, Build Application using script" + # ./location_of_script_within_repo/buildscript.sh + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v2 + with: + category: "/language:${{matrix.language}}" diff --git a/CMakeLists.txt b/CMakeLists.txt index ec620b7e2..75ffe79b8 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ project(cmark-gfm) set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 29) set(PROJECT_VERSION_PATCH 0) -set(PROJECT_VERSION_GFM 11) +set(PROJECT_VERSION_GFM 13) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM}) include("FindAsan.cmake") diff --git a/api_test/main.c b/api_test/main.c index e950451ba..e0fe02968 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -836,7 +836,7 @@ static void test_continuation_byte(test_batch_runner *runner, char *html = cmark_markdown_to_html(buf, strlen(buf), CMARK_OPT_VALIDATE_UTF8); - STR_EQ(runner, html, expected, "invalid utf8 continuation byte %d/%d", pos, + STR_EQ(runner, html, expected, "invalid utf8 continuation byte %zu/%zu", pos, len); free(html); } diff --git a/changelog.txt b/changelog.txt index 848e71029..94e8a19fd 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,15 @@ +[0.29.0.gfm.13] + * Normalized marker row vs. delimiter row nomenclature (#273) + * Exposed CMARK_NODE_FOOTNOTE_DEFINITION literal value (#336) + * Fix format specifier for printing a size_t (#340) + +[0.29.0.gfm.12] + + * Fixed polynomial time complexity issues per + https://github.com/github/cmark-gfm/security/advisories/GHSA-w4qg-3vf7-m9x5 + * Added CodeQL project integration (#337) + * Addressed const qualifier discard compiler warnings (#330, #331) + [0.29.0.gfm.11] * Improved fixes for polynomial time complexity issues per diff --git a/extensions/autolink.c b/extensions/autolink.c index 491d96c32..d898cd2d4 100644 --- a/extensions/autolink.c +++ b/extensions/autolink.c @@ -296,7 +296,7 @@ static cmark_node *match(cmark_syntax_extension *ext, cmark_parser *parser, // inline was finished in inlines.c. } -static bool validate_protocol(char protocol[], uint8_t *data, size_t rewind, size_t max_rewind) { +static bool validate_protocol(const char protocol[], uint8_t *data, size_t rewind, size_t max_rewind) { size_t len = strlen(protocol); if (len > (max_rewind - rewind)) { diff --git a/extensions/table.c b/extensions/table.c index 337dcf02f..6c7a2bbae 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -11,6 +11,9 @@ #include "table.h" #include "cmark-gfm-core-extensions.h" +// Limit to prevent a malicious input from causing a denial of service. +#define MAX_AUTOCOMPLETED_CELLS 0x80000 + // Custom node flag, initialized in `create_table_extension`. static cmark_node_internal_flags CMARK_NODE__TABLE_VISITED; @@ -19,6 +22,7 @@ cmark_node_type CMARK_NODE_TABLE, CMARK_NODE_TABLE_ROW, typedef struct { unsigned colspan, rowspan; + int cell_index; } node_cell_data; typedef struct { @@ -36,6 +40,8 @@ typedef struct { typedef struct { uint16_t n_columns; uint8_t *alignments; + int n_rows; + int n_nonempty_cells; } node_table; typedef struct { @@ -94,6 +100,33 @@ static int set_n_table_columns(cmark_node *node, uint16_t n_columns) { return 1; } +// Increment the number of rows in the table. Also update n_nonempty_cells, +// which keeps track of the number of cells which were parsed from the +// input file. (If one of the rows is too short, then the trailing cells +// are autocompleted. Autocompleted cells are not counted in n_nonempty_cells.) +// The purpose of this is to prevent a malicious input from generating a very +// large number of autocompleted cells, which could cause a denial of service +// vulnerability. +static int incr_table_row_count(cmark_node *node, int i) { + if (!node || node->type != CMARK_NODE_TABLE) { + return 0; + } + + ((node_table *)node->as.opaque)->n_rows++; + ((node_table *)node->as.opaque)->n_nonempty_cells += i; + return 1; +} + +// Calculate the number of autocompleted cells. +static int get_n_autocompleted_cells(cmark_node *node) { + if (!node || node->type != CMARK_NODE_TABLE) { + return 0; + } + + const node_table *nt = (node_table *)node->as.opaque; + return (nt->n_columns * nt->n_rows) - nt->n_nonempty_cells; +} + static uint8_t *get_table_alignments(cmark_node *node) { if (!node || node->type != CMARK_NODE_TABLE) return 0; @@ -109,6 +142,29 @@ static int set_table_alignments(cmark_node *node, uint8_t *alignments) { return 1; } +static uint8_t get_cell_alignment(cmark_node *node) { + if (!node || node->type != CMARK_NODE_TABLE_CELL) + return 0; + + node_cell_data *data = (node_cell_data *)node->as.opaque; + if (!data) + return 0; + const uint8_t *alignments = get_table_alignments(node->parent->parent); + int i = data->cell_index; + return alignments[i]; +} + +static int set_cell_index(cmark_node *node, int i) { + if (!node || node->type != CMARK_NODE_TABLE_CELL) + return 0; + + node_cell_data *data = (node_cell_data *)node->as.opaque; + if (!data) + return 0; + data->cell_index = i; + return 1; +} + static unsigned get_cell_colspan(cmark_node *node) { if (!node || node->type != CMARK_NODE_TABLE_CELL) return UINT_MAX; @@ -261,10 +317,9 @@ static table_row *row_from_string(cmark_syntax_extension *self, --cell->start_offset; ++cell->internal_offset; } + cell->cell_data = (node_cell_data *)parser->mem->calloc(1, sizeof(node_cell_data)); if (parser->options & CMARK_OPT_TABLE_SPANS) { - cell->cell_data = (node_cell_data *)parser->mem->calloc(1, sizeof(node_cell_data)); - // Check for a column-spanning cell if (row->n_columns > 0 && cmark_strbuf_len(cell->buf) == 0 && cell->start_offset == cell->end_offset) { cell->cell_data->colspan = 0; @@ -294,7 +349,8 @@ static table_row *row_from_string(cmark_syntax_extension *self, } } } else { - cell->cell_data = NULL; + cell->cell_data->colspan = 1; + cell->cell_data->rowspan = 1; } // make sure we never wrap row->n_columns @@ -366,7 +422,7 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, unsigned char *input, int len) { cmark_node *table_header; table_row *header_row = NULL; - table_row *marker_row = NULL; + table_row *delimiter_row = NULL; node_table_row *ntr; const char *parent_string; uint16_t i; @@ -379,16 +435,16 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, return parent_container; } - // Since scan_table_start was successful, we must have a marker row. - marker_row = row_from_string(self, parser, - input + cmark_parser_get_first_nonspace(parser), - len - cmark_parser_get_first_nonspace(parser)); + // Since scan_table_start was successful, we must have a delimiter row. + delimiter_row = row_from_string( + self, parser, input + cmark_parser_get_first_nonspace(parser), + len - cmark_parser_get_first_nonspace(parser)); // assert may be optimized out, don't rely on it for security boundaries - if (!marker_row) { + if (!delimiter_row) { return parent_container; } - assert(marker_row); + assert(delimiter_row); cmark_arena_push(); @@ -398,8 +454,8 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, parent_string = cmark_node_get_string_content(parent_container); header_row = row_from_string(self, parser, (unsigned char *)parent_string, (int)strlen(parent_string)); - if (!header_row || header_row->n_columns != marker_row->n_columns) { - free_table_row(parser->mem, marker_row); + if (!header_row || header_row->n_columns != delimiter_row->n_columns) { + free_table_row(parser->mem, delimiter_row); free_table_row(parser->mem, header_row); cmark_arena_pop(); parent_container->flags |= CMARK_NODE__TABLE_VISITED; @@ -407,14 +463,14 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, } if (cmark_arena_pop()) { - marker_row = row_from_string( + delimiter_row = row_from_string( self, parser, input + cmark_parser_get_first_nonspace(parser), len - cmark_parser_get_first_nonspace(parser)); header_row = row_from_string(self, parser, (unsigned char *)parent_string, (int)strlen(parent_string)); // row_from_string can return NULL, add additional check to ensure n_columns match - if (!marker_row || !header_row || header_row->n_columns != marker_row->n_columns) { - free_table_row(parser->mem, marker_row); + if (!delimiter_row || !header_row || header_row->n_columns != delimiter_row->n_columns) { + free_table_row(parser->mem, delimiter_row); free_table_row(parser->mem, header_row); return parent_container; } @@ -422,7 +478,7 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, if (!cmark_node_set_type(parent_container, CMARK_NODE_TABLE)) { free_table_row(parser->mem, header_row); - free_table_row(parser->mem, marker_row); + free_table_row(parser->mem, delimiter_row); return parent_container; } @@ -435,12 +491,12 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, parent_container->as.opaque = parser->mem->calloc(1, sizeof(node_table)); set_n_table_columns(parent_container, header_row->n_columns); - // allocate alignments based on marker_row->n_columns - // since we populate the alignments array based on marker_row->cells + // allocate alignments based on delimiter_row->n_columns + // since we populate the alignments array based on delimiter_row->cells uint8_t *alignments = - (uint8_t *)parser->mem->calloc(marker_row->n_columns, sizeof(uint8_t)); - for (i = 0; i < marker_row->n_columns; ++i) { - node_cell *node = &marker_row->cells[i]; + (uint8_t *)parser->mem->calloc(delimiter_row->n_columns, sizeof(uint8_t)); + for (i = 0; i < delimiter_row->n_columns; ++i) { + node_cell *node = &delimiter_row->cells[i]; bool left = node->buf->ptr[0] == ':', right = node->buf->ptr[node->buf->size - 1] == ':'; if (left && right) @@ -462,27 +518,28 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, table_header->as.opaque = ntr = (node_table_row *)parser->mem->calloc(1, sizeof(node_table_row)); ntr->is_header = true; - { - for (i = 0; i < header_row->n_columns; ++i) { - node_cell *cell = &header_row->cells[i]; - cmark_node *header_cell = cmark_parser_add_child(parser, table_header, - CMARK_NODE_TABLE_CELL, parent_container->start_column + cell->start_offset); - header_cell->start_line = header_cell->end_line = parent_container->start_line; - header_cell->internal_offset = cell->internal_offset; - header_cell->end_column = parent_container->start_column + cell->end_offset; - header_cell->as.opaque = cell->cell_data; - cell->cell_data = NULL; - cmark_node_set_string_content(header_cell, (char *) cell->buf->ptr); - cmark_node_set_syntax_extension(header_cell, self); - } + for (i = 0; i < header_row->n_columns; ++i) { + node_cell *cell = &header_row->cells[i]; + cmark_node *header_cell = cmark_parser_add_child(parser, table_header, + CMARK_NODE_TABLE_CELL, parent_container->start_column + cell->start_offset); + header_cell->start_line = header_cell->end_line = parent_container->start_line; + header_cell->internal_offset = cell->internal_offset; + header_cell->end_column = parent_container->start_column + cell->end_offset; + header_cell->as.opaque = cell->cell_data; + cell->cell_data = NULL; + cmark_node_set_string_content(header_cell, (char *) cell->buf->ptr); + cmark_node_set_syntax_extension(header_cell, self); + set_cell_index(header_cell, i); } + incr_table_row_count(parent_container, i); + cmark_parser_advance_offset( parser, (char *)input, (int)strlen((char *)input) - 1 - cmark_parser_get_offset(parser), false); free_table_row(parser->mem, header_row); - free_table_row(parser->mem, marker_row); + free_table_row(parser->mem, delimiter_row); return parent_container; } @@ -496,6 +553,10 @@ static cmark_node *try_opening_table_row(cmark_syntax_extension *self, if (cmark_parser_is_blank(parser)) return NULL; + if (get_n_autocompleted_cells(parent_container) > MAX_AUTOCOMPLETED_CELLS) { + return NULL; + } + table_row_block = cmark_parser_add_child(parser, parent_container, CMARK_NODE_TABLE_ROW, parent_container->start_column); @@ -555,12 +616,16 @@ static cmark_node *try_opening_table_row(cmark_syntax_extension *self, cell->cell_data = NULL; cmark_node_set_string_content(node, (char *) cell->buf->ptr); cmark_node_set_syntax_extension(node, self); + set_cell_index(node, i); } + incr_table_row_count(parent_container, i); + for (; i < table_columns; ++i) { cmark_node *node = cmark_parser_add_child( parser, table_row_block, CMARK_NODE_TABLE_CELL, 0); cmark_node_set_syntax_extension(node, self); + set_cell_index(node, i); } } @@ -759,13 +824,7 @@ static const char *xml_attr(cmark_syntax_extension *extension, cmark_node *node) { if (node->type == CMARK_NODE_TABLE_CELL) { if (cmark_gfm_extensions_get_table_row_is_header(node->parent)) { - uint8_t *alignments = get_table_alignments(node->parent->parent); - int i = 0; - cmark_node *n; - for (n = node->parent->first_child; n; n = n->next, ++i) - if (n == node) - break; - switch (alignments[i]) { + switch (get_cell_alignment(node)) { case 'l': return " align=\"left\""; case 'c': return " align=\"center\""; case 'r': return " align=\"right\""; @@ -886,7 +945,6 @@ static void html_render(cmark_syntax_extension *extension, cmark_event_type ev_type, int options) { bool entering = (ev_type == CMARK_EVENT_ENTER); cmark_strbuf *html = renderer->html; - cmark_node *n; // XXX: we just monopolise renderer->opaque. struct html_table_state *table_state = @@ -939,7 +997,6 @@ static void html_render(cmark_syntax_extension *extension, unsigned rowspan = get_cell_rowspan(node); // A value of zero in either span means that this cell is a "filler" cell. Don't render those. if (colspan > 0 && rowspan > 0) { - uint8_t *alignments = get_table_alignments(node->parent->parent); if (entering) { cmark_html_render_cr(html); if (table_state->in_table_header) { @@ -948,12 +1005,7 @@ static void html_render(cmark_syntax_extension *extension, cmark_strbuf_puts(html, "parent->first_child; n; n = n->next, ++i) - if (n == node) - break; - - switch (alignments[i]) { + switch (get_cell_alignment(node)) { case 'l': html_table_add_align(html, "left", options); break; case 'c': html_table_add_align(html, "center", options); break; case 'r': html_table_add_align(html, "right", options); break; diff --git a/src/blocks.c b/src/blocks.c index a7290652c..f3b20580e 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -1246,15 +1246,17 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, parser->first_nonspace + 1); S_advance_offset(parser, input, input->len - 1 - parser->offset, false); } else if (!indented && - parser->options & CMARK_OPT_FOOTNOTES && + (parser->options & CMARK_OPT_FOOTNOTES) && + depth < MAX_LIST_DEPTH && (matched = scan_footnote_definition(input, parser->first_nonspace))) { cmark_chunk c = cmark_chunk_dup(input, parser->first_nonspace + 2, matched - 2); - cmark_chunk_to_cstr(parser->mem, &c); while (c.data[c.len - 1] != ']') --c.len; --c.len; + cmark_chunk_to_cstr(parser->mem, &c); + S_advance_offset(parser, input, parser->first_nonspace + matched - parser->offset, false); *container = add_child(parser, *container, CMARK_NODE_FOOTNOTE_DEFINITION, parser->first_nonspace + matched + 1); (*container)->as.literal = c; diff --git a/src/commonmark.c b/src/commonmark.c index d296567e0..f1698687f 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -166,7 +166,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, char fencechar[2] = {'\0', '\0'}; size_t info_len, code_len; char listmarker[LISTMARKER_SIZE]; - char *emph_delim; + const char *emph_delim; bool first_in_list_item; bufsize_t marker_width; bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) && diff --git a/src/node.c b/src/node.c index d93ed307d..49c371ff2 100644 --- a/src/node.c +++ b/src/node.c @@ -417,6 +417,7 @@ const char *cmark_node_get_literal(cmark_node *node) { case CMARK_NODE_HTML_INLINE: case CMARK_NODE_CODE: case CMARK_NODE_FOOTNOTE_REFERENCE: + case CMARK_NODE_FOOTNOTE_DEFINITION: return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.literal); case CMARK_NODE_CODE_BLOCK: diff --git a/test/extensions.txt b/test/extensions.txt index fe98fe548..19c06d5fa 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -208,7 +208,7 @@ fff | ggg | hhh | iii | jjj ### Table cell count mismatches -The header and marker row must match. +The header and delimiter row must match. ```````````````````````````````` example | a | b | c |