From 78e2b78ee1b71bf0b4a3790a72a8f76538980976 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Mon, 24 Aug 2020 20:06:10 +0200 Subject: [PATCH] Avoid quadratic output growth with reference links Keep track of the number bytes added through expansion of reference links and limit the total to the size of the input document. Always allow a minimum of 100KB. Unfortunately, cmark has no error handling, so all we can do is to stop expanding reference links without returning an error. This should never be an issue in practice though. The 100KB minimum alone should cover all real-world cases. See issue #354. --- src/blocks.c | 14 ++++++++++++++ src/parser.h | 1 + src/references.c | 17 ++++++++++++++++- src/references.h | 3 +++ 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/blocks.c b/src/blocks.c index df24d6002..f2eeb05e4 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "cmark_ctype.h" #include "config.h" @@ -518,6 +519,14 @@ static cmark_node *finalize_document(cmark_parser *parser) { } finalize(parser, parser->root); + + // Limit total size of extra content created from reference links to + // document size to avoid superlinear growth. Always allow 100KB. + if (parser->total_size > 100000) + parser->refmap->max_ref_size = parser->total_size; + else + parser->refmap->max_ref_size = 100000; + process_inlines(parser->mem, parser->root, parser->refmap, parser->options); cmark_strbuf_free(&parser->content); @@ -564,6 +573,11 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, const unsigned char *end = buffer + len; static const uint8_t repl[] = {239, 191, 189}; + if (len > UINT_MAX - parser->total_size) + parser->total_size = UINT_MAX; + else + parser->total_size += len; + // Skip UTF-8 BOM if present; see #334 if (parser->line_number == 0 && parser->column == 0 && len >= 3 && *buffer == 0xEF && *(buffer + 1) == 0xBB && diff --git a/src/parser.h b/src/parser.h index 8f6c694f7..f546ace11 100644 --- a/src/parser.h +++ b/src/parser.h @@ -32,6 +32,7 @@ struct cmark_parser { cmark_strbuf content; int options; bool last_buffer_ended_with_cr; + unsigned int total_size; }; #ifdef __cplusplus diff --git a/src/references.c b/src/references.c index 623006a73..39b4f2bfa 100644 --- a/src/references.c +++ b/src/references.c @@ -62,6 +62,11 @@ void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label, ref->age = map->size; ref->next = map->refs; + if (ref->url != NULL) + ref->size += strlen((char*)ref->url); + if (ref->title != NULL) + ref->size += strlen((char*)ref->title); + map->refs = ref; map->size++; } @@ -110,6 +115,7 @@ static void sort_references(cmark_reference_map *map) { cmark_reference *cmark_reference_lookup(cmark_reference_map *map, cmark_chunk *label) { cmark_reference **ref = NULL; + cmark_reference *r = NULL; unsigned char *norm; if (label->len < 1 || label->len > MAX_LINK_LABEL_LENGTH) @@ -128,7 +134,16 @@ cmark_reference *cmark_reference_lookup(cmark_reference_map *map, ref = (cmark_reference **)bsearch(norm, map->sorted, map->size, sizeof(cmark_reference *), refsearch); map->mem->free(norm); - return ref ? ref[0] : NULL; + + if (ref != NULL) { + r = ref[0]; + /* Check for expansion limit */ + if (map->max_ref_size && r->size > map->max_ref_size - map->ref_size) + return NULL; + map->ref_size += r->size; + } + + return r; } void cmark_reference_map_free(cmark_reference_map *map) { diff --git a/src/references.h b/src/references.h index cc5950949..b069d920c 100644 --- a/src/references.h +++ b/src/references.h @@ -13,6 +13,7 @@ struct cmark_reference { unsigned char *url; unsigned char *title; unsigned int age; + unsigned int size; }; typedef struct cmark_reference cmark_reference; @@ -22,6 +23,8 @@ struct cmark_reference_map { cmark_reference *refs; cmark_reference **sorted; unsigned int size; + unsigned int ref_size; + unsigned int max_ref_size; }; typedef struct cmark_reference_map cmark_reference_map;