diff --git a/NEWS b/NEWS index e819dac..d02019b 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,12 @@ NEWS file for libxml2 +v2.11.4: May 18 2023 + +Fixes a serious regression. + +- parser: Fix regression when push parsing UTF-8 sequences + + v2.11.3: May 11 2023 Fixes more regressions. diff --git a/README.md b/README.md index 776e1cc..f9b4ba7 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ libxml2 Windows build with Visual Studio. -This version is libxml2-2.11.3. +This version is libxml2-2.11.4. To build, simply open the required solution file, and you know how to use Visual Studio, right? diff --git a/THIS_VERSION_IS_2.11.3 b/THIS_VERSION_IS_2.11.4 similarity index 100% rename from THIS_VERSION_IS_2.11.3 rename to THIS_VERSION_IS_2.11.4 diff --git a/distfiles/download.url b/distfiles/download.url index 170efb5..c87c7ae 100644 --- a/distfiles/download.url +++ b/distfiles/download.url @@ -1 +1 @@ -https://download.gnome.org/sources/libxml2/2.11/libxml2-2.11.3.tar.xz +https://download.gnome.org/sources/libxml2/2.11/libxml2-2.11.4.tar.xz diff --git a/distfiles/libxml2-2.11.3.tar.xz b/distfiles/libxml2-2.11.3.tar.xz deleted file mode 100644 index 5da9f38..0000000 Binary files a/distfiles/libxml2-2.11.3.tar.xz and /dev/null differ diff --git a/distfiles/libxml2-2.11.3-import.lst b/distfiles/libxml2-2.11.4-import.lst similarity index 100% rename from distfiles/libxml2-2.11.3-import.lst rename to distfiles/libxml2-2.11.4-import.lst diff --git a/distfiles/libxml2-2.11.3-import.md5 b/distfiles/libxml2-2.11.4-import.md5 similarity index 98% rename from distfiles/libxml2-2.11.3-import.md5 rename to distfiles/libxml2-2.11.4-import.md5 index 9a55e81..fbc5b78 100644 --- a/distfiles/libxml2-2.11.3-import.md5 +++ b/distfiles/libxml2-2.11.4-import.md5 @@ -2,7 +2,7 @@ 5aa30e4781d7c802b78ad4346632485d HTMLparser.c 7bcee4dcda730f1f34029529f3cc24a4 HTMLtree.c dd63184811cb2ff705c3e466364d3773 INSTALL -39aea02633e5243ad3798ee268a34a9d NEWS +201a96e1d38a965133ab1e20247dc32a NEWS c67f840acaa24a4752ec5a09c5b7c9ab README.libxml2.md 2774afd7cdd927d278dcc64f73112e0a SAX.c 5d0ea4ec1675df4c37ed947df682ff39 SAX2.c @@ -86,8 +86,8 @@ e857d2ed47c0848d6f469ebc16ef31f1 include/wsockcompat.h 678e168c1dacffd4da8d528e4e733d05 list.c a591291901040a0c5a8a4c187154d1de nanoftp.c e0bf81a96bd2674a5c51710b76b3b20a nanohttp.c -e73e07a7e57282942ac51b34094e45c9 parser.c -f3a661d72d9c0335fcd5a5ee948ddbee parserInternals.c +524e02fec3580c1095a6ed1650ec1b6d parser.c +f22c5bb3a2fa2ed3567e83a25dbcc774 parserInternals.c 247aeee5286dc32f9212a71764c8916b pattern.c be5da77f293be48c4611cc4dd456954d relaxng.c b171015076b703294c403dd53c4ef090 runsuite.c diff --git a/distfiles/libxml2-2.11.4.tar.xz b/distfiles/libxml2-2.11.4.tar.xz new file mode 100644 index 0000000..83ac0a3 Binary files /dev/null and b/distfiles/libxml2-2.11.4.tar.xz differ diff --git a/include/libxml/xmlversion.h b/include/libxml/xmlversion.h index 13d6299..c552d52 100644 --- a/include/libxml/xmlversion.h +++ b/include/libxml/xmlversion.h @@ -29,21 +29,21 @@ XMLPUBFUN void xmlCheckVersion(int version); * * the version string like "1.2.3" */ -#define LIBXML_DOTTED_VERSION "2.11.3" +#define LIBXML_DOTTED_VERSION "2.11.4" /** * LIBXML_VERSION: * * the version number: 1.2.3 value is 10203 */ -#define LIBXML_VERSION 21103 +#define LIBXML_VERSION 21104 /** * LIBXML_VERSION_STRING: * * the version number string, 1.2.3 value is "10203" */ -#define LIBXML_VERSION_STRING "21103" +#define LIBXML_VERSION_STRING "21104" /** * LIBXML_VERSION_EXTRA: @@ -58,7 +58,7 @@ XMLPUBFUN void xmlCheckVersion(int version); * Macro to check that the libxml version in use is compatible with * the version the software has been compiled against */ -#define LIBXML_TEST_VERSION xmlCheckVersion(21103); +#define LIBXML_TEST_VERSION xmlCheckVersion(21104); #ifndef VMS #if 0 diff --git a/parser.c b/parser.c index c6383f6..d3f30b2 100644 --- a/parser.c +++ b/parser.c @@ -4332,7 +4332,7 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { return(buf); } -static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt); +static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial); /* * used for the test in the inner loop of the char data testing @@ -4373,17 +4373,13 @@ static const unsigned char test_char_data[256] = { }; /** - * xmlParseCharData: + * xmlParseCharDataInternal: * @ctxt: an XML parser context - * @cdata: unused - * - * DEPRECATED: Internal function, don't use. + * @partial: buffer may contain partial UTF-8 sequences * * Parse character data. Always makes progress if the first char isn't * '<' or '&'. * - * if we are within a CDATA section ']]>' marks an end of section. - * * The right angle bracket (>) may be represented using the string ">", * and must, for compatibility, be escaped using ">" or a character * reference when it appears in the string "]]>" in content, when that @@ -4391,9 +4387,8 @@ static const unsigned char test_char_data[256] = { * * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */ - -void -xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) { +static void +xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) { const xmlChar *in; int nbchar = 0; int line = ctxt->input->line; @@ -4526,7 +4521,7 @@ xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) { (*in == 0x09) || (*in == 0x0a)); ctxt->input->line = line; ctxt->input->col = col; - xmlParseCharDataComplex(ctxt); + xmlParseCharDataComplex(ctxt, partial); } /** @@ -4541,7 +4536,7 @@ xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) { * of non-ASCII characters. */ static void -xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) { +xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) { xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; int nbchar = 0; int cur, l; @@ -4604,15 +4599,42 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) { } } } - if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) { - /* Generate the error and skip the offending character */ - xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, - "PCDATA invalid Char value %d\n", - cur ? cur : CUR); - NEXT; + /* + * cur == 0 can mean + * + * - XML_PARSER_EOF or memory error. This is checked above. + * - An actual 0 character. + * - End of buffer. + * - An incomplete UTF-8 sequence. This is allowed if partial is set. + */ + if (ctxt->input->cur < ctxt->input->end) { + if ((cur == 0) && (CUR != 0)) { + if (partial == 0) { + xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, + "Incomplete UTF-8 sequence starting with %02X\n", CUR); + NEXTL(1); + } + } else if ((cur != '<') && (cur != '&')) { + /* Generate the error and skip the offending character */ + xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, + "PCDATA invalid Char value %d\n", cur); + NEXTL(l); + } } } +/** + * xmlParseCharData: + * @ctxt: an XML parser context + * @cdata: unused + * + * DEPRECATED: Internal function, don't use. + */ +void +xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) { + xmlParseCharDataInternal(ctxt, 0); +} + /** * xmlParseExternalID: * @ctxt: an XML parser context @@ -9875,7 +9897,7 @@ xmlParseContentInternal(xmlParserCtxtPtr ctxt) { * Last case, text. Note that References are handled directly. */ else { - xmlParseCharData(ctxt, 0); + xmlParseCharDataInternal(ctxt, 0); } SHRINK; @@ -11662,7 +11684,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { goto done; } ctxt->checkIndex = 0; - xmlParseCharData(ctxt, 0); + xmlParseCharDataInternal(ctxt, !terminate); } break; } diff --git a/parserInternals.c b/parserInternals.c index 946df94..b92f642 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -715,14 +715,20 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { avail = ctxt->input->end - ctxt->input->cur; - if ((avail < 2) || (cur[1] & 0xc0) != 0x80) + if (avail < 2) + goto incomplete_sequence; + if ((cur[1] & 0xc0) != 0x80) goto encoding_error; if ((c & 0xe0) == 0xe0) { - if ((avail < 3) || (cur[2] & 0xc0) != 0x80) + if (avail < 3) + goto incomplete_sequence; + if ((cur[2] & 0xc0) != 0x80) goto encoding_error; if ((c & 0xf0) == 0xf0) { + if (avail < 4) + goto incomplete_sequence; if (((c & 0xf8) != 0xf0) || - (avail < 4) || ((cur[3] & 0xc0) != 0x80)) + ((cur[3] & 0xc0) != 0x80)) goto encoding_error; /* 4-byte code */ *len = 4; @@ -784,17 +790,8 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { return(0xA); } return(*ctxt->input->cur); -encoding_error: - /* - * An encoding problem may arise from a truncated input buffer - * splitting a character in the middle. In that case do not raise - * an error but return 0 to indicate an end of stream problem - */ - if (ctxt->input->end - ctxt->input->cur < 4) { - *len = 0; - return(0); - } +encoding_error: /* * If we detect an UTF8 error that probably mean that the * input encoding didn't get properly advertised in the @@ -802,7 +799,11 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { * to ISO-Latin-1 (if you don't like this policy, just declare the * encoding !) */ - { + if (ctxt->input->end - ctxt->input->cur < 4) { + __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, + "Input is not proper UTF-8, indicate encoding !\n", + NULL, NULL); + } else { char buffer[150]; snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", @@ -815,6 +816,16 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { ctxt->charset = XML_CHAR_ENCODING_8859_1; *len = 1; return(*ctxt->input->cur); + +incomplete_sequence: + /* + * An encoding problem may arise from a truncated input buffer + * splitting a character in the middle. In that case do not raise + * an error but return 0. This should only happen when push parsing + * char data. + */ + *len = 0; + return(0); } /** diff --git a/win32/rcVersion.h b/win32/rcVersion.h index d09cb2d..40691c7 100644 --- a/win32/rcVersion.h +++ b/win32/rcVersion.h @@ -1,4 +1,4 @@ #define LIBXML_MAJOR_VERSION 2 #define LIBXML_MINOR_VERSION 11 -#define LIBXML_MICRO_VERSION 3 -#define LIBXML_DOTTED_VERSION "2.11.3" +#define LIBXML_MICRO_VERSION 4 +#define LIBXML_DOTTED_VERSION "2.11.4"