diff --git a/JSBSim.iss.in b/JSBSim.iss.in index 7a3ceacc3..4ab9030df 100644 --- a/JSBSim.iss.in +++ b/JSBSim.iss.in @@ -21,7 +21,8 @@ Name: "devfiles\matlab"; Description: "Files to build a JSBSim S-Function for Ma [Files] Source: "${CMAKE_BINARY_DIR}\src\JSBSim.exe"; DestDir: "{app}"; Components: JSBSim -Source: "${CMAKE_SOURCE_DIR}\COPYING"; DestDir: "{app}"; Components: JSBSim devfiles +Source: "${CMAKE_SOURCE_DIR}\COPYING"; DestDir: "{app}"; Components: JSBSim +Source: "${CMAKE_SOURCE_DIR}\src\simgear\xml\Expat.COPYING"; DestDir: "{app}"; Components: JSBSim Source: "${CMAKE_BINARY_DIR}\utils\aeromatic++\aeromatic.exe"; DestDir: "{app}\aeromatic++"; Components: aeromatic Source: "${CMAKE_BINARY_DIR}\msvcp*.dll"; DestDir: "{app}"; Components: JSBSim aeromatic Source: "${CMAKE_SOURCE_DIR}\aircraft\*.xml"; DestDir: "{app}\aircraft"; Flags: recursesubdirs; Components: data diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index af451a7ad..9edc3fa1e 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -43,9 +43,12 @@ file(RELATIVE_PATH libJSBSim_PATH ${CMAKE_SOURCE_DIR} ${libJSBSim_DIRECTORY}) file(COPY ${CMAKE_SOURCE_DIR}/README.md DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) file(COPY ${CMAKE_SOURCE_DIR}/COPYING DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/COPYING ${CMAKE_CURRENT_BINARY_DIR}/LICENSE.txt) -configure_file(${CMAKE_SOURCE_DIR}/src/simgear/xml/xmltok_impl.c + +set(SIMGEAR_SOURCE_DIR ${CMAKE_SOURCE_DIR}/src/simgear/xml) +file(COPY ${SIMGEAR_SOURCE_DIR}/Expat.COPYING DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) +configure_file(${SIMGEAR_SOURCE_DIR}/xmltok_impl.c ${CMAKE_CURRENT_BINARY_DIR}/src/simgear/xml/xmltok_impl.c COPYONLY) -configure_file(${CMAKE_SOURCE_DIR}/src/simgear/xml/xmltok_ns.c +configure_file(${SIMGEAR_SOURCE_DIR}/xmltok_ns.c ${CMAKE_CURRENT_BINARY_DIR}/src/simgear/xml/xmltok_ns.c COPYONLY) foreach(_FILE "MANIFEST.in" "ExceptionManagement.h" "fpectl/fpectlmodule.h" "pyproject.toml") diff --git a/python/MANIFEST.in b/python/MANIFEST.in index b0cd02d16..859086ece 100644 --- a/python/MANIFEST.in +++ b/python/MANIFEST.in @@ -1,4 +1,5 @@ include LICENSE.txt +include Expat.COPYING recursive-include . *.h* include *.pyx include *.pxd diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5d6473b97..f5cdd084c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -75,8 +75,9 @@ if(EXPAT_FOUND) set(ALL_LINK_LIBRARIES ${EXPAT_LIBRARIES}) endif() else() - list(APPEND COMPILE_DEFINITIONS HAVE_EXPAT_CONFIG_H) -endif() + list(APPEND COMPILE_DEFINITIONS HAVE_EXPAT_CONFIG_H XML_DEV_URANDOM) + list(APPEND MSVC_COMPILE_DEFINITIONS XML_STATIC) +endif(EXPAT_FOUND) # Manage compile definitions set(JSBSIM_COMPILE_DEFINITIONS ${COMPILE_DEFINITIONS} PARENT_SCOPE) diff --git a/src/simgear/xml/CMakeLists.txt b/src/simgear/xml/CMakeLists.txt index 052f099c6..e63f9d9fd 100644 --- a/src/simgear/xml/CMakeLists.txt +++ b/src/simgear/xml/CMakeLists.txt @@ -21,7 +21,8 @@ if(NOT EXPAT_FOUND) xmlrole.h xmltok.h xmltok_impl.h - expat_config.h) + expat_config.h + siphash.h) endif() add_library(Xml OBJECT ${HEADERS} ${SOURCES}) diff --git a/src/simgear/xml/Expat.COPYING b/src/simgear/xml/Expat.COPYING new file mode 100644 index 000000000..3c0142e71 --- /dev/null +++ b/src/simgear/xml/Expat.COPYING @@ -0,0 +1,21 @@ +Copyright (c) 1998-2000 Thai Open Source Software Center Ltd and Clark Cooper +Copyright (c) 2001-2019 Expat maintainers + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/simgear/xml/ascii.h b/src/simgear/xml/ascii.h index 337e5bb7e..1f594d2e5 100644 --- a/src/simgear/xml/ascii.h +++ b/src/simgear/xml/ascii.h @@ -1,5 +1,36 @@ -/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1999-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2007 Karl Waclawek + Copyright (c) 2017 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define ASCII_A 0x41 @@ -83,3 +114,10 @@ #define ASCII_LSQB 0x5B #define ASCII_RSQB 0x5D #define ASCII_UNDERSCORE 0x5F +#define ASCII_LPAREN 0x28 +#define ASCII_RPAREN 0x29 +#define ASCII_FF 0x0C +#define ASCII_SLASH 0x2F +#define ASCII_HASH 0x23 +#define ASCII_PIPE 0x7C +#define ASCII_COMMA 0x2C diff --git a/src/simgear/xml/asciitab.h b/src/simgear/xml/asciitab.h index 79a15c28c..af766fb24 100644 --- a/src/simgear/xml/asciitab.h +++ b/src/simgear/xml/asciitab.h @@ -1,36 +1,66 @@ -/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2017 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* 0x00 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x04 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x08 */ BT_NONXML, BT_S, BT_LF, BT_NONXML, -/* 0x0C */ BT_NONXML, BT_CR, BT_NONXML, BT_NONXML, -/* 0x10 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x14 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x18 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x1C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x20 */ BT_S, BT_EXCL, BT_QUOT, BT_NUM, -/* 0x24 */ BT_OTHER, BT_PERCNT, BT_AMP, BT_APOS, -/* 0x28 */ BT_LPAR, BT_RPAR, BT_AST, BT_PLUS, -/* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL, -/* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, -/* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, -/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI, -/* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST, -/* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, -/* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, -/* 0x48 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x4C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x50 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x54 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x58 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_LSQB, -/* 0x5C */ BT_OTHER, BT_RSQB, BT_OTHER, BT_NMSTRT, -/* 0x60 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, -/* 0x64 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, -/* 0x68 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x6C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x70 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x74 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x78 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, -/* 0x7C */ BT_VERBAR, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0x04 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x08 */ BT_NONXML, BT_S, BT_LF, BT_NONXML, + /* 0x0C */ BT_NONXML, BT_CR, BT_NONXML, BT_NONXML, + /* 0x10 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x14 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x18 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x1C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x20 */ BT_S, BT_EXCL, BT_QUOT, BT_NUM, + /* 0x24 */ BT_OTHER, BT_PERCNT, BT_AMP, BT_APOS, + /* 0x28 */ BT_LPAR, BT_RPAR, BT_AST, BT_PLUS, + /* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL, + /* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, + /* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, + /* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI, + /* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST, + /* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, + /* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, + /* 0x48 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x4C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x50 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x54 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x58 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_LSQB, + /* 0x5C */ BT_OTHER, BT_RSQB, BT_OTHER, BT_NMSTRT, + /* 0x60 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, + /* 0x64 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, + /* 0x68 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x6C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x70 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x74 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x78 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, + /* 0x7C */ BT_VERBAR, BT_OTHER, BT_OTHER, BT_OTHER, diff --git a/src/simgear/xml/expat.h b/src/simgear/xml/expat.h index cb07c1c92..e2020a4a2 100644 --- a/src/simgear/xml/expat.h +++ b/src/simgear/xml/expat.h @@ -1,29 +1,59 @@ -/* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2000-2005 Fred L. Drake, Jr. + Copyright (c) 2001-2002 Greg Stein + Copyright (c) 2002-2016 Karl Waclawek + Copyright (c) 2016-2022 Sebastian Pipping + Copyright (c) 2016 Cristian Rodríguez + Copyright (c) 2016 Thomas Beutlich + Copyright (c) 2017 Rhodri James + Copyright (c) 2022 Thijs Schreijer + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef XmlParse_INCLUDED -#define XmlParse_INCLUDED 1 - -#ifdef __VMS -/* 0 1 2 3 0 1 2 3 - 1234567890123456789012345678901 1234567890123456789012345678901 */ -#define XML_SetProcessingInstructionHandler XML_SetProcessingInstrHandler -#define XML_SetUnparsedEntityDeclHandler XML_SetUnparsedEntDeclHandler -#define XML_SetStartNamespaceDeclHandler XML_SetStartNamespcDeclHandler -#define XML_SetExternalEntityRefHandlerArg XML_SetExternalEntRefHandlerArg -#endif +#ifndef Expat_INCLUDED +#define Expat_INCLUDED 1 #include #include "expat_external.h" +#ifdef __cplusplus +extern "C" { +#endif + struct XML_ParserStruct; typedef struct XML_ParserStruct *XML_Parser; -/* Should this be defined using stdbool.h when C99 is available? */ typedef unsigned char XML_Bool; -#define XML_TRUE ((XML_Bool) 1) -#define XML_FALSE ((XML_Bool) 0) +#define XML_TRUE ((XML_Bool)1) +#define XML_FALSE ((XML_Bool)0) /* The XML_Status enum gives the possible return values for several API functions. The preprocessor #defines are included so this @@ -43,7 +73,7 @@ enum XML_Status { #define XML_STATUS_ERROR XML_STATUS_ERROR XML_STATUS_OK = 1, #define XML_STATUS_OK XML_STATUS_OK - XML_STATUS_SUSPENDED = 2, + XML_STATUS_SUSPENDED = 2 #define XML_STATUS_SUSPENDED XML_STATUS_SUSPENDED }; @@ -87,7 +117,17 @@ enum XML_Error { XML_ERROR_NOT_SUSPENDED, XML_ERROR_ABORTED, XML_ERROR_FINISHED, - XML_ERROR_SUSPEND_PE + XML_ERROR_SUSPEND_PE, + /* Added in 2.0. */ + XML_ERROR_RESERVED_PREFIX_XML, + XML_ERROR_RESERVED_PREFIX_XMLNS, + XML_ERROR_RESERVED_NAMESPACE_URI, + /* Added in 2.2.1. */ + XML_ERROR_INVALID_ARGUMENT, + /* Added in 2.3.0. */ + XML_ERROR_NO_BUFFER, + /* Added in 2.4.0. */ + XML_ERROR_AMPLIFICATION_LIMIT_BREACH }; enum XML_Content_Type { @@ -127,25 +167,25 @@ enum XML_Content_Quant { typedef struct XML_cp XML_Content; struct XML_cp { - enum XML_Content_Type type; - enum XML_Content_Quant quant; - XML_Char * name; - unsigned int numchildren; - XML_Content * children; + enum XML_Content_Type type; + enum XML_Content_Quant quant; + XML_Char *name; + unsigned int numchildren; + XML_Content *children; }; - /* This is called for an element declaration. See above for - description of the model argument. It's the caller's responsibility - to free model when finished with it. + description of the model argument. It's the user code's responsibility + to free model when finished with it. See XML_FreeContentModel. + There is no need to free the model from the handler, it can be kept + around and freed at a later stage. */ -typedef void (XMLCALL *XML_ElementDeclHandler) (void *userData, - const XML_Char *name, - XML_Content *model); +typedef void(XMLCALL *XML_ElementDeclHandler)(void *userData, + const XML_Char *name, + XML_Content *model); XMLPARSEAPI(void) -XML_SetElementDeclHandler(XML_Parser parser, - XML_ElementDeclHandler eldecl); +XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl); /* The Attlist declaration handler is called for *each* attribute. So a single Attlist declaration with multiple attributes declared will @@ -155,17 +195,12 @@ XML_SetElementDeclHandler(XML_Parser parser, value will be NULL in the case of "#REQUIRED". If "isrequired" is true and default is non-NULL, then this is a "#FIXED" default. */ -typedef void (XMLCALL *XML_AttlistDeclHandler) ( - void *userData, - const XML_Char *elname, - const XML_Char *attname, - const XML_Char *att_type, - const XML_Char *dflt, - int isrequired); +typedef void(XMLCALL *XML_AttlistDeclHandler)( + void *userData, const XML_Char *elname, const XML_Char *attname, + const XML_Char *att_type, const XML_Char *dflt, int isrequired); XMLPARSEAPI(void) -XML_SetAttlistDeclHandler(XML_Parser parser, - XML_AttlistDeclHandler attdecl); +XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl); /* The XML declaration handler is called for *both* XML declarations and text declarations. The way to distinguish is that the version @@ -175,15 +210,13 @@ XML_SetAttlistDeclHandler(XML_Parser parser, was no standalone parameter in the declaration, that it was given as no, or that it was given as yes. */ -typedef void (XMLCALL *XML_XmlDeclHandler) (void *userData, - const XML_Char *version, - const XML_Char *encoding, - int standalone); +typedef void(XMLCALL *XML_XmlDeclHandler)(void *userData, + const XML_Char *version, + const XML_Char *encoding, + int standalone); XMLPARSEAPI(void) -XML_SetXmlDeclHandler(XML_Parser parser, - XML_XmlDeclHandler xmldecl); - +XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler xmldecl); typedef struct { void *(*malloc_fcn)(size_t size); @@ -205,13 +238,23 @@ XML_ParserCreate(const XML_Char *encoding); URI, the namespace separator character, and the local part of the name. If the namespace separator is '\0' then the namespace URI and the local part will be concatenated without any separator. - When a namespace is not declared, the name and prefix will be - passed through without expansion. + It is a programming error to use the separator '\0' with namespace + triplets (see XML_SetReturnNSTriplet). + If a namespace separator is chosen that can be part of a URI or + part of an XML name, splitting an expanded name back into its + 1, 2 or 3 original parts on application level in the element handler + may end up vulnerable, so these are advised against; sane choices for + a namespace separator are e.g. '\n' (line feed) and '|' (pipe). + + Note that Expat does not validate namespace URIs (beyond encoding) + against RFC 3986 today (and is not required to do so with regard to + the XML 1.0 namespaces specification) but it may start doing that + in future releases. Before that, an application using Expat must + be ready to receive namespace URIs containing non-URI characters. */ XMLPARSEAPI(XML_Parser) XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator); - /* Constructs a new parser using the memory management suite referred to by memsuite. If memsuite is NULL, then use the standard library memory suite. If namespaceSeparator is non-NULL it creates a parser with @@ -227,7 +270,7 @@ XML_ParserCreate_MM(const XML_Char *encoding, const XML_Char *namespaceSeparator); /* Prepare a parser object to be re-used. This is particularly - valuable when memory allocation overhead is disproportionatly high, + valuable when memory allocation overhead is disproportionately high, such as when a large number of small documnents need to be parsed. All handlers are cleared from the parser, except for the unknownEncodingHandler. The parser's external state is re-initialized @@ -241,31 +284,27 @@ XML_ParserReset(XML_Parser parser, const XML_Char *encoding); /* atts is array of name/value pairs, terminated by 0; names and values are 0 terminated. */ -typedef void (XMLCALL *XML_StartElementHandler) (void *userData, - const XML_Char *name, - const XML_Char **atts); - -typedef void (XMLCALL *XML_EndElementHandler) (void *userData, - const XML_Char *name); +typedef void(XMLCALL *XML_StartElementHandler)(void *userData, + const XML_Char *name, + const XML_Char **atts); +typedef void(XMLCALL *XML_EndElementHandler)(void *userData, + const XML_Char *name); /* s is not 0 terminated. */ -typedef void (XMLCALL *XML_CharacterDataHandler) (void *userData, - const XML_Char *s, - int len); +typedef void(XMLCALL *XML_CharacterDataHandler)(void *userData, + const XML_Char *s, int len); /* target and data are 0 terminated */ -typedef void (XMLCALL *XML_ProcessingInstructionHandler) ( - void *userData, - const XML_Char *target, - const XML_Char *data); +typedef void(XMLCALL *XML_ProcessingInstructionHandler)(void *userData, + const XML_Char *target, + const XML_Char *data); /* data is 0 terminated */ -typedef void (XMLCALL *XML_CommentHandler) (void *userData, - const XML_Char *data); +typedef void(XMLCALL *XML_CommentHandler)(void *userData, const XML_Char *data); -typedef void (XMLCALL *XML_StartCdataSectionHandler) (void *userData); -typedef void (XMLCALL *XML_EndCdataSectionHandler) (void *userData); +typedef void(XMLCALL *XML_StartCdataSectionHandler)(void *userData); +typedef void(XMLCALL *XML_EndCdataSectionHandler)(void *userData); /* This is called for any characters in the XML document for which there is no applicable handler. This includes both characters that @@ -280,25 +319,23 @@ typedef void (XMLCALL *XML_EndCdataSectionHandler) (void *userData); default handler: for example, a comment might be split between multiple calls. */ -typedef void (XMLCALL *XML_DefaultHandler) (void *userData, - const XML_Char *s, - int len); +typedef void(XMLCALL *XML_DefaultHandler)(void *userData, const XML_Char *s, + int len); /* This is called for the start of the DOCTYPE declaration, before any DTD or internal subset is parsed. */ -typedef void (XMLCALL *XML_StartDoctypeDeclHandler) ( - void *userData, - const XML_Char *doctypeName, - const XML_Char *sysid, - const XML_Char *pubid, - int has_internal_subset); +typedef void(XMLCALL *XML_StartDoctypeDeclHandler)(void *userData, + const XML_Char *doctypeName, + const XML_Char *sysid, + const XML_Char *pubid, + int has_internal_subset); -/* This is called for the start of the DOCTYPE declaration when the +/* This is called for the end of the DOCTYPE declaration when the closing > is encountered, but after processing any external subset. */ -typedef void (XMLCALL *XML_EndDoctypeDeclHandler)(void *userData); +typedef void(XMLCALL *XML_EndDoctypeDeclHandler)(void *userData); /* This is called for entity declarations. The is_parameter_entity argument will be non-zero if the entity is a parameter entity, zero @@ -306,7 +343,7 @@ typedef void (XMLCALL *XML_EndDoctypeDeclHandler)(void *userData); For internal entities (), value will be non-NULL and systemId, publicID, and notationName will be NULL. - The value string is NOT nul-terminated; the length is provided in + The value string is NOT null-terminated; the length is provided in the value_length argument. Since it is legal to have zero-length values, do not use this argument to test for internal entities. @@ -318,23 +355,17 @@ typedef void (XMLCALL *XML_EndDoctypeDeclHandler)(void *userData); Note that is_parameter_entity can't be changed to XML_Bool, since that would break binary compatibility. */ -typedef void (XMLCALL *XML_EntityDeclHandler) ( - void *userData, - const XML_Char *entityName, - int is_parameter_entity, - const XML_Char *value, - int value_length, - const XML_Char *base, - const XML_Char *systemId, - const XML_Char *publicId, - const XML_Char *notationName); +typedef void(XMLCALL *XML_EntityDeclHandler)( + void *userData, const XML_Char *entityName, int is_parameter_entity, + const XML_Char *value, int value_length, const XML_Char *base, + const XML_Char *systemId, const XML_Char *publicId, + const XML_Char *notationName); XMLPARSEAPI(void) -XML_SetEntityDeclHandler(XML_Parser parser, - XML_EntityDeclHandler handler); +XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler); /* OBSOLETE -- OBSOLETE -- OBSOLETE - This handler has been superceded by the EntityDeclHandler above. + This handler has been superseded by the EntityDeclHandler above. It is provided here for backward compatibility. This is called for a declaration of an unparsed (NDATA) entity. @@ -342,24 +373,20 @@ XML_SetEntityDeclHandler(XML_Parser parser, entityName, systemId and notationName arguments will never be NULL. The other arguments may be. */ -typedef void (XMLCALL *XML_UnparsedEntityDeclHandler) ( - void *userData, - const XML_Char *entityName, - const XML_Char *base, - const XML_Char *systemId, - const XML_Char *publicId, - const XML_Char *notationName); +typedef void(XMLCALL *XML_UnparsedEntityDeclHandler)( + void *userData, const XML_Char *entityName, const XML_Char *base, + const XML_Char *systemId, const XML_Char *publicId, + const XML_Char *notationName); /* This is called for a declaration of notation. The base argument is whatever was set by XML_SetBase. The notationName will never be NULL. The other arguments can be. */ -typedef void (XMLCALL *XML_NotationDeclHandler) ( - void *userData, - const XML_Char *notationName, - const XML_Char *base, - const XML_Char *systemId, - const XML_Char *publicId); +typedef void(XMLCALL *XML_NotationDeclHandler)(void *userData, + const XML_Char *notationName, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId); /* When namespace processing is enabled, these are called once for each namespace declaration. The call to the start and end element @@ -367,14 +394,12 @@ typedef void (XMLCALL *XML_NotationDeclHandler) ( declaration handlers. For an xmlns attribute, prefix will be NULL. For an xmlns="" attribute, uri will be NULL. */ -typedef void (XMLCALL *XML_StartNamespaceDeclHandler) ( - void *userData, - const XML_Char *prefix, - const XML_Char *uri); +typedef void(XMLCALL *XML_StartNamespaceDeclHandler)(void *userData, + const XML_Char *prefix, + const XML_Char *uri); -typedef void (XMLCALL *XML_EndNamespaceDeclHandler) ( - void *userData, - const XML_Char *prefix); +typedef void(XMLCALL *XML_EndNamespaceDeclHandler)(void *userData, + const XML_Char *prefix); /* This is called if the document is not standalone, that is, it has an external subset or a reference to a parameter entity, but does not @@ -385,7 +410,7 @@ typedef void (XMLCALL *XML_EndNamespaceDeclHandler) ( conditions above this handler will only be called if the referenced entity was actually read. */ -typedef int (XMLCALL *XML_NotStandaloneHandler) (void *userData); +typedef int(XMLCALL *XML_NotStandaloneHandler)(void *userData); /* This is called for a reference to an external parsed general entity. The referenced entity is not automatically parsed. The @@ -421,12 +446,11 @@ typedef int (XMLCALL *XML_NotStandaloneHandler) (void *userData); Note that unlike other handlers the first argument is the parser, not userData. */ -typedef int (XMLCALL *XML_ExternalEntityRefHandler) ( - XML_Parser parser, - const XML_Char *context, - const XML_Char *base, - const XML_Char *systemId, - const XML_Char *publicId); +typedef int(XMLCALL *XML_ExternalEntityRefHandler)(XML_Parser parser, + const XML_Char *context, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId); /* This is called in two situations: 1) An entity reference is encountered for which no declaration @@ -438,10 +462,9 @@ typedef int (XMLCALL *XML_ExternalEntityRefHandler) ( the event would be out of sync with the reporting of the declarations or attribute values */ -typedef void (XMLCALL *XML_SkippedEntityHandler) ( - void *userData, - const XML_Char *entityName, - int is_parameter_entity); +typedef void(XMLCALL *XML_SkippedEntityHandler)(void *userData, + const XML_Char *entityName, + int is_parameter_entity); /* This structure is filled in by the XML_UnknownEncodingHandler to provide information to the parser about encodings that are unknown @@ -498,8 +521,8 @@ typedef void (XMLCALL *XML_SkippedEntityHandler) ( typedef struct { int map[256]; void *data; - int (XMLCALL *convert)(void *data, const char *s); - void (XMLCALL *release)(void *data); + int(XMLCALL *convert)(void *data, const char *s); + void(XMLCALL *release)(void *data); } XML_Encoding; /* This is called for an encoding that is unknown to the parser. @@ -515,25 +538,21 @@ typedef struct { Otherwise it must return XML_STATUS_ERROR. If info does not describe a suitable encoding, then the parser will - return an XML_UNKNOWN_ENCODING error. + return an XML_ERROR_UNKNOWN_ENCODING error. */ -typedef int (XMLCALL *XML_UnknownEncodingHandler) ( - void *encodingHandlerData, - const XML_Char *name, - XML_Encoding *info); +typedef int(XMLCALL *XML_UnknownEncodingHandler)(void *encodingHandlerData, + const XML_Char *name, + XML_Encoding *info); XMLPARSEAPI(void) -XML_SetElementHandler(XML_Parser parser, - XML_StartElementHandler start, +XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, XML_EndElementHandler end); XMLPARSEAPI(void) -XML_SetStartElementHandler(XML_Parser parser, - XML_StartElementHandler handler); +XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler handler); XMLPARSEAPI(void) -XML_SetEndElementHandler(XML_Parser parser, - XML_EndElementHandler handler); +XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler handler); XMLPARSEAPI(void) XML_SetCharacterDataHandler(XML_Parser parser, @@ -543,8 +562,7 @@ XMLPARSEAPI(void) XML_SetProcessingInstructionHandler(XML_Parser parser, XML_ProcessingInstructionHandler handler); XMLPARSEAPI(void) -XML_SetCommentHandler(XML_Parser parser, - XML_CommentHandler handler); +XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler); XMLPARSEAPI(void) XML_SetCdataSectionHandler(XML_Parser parser, @@ -564,20 +582,17 @@ XML_SetEndCdataSectionHandler(XML_Parser parser, default handler, or to the skipped entity handler, if one is set. */ XMLPARSEAPI(void) -XML_SetDefaultHandler(XML_Parser parser, - XML_DefaultHandler handler); +XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler); /* This sets the default handler but does not inhibit expansion of internal entities. The entity reference will not be passed to the default handler. */ XMLPARSEAPI(void) -XML_SetDefaultHandlerExpand(XML_Parser parser, - XML_DefaultHandler handler); +XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler); XMLPARSEAPI(void) -XML_SetDoctypeDeclHandler(XML_Parser parser, - XML_StartDoctypeDeclHandler start, +XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start, XML_EndDoctypeDeclHandler end); XMLPARSEAPI(void) @@ -585,16 +600,14 @@ XML_SetStartDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start); XMLPARSEAPI(void) -XML_SetEndDoctypeDeclHandler(XML_Parser parser, - XML_EndDoctypeDeclHandler end); +XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end); XMLPARSEAPI(void) XML_SetUnparsedEntityDeclHandler(XML_Parser parser, XML_UnparsedEntityDeclHandler handler); XMLPARSEAPI(void) -XML_SetNotationDeclHandler(XML_Parser parser, - XML_NotationDeclHandler handler); +XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler); XMLPARSEAPI(void) XML_SetNamespaceDeclHandler(XML_Parser parser, @@ -622,8 +635,7 @@ XML_SetExternalEntityRefHandler(XML_Parser parser, instead of the parser object. */ XMLPARSEAPI(void) -XML_SetExternalEntityRefHandlerArg(XML_Parser parser, - void *arg); +XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg); XMLPARSEAPI(void) XML_SetSkippedEntityHandler(XML_Parser parser, @@ -698,11 +710,11 @@ XML_UseParserAsHandlerArg(XML_Parser parser); be called, despite an external subset being parsed. Note: If XML_DTD is not defined when Expat is compiled, returns XML_ERROR_FEATURE_REQUIRES_XML_DTD. + Note: If parser == NULL, returns XML_ERROR_INVALID_ARGUMENT. */ XMLPARSEAPI(enum XML_Error) XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD); - /* Sets the base to be used for resolving relative URIs in system identifiers in declarations. Resolving relative identifiers is left to the application: this value will be passed through as the @@ -720,20 +732,44 @@ XML_GetBase(XML_Parser parser); /* Returns the number of the attribute/value pairs passed in last call to the XML_StartElementHandler that were specified in the start-tag rather than defaulted. Each attribute/value pair counts as 2; thus - this correspondds to an index into the atts array passed to the - XML_StartElementHandler. + this corresponds to an index into the atts array passed to the + XML_StartElementHandler. Returns -1 if parser == NULL. */ XMLPARSEAPI(int) XML_GetSpecifiedAttributeCount(XML_Parser parser); /* Returns the index of the ID attribute passed in the last call to - XML_StartElementHandler, or -1 if there is no ID attribute. Each - attribute/value pair counts as 2; thus this correspondds to an - index into the atts array passed to the XML_StartElementHandler. + XML_StartElementHandler, or -1 if there is no ID attribute or + parser == NULL. Each attribute/value pair counts as 2; thus this + corresponds to an index into the atts array passed to the + XML_StartElementHandler. */ XMLPARSEAPI(int) XML_GetIdAttributeIndex(XML_Parser parser); +#ifdef XML_ATTR_INFO +/* Source file byte offsets for the start and end of attribute names and values. + The value indices are exclusive of surrounding quotes; thus in a UTF-8 source + file an attribute value of "blah" will yield: + info->valueEnd - info->valueStart = 4 bytes. +*/ +typedef struct { + XML_Index nameStart; /* Offset to beginning of the attribute name. */ + XML_Index nameEnd; /* Offset after the attribute name's last byte. */ + XML_Index valueStart; /* Offset to beginning of the attribute value. */ + XML_Index valueEnd; /* Offset after the attribute value's last byte. */ +} XML_AttrInfo; + +/* Returns an array of XML_AttrInfo structures for the attribute/value pairs + passed in last call to the XML_StartElementHandler that were specified + in the start-tag rather than defaulted. Each attribute/value pair counts + as 1; thus the number of entries in the array is + XML_GetSpecifiedAttributeCount(parser) / 2. +*/ +XMLPARSEAPI(const XML_AttrInfo *) +XML_GetAttributeInfo(XML_Parser parser); +#endif + /* Parses some input. Returns XML_STATUS_ERROR if a fatal error is detected. The last call to XML_Parse must have isFinal true; len may be zero for this call (or any other). @@ -757,20 +793,20 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal); (resumable = 0) an already suspended parser. Some call-backs may still follow because they would otherwise get lost. Examples: - endElementHandler() for empty elements when stopped in - startElementHandler(), - - endNameSpaceDeclHandler() when stopped in endElementHandler(), + startElementHandler(), + - endNameSpaceDeclHandler() when stopped in endElementHandler(), and possibly others. Can be called from most handlers, including DTD related call-backs, except when parsing an external parameter entity and resumable != 0. Returns XML_STATUS_OK when successful, XML_STATUS_ERROR otherwise. - Possible error codes: + Possible error codes: - XML_ERROR_SUSPENDED: when suspending an already suspended parser. - XML_ERROR_FINISHED: when the parser has already finished. - XML_ERROR_SUSPEND_PE: when suspending while parsing an external PE. - When resumable != 0 (true) then parsing is suspended, that is, - XML_Parse() and XML_ParseBuffer() return XML_STATUS_SUSPENDED. + When resumable != 0 (true) then parsing is suspended, that is, + XML_Parse() and XML_ParseBuffer() return XML_STATUS_SUSPENDED. Otherwise, parsing is aborted, that is, XML_Parse() and XML_ParseBuffer() return XML_STATUS_ERROR with error code XML_ERROR_ABORTED. @@ -781,7 +817,7 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal); the externalEntityRefHandler() to call XML_StopParser() on the parent parser (recursively), if one wants to stop parsing altogether. - When suspended, parsing can be resumed by calling XML_ResumeParser(). + When suspended, parsing can be resumed by calling XML_ResumeParser(). */ XMLPARSEAPI(enum XML_Status) XML_StopParser(XML_Parser parser, XML_Bool resumable); @@ -789,7 +825,7 @@ XML_StopParser(XML_Parser parser, XML_Bool resumable); /* Resumes parsing after it has been suspended with XML_StopParser(). Must not be called from within a handler call-back. Returns same status codes as XML_Parse() or XML_ParseBuffer(). - Additional error code XML_ERROR_NOT_SUSPENDED possible. + Additional error code XML_ERROR_NOT_SUSPENDED possible. *Note*: This must be called on the most deeply nested child parser instance @@ -801,12 +837,7 @@ XML_StopParser(XML_Parser parser, XML_Bool resumable); XMLPARSEAPI(enum XML_Status) XML_ResumeParser(XML_Parser parser); -enum XML_Parsing { - XML_INITIALIZED, - XML_PARSING, - XML_FINISHED, - XML_SUSPENDED -}; +enum XML_Parsing { XML_INITIALIZED, XML_PARSING, XML_FINISHED, XML_SUSPENDED }; typedef struct { enum XML_Parsing parsing; @@ -838,8 +869,7 @@ XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status); Otherwise returns a new XML_Parser object. */ XMLPARSEAPI(XML_Parser) -XML_ExternalEntityParserCreate(XML_Parser parser, - const XML_Char *context, +XML_ExternalEntityParserCreate(XML_Parser parser, const XML_Char *context, const XML_Char *encoding); enum XML_ParamEntityParsing { @@ -870,11 +900,21 @@ enum XML_ParamEntityParsing { entities is requested; otherwise it will return non-zero. Note: If XML_SetParamEntityParsing is called after XML_Parse or XML_ParseBuffer, then it has no effect and will always return 0. + Note: If parser == NULL, the function will do nothing and return 0. */ XMLPARSEAPI(int) XML_SetParamEntityParsing(XML_Parser parser, enum XML_ParamEntityParsing parsing); +/* Sets the hash salt to use for internal hash calculations. + Helps in preventing DoS attacks based on predicting hash + function behavior. This must be called before parsing is started. + Returns 1 if successful, 0 when called after parsing has started. + Note: If parser == NULL, the function will do nothing and return 0. +*/ +XMLPARSEAPI(int) +XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt); + /* If XML_Parse or XML_ParseBuffer have returned XML_STATUS_ERROR, then XML_GetErrorCode returns information about the error. */ @@ -890,16 +930,20 @@ XML_GetErrorCode(XML_Parser parser); be within the relevant markup. When called outside of the callback functions, the position indicated will be just past the last parse event (regardless of whether there was an associated callback). - + They may also be called after returning from a call to XML_Parse or XML_ParseBuffer. If the return value is XML_STATUS_ERROR then the location is the location of the character at which the error was detected; otherwise the location is the location of the last parse event, as described above. + + Note: XML_GetCurrentLineNumber and XML_GetCurrentColumnNumber + return 0 to indicate an error. + Note: XML_GetCurrentByteIndex returns -1 to indicate an error. */ -XMLPARSEAPI(int) XML_GetCurrentLineNumber(XML_Parser parser); -XMLPARSEAPI(int) XML_GetCurrentColumnNumber(XML_Parser parser); -XMLPARSEAPI(long) XML_GetCurrentByteIndex(XML_Parser parser); +XMLPARSEAPI(XML_Size) XML_GetCurrentLineNumber(XML_Parser parser); +XMLPARSEAPI(XML_Size) XML_GetCurrentColumnNumber(XML_Parser parser); +XMLPARSEAPI(XML_Index) XML_GetCurrentByteIndex(XML_Parser parser); /* Return the number of bytes in the current event. Returns 0 if the event is in an internal entity. @@ -918,14 +962,12 @@ XML_GetCurrentByteCount(XML_Parser parser); the handler that makes the call. */ XMLPARSEAPI(const char *) -XML_GetInputContext(XML_Parser parser, - int *offset, - int *size); +XML_GetInputContext(XML_Parser parser, int *offset, int *size); /* For backwards compatibility with previous versions. */ -#define XML_GetErrorLineNumber XML_GetCurrentLineNumber +#define XML_GetErrorLineNumber XML_GetCurrentLineNumber #define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber -#define XML_GetErrorByteIndex XML_GetCurrentByteIndex +#define XML_GetErrorByteIndex XML_GetCurrentByteIndex /* Frees the content model passed to the element declaration handler */ XMLPARSEAPI(void) @@ -933,9 +975,12 @@ XML_FreeContentModel(XML_Parser parser, XML_Content *model); /* Exposing the memory handling functions used in Expat */ XMLPARSEAPI(void *) +XML_ATTR_MALLOC +XML_ATTR_ALLOC_SIZE(2) XML_MemMalloc(XML_Parser parser, size_t size); XMLPARSEAPI(void *) +XML_ATTR_ALLOC_SIZE(3) XML_MemRealloc(XML_Parser parser, void *ptr, size_t size); XMLPARSEAPI(void) @@ -974,31 +1019,46 @@ enum XML_FeatureEnum { XML_FEATURE_CONTEXT_BYTES, XML_FEATURE_MIN_SIZE, XML_FEATURE_SIZEOF_XML_CHAR, - XML_FEATURE_SIZEOF_XML_LCHAR + XML_FEATURE_SIZEOF_XML_LCHAR, + XML_FEATURE_NS, + XML_FEATURE_LARGE_SIZE, + XML_FEATURE_ATTR_INFO, + /* Added in Expat 2.4.0. */ + XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, + XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT /* Additional features must be added to the end of this enum. */ }; typedef struct { - enum XML_FeatureEnum feature; - const XML_LChar *name; - long int value; + enum XML_FeatureEnum feature; + const XML_LChar *name; + long int value; } XML_Feature; XMLPARSEAPI(const XML_Feature *) XML_GetFeatureList(void); +#ifdef XML_DTD +/* Added in Expat 2.4.0. */ +XMLPARSEAPI(XML_Bool) +XML_SetBillionLaughsAttackProtectionMaximumAmplification( + XML_Parser parser, float maximumAmplificationFactor); + +/* Added in Expat 2.4.0. */ +XMLPARSEAPI(XML_Bool) +XML_SetBillionLaughsAttackProtectionActivationThreshold( + XML_Parser parser, unsigned long long activationThresholdBytes); +#endif -/* Expat follows the GNU/Linux convention of odd number minor version for - beta/development releases and even number minor version for stable - releases. Micro is bumped with each release, and set to 0 with each - change to major or minor version. +/* Expat follows the semantic versioning convention. + See http://semver.org. */ -#define XML_MAJOR_VERSION 1 -#define XML_MINOR_VERSION 95 +#define XML_MAJOR_VERSION 2 +#define XML_MINOR_VERSION 4 #define XML_MICRO_VERSION 8 #ifdef __cplusplus } #endif -#endif /* not XmlParse_INCLUDED */ +#endif /* not Expat_INCLUDED */ diff --git a/src/simgear/xml/expat_external.h b/src/simgear/xml/expat_external.h index 4cff8e044..8829f7709 100644 --- a/src/simgear/xml/expat_external.h +++ b/src/simgear/xml/expat_external.h @@ -1,13 +1,46 @@ -/* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2000-2004 Fred L. Drake, Jr. + Copyright (c) 2001-2002 Greg Stein + Copyright (c) 2002-2006 Karl Waclawek + Copyright (c) 2016 Cristian Rodríguez + Copyright (c) 2016-2019 Sebastian Pipping + Copyright (c) 2017 Rhodri James + Copyright (c) 2018 Yury Gribov + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* External API definitions */ +#ifndef Expat_External_INCLUDED +#define Expat_External_INCLUDED 1 -// These lines commented out. They cause problems with MSVC++ -// #if defined(_MSC_EXTENSIONS) && !defined(__BEOS__) && !defined(__CYGWIN__) -// #define XML_USE_MSC_EXTENSIONS 1 -// #endif +/* External API definitions */ /* Expat tries very hard to make the API boundary very specifically defined. There are two macros defined to control this boundary; @@ -32,11 +65,11 @@ system headers may assume the cdecl convention. */ #ifndef XMLCALL -#if defined(XML_USE_MSC_EXTENSIONS) -#define XMLCALL __cdecl -#elif defined(__GNUC__) && defined(__i386) -#define XMLCALL __attribute__((cdecl)) -#else +# if defined(_MSC_VER) +# define XMLCALL __cdecl +# elif defined(__GNUC__) && defined(__i386) && ! defined(__INTEL_COMPILER) +# define XMLCALL __attribute__((cdecl)) +# else /* For any platform which uses this definition and supports more than one calling convention, we need to extend this definition to declare the convention used on that platform, if it's possible to @@ -47,27 +80,47 @@ pre-processor and how to specify the same calling convention as the platform's malloc() implementation. */ -#define XMLCALL -#endif -#endif /* not defined XMLCALL */ - +# define XMLCALL +# endif +#endif /* not defined XMLCALL */ -#if !defined(XML_STATIC) && !defined(XMLIMPORT) -#ifndef XML_BUILDING_EXPAT +#if ! defined(XML_STATIC) && ! defined(XMLIMPORT) +# ifndef XML_BUILDING_EXPAT /* using Expat from an application */ -#ifdef XML_USE_MSC_EXTENSIONS -#define XMLIMPORT __declspec(dllimport) +# if defined(_MSC_EXTENSIONS) && ! defined(__BEOS__) && ! defined(__CYGWIN__) +# define XMLIMPORT __declspec(dllimport) +# endif + +# endif +#endif /* not defined XML_STATIC */ + +#ifndef XML_ENABLE_VISIBILITY +# define XML_ENABLE_VISIBILITY 0 #endif +#if ! defined(XMLIMPORT) && XML_ENABLE_VISIBILITY +# define XMLIMPORT __attribute__((visibility("default"))) #endif -#endif /* not defined XML_STATIC */ /* If we didn't define it above, define it away: */ #ifndef XMLIMPORT -#define XMLIMPORT +# define XMLIMPORT +#endif + +#if defined(__GNUC__) \ + && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)) +# define XML_ATTR_MALLOC __attribute__((__malloc__)) +#else +# define XML_ATTR_MALLOC #endif +#if defined(__GNUC__) \ + && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) +# define XML_ATTR_ALLOC_SIZE(x) __attribute__((__alloc_size__(x))) +#else +# define XML_ATTR_ALLOC_SIZE(x) +#endif #define XMLPARSEAPI(type) XMLIMPORT type XMLCALL @@ -76,18 +129,37 @@ extern "C" { #endif #ifdef XML_UNICODE_WCHAR_T -#define XML_UNICODE +# ifndef XML_UNICODE +# define XML_UNICODE +# endif +# if defined(__SIZEOF_WCHAR_T__) && (__SIZEOF_WCHAR_T__ != 2) +# error "sizeof(wchar_t) != 2; Need -fshort-wchar for both Expat and libc" +# endif #endif -#ifdef XML_UNICODE /* Information is UTF-16 encoded. */ -#ifdef XML_UNICODE_WCHAR_T +#ifdef XML_UNICODE /* Information is UTF-16 encoded. */ +# ifdef XML_UNICODE_WCHAR_T typedef wchar_t XML_Char; typedef wchar_t XML_LChar; -#else +# else typedef unsigned short XML_Char; typedef char XML_LChar; -#endif /* XML_UNICODE_WCHAR_T */ -#else /* Information is UTF-8 encoded. */ +# endif /* XML_UNICODE_WCHAR_T */ +#else /* Information is UTF-8 encoded. */ typedef char XML_Char; typedef char XML_LChar; -#endif /* XML_UNICODE */ +#endif /* XML_UNICODE */ + +#ifdef XML_LARGE_SIZE /* Use large integers for file/stream positions. */ +typedef long long XML_Index; +typedef unsigned long long XML_Size; +#else +typedef long XML_Index; +typedef unsigned long XML_Size; +#endif /* XML_LARGE_SIZE */ + +#ifdef __cplusplus +} +#endif + +#endif /* not Expat_External_INCLUDED */ diff --git a/src/simgear/xml/iasciitab.h b/src/simgear/xml/iasciitab.h index 24a1d5ccc..5d8646f2a 100644 --- a/src/simgear/xml/iasciitab.h +++ b/src/simgear/xml/iasciitab.h @@ -1,37 +1,67 @@ -/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2017 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* Like asciitab.h, except that 0xD has code BT_S rather than BT_CR */ /* 0x00 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x04 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x08 */ BT_NONXML, BT_S, BT_LF, BT_NONXML, -/* 0x0C */ BT_NONXML, BT_S, BT_NONXML, BT_NONXML, -/* 0x10 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x14 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x18 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x1C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x20 */ BT_S, BT_EXCL, BT_QUOT, BT_NUM, -/* 0x24 */ BT_OTHER, BT_PERCNT, BT_AMP, BT_APOS, -/* 0x28 */ BT_LPAR, BT_RPAR, BT_AST, BT_PLUS, -/* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL, -/* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, -/* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, -/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI, -/* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST, -/* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, -/* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, -/* 0x48 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x4C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x50 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x54 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x58 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_LSQB, -/* 0x5C */ BT_OTHER, BT_RSQB, BT_OTHER, BT_NMSTRT, -/* 0x60 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, -/* 0x64 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, -/* 0x68 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x6C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x70 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x74 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x78 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, -/* 0x7C */ BT_VERBAR, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0x04 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x08 */ BT_NONXML, BT_S, BT_LF, BT_NONXML, + /* 0x0C */ BT_NONXML, BT_S, BT_NONXML, BT_NONXML, + /* 0x10 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x14 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x18 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x1C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x20 */ BT_S, BT_EXCL, BT_QUOT, BT_NUM, + /* 0x24 */ BT_OTHER, BT_PERCNT, BT_AMP, BT_APOS, + /* 0x28 */ BT_LPAR, BT_RPAR, BT_AST, BT_PLUS, + /* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL, + /* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, + /* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, + /* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI, + /* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST, + /* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, + /* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, + /* 0x48 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x4C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x50 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x54 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x58 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_LSQB, + /* 0x5C */ BT_OTHER, BT_RSQB, BT_OTHER, BT_NMSTRT, + /* 0x60 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, + /* 0x64 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, + /* 0x68 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x6C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x70 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x74 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x78 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, + /* 0x7C */ BT_VERBAR, BT_OTHER, BT_OTHER, BT_OTHER, diff --git a/src/simgear/xml/internal.h b/src/simgear/xml/internal.h index ff056c659..444eba0fb 100644 --- a/src/simgear/xml/internal.h +++ b/src/simgear/xml/internal.h @@ -18,9 +18,42 @@ Note: Use of these macros is based on judgement, not hard rules, and therefore subject to change. + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 2002-2003 Fred L. Drake, Jr. + Copyright (c) 2002-2006 Karl Waclawek + Copyright (c) 2003 Greg Stein + Copyright (c) 2016-2021 Sebastian Pipping + Copyright (c) 2018 Yury Gribov + Copyright (c) 2019 David Loffredo + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#if defined(__GNUC__) && defined(__i386__) +#if defined(__GNUC__) && defined(__i386__) && ! defined(__MINGW32__) /* We'll use this version by default only where we know it helps. regparm() generates warnings on Solaris boxes. See SF bug #692878. @@ -30,8 +63,8 @@ #define FASTCALL __attribute__((stdcall, regparm(3))) and let's try this: */ -#define FASTCALL __attribute__((regparm(3))) -#define PTRFASTCALL __attribute__((regparm(3))) +# define FASTCALL __attribute__((regparm(3))) +# define PTRFASTCALL __attribute__((regparm(3))) #endif /* Using __fastcall seems to have an unexpected negative effect under @@ -45,29 +78,86 @@ /* Make sure all of these are defined if they aren't already. */ #ifndef FASTCALL -#define FASTCALL +# define FASTCALL #endif #ifndef PTRCALL -#define PTRCALL +# define PTRCALL #endif #ifndef PTRFASTCALL -#define PTRFASTCALL +# define PTRFASTCALL #endif #ifndef XML_MIN_SIZE -#if !defined(__cplusplus) && !defined(inline) -#ifdef __GNUC__ -#define inline __inline -#endif /* __GNUC__ */ -#endif +# if ! defined(__cplusplus) && ! defined(inline) +# ifdef __GNUC__ +# define inline __inline +# endif /* __GNUC__ */ +# endif #endif /* XML_MIN_SIZE */ #ifdef __cplusplus -#define inline inline +# define inline inline #else -#ifndef inline -#define inline +# ifndef inline +# define inline +# endif #endif + +#include // ULONG_MAX + +#if defined(_WIN32) && ! defined(__USE_MINGW_ANSI_STDIO) +# define EXPAT_FMT_ULL(midpart) "%" midpart "I64u" +# if defined(_WIN64) // Note: modifiers "td" and "zu" do not work for MinGW +# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "I64d" +# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "I64u" +# else +# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "d" +# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u" +# endif +#else +# define EXPAT_FMT_ULL(midpart) "%" midpart "llu" +# if ! defined(ULONG_MAX) +# error Compiler did not define ULONG_MAX for us +# elif ULONG_MAX == 18446744073709551615u // 2^64-1 +# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "ld" +# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "lu" +# else +# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "d" +# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u" +# endif +#endif + +#ifndef UNUSED_P +# define UNUSED_P(p) (void)p +#endif + +/* NOTE BEGIN If you ever patch these defaults to greater values + for non-attack XML payload in your environment, + please file a bug report with libexpat. Thank you! +*/ +#define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT \ + 100.0f +#define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT \ + 8388608 // 8 MiB, 2^23 +/* NOTE END */ + +#include "expat.h" // so we can use type XML_Parser below + +#ifdef __cplusplus +extern "C" { +#endif + +void _INTERNAL_trim_to_complete_utf8_characters(const char *from, + const char **fromLimRef); + +#if defined(XML_DTD) +unsigned long long testingAccountingGetCountBytesDirect(XML_Parser parser); +unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser); +const char *unsignedCharToPrintable(unsigned char c); +#endif + +#ifdef __cplusplus +} #endif diff --git a/src/simgear/xml/latin1tab.h b/src/simgear/xml/latin1tab.h index 53c25d76b..b681d278a 100644 --- a/src/simgear/xml/latin1tab.h +++ b/src/simgear/xml/latin1tab.h @@ -1,36 +1,66 @@ -/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2017 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* 0x80 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0x84 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0x88 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0x8C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0x90 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0x94 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0x98 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0x9C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0xA0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0xA4 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0xA8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER, -/* 0xAC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0xB0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0xB4 */ BT_OTHER, BT_NMSTRT, BT_OTHER, BT_NAME, -/* 0xB8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER, -/* 0xBC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0xC0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xC4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xC8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xCC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xD0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xD4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, -/* 0xD8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xDC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xE0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xE4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xE8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xEC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xF0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xF4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, -/* 0xF8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xFC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x84 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0x88 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0x8C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0x90 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0x94 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0x98 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0x9C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xA0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xA4 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xA8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER, + /* 0xAC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xB0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xB4 */ BT_OTHER, BT_NMSTRT, BT_OTHER, BT_NAME, + /* 0xB8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER, + /* 0xBC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xC0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xC4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xC8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xCC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xD0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xD4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, + /* 0xD8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xDC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xE0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xE4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xE8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xEC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xF0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xF4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, + /* 0xF8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xFC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, diff --git a/src/simgear/xml/nametab.h b/src/simgear/xml/nametab.h index b05e62c77..63485446b 100644 --- a/src/simgear/xml/nametab.h +++ b/src/simgear/xml/nametab.h @@ -1,150 +1,136 @@ +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 2000 Clark Cooper + Copyright (c) 2017 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + static const unsigned namingBitmap[] = { -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, -0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, -0x00000000, 0x04000000, 0x87FFFFFE, 0x07FFFFFE, -0x00000000, 0x00000000, 0xFF7FFFFF, 0xFF7FFFFF, -0xFFFFFFFF, 0x7FF3FFFF, 0xFFFFFDFE, 0x7FFFFFFF, -0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE00F, 0xFC31FFFF, -0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF, -0xFFFFFFFF, 0xF80001FF, 0x00000003, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0xFFFFD740, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD, -0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF, -0xFFFF0003, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF, -0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE, -0x0000007F, 0x00000000, 0xFFFF0000, 0x000707FF, -0x00000000, 0x07FFFFFE, 0x000007FE, 0xFFFE0000, -0xFFFFFFFF, 0x7CFFFFFF, 0x002F7FFF, 0x00000060, -0xFFFFFFE0, 0x23FFFFFF, 0xFF000000, 0x00000003, -0xFFF99FE0, 0x03C5FDFF, 0xB0000000, 0x00030003, -0xFFF987E0, 0x036DFDFF, 0x5E000000, 0x001C0000, -0xFFFBAFE0, 0x23EDFDFF, 0x00000000, 0x00000001, -0xFFF99FE0, 0x23CDFDFF, 0xB0000000, 0x00000003, -0xD63DC7E0, 0x03BFC718, 0x00000000, 0x00000000, -0xFFFDDFE0, 0x03EFFDFF, 0x00000000, 0x00000003, -0xFFFDDFE0, 0x03EFFDFF, 0x40000000, 0x00000003, -0xFFFDDFE0, 0x03FFFDFF, 0x00000000, 0x00000003, -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0xFFFFFFFE, 0x000D7FFF, 0x0000003F, 0x00000000, -0xFEF02596, 0x200D6CAE, 0x0000001F, 0x00000000, -0x00000000, 0x00000000, 0xFFFFFEFF, 0x000003FF, -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0x00000000, 0xFFFFFFFF, 0xFFFF003F, 0x007FFFFF, -0x0007DAED, 0x50000000, 0x82315001, 0x002C62AB, -0x40000000, 0xF580C900, 0x00000007, 0x02010800, -0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, -0x0FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x03FFFFFF, -0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF, -0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF, -0x00000000, 0x00004C40, 0x00000000, 0x00000000, -0x00000007, 0x00000000, 0x00000000, 0x00000000, -0x00000080, 0x000003FE, 0xFFFFFFFE, 0xFFFFFFFF, -0x001FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0x07FFFFFF, -0xFFFFFFE0, 0x00001FFF, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, -0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000, -0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, -0xFFFFFFFF, 0x0000000F, 0x00000000, 0x00000000, -0x00000000, 0x07FF6000, 0x87FFFFFE, 0x07FFFFFE, -0x00000000, 0x00800000, 0xFF7FFFFF, 0xFF7FFFFF, -0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF, -0xFFFFFFFF, 0xF80001FF, 0x00030003, 0x00000000, -0xFFFFFFFF, 0xFFFFFFFF, 0x0000003F, 0x00000003, -0xFFFFD7C0, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD, -0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF, -0xFFFF007B, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF, -0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE, -0xFFFE007F, 0xBBFFFFFB, 0xFFFF0016, 0x000707FF, -0x00000000, 0x07FFFFFE, 0x0007FFFF, 0xFFFF03FF, -0xFFFFFFFF, 0x7CFFFFFF, 0xFFEF7FFF, 0x03FF3DFF, -0xFFFFFFEE, 0xF3FFFFFF, 0xFF1E3FFF, 0x0000FFCF, -0xFFF99FEE, 0xD3C5FDFF, 0xB080399F, 0x0003FFCF, -0xFFF987E4, 0xD36DFDFF, 0x5E003987, 0x001FFFC0, -0xFFFBAFEE, 0xF3EDFDFF, 0x00003BBF, 0x0000FFC1, -0xFFF99FEE, 0xF3CDFDFF, 0xB0C0398F, 0x0000FFC3, -0xD63DC7EC, 0xC3BFC718, 0x00803DC7, 0x0000FF80, -0xFFFDDFEE, 0xC3EFFDFF, 0x00603DDF, 0x0000FFC3, -0xFFFDDFEC, 0xC3EFFDFF, 0x40603DDF, 0x0000FFC3, -0xFFFDDFEC, 0xC3FFFDFF, 0x00803DCF, 0x0000FFC3, -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0xFFFFFFFE, 0x07FF7FFF, 0x03FF7FFF, 0x00000000, -0xFEF02596, 0x3BFF6CAE, 0x03FF3F5F, 0x00000000, -0x03000000, 0xC2A003FF, 0xFFFFFEFF, 0xFFFE03FF, -0xFEBF0FDF, 0x02FE3FFF, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x1FFF0000, 0x00000002, -0x000000A0, 0x003EFFFE, 0xFFFFFFFE, 0xFFFFFFFF, -0x661FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0x77FFFFFF, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x04000000, + 0x87FFFFFE, 0x07FFFFFE, 0x00000000, 0x00000000, 0xFF7FFFFF, 0xFF7FFFFF, + 0xFFFFFFFF, 0x7FF3FFFF, 0xFFFFFDFE, 0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFE00F, 0xFC31FFFF, 0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF, + 0xFFFFFFFF, 0xF80001FF, 0x00000003, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0xFFFFD740, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD, + 0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF, 0xFFFF0003, 0xFFFFFFFF, + 0xFFFF199F, 0x033FCFFF, 0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE, + 0x0000007F, 0x00000000, 0xFFFF0000, 0x000707FF, 0x00000000, 0x07FFFFFE, + 0x000007FE, 0xFFFE0000, 0xFFFFFFFF, 0x7CFFFFFF, 0x002F7FFF, 0x00000060, + 0xFFFFFFE0, 0x23FFFFFF, 0xFF000000, 0x00000003, 0xFFF99FE0, 0x03C5FDFF, + 0xB0000000, 0x00030003, 0xFFF987E0, 0x036DFDFF, 0x5E000000, 0x001C0000, + 0xFFFBAFE0, 0x23EDFDFF, 0x00000000, 0x00000001, 0xFFF99FE0, 0x23CDFDFF, + 0xB0000000, 0x00000003, 0xD63DC7E0, 0x03BFC718, 0x00000000, 0x00000000, + 0xFFFDDFE0, 0x03EFFDFF, 0x00000000, 0x00000003, 0xFFFDDFE0, 0x03EFFDFF, + 0x40000000, 0x00000003, 0xFFFDDFE0, 0x03FFFDFF, 0x00000000, 0x00000003, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFE, 0x000D7FFF, + 0x0000003F, 0x00000000, 0xFEF02596, 0x200D6CAE, 0x0000001F, 0x00000000, + 0x00000000, 0x00000000, 0xFFFFFEFF, 0x000003FF, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0xFFFFFFFF, 0xFFFF003F, 0x007FFFFF, 0x0007DAED, 0x50000000, + 0x82315001, 0x002C62AB, 0x40000000, 0xF580C900, 0x00000007, 0x02010800, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0FFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0x03FFFFFF, 0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF, + 0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF, 0x00000000, 0x00004C40, + 0x00000000, 0x00000000, 0x00000007, 0x00000000, 0x00000000, 0x00000000, + 0x00000080, 0x000003FE, 0xFFFFFFFE, 0xFFFFFFFF, 0x001FFFFF, 0xFFFFFFFE, + 0xFFFFFFFF, 0x07FFFFFF, 0xFFFFFFE0, 0x00001FFF, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000000F, + 0x00000000, 0x00000000, 0x00000000, 0x07FF6000, 0x87FFFFFE, 0x07FFFFFE, + 0x00000000, 0x00800000, 0xFF7FFFFF, 0xFF7FFFFF, 0x00FFFFFF, 0x00000000, + 0xFFFF0000, 0xFFFFFFFF, 0xFFFFFFFF, 0xF80001FF, 0x00030003, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0x0000003F, 0x00000003, 0xFFFFD7C0, 0xFFFFFFFB, + 0x547F7FFF, 0x000FFFFD, 0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF, + 0xFFFF007B, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF, 0x00000000, 0xFFFE0000, + 0x027FFFFF, 0xFFFFFFFE, 0xFFFE007F, 0xBBFFFFFB, 0xFFFF0016, 0x000707FF, + 0x00000000, 0x07FFFFFE, 0x0007FFFF, 0xFFFF03FF, 0xFFFFFFFF, 0x7CFFFFFF, + 0xFFEF7FFF, 0x03FF3DFF, 0xFFFFFFEE, 0xF3FFFFFF, 0xFF1E3FFF, 0x0000FFCF, + 0xFFF99FEE, 0xD3C5FDFF, 0xB080399F, 0x0003FFCF, 0xFFF987E4, 0xD36DFDFF, + 0x5E003987, 0x001FFFC0, 0xFFFBAFEE, 0xF3EDFDFF, 0x00003BBF, 0x0000FFC1, + 0xFFF99FEE, 0xF3CDFDFF, 0xB0C0398F, 0x0000FFC3, 0xD63DC7EC, 0xC3BFC718, + 0x00803DC7, 0x0000FF80, 0xFFFDDFEE, 0xC3EFFDFF, 0x00603DDF, 0x0000FFC3, + 0xFFFDDFEC, 0xC3EFFDFF, 0x40603DDF, 0x0000FFC3, 0xFFFDDFEC, 0xC3FFFDFF, + 0x00803DCF, 0x0000FFC3, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFFFE, 0x07FF7FFF, 0x03FF7FFF, 0x00000000, 0xFEF02596, 0x3BFF6CAE, + 0x03FF3F5F, 0x00000000, 0x03000000, 0xC2A003FF, 0xFFFFFEFF, 0xFFFE03FF, + 0xFEBF0FDF, 0x02FE3FFF, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1FFF0000, 0x00000002, + 0x000000A0, 0x003EFFFE, 0xFFFFFFFE, 0xFFFFFFFF, 0x661FFFFF, 0xFFFFFFFE, + 0xFFFFFFFF, 0x77FFFFFF, }; static const unsigned char nmstrtPages[] = { -0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x00, -0x00, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, -0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13, -0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x15, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x17, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x00, 0x00, 0x09, 0x0A, 0x0B, + 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13, 0x00, 0x14, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x15, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, }; static const unsigned char namePages[] = { -0x19, 0x03, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x00, -0x00, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, -0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13, -0x26, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x27, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x17, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x19, 0x03, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x00, 0x00, 0x1F, 0x20, 0x21, + 0x22, 0x23, 0x24, 0x25, 0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13, 0x26, 0x14, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x27, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, }; diff --git a/src/simgear/xml/siphash.h b/src/simgear/xml/siphash.h new file mode 100644 index 000000000..e5406d7ee --- /dev/null +++ b/src/simgear/xml/siphash.h @@ -0,0 +1,393 @@ +/* ========================================================================== + * siphash.h - SipHash-2-4 in a single header file + * -------------------------------------------------------------------------- + * Derived by William Ahern from the reference implementation[1] published[2] + * by Jean-Philippe Aumasson and Daniel J. Berstein. + * Minimal changes by Sebastian Pipping and Victor Stinner on top, see below. + * Licensed under the CC0 Public Domain Dedication license. + * + * 1. https://www.131002.net/siphash/siphash24.c + * 2. https://www.131002.net/siphash/ + * -------------------------------------------------------------------------- + * HISTORY: + * + * 2020-10-03 (Sebastian Pipping) + * - Drop support for Visual Studio 9.0/2008 and earlier + * + * 2019-08-03 (Sebastian Pipping) + * - Mark part of sip24_valid as to be excluded from clang-format + * - Re-format code using clang-format 9 + * + * 2018-07-08 (Anton Maklakov) + * - Add "fall through" markers for GCC's -Wimplicit-fallthrough + * + * 2017-11-03 (Sebastian Pipping) + * - Hide sip_tobin and sip_binof unless SIPHASH_TOBIN macro is defined + * + * 2017-07-25 (Vadim Zeitlin) + * - Fix use of SIPHASH_MAIN macro + * + * 2017-07-05 (Sebastian Pipping) + * - Use _SIP_ULL macro to not require a C++11 compiler if compiled as C++ + * - Add const qualifiers at two places + * - Ensure <=80 characters line length (assuming tab width 4) + * + * 2017-06-23 (Victor Stinner) + * - Address Win64 compile warnings + * + * 2017-06-18 (Sebastian Pipping) + * - Clarify license note in the header + * - Address C89 issues: + * - Stop using inline keyword (and let compiler decide) + * - Replace _Bool by int + * - Turn macro siphash24 into a function + * - Address invalid conversion (void pointer) by explicit cast + * - Address lack of stdint.h for Visual Studio 2003 to 2008 + * - Always expose sip24_valid (for self-tests) + * + * 2012-11-04 - Born. (William Ahern) + * -------------------------------------------------------------------------- + * USAGE: + * + * SipHash-2-4 takes as input two 64-bit words as the key, some number of + * message bytes, and outputs a 64-bit word as the message digest. This + * implementation employs two data structures: a struct sipkey for + * representing the key, and a struct siphash for representing the hash + * state. + * + * For converting a 16-byte unsigned char array to a key, use either the + * macro sip_keyof or the routine sip_tokey. The former instantiates a + * compound literal key, while the latter requires a key object as a + * parameter. + * + * unsigned char secret[16]; + * arc4random_buf(secret, sizeof secret); + * struct sipkey *key = sip_keyof(secret); + * + * For hashing a message, use either the convenience macro siphash24 or the + * routines sip24_init, sip24_update, and sip24_final. + * + * struct siphash state; + * void *msg; + * size_t len; + * uint64_t hash; + * + * sip24_init(&state, key); + * sip24_update(&state, msg, len); + * hash = sip24_final(&state); + * + * or + * + * hash = siphash24(msg, len, key); + * + * To convert the 64-bit hash value to a canonical 8-byte little-endian + * binary representation, use either the macro sip_binof or the routine + * sip_tobin. The former instantiates and returns a compound literal array, + * while the latter requires an array object as a parameter. + * -------------------------------------------------------------------------- + * NOTES: + * + * o Neither sip_keyof, sip_binof, nor siphash24 will work with compilers + * lacking compound literal support. Instead, you must use the lower-level + * interfaces which take as parameters the temporary state objects. + * + * o Uppercase macros may evaluate parameters more than once. Lowercase + * macros should not exhibit any such side effects. + * ========================================================================== + */ +#ifndef SIPHASH_H +#define SIPHASH_H + +#include /* size_t */ +#include /* uint64_t uint32_t uint8_t */ + +/* + * Workaround to not require a C++11 compiler for using ULL suffix + * if this code is included and compiled as C++; related GCC warning is: + * warning: use of C++11 long long integer constant [-Wlong-long] + */ +#define _SIP_ULL(high, low) (((uint64_t)high << 32) | low) + +#define SIP_ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) + +#define SIP_U32TO8_LE(p, v) \ + (p)[0] = (uint8_t)((v) >> 0); \ + (p)[1] = (uint8_t)((v) >> 8); \ + (p)[2] = (uint8_t)((v) >> 16); \ + (p)[3] = (uint8_t)((v) >> 24); + +#define SIP_U64TO8_LE(p, v) \ + SIP_U32TO8_LE((p) + 0, (uint32_t)((v) >> 0)); \ + SIP_U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); + +#define SIP_U8TO64_LE(p) \ + (((uint64_t)((p)[0]) << 0) | ((uint64_t)((p)[1]) << 8) \ + | ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) \ + | ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) \ + | ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) + +#define SIPHASH_INITIALIZER \ + { 0, 0, 0, 0, {0}, 0, 0 } + +struct siphash { + uint64_t v0, v1, v2, v3; + + unsigned char buf[8], *p; + uint64_t c; +}; /* struct siphash */ + +#define SIP_KEYLEN 16 + +struct sipkey { + uint64_t k[2]; +}; /* struct sipkey */ + +#define sip_keyof(k) sip_tokey(&(struct sipkey){{0}}, (k)) + +static struct sipkey * +sip_tokey(struct sipkey *key, const void *src) { + key->k[0] = SIP_U8TO64_LE((const unsigned char *)src); + key->k[1] = SIP_U8TO64_LE((const unsigned char *)src + 8); + return key; +} /* sip_tokey() */ + +#ifdef SIPHASH_TOBIN + +# define sip_binof(v) sip_tobin((unsigned char[8]){0}, (v)) + +static void * +sip_tobin(void *dst, uint64_t u64) { + SIP_U64TO8_LE((unsigned char *)dst, u64); + return dst; +} /* sip_tobin() */ + +#endif /* SIPHASH_TOBIN */ + +static void +sip_round(struct siphash *H, const int rounds) { + int i; + + for (i = 0; i < rounds; i++) { + H->v0 += H->v1; + H->v1 = SIP_ROTL(H->v1, 13); + H->v1 ^= H->v0; + H->v0 = SIP_ROTL(H->v0, 32); + + H->v2 += H->v3; + H->v3 = SIP_ROTL(H->v3, 16); + H->v3 ^= H->v2; + + H->v0 += H->v3; + H->v3 = SIP_ROTL(H->v3, 21); + H->v3 ^= H->v0; + + H->v2 += H->v1; + H->v1 = SIP_ROTL(H->v1, 17); + H->v1 ^= H->v2; + H->v2 = SIP_ROTL(H->v2, 32); + } +} /* sip_round() */ + +static struct siphash * +sip24_init(struct siphash *H, const struct sipkey *key) { + H->v0 = _SIP_ULL(0x736f6d65U, 0x70736575U) ^ key->k[0]; + H->v1 = _SIP_ULL(0x646f7261U, 0x6e646f6dU) ^ key->k[1]; + H->v2 = _SIP_ULL(0x6c796765U, 0x6e657261U) ^ key->k[0]; + H->v3 = _SIP_ULL(0x74656462U, 0x79746573U) ^ key->k[1]; + + H->p = H->buf; + H->c = 0; + + return H; +} /* sip24_init() */ + +#define sip_endof(a) (&(a)[sizeof(a) / sizeof *(a)]) + +static struct siphash * +sip24_update(struct siphash *H, const void *src, size_t len) { + const unsigned char *p = (const unsigned char *)src, *pe = p + len; + uint64_t m; + + do { + while (p < pe && H->p < sip_endof(H->buf)) + *H->p++ = *p++; + + if (H->p < sip_endof(H->buf)) + break; + + m = SIP_U8TO64_LE(H->buf); + H->v3 ^= m; + sip_round(H, 2); + H->v0 ^= m; + + H->p = H->buf; + H->c += 8; + } while (p < pe); + + return H; +} /* sip24_update() */ + +static uint64_t +sip24_final(struct siphash *H) { + const char left = (char)(H->p - H->buf); + uint64_t b = (H->c + left) << 56; + + switch (left) { + case 7: + b |= (uint64_t)H->buf[6] << 48; + /* fall through */ + case 6: + b |= (uint64_t)H->buf[5] << 40; + /* fall through */ + case 5: + b |= (uint64_t)H->buf[4] << 32; + /* fall through */ + case 4: + b |= (uint64_t)H->buf[3] << 24; + /* fall through */ + case 3: + b |= (uint64_t)H->buf[2] << 16; + /* fall through */ + case 2: + b |= (uint64_t)H->buf[1] << 8; + /* fall through */ + case 1: + b |= (uint64_t)H->buf[0] << 0; + /* fall through */ + case 0: + break; + } + + H->v3 ^= b; + sip_round(H, 2); + H->v0 ^= b; + H->v2 ^= 0xff; + sip_round(H, 4); + + return H->v0 ^ H->v1 ^ H->v2 ^ H->v3; +} /* sip24_final() */ + +static uint64_t +siphash24(const void *src, size_t len, const struct sipkey *key) { + struct siphash state = SIPHASH_INITIALIZER; + return sip24_final(sip24_update(sip24_init(&state, key), src, len)); +} /* siphash24() */ + +/* + * SipHash-2-4 output with + * k = 00 01 02 ... + * and + * in = (empty string) + * in = 00 (1 byte) + * in = 00 01 (2 bytes) + * in = 00 01 02 (3 bytes) + * ... + * in = 00 01 02 ... 3e (63 bytes) + */ +static int +sip24_valid(void) { + /* clang-format off */ + static const unsigned char vectors[64][8] = { + { 0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72, }, + { 0xfd, 0x67, 0xdc, 0x93, 0xc5, 0x39, 0xf8, 0x74, }, + { 0x5a, 0x4f, 0xa9, 0xd9, 0x09, 0x80, 0x6c, 0x0d, }, + { 0x2d, 0x7e, 0xfb, 0xd7, 0x96, 0x66, 0x67, 0x85, }, + { 0xb7, 0x87, 0x71, 0x27, 0xe0, 0x94, 0x27, 0xcf, }, + { 0x8d, 0xa6, 0x99, 0xcd, 0x64, 0x55, 0x76, 0x18, }, + { 0xce, 0xe3, 0xfe, 0x58, 0x6e, 0x46, 0xc9, 0xcb, }, + { 0x37, 0xd1, 0x01, 0x8b, 0xf5, 0x00, 0x02, 0xab, }, + { 0x62, 0x24, 0x93, 0x9a, 0x79, 0xf5, 0xf5, 0x93, }, + { 0xb0, 0xe4, 0xa9, 0x0b, 0xdf, 0x82, 0x00, 0x9e, }, + { 0xf3, 0xb9, 0xdd, 0x94, 0xc5, 0xbb, 0x5d, 0x7a, }, + { 0xa7, 0xad, 0x6b, 0x22, 0x46, 0x2f, 0xb3, 0xf4, }, + { 0xfb, 0xe5, 0x0e, 0x86, 0xbc, 0x8f, 0x1e, 0x75, }, + { 0x90, 0x3d, 0x84, 0xc0, 0x27, 0x56, 0xea, 0x14, }, + { 0xee, 0xf2, 0x7a, 0x8e, 0x90, 0xca, 0x23, 0xf7, }, + { 0xe5, 0x45, 0xbe, 0x49, 0x61, 0xca, 0x29, 0xa1, }, + { 0xdb, 0x9b, 0xc2, 0x57, 0x7f, 0xcc, 0x2a, 0x3f, }, + { 0x94, 0x47, 0xbe, 0x2c, 0xf5, 0xe9, 0x9a, 0x69, }, + { 0x9c, 0xd3, 0x8d, 0x96, 0xf0, 0xb3, 0xc1, 0x4b, }, + { 0xbd, 0x61, 0x79, 0xa7, 0x1d, 0xc9, 0x6d, 0xbb, }, + { 0x98, 0xee, 0xa2, 0x1a, 0xf2, 0x5c, 0xd6, 0xbe, }, + { 0xc7, 0x67, 0x3b, 0x2e, 0xb0, 0xcb, 0xf2, 0xd0, }, + { 0x88, 0x3e, 0xa3, 0xe3, 0x95, 0x67, 0x53, 0x93, }, + { 0xc8, 0xce, 0x5c, 0xcd, 0x8c, 0x03, 0x0c, 0xa8, }, + { 0x94, 0xaf, 0x49, 0xf6, 0xc6, 0x50, 0xad, 0xb8, }, + { 0xea, 0xb8, 0x85, 0x8a, 0xde, 0x92, 0xe1, 0xbc, }, + { 0xf3, 0x15, 0xbb, 0x5b, 0xb8, 0x35, 0xd8, 0x17, }, + { 0xad, 0xcf, 0x6b, 0x07, 0x63, 0x61, 0x2e, 0x2f, }, + { 0xa5, 0xc9, 0x1d, 0xa7, 0xac, 0xaa, 0x4d, 0xde, }, + { 0x71, 0x65, 0x95, 0x87, 0x66, 0x50, 0xa2, 0xa6, }, + { 0x28, 0xef, 0x49, 0x5c, 0x53, 0xa3, 0x87, 0xad, }, + { 0x42, 0xc3, 0x41, 0xd8, 0xfa, 0x92, 0xd8, 0x32, }, + { 0xce, 0x7c, 0xf2, 0x72, 0x2f, 0x51, 0x27, 0x71, }, + { 0xe3, 0x78, 0x59, 0xf9, 0x46, 0x23, 0xf3, 0xa7, }, + { 0x38, 0x12, 0x05, 0xbb, 0x1a, 0xb0, 0xe0, 0x12, }, + { 0xae, 0x97, 0xa1, 0x0f, 0xd4, 0x34, 0xe0, 0x15, }, + { 0xb4, 0xa3, 0x15, 0x08, 0xbe, 0xff, 0x4d, 0x31, }, + { 0x81, 0x39, 0x62, 0x29, 0xf0, 0x90, 0x79, 0x02, }, + { 0x4d, 0x0c, 0xf4, 0x9e, 0xe5, 0xd4, 0xdc, 0xca, }, + { 0x5c, 0x73, 0x33, 0x6a, 0x76, 0xd8, 0xbf, 0x9a, }, + { 0xd0, 0xa7, 0x04, 0x53, 0x6b, 0xa9, 0x3e, 0x0e, }, + { 0x92, 0x59, 0x58, 0xfc, 0xd6, 0x42, 0x0c, 0xad, }, + { 0xa9, 0x15, 0xc2, 0x9b, 0xc8, 0x06, 0x73, 0x18, }, + { 0x95, 0x2b, 0x79, 0xf3, 0xbc, 0x0a, 0xa6, 0xd4, }, + { 0xf2, 0x1d, 0xf2, 0xe4, 0x1d, 0x45, 0x35, 0xf9, }, + { 0x87, 0x57, 0x75, 0x19, 0x04, 0x8f, 0x53, 0xa9, }, + { 0x10, 0xa5, 0x6c, 0xf5, 0xdf, 0xcd, 0x9a, 0xdb, }, + { 0xeb, 0x75, 0x09, 0x5c, 0xcd, 0x98, 0x6c, 0xd0, }, + { 0x51, 0xa9, 0xcb, 0x9e, 0xcb, 0xa3, 0x12, 0xe6, }, + { 0x96, 0xaf, 0xad, 0xfc, 0x2c, 0xe6, 0x66, 0xc7, }, + { 0x72, 0xfe, 0x52, 0x97, 0x5a, 0x43, 0x64, 0xee, }, + { 0x5a, 0x16, 0x45, 0xb2, 0x76, 0xd5, 0x92, 0xa1, }, + { 0xb2, 0x74, 0xcb, 0x8e, 0xbf, 0x87, 0x87, 0x0a, }, + { 0x6f, 0x9b, 0xb4, 0x20, 0x3d, 0xe7, 0xb3, 0x81, }, + { 0xea, 0xec, 0xb2, 0xa3, 0x0b, 0x22, 0xa8, 0x7f, }, + { 0x99, 0x24, 0xa4, 0x3c, 0xc1, 0x31, 0x57, 0x24, }, + { 0xbd, 0x83, 0x8d, 0x3a, 0xaf, 0xbf, 0x8d, 0xb7, }, + { 0x0b, 0x1a, 0x2a, 0x32, 0x65, 0xd5, 0x1a, 0xea, }, + { 0x13, 0x50, 0x79, 0xa3, 0x23, 0x1c, 0xe6, 0x60, }, + { 0x93, 0x2b, 0x28, 0x46, 0xe4, 0xd7, 0x06, 0x66, }, + { 0xe1, 0x91, 0x5f, 0x5c, 0xb1, 0xec, 0xa4, 0x6c, }, + { 0xf3, 0x25, 0x96, 0x5c, 0xa1, 0x6d, 0x62, 0x9f, }, + { 0x57, 0x5f, 0xf2, 0x8e, 0x60, 0x38, 0x1b, 0xe5, }, + { 0x72, 0x45, 0x06, 0xeb, 0x4c, 0x32, 0x8a, 0x95, } + }; + /* clang-format on */ + + unsigned char in[64]; + struct sipkey k; + size_t i; + + sip_tokey(&k, "\000\001\002\003\004\005\006\007\010\011" + "\012\013\014\015\016\017"); + + for (i = 0; i < sizeof in; ++i) { + in[i] = (unsigned char)i; + + if (siphash24(in, i, &k) != SIP_U8TO64_LE(vectors[i])) + return 0; + } + + return 1; +} /* sip24_valid() */ + +#ifdef SIPHASH_MAIN + +# include + +int +main(void) { + const int ok = sip24_valid(); + + if (ok) + puts("OK"); + else + puts("FAIL"); + + return ! ok; +} /* main() */ + +#endif /* SIPHASH_MAIN */ + +#endif /* SIPHASH_H */ diff --git a/src/simgear/xml/utf8tab.h b/src/simgear/xml/utf8tab.h index 7bb3e7760..88efcf91c 100644 --- a/src/simgear/xml/utf8tab.h +++ b/src/simgear/xml/utf8tab.h @@ -1,37 +1,66 @@ -/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. -*/ +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2017 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ /* 0x80 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0x84 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0x88 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0x8C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0x90 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0x94 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0x98 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0x9C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xA0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xA4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xA8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xAC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xB0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xB4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xB8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xBC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xC0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, -/* 0xC4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, -/* 0xC8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, -/* 0xCC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, -/* 0xD0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, -/* 0xD4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, -/* 0xD8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, -/* 0xDC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, -/* 0xE0 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, -/* 0xE4 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, -/* 0xE8 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, -/* 0xEC */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, -/* 0xF0 */ BT_LEAD4, BT_LEAD4, BT_LEAD4, BT_LEAD4, -/* 0xF4 */ BT_LEAD4, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0xF8 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0xFC */ BT_NONXML, BT_NONXML, BT_MALFORM, BT_MALFORM, + /* 0x84 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0x88 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0x8C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0x90 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0x94 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0x98 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0x9C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xA0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xA4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xA8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xAC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xB0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xB4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xB8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xBC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xC0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xC4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xC8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xCC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xD0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xD4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xD8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xDC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xE0 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, + /* 0xE4 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, + /* 0xE8 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, + /* 0xEC */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, + /* 0xF0 */ BT_LEAD4, BT_LEAD4, BT_LEAD4, BT_LEAD4, + /* 0xF4 */ BT_LEAD4, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0xF8 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0xFC */ BT_NONXML, BT_NONXML, BT_MALFORM, BT_MALFORM, diff --git a/src/simgear/xml/winconfig.h b/src/simgear/xml/winconfig.h index c1b791d62..2ecd61b5b 100644 --- a/src/simgear/xml/winconfig.h +++ b/src/simgear/xml/winconfig.h @@ -1,10 +1,35 @@ -/*================================================================ -** Copyright 2000, Clark Cooper -** All rights reserved. -** -** This is free software. You are permitted to copy, distribute, or modify -** it under the terms of the MIT/X license (contained in the COPYING file -** with this distribution.) +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Greg Stein + Copyright (c) 2005 Karl Waclawek + Copyright (c) 2017-2021 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef WINCONFIG_H @@ -17,14 +42,4 @@ #include #include -#define XML_NS 1 -#define XML_DTD 1 -#define XML_CONTEXT_BYTES 1024 - -/* we will assume all Windows platforms are little endian */ -#define BYTEORDER 1234 - -/* Windows has memmove() available. */ -#define HAVE_MEMMOVE - #endif /* ndef WINCONFIG_H */ diff --git a/src/simgear/xml/xmlparse.c b/src/simgear/xml/xmlparse.c index 301462297..e986156ec 100644 --- a/src/simgear/xml/xmlparse.c +++ b/src/simgear/xml/xmlparse.c @@ -1,81 +1,199 @@ -/* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* 2722de33b8d95adcfb16db05afdec6ed1d40d51565cda2176c61806b5350eafe (2.4.8+) + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2000-2006 Fred L. Drake, Jr. + Copyright (c) 2001-2002 Greg Stein + Copyright (c) 2002-2016 Karl Waclawek + Copyright (c) 2005-2009 Steven Solie + Copyright (c) 2016 Eric Rahm + Copyright (c) 2016-2022 Sebastian Pipping + Copyright (c) 2016 Gaurav + Copyright (c) 2016 Thomas Beutlich + Copyright (c) 2016 Gustavo Grieco + Copyright (c) 2016 Pascal Cuoq + Copyright (c) 2016 Ed Schouten + Copyright (c) 2017-2018 Rhodri James + Copyright (c) 2017 Václav Slavík + Copyright (c) 2017 Viktor Szakats + Copyright (c) 2017 Chanho Park + Copyright (c) 2017 Rolf Eike Beer + Copyright (c) 2017 Hans Wennborg + Copyright (c) 2018 Anton Maklakov + Copyright (c) 2018 Benjamin Peterson + Copyright (c) 2018 Marco Maggi + Copyright (c) 2018 Mariusz Zaborski + Copyright (c) 2019 David Loffredo + Copyright (c) 2019-2020 Ben Wagner + Copyright (c) 2019 Vadim Zeitlin + Copyright (c) 2021 Dong-hee Na + Copyright (c) 2022 Samanta Navarro + Copyright (c) 2022 Jeffrey Walton + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#define NDEBUG + +#define XML_BUILDING_EXPAT 1 + +#include + +#if ! defined(_GNU_SOURCE) +# define _GNU_SOURCE 1 /* syscall prototype */ +#endif + +#ifdef _WIN32 +/* force stdlib to define rand_s() */ +# if ! defined(_CRT_RAND_S) +# define _CRT_RAND_S +# endif +#endif #include -#include /* memset(), memcpy() */ +#include /* memset(), memcpy() */ #include +#include /* UINT_MAX */ +#include /* fprintf */ +#include /* getenv, rand_s */ +#include /* uintptr_t */ +#include /* isnan */ + +#ifdef _WIN32 +# define getpid GetCurrentProcessId +#else +# include /* gettimeofday() */ +# include /* getpid() */ +# include /* getpid() */ +# include /* O_RDONLY */ +# include +#endif -#define XML_BUILDING_EXPAT 1 - -#ifdef COMPILED_FROM_DSP -#include "winconfig.h" -#elif defined(MACOS_CLASSIC) -#include "macconfig.h" -#elif defined(HAVE_EXPAT_CONFIG_H) -#include "expat_config.h" -#endif /* ndef COMPILED_FROM_DSP */ +#ifdef _WIN32 +# include "winconfig.h" +#endif +#include "ascii.h" #include "expat.h" +#include "siphash.h" + +#if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) +# if defined(HAVE_GETRANDOM) +# include /* getrandom */ +# else +# include /* syscall */ +# include /* SYS_getrandom */ +# endif +# if ! defined(GRND_NONBLOCK) +# define GRND_NONBLOCK 0x0001 +# endif /* defined(GRND_NONBLOCK) */ +#endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ + +#if defined(HAVE_LIBBSD) \ + && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM)) +# include +#endif + +#if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32) +# define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800 +#endif + +#if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \ + && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \ + && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \ + && ! defined(XML_POOR_ENTROPY) +# error You do not have support for any sources of high quality entropy \ + enabled. For end user security, that is probably not what you want. \ + \ + Your options include: \ + * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \ + * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \ + * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \ + * BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \ + * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \ + * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \ + * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \ + * Windows >=Vista (rand_s): _WIN32. \ + \ + If insist on not using any of these, bypass this error by defining \ + XML_POOR_ENTROPY; you have been warned. \ + \ + If you have reasons to patch this detection code away or need changes \ + to the build system, please open a bug. Thank you! +#endif #ifdef XML_UNICODE -#define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX -#define XmlConvert XmlUtf16Convert -#define XmlGetInternalEncoding XmlGetUtf16InternalEncoding -#define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS -#define XmlEncode XmlUtf16Encode -#define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((unsigned long)s) & 1)) +# define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX +# define XmlConvert XmlUtf16Convert +# define XmlGetInternalEncoding XmlGetUtf16InternalEncoding +# define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS +# define XmlEncode XmlUtf16Encode +# define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1)) typedef unsigned short ICHAR; #else -#define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX -#define XmlConvert XmlUtf8Convert -#define XmlGetInternalEncoding XmlGetUtf8InternalEncoding -#define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS -#define XmlEncode XmlUtf8Encode -#define MUST_CONVERT(enc, s) (!(enc)->isUtf8) +# define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX +# define XmlConvert XmlUtf8Convert +# define XmlGetInternalEncoding XmlGetUtf8InternalEncoding +# define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS +# define XmlEncode XmlUtf8Encode +# define MUST_CONVERT(enc, s) (! (enc)->isUtf8) typedef char ICHAR; #endif - #ifndef XML_NS -#define XmlInitEncodingNS XmlInitEncoding -#define XmlInitUnknownEncodingNS XmlInitUnknownEncoding -#undef XmlGetInternalEncodingNS -#define XmlGetInternalEncodingNS XmlGetInternalEncoding -#define XmlParseXmlDeclNS XmlParseXmlDecl +# define XmlInitEncodingNS XmlInitEncoding +# define XmlInitUnknownEncodingNS XmlInitUnknownEncoding +# undef XmlGetInternalEncodingNS +# define XmlGetInternalEncodingNS XmlGetInternalEncoding +# define XmlParseXmlDeclNS XmlParseXmlDecl #endif #ifdef XML_UNICODE -#ifdef XML_UNICODE_WCHAR_T -#define XML_T(x) (const wchar_t)x -#define XML_L(x) L ## x -#else -#define XML_T(x) (const unsigned short)x -#define XML_L(x) x -#endif +# ifdef XML_UNICODE_WCHAR_T +# define XML_T(x) (const wchar_t) x +# define XML_L(x) L##x +# else +# define XML_T(x) (const unsigned short)x +# define XML_L(x) x +# endif #else -#define XML_T(x) x -#define XML_L(x) x +# define XML_T(x) x +# define XML_L(x) x #endif /* Round up n to be a multiple of sz, where sz is a power of 2. */ -#define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1)) +#define ROUND_UP(n, sz) (((n) + ((sz)-1)) & ~((sz)-1)) -/* Handle the case where memmove() doesn't exist. */ -#ifndef HAVE_MEMMOVE -#ifdef HAVE_BCOPY -#define memmove(d,s,l) bcopy((s),(d),(l)) -#else -#error memmove does not exist on this platform, nor is a substitute available -#endif /* HAVE_BCOPY */ -#endif /* HAVE_MEMMOVE */ +/* Do safe (NULL-aware) pointer arithmetic */ +#define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0) #include "internal.h" #include "xmltok.h" @@ -95,17 +213,9 @@ typedef struct { const XML_Memory_Handling_Suite *mem; } HASH_TABLE; -/* Basic character hash algorithm, taken from Python's string hash: - h = h * 1000003 ^ character, the constant being a prime number. +static size_t keylen(KEY s); -*/ -#ifdef XML_UNICODE -#define CHAR_HASH(h, c) \ - (((h) * 0xF4243) ^ (unsigned short)(c)) -#else -#define CHAR_HASH(h, c) \ - (((h) * 0xF4243) ^ (unsigned char)(c)) -#endif +static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key); /* For probing (after a collision) we need a step size relative prime to the hash table size, which is a power of 2. We use double-hashing, @@ -115,9 +225,9 @@ typedef struct { We limit the maximum step size to table->size / 4 (mask >> 2) and make it odd, since odd numbers are always relative prime to a power of 2. */ -#define SECOND_HASH(hash, mask, power) \ - ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2)) -#define PROBE_STEP(hash, mask, power) \ +#define SECOND_HASH(hash, mask, power) \ + ((((hash) & ~(mask)) >> ((power)-1)) & ((mask) >> 2)) +#define PROBE_STEP(hash, mask, power) \ ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1)) typedef struct { @@ -125,7 +235,7 @@ typedef struct { NAMED **end; } HASH_TABLE_ITER; -#define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */ +#define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */ #define INIT_DATA_BUF_SIZE 1024 #define INIT_ATTS_SIZE 16 #define INIT_ATTS_VERSION 0xFFFFFFFF @@ -172,20 +282,20 @@ typedef struct { TAG objects in a free list. */ typedef struct tag { - struct tag *parent; /* parent of this element */ - const char *rawName; /* tagName in the original encoding */ + struct tag *parent; /* parent of this element */ + const char *rawName; /* tagName in the original encoding */ int rawNameLength; - TAG_NAME name; /* tagName in the API encoding */ - char *buf; /* buffer for name components */ - char *bufEnd; /* end of the buffer */ + TAG_NAME name; /* tagName in the API encoding */ + char *buf; /* buffer for name components */ + char *bufEnd; /* end of the buffer */ BINDING *bindings; } TAG; typedef struct { const XML_Char *name; const XML_Char *textPtr; - int textLen; /* length in XML_Chars */ - int processed; /* # of processed bytes - when suspended */ + int textLen; /* length in XML_Chars */ + int processed; /* # of processed bytes - when suspended */ const XML_Char *systemId; const XML_Char *base; const XML_Char *publicId; @@ -196,13 +306,13 @@ typedef struct { } ENTITY; typedef struct { - enum XML_Content_Type type; - enum XML_Content_Quant quant; - const XML_Char * name; - int firstchild; - int lastchild; - int childcnt; - int nextsib; + enum XML_Content_Type type; + enum XML_Content_Quant quant; + const XML_Char *name; + int firstchild; + int lastchild; + int childcnt; + int nextsib; } CONTENT_SCAFFOLD; #define INIT_SCAFFOLD_ELEMENTS 32 @@ -290,10 +400,33 @@ typedef struct open_internal_entity { XML_Bool betweenDecl; /* WFC: PE Between Declarations */ } OPEN_INTERNAL_ENTITY; -typedef enum XML_Error PTRCALL Processor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr); +enum XML_Account { + XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */ + XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity + expansion */ + XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */ +}; + +#ifdef XML_DTD +typedef unsigned long long XmlBigCount; +typedef struct accounting { + XmlBigCount countBytesDirect; + XmlBigCount countBytesIndirect; + int debugLevel; + float maximumAmplificationFactor; // >=1.0 + unsigned long long activationThresholdBytes; +} ACCOUNTING; + +typedef struct entity_stats { + unsigned int countEverOpened; + unsigned int currentDepth; + unsigned int maximumDepthSeen; + int debugLevel; +} ENTITY_STATS; +#endif /* XML_DTD */ + +typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start, + const char *end, const char **endPtr); static Processor prologProcessor; static Processor prologInitProcessor; @@ -314,123 +447,147 @@ static Processor externalEntityInitProcessor3; static Processor externalEntityContentProcessor; static Processor internalEntityProcessor; -static enum XML_Error -handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName); -static enum XML_Error -processXmlDecl(XML_Parser parser, int isGeneralTextEntity, - const char *s, const char *next); -static enum XML_Error -initializeEncoding(XML_Parser parser); -static enum XML_Error -doProlog(XML_Parser parser, const ENCODING *enc, const char *s, - const char *end, int tok, const char *next, const char **nextPtr, - XML_Bool haveMore); -static enum XML_Error -processInternalEntity(XML_Parser parser, ENTITY *entity, - XML_Bool betweenDecl); -static enum XML_Error -doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, - const char *start, const char *end, const char **endPtr, - XML_Bool haveMore); -static enum XML_Error -doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, - const char *end, const char **nextPtr, XML_Bool haveMore); +static enum XML_Error handleUnknownEncoding(XML_Parser parser, + const XML_Char *encodingName); +static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity, + const char *s, const char *next); +static enum XML_Error initializeEncoding(XML_Parser parser); +static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc, + const char *s, const char *end, int tok, + const char *next, const char **nextPtr, + XML_Bool haveMore, XML_Bool allowClosingDoctype, + enum XML_Account account); +static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity, + XML_Bool betweenDecl); +static enum XML_Error doContent(XML_Parser parser, int startTagLevel, + const ENCODING *enc, const char *start, + const char *end, const char **endPtr, + XML_Bool haveMore, enum XML_Account account); +static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *, + const char **startPtr, const char *end, + const char **nextPtr, XML_Bool haveMore, + enum XML_Account account); #ifdef XML_DTD -static enum XML_Error -doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr, - const char *end, const char **nextPtr, XML_Bool haveMore); +static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *, + const char **startPtr, const char *end, + const char **nextPtr, XML_Bool haveMore); #endif /* XML_DTD */ -static enum XML_Error -storeAtts(XML_Parser parser, const ENCODING *, const char *s, - TAG_NAME *tagNamePtr, BINDING **bindingsPtr); -static enum XML_Error -addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, - const XML_Char *uri, BINDING **bindingsPtr); -static int -defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata, - XML_Bool isId, const XML_Char *dfltValue, XML_Parser parser); -static enum XML_Error -storeAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata, - const char *, const char *, STRING_POOL *); -static enum XML_Error -appendAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata, - const char *, const char *, STRING_POOL *); -static ATTRIBUTE_ID * -getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, - const char *end); -static int -setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *); -static enum XML_Error -storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start, - const char *end); -static int -reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, - const char *start, const char *end); -static int -reportComment(XML_Parser parser, const ENCODING *enc, const char *start, - const char *end); -static void -reportDefault(XML_Parser parser, const ENCODING *enc, const char *start, - const char *end); - -static const XML_Char * getContext(XML_Parser parser); -static XML_Bool -setContext(XML_Parser parser, const XML_Char *context); +static void freeBindings(XML_Parser parser, BINDING *bindings); +static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, + const char *s, TAG_NAME *tagNamePtr, + BINDING **bindingsPtr, + enum XML_Account account); +static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix, + const ATTRIBUTE_ID *attId, const XML_Char *uri, + BINDING **bindingsPtr); +static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata, + XML_Bool isId, const XML_Char *dfltValue, + XML_Parser parser); +static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *, + XML_Bool isCdata, const char *, + const char *, STRING_POOL *, + enum XML_Account account); +static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *, + XML_Bool isCdata, const char *, + const char *, STRING_POOL *, + enum XML_Account account); +static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end); +static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *); +static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end, + enum XML_Account account); +static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end); +static int reportComment(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end); +static void reportDefault(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end); + +static const XML_Char *getContext(XML_Parser parser); +static XML_Bool setContext(XML_Parser parser, const XML_Char *context); static void FASTCALL normalizePublicId(XML_Char *s); -static DTD * dtdCreate(const XML_Memory_Handling_Suite *ms); -/* do not call if parentParser != NULL */ +static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms); +/* do not call if m_parentParser != NULL */ static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms); -static void -dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms); -static int -dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms); -static int -copyEntityTable(HASH_TABLE *, STRING_POOL *, const HASH_TABLE *); - -static NAMED * -lookup(HASH_TABLE *table, KEY name, size_t createSize); -static void FASTCALL -hashTableInit(HASH_TABLE *, const XML_Memory_Handling_Suite *ms); +static void dtdDestroy(DTD *p, XML_Bool isDocEntity, + const XML_Memory_Handling_Suite *ms); +static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, + const XML_Memory_Handling_Suite *ms); +static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *, STRING_POOL *, + const HASH_TABLE *); +static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name, + size_t createSize); +static void FASTCALL hashTableInit(HASH_TABLE *, + const XML_Memory_Handling_Suite *ms); static void FASTCALL hashTableClear(HASH_TABLE *); static void FASTCALL hashTableDestroy(HASH_TABLE *); -static void FASTCALL -hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *); -static NAMED * FASTCALL hashTableIterNext(HASH_TABLE_ITER *); +static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *); +static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *); -static void FASTCALL -poolInit(STRING_POOL *, const XML_Memory_Handling_Suite *ms); +static void FASTCALL poolInit(STRING_POOL *, + const XML_Memory_Handling_Suite *ms); static void FASTCALL poolClear(STRING_POOL *); static void FASTCALL poolDestroy(STRING_POOL *); -static XML_Char * -poolAppend(STRING_POOL *pool, const ENCODING *enc, - const char *ptr, const char *end); -static XML_Char * -poolStoreString(STRING_POOL *pool, const ENCODING *enc, - const char *ptr, const char *end); +static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end); +static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end); static XML_Bool FASTCALL poolGrow(STRING_POOL *pool); -static const XML_Char * FASTCALL -poolCopyString(STRING_POOL *pool, const XML_Char *s); -static const XML_Char * -poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n); -static const XML_Char * FASTCALL -poolAppendString(STRING_POOL *pool, const XML_Char *s); +static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool, + const XML_Char *s); +static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, + int n); +static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool, + const XML_Char *s); static int FASTCALL nextScaffoldPart(XML_Parser parser); -static XML_Content * build_model(XML_Parser parser); -static ELEMENT_TYPE * -getElementType(XML_Parser parser, const ENCODING *enc, - const char *ptr, const char *end); +static XML_Content *build_model(XML_Parser parser); +static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc, + const char *ptr, const char *end); -static XML_Parser -parserCreate(const XML_Char *encodingName, - const XML_Memory_Handling_Suite *memsuite, - const XML_Char *nameSep, - DTD *dtd); -static void -parserInit(XML_Parser parser, const XML_Char *encodingName); +static XML_Char *copyString(const XML_Char *s, + const XML_Memory_Handling_Suite *memsuite); + +static unsigned long generate_hash_secret_salt(XML_Parser parser); +static XML_Bool startParsing(XML_Parser parser); + +static XML_Parser parserCreate(const XML_Char *encodingName, + const XML_Memory_Handling_Suite *memsuite, + const XML_Char *nameSep, DTD *dtd); + +static void parserInit(XML_Parser parser, const XML_Char *encodingName); + +#ifdef XML_DTD +static float accountingGetCurrentAmplification(XML_Parser rootParser); +static void accountingReportStats(XML_Parser originParser, const char *epilog); +static void accountingOnAbort(XML_Parser originParser); +static void accountingReportDiff(XML_Parser rootParser, + unsigned int levelsAwayFromRootParser, + const char *before, const char *after, + ptrdiff_t bytesMore, int source_line, + enum XML_Account account); +static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok, + const char *before, const char *after, + int source_line, + enum XML_Account account); + +static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity, + const char *action, int sourceLine); +static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity, + int sourceLine); +static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity, + int sourceLine); + +static XML_Parser getRootParserOf(XML_Parser parser, + unsigned int *outLevelDiff); +#endif /* XML_DTD */ + +static unsigned long getDebugLevel(const char *variableName, + unsigned long defaultDebugLevel); #define poolStart(pool) ((pool)->start) #define poolEnd(pool) ((pool)->ptr) @@ -439,13 +596,13 @@ parserInit(XML_Parser parser, const XML_Char *encodingName); #define poolLastChar(pool) (((pool)->ptr)[-1]) #define poolDiscard(pool) ((pool)->ptr = (pool)->start) #define poolFinish(pool) ((pool)->start = (pool)->ptr) -#define poolAppendChar(pool, c) \ - (((pool)->ptr == (pool)->end && !poolGrow(pool)) \ - ? 0 \ - : ((*((pool)->ptr)++ = c), 1)) +#define poolAppendChar(pool, c) \ + (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \ + ? 0 \ + : ((*((pool)->ptr)++ = c), 1)) struct XML_ParserStruct { - /* The first member must be userData so that the XML_GetUserData + /* The first member must be m_userData so that the XML_GetUserData macro works. */ void *m_userData; void *m_handlerArg; @@ -455,9 +612,9 @@ struct XML_ParserStruct { const char *m_bufferPtr; /* past last character to be parsed */ char *m_bufferEnd; - /* allocated end of buffer */ + /* allocated end of m_buffer */ const char *m_bufferLim; - long m_parseEndByteIndex; + XML_Index m_parseEndByteIndex; const char *m_parseEndPtr; XML_Char *m_dataBuf; XML_Char *m_dataBufEnd; @@ -493,7 +650,7 @@ struct XML_ParserStruct { void *m_unknownEncodingMem; void *m_unknownEncodingData; void *m_unknownEncodingHandlerData; - void (XMLCALL *m_unknownEncodingRelease)(void *); + void(XMLCALL *m_unknownEncodingRelease)(void *); PROLOG_STATE m_prologState; Processor *m_processor; enum XML_Error m_errorCode; @@ -528,6 +685,9 @@ struct XML_ParserStruct { NS_ATT *m_nsAtts; unsigned long m_nsAttsVersion; unsigned char m_nsAttsPower; +#ifdef XML_ATTR_INFO + XML_AttrInfo *m_attInfo; +#endif POSITION m_position; STRING_POOL m_tempPool; STRING_POOL m_temp2Pool; @@ -540,174 +700,290 @@ struct XML_ParserStruct { XML_Bool m_isParamEntity; XML_Bool m_useForeignDTD; enum XML_ParamEntityParsing m_paramEntityParsing; +#endif + unsigned long m_hash_secret_salt; +#ifdef XML_DTD + ACCOUNTING m_accounting; + ENTITY_STATS m_entity_stats; #endif }; -#define MALLOC(s) (parser->m_mem.malloc_fcn((s))) -#define REALLOC(p,s) (parser->m_mem.realloc_fcn((p),(s))) -#define FREE(p) (parser->m_mem.free_fcn((p))) - -#define userData (parser->m_userData) -#define handlerArg (parser->m_handlerArg) -#define startElementHandler (parser->m_startElementHandler) -#define endElementHandler (parser->m_endElementHandler) -#define characterDataHandler (parser->m_characterDataHandler) -#define processingInstructionHandler \ - (parser->m_processingInstructionHandler) -#define commentHandler (parser->m_commentHandler) -#define startCdataSectionHandler \ - (parser->m_startCdataSectionHandler) -#define endCdataSectionHandler (parser->m_endCdataSectionHandler) -#define defaultHandler (parser->m_defaultHandler) -#define startDoctypeDeclHandler (parser->m_startDoctypeDeclHandler) -#define endDoctypeDeclHandler (parser->m_endDoctypeDeclHandler) -#define unparsedEntityDeclHandler \ - (parser->m_unparsedEntityDeclHandler) -#define notationDeclHandler (parser->m_notationDeclHandler) -#define startNamespaceDeclHandler \ - (parser->m_startNamespaceDeclHandler) -#define endNamespaceDeclHandler (parser->m_endNamespaceDeclHandler) -#define notStandaloneHandler (parser->m_notStandaloneHandler) -#define externalEntityRefHandler \ - (parser->m_externalEntityRefHandler) -#define externalEntityRefHandlerArg \ - (parser->m_externalEntityRefHandlerArg) -#define internalEntityRefHandler \ - (parser->m_internalEntityRefHandler) -#define skippedEntityHandler (parser->m_skippedEntityHandler) -#define unknownEncodingHandler (parser->m_unknownEncodingHandler) -#define elementDeclHandler (parser->m_elementDeclHandler) -#define attlistDeclHandler (parser->m_attlistDeclHandler) -#define entityDeclHandler (parser->m_entityDeclHandler) -#define xmlDeclHandler (parser->m_xmlDeclHandler) -#define encoding (parser->m_encoding) -#define initEncoding (parser->m_initEncoding) -#define internalEncoding (parser->m_internalEncoding) -#define unknownEncodingMem (parser->m_unknownEncodingMem) -#define unknownEncodingData (parser->m_unknownEncodingData) -#define unknownEncodingHandlerData \ - (parser->m_unknownEncodingHandlerData) -#define unknownEncodingRelease (parser->m_unknownEncodingRelease) -#define protocolEncodingName (parser->m_protocolEncodingName) -#define ns (parser->m_ns) -#define ns_triplets (parser->m_ns_triplets) -#define prologState (parser->m_prologState) -#define processor (parser->m_processor) -#define errorCode (parser->m_errorCode) -#define eventPtr (parser->m_eventPtr) -#define eventEndPtr (parser->m_eventEndPtr) -#define positionPtr (parser->m_positionPtr) -#define position (parser->m_position) -#define openInternalEntities (parser->m_openInternalEntities) -#define freeInternalEntities (parser->m_freeInternalEntities) -#define defaultExpandInternalEntities \ - (parser->m_defaultExpandInternalEntities) -#define tagLevel (parser->m_tagLevel) -#define buffer (parser->m_buffer) -#define bufferPtr (parser->m_bufferPtr) -#define bufferEnd (parser->m_bufferEnd) -#define parseEndByteIndex (parser->m_parseEndByteIndex) -#define parseEndPtr (parser->m_parseEndPtr) -#define bufferLim (parser->m_bufferLim) -#define dataBuf (parser->m_dataBuf) -#define dataBufEnd (parser->m_dataBufEnd) -#define _dtd (parser->m_dtd) -#define curBase (parser->m_curBase) -#define declEntity (parser->m_declEntity) -#define doctypeName (parser->m_doctypeName) -#define doctypeSysid (parser->m_doctypeSysid) -#define doctypePubid (parser->m_doctypePubid) -#define declAttributeType (parser->m_declAttributeType) -#define declNotationName (parser->m_declNotationName) -#define declNotationPublicId (parser->m_declNotationPublicId) -#define declElementType (parser->m_declElementType) -#define declAttributeId (parser->m_declAttributeId) -#define declAttributeIsCdata (parser->m_declAttributeIsCdata) -#define declAttributeIsId (parser->m_declAttributeIsId) -#define freeTagList (parser->m_freeTagList) -#define freeBindingList (parser->m_freeBindingList) -#define inheritedBindings (parser->m_inheritedBindings) -#define tagStack (parser->m_tagStack) -#define atts (parser->m_atts) -#define attsSize (parser->m_attsSize) -#define nSpecifiedAtts (parser->m_nSpecifiedAtts) -#define idAttIndex (parser->m_idAttIndex) -#define nsAtts (parser->m_nsAtts) -#define nsAttsVersion (parser->m_nsAttsVersion) -#define nsAttsPower (parser->m_nsAttsPower) -#define tempPool (parser->m_tempPool) -#define temp2Pool (parser->m_temp2Pool) -#define groupConnector (parser->m_groupConnector) -#define groupSize (parser->m_groupSize) -#define namespaceSeparator (parser->m_namespaceSeparator) -#define parentParser (parser->m_parentParser) -#define parsing (parser->m_parsingStatus.parsing) -#define finalBuffer (parser->m_parsingStatus.finalBuffer) -#ifdef XML_DTD -#define isParamEntity (parser->m_isParamEntity) -#define useForeignDTD (parser->m_useForeignDTD) -#define paramEntityParsing (parser->m_paramEntityParsing) -#endif /* XML_DTD */ +#define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s))) +#define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s))) +#define FREE(parser, p) (parser->m_mem.free_fcn((p))) XML_Parser XMLCALL -XML_ParserCreate(const XML_Char *encodingName) -{ +XML_ParserCreate(const XML_Char *encodingName) { return XML_ParserCreate_MM(encodingName, NULL, NULL); } XML_Parser XMLCALL -XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) -{ - XML_Char tmp[2]; - *tmp = nsSep; +XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) { + XML_Char tmp[2] = {nsSep, 0}; return XML_ParserCreate_MM(encodingName, NULL, tmp); } -static const XML_Char implicitContext[] = { - 'x', 'm', 'l', '=', 'h', 't', 't', 'p', ':', '/', '/', - 'w', 'w', 'w', '.', 'w', '3', '.', 'o', 'r', 'g', '/', - 'X', 'M', 'L', '/', '1', '9', '9', '8', '/', - 'n', 'a', 'm', 'e', 's', 'p', 'a', 'c', 'e', '\0' -}; +// "xml=http://www.w3.org/XML/1998/namespace" +static const XML_Char implicitContext[] + = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, + ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, + ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, + ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, + ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, + ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, + ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e, + ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, + '\0'}; -XML_Parser XMLCALL -XML_ParserCreate_MM(const XML_Char *encodingName, - const XML_Memory_Handling_Suite *memsuite, - const XML_Char *nameSep) -{ - XML_Parser parser = parserCreate(encodingName, memsuite, nameSep, NULL); - if (parser != NULL && ns) { +/* To avoid warnings about unused functions: */ +#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) + +# if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) + +/* Obtain entropy on Linux 3.17+ */ +static int +writeRandomBytes_getrandom_nonblock(void *target, size_t count) { + int success = 0; /* full count bytes written? */ + size_t bytesWrittenTotal = 0; + const unsigned int getrandomFlags = GRND_NONBLOCK; + + do { + void *const currentTarget = (void *)((char *)target + bytesWrittenTotal); + const size_t bytesToWrite = count - bytesWrittenTotal; + + const int bytesWrittenMore = +# if defined(HAVE_GETRANDOM) + getrandom(currentTarget, bytesToWrite, getrandomFlags); +# else + syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags); +# endif + + if (bytesWrittenMore > 0) { + bytesWrittenTotal += bytesWrittenMore; + if (bytesWrittenTotal >= count) + success = 1; + } + } while (! success && (errno == EINTR)); + + return success; +} + +# endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ + +# if ! defined(_WIN32) && defined(XML_DEV_URANDOM) + +/* Extract entropy from /dev/urandom */ +static int +writeRandomBytes_dev_urandom(void *target, size_t count) { + int success = 0; /* full count bytes written? */ + size_t bytesWrittenTotal = 0; + + const int fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) { + return 0; + } + + do { + void *const currentTarget = (void *)((char *)target + bytesWrittenTotal); + const size_t bytesToWrite = count - bytesWrittenTotal; + + const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite); + + if (bytesWrittenMore > 0) { + bytesWrittenTotal += bytesWrittenMore; + if (bytesWrittenTotal >= count) + success = 1; + } + } while (! success && (errno == EINTR)); + + close(fd); + return success; +} + +# endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ + +#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ + +#if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) + +static void +writeRandomBytes_arc4random(void *target, size_t count) { + size_t bytesWrittenTotal = 0; + + while (bytesWrittenTotal < count) { + const uint32_t random32 = arc4random(); + size_t i = 0; + + for (; (i < sizeof(random32)) && (bytesWrittenTotal < count); + i++, bytesWrittenTotal++) { + const uint8_t random8 = (uint8_t)(random32 >> (i * 8)); + ((uint8_t *)target)[bytesWrittenTotal] = random8; + } + } +} + +#endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */ + +#ifdef _WIN32 + +/* Provide declaration of rand_s() for MinGW-32 (not 64, which has it), + as it didn't declare it in its header prior to version 5.3.0 of its + runtime package (mingwrt, containing stdlib.h). The upstream fix + was introduced at https://osdn.net/projects/mingw/ticket/39658 . */ +# if defined(__MINGW32__) && defined(__MINGW32_VERSION) \ + && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR) +__declspec(dllimport) int rand_s(unsigned int *); +# endif + +/* Obtain entropy on Windows using the rand_s() function which + * generates cryptographically secure random numbers. Internally it + * uses RtlGenRandom API which is present in Windows XP and later. + */ +static int +writeRandomBytes_rand_s(void *target, size_t count) { + size_t bytesWrittenTotal = 0; + + while (bytesWrittenTotal < count) { + unsigned int random32 = 0; + size_t i = 0; + + if (rand_s(&random32)) + return 0; /* failure */ + + for (; (i < sizeof(random32)) && (bytesWrittenTotal < count); + i++, bytesWrittenTotal++) { + const uint8_t random8 = (uint8_t)(random32 >> (i * 8)); + ((uint8_t *)target)[bytesWrittenTotal] = random8; + } + } + return 1; /* success */ +} + +#endif /* _WIN32 */ + +#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) + +static unsigned long +gather_time_entropy(void) { +# ifdef _WIN32 + FILETIME ft; + GetSystemTimeAsFileTime(&ft); /* never fails */ + return ft.dwHighDateTime ^ ft.dwLowDateTime; +# else + struct timeval tv; + int gettimeofday_res; + + gettimeofday_res = gettimeofday(&tv, NULL); + +# if defined(NDEBUG) + (void)gettimeofday_res; +# else + assert(gettimeofday_res == 0); +# endif /* defined(NDEBUG) */ + + /* Microseconds time is <20 bits entropy */ + return tv.tv_usec; +# endif +} + +#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ + +static unsigned long +ENTROPY_DEBUG(const char *label, unsigned long entropy) { + if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) { + fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label, + (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy)); + } + return entropy; +} + +static unsigned long +generate_hash_secret_salt(XML_Parser parser) { + unsigned long entropy; + (void)parser; + + /* "Failproof" high quality providers: */ +#if defined(HAVE_ARC4RANDOM_BUF) + arc4random_buf(&entropy, sizeof(entropy)); + return ENTROPY_DEBUG("arc4random_buf", entropy); +#elif defined(HAVE_ARC4RANDOM) + writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy)); + return ENTROPY_DEBUG("arc4random", entropy); +#else + /* Try high quality providers first .. */ +# ifdef _WIN32 + if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) { + return ENTROPY_DEBUG("rand_s", entropy); + } +# elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) + if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) { + return ENTROPY_DEBUG("getrandom", entropy); + } +# endif +# if ! defined(_WIN32) && defined(XML_DEV_URANDOM) + if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) { + return ENTROPY_DEBUG("/dev/urandom", entropy); + } +# endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ + /* .. and self-made low quality for backup: */ + + /* Process ID is 0 bits entropy if attacker has local access */ + entropy = gather_time_entropy() ^ getpid(); + + /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */ + if (sizeof(unsigned long) == 4) { + return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647); + } else { + return ENTROPY_DEBUG("fallback(8)", + entropy * (unsigned long)2305843009213693951ULL); + } +#endif +} + +static unsigned long +get_hash_secret_salt(XML_Parser parser) { + if (parser->m_parentParser != NULL) + return get_hash_secret_salt(parser->m_parentParser); + return parser->m_hash_secret_salt; +} + +static XML_Bool /* only valid for root parser */ +startParsing(XML_Parser parser) { + /* hash functions must be initialized before setContext() is called */ + if (parser->m_hash_secret_salt == 0) + parser->m_hash_secret_salt = generate_hash_secret_salt(parser); + if (parser->m_ns) { /* implicit context only set for root parser, since child parsers (i.e. external entity parsers) will inherit it */ - if (!setContext(parser, implicitContext)) { - XML_ParserFree(parser); - return NULL; - } + return setContext(parser, implicitContext); } - return parser; + return XML_TRUE; +} + +XML_Parser XMLCALL +XML_ParserCreate_MM(const XML_Char *encodingName, + const XML_Memory_Handling_Suite *memsuite, + const XML_Char *nameSep) { + return parserCreate(encodingName, memsuite, nameSep, NULL); } static XML_Parser parserCreate(const XML_Char *encodingName, - const XML_Memory_Handling_Suite *memsuite, - const XML_Char *nameSep, - DTD *dtd) -{ + const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep, + DTD *dtd) { XML_Parser parser; if (memsuite) { XML_Memory_Handling_Suite *mtemp; - parser = (XML_Parser) - memsuite->malloc_fcn(sizeof(struct XML_ParserStruct)); + parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct)); if (parser != NULL) { mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); mtemp->malloc_fcn = memsuite->malloc_fcn; mtemp->realloc_fcn = memsuite->realloc_fcn; mtemp->free_fcn = memsuite->free_fcn; } - } - else { + } else { XML_Memory_Handling_Suite *mtemp; parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct)); if (parser != NULL) { @@ -718,266 +994,348 @@ parserCreate(const XML_Char *encodingName, } } - if (!parser) + if (! parser) return parser; - buffer = NULL; - bufferLim = NULL; + parser->m_buffer = NULL; + parser->m_bufferLim = NULL; - attsSize = INIT_ATTS_SIZE; - atts = (ATTRIBUTE *)MALLOC(attsSize * sizeof(ATTRIBUTE)); - if (atts == NULL) { - FREE(parser); + parser->m_attsSize = INIT_ATTS_SIZE; + parser->m_atts + = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE)); + if (parser->m_atts == NULL) { + FREE(parser, parser); + return NULL; + } +#ifdef XML_ATTR_INFO + parser->m_attInfo = (XML_AttrInfo *)MALLOC( + parser, parser->m_attsSize * sizeof(XML_AttrInfo)); + if (parser->m_attInfo == NULL) { + FREE(parser, parser->m_atts); + FREE(parser, parser); return NULL; } - dataBuf = (XML_Char *)MALLOC(INIT_DATA_BUF_SIZE * sizeof(XML_Char)); - if (dataBuf == NULL) { - FREE(atts); - FREE(parser); +#endif + parser->m_dataBuf + = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char)); + if (parser->m_dataBuf == NULL) { + FREE(parser, parser->m_atts); +#ifdef XML_ATTR_INFO + FREE(parser, parser->m_attInfo); +#endif + FREE(parser, parser); return NULL; } - dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE; + parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE; if (dtd) - _dtd = dtd; + parser->m_dtd = dtd; else { - _dtd = dtdCreate(&parser->m_mem); - if (_dtd == NULL) { - FREE(dataBuf); - FREE(atts); - FREE(parser); + parser->m_dtd = dtdCreate(&parser->m_mem); + if (parser->m_dtd == NULL) { + FREE(parser, parser->m_dataBuf); + FREE(parser, parser->m_atts); +#ifdef XML_ATTR_INFO + FREE(parser, parser->m_attInfo); +#endif + FREE(parser, parser); return NULL; } } - freeBindingList = NULL; - freeTagList = NULL; - freeInternalEntities = NULL; + parser->m_freeBindingList = NULL; + parser->m_freeTagList = NULL; + parser->m_freeInternalEntities = NULL; + + parser->m_groupSize = 0; + parser->m_groupConnector = NULL; - groupSize = 0; - groupConnector = NULL; + parser->m_unknownEncodingHandler = NULL; + parser->m_unknownEncodingHandlerData = NULL; - unknownEncodingHandler = NULL; - unknownEncodingHandlerData = NULL; + parser->m_namespaceSeparator = ASCII_EXCL; + parser->m_ns = XML_FALSE; + parser->m_ns_triplets = XML_FALSE; - namespaceSeparator = '!'; - ns = XML_FALSE; - ns_triplets = XML_FALSE; + parser->m_nsAtts = NULL; + parser->m_nsAttsVersion = 0; + parser->m_nsAttsPower = 0; - nsAtts = NULL; - nsAttsVersion = 0; - nsAttsPower = 0; + parser->m_protocolEncodingName = NULL; - poolInit(&tempPool, &(parser->m_mem)); - poolInit(&temp2Pool, &(parser->m_mem)); + poolInit(&parser->m_tempPool, &(parser->m_mem)); + poolInit(&parser->m_temp2Pool, &(parser->m_mem)); parserInit(parser, encodingName); - if (encodingName && !protocolEncodingName) { + if (encodingName && ! parser->m_protocolEncodingName) { XML_ParserFree(parser); return NULL; } if (nameSep) { - ns = XML_TRUE; - internalEncoding = XmlGetInternalEncodingNS(); - namespaceSeparator = *nameSep; - } - else { - internalEncoding = XmlGetInternalEncoding(); + parser->m_ns = XML_TRUE; + parser->m_internalEncoding = XmlGetInternalEncodingNS(); + parser->m_namespaceSeparator = *nameSep; + } else { + parser->m_internalEncoding = XmlGetInternalEncoding(); } return parser; } static void -parserInit(XML_Parser parser, const XML_Char *encodingName) -{ - processor = prologInitProcessor; - XmlPrologStateInit(&prologState); - protocolEncodingName = (encodingName != NULL - ? poolCopyString(&tempPool, encodingName) - : NULL); - curBase = NULL; - XmlInitEncoding(&initEncoding, &encoding, 0); - userData = NULL; - handlerArg = NULL; - startElementHandler = NULL; - endElementHandler = NULL; - characterDataHandler = NULL; - processingInstructionHandler = NULL; - commentHandler = NULL; - startCdataSectionHandler = NULL; - endCdataSectionHandler = NULL; - defaultHandler = NULL; - startDoctypeDeclHandler = NULL; - endDoctypeDeclHandler = NULL; - unparsedEntityDeclHandler = NULL; - notationDeclHandler = NULL; - startNamespaceDeclHandler = NULL; - endNamespaceDeclHandler = NULL; - notStandaloneHandler = NULL; - externalEntityRefHandler = NULL; - externalEntityRefHandlerArg = parser; - skippedEntityHandler = NULL; - elementDeclHandler = NULL; - attlistDeclHandler = NULL; - entityDeclHandler = NULL; - xmlDeclHandler = NULL; - bufferPtr = buffer; - bufferEnd = buffer; - parseEndByteIndex = 0; - parseEndPtr = NULL; - declElementType = NULL; - declAttributeId = NULL; - declEntity = NULL; - doctypeName = NULL; - doctypeSysid = NULL; - doctypePubid = NULL; - declAttributeType = NULL; - declNotationName = NULL; - declNotationPublicId = NULL; - declAttributeIsCdata = XML_FALSE; - declAttributeIsId = XML_FALSE; - memset(&position, 0, sizeof(POSITION)); - errorCode = XML_ERROR_NONE; - eventPtr = NULL; - eventEndPtr = NULL; - positionPtr = NULL; - openInternalEntities = NULL; - defaultExpandInternalEntities = XML_TRUE; - tagLevel = 0; - tagStack = NULL; - inheritedBindings = NULL; - nSpecifiedAtts = 0; - unknownEncodingMem = NULL; - unknownEncodingRelease = NULL; - unknownEncodingData = NULL; - parentParser = NULL; - parsing = XML_INITIALIZED; +parserInit(XML_Parser parser, const XML_Char *encodingName) { + parser->m_processor = prologInitProcessor; + XmlPrologStateInit(&parser->m_prologState); + if (encodingName != NULL) { + parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem)); + } + parser->m_curBase = NULL; + XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0); + parser->m_userData = NULL; + parser->m_handlerArg = NULL; + parser->m_startElementHandler = NULL; + parser->m_endElementHandler = NULL; + parser->m_characterDataHandler = NULL; + parser->m_processingInstructionHandler = NULL; + parser->m_commentHandler = NULL; + parser->m_startCdataSectionHandler = NULL; + parser->m_endCdataSectionHandler = NULL; + parser->m_defaultHandler = NULL; + parser->m_startDoctypeDeclHandler = NULL; + parser->m_endDoctypeDeclHandler = NULL; + parser->m_unparsedEntityDeclHandler = NULL; + parser->m_notationDeclHandler = NULL; + parser->m_startNamespaceDeclHandler = NULL; + parser->m_endNamespaceDeclHandler = NULL; + parser->m_notStandaloneHandler = NULL; + parser->m_externalEntityRefHandler = NULL; + parser->m_externalEntityRefHandlerArg = parser; + parser->m_skippedEntityHandler = NULL; + parser->m_elementDeclHandler = NULL; + parser->m_attlistDeclHandler = NULL; + parser->m_entityDeclHandler = NULL; + parser->m_xmlDeclHandler = NULL; + parser->m_bufferPtr = parser->m_buffer; + parser->m_bufferEnd = parser->m_buffer; + parser->m_parseEndByteIndex = 0; + parser->m_parseEndPtr = NULL; + parser->m_declElementType = NULL; + parser->m_declAttributeId = NULL; + parser->m_declEntity = NULL; + parser->m_doctypeName = NULL; + parser->m_doctypeSysid = NULL; + parser->m_doctypePubid = NULL; + parser->m_declAttributeType = NULL; + parser->m_declNotationName = NULL; + parser->m_declNotationPublicId = NULL; + parser->m_declAttributeIsCdata = XML_FALSE; + parser->m_declAttributeIsId = XML_FALSE; + memset(&parser->m_position, 0, sizeof(POSITION)); + parser->m_errorCode = XML_ERROR_NONE; + parser->m_eventPtr = NULL; + parser->m_eventEndPtr = NULL; + parser->m_positionPtr = NULL; + parser->m_openInternalEntities = NULL; + parser->m_defaultExpandInternalEntities = XML_TRUE; + parser->m_tagLevel = 0; + parser->m_tagStack = NULL; + parser->m_inheritedBindings = NULL; + parser->m_nSpecifiedAtts = 0; + parser->m_unknownEncodingMem = NULL; + parser->m_unknownEncodingRelease = NULL; + parser->m_unknownEncodingData = NULL; + parser->m_parentParser = NULL; + parser->m_parsingStatus.parsing = XML_INITIALIZED; +#ifdef XML_DTD + parser->m_isParamEntity = XML_FALSE; + parser->m_useForeignDTD = XML_FALSE; + parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; +#endif + parser->m_hash_secret_salt = 0; + #ifdef XML_DTD - isParamEntity = XML_FALSE; - useForeignDTD = XML_FALSE; - paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; + memset(&parser->m_accounting, 0, sizeof(ACCOUNTING)); + parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u); + parser->m_accounting.maximumAmplificationFactor + = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT; + parser->m_accounting.activationThresholdBytes + = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT; + + memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS)); + parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u); #endif } -/* moves list of bindings to freeBindingList */ +/* moves list of bindings to m_freeBindingList */ static void FASTCALL -moveToFreeBindingList(XML_Parser parser, BINDING *bindings) -{ +moveToFreeBindingList(XML_Parser parser, BINDING *bindings) { while (bindings) { BINDING *b = bindings; bindings = bindings->nextTagBinding; - b->nextTagBinding = freeBindingList; - freeBindingList = b; + b->nextTagBinding = parser->m_freeBindingList; + parser->m_freeBindingList = b; } } XML_Bool XMLCALL -XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) -{ +XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) { TAG *tStk; OPEN_INTERNAL_ENTITY *openEntityList; - if (parentParser) + + if (parser == NULL) + return XML_FALSE; + + if (parser->m_parentParser) return XML_FALSE; - /* move tagStack to freeTagList */ - tStk = tagStack; + /* move m_tagStack to m_freeTagList */ + tStk = parser->m_tagStack; while (tStk) { TAG *tag = tStk; tStk = tStk->parent; - tag->parent = freeTagList; + tag->parent = parser->m_freeTagList; moveToFreeBindingList(parser, tag->bindings); tag->bindings = NULL; - freeTagList = tag; + parser->m_freeTagList = tag; } - /* move openInternalEntities to freeInternalEntities */ - openEntityList = openInternalEntities; + /* move m_openInternalEntities to m_freeInternalEntities */ + openEntityList = parser->m_openInternalEntities; while (openEntityList) { OPEN_INTERNAL_ENTITY *openEntity = openEntityList; openEntityList = openEntity->next; - openEntity->next = freeInternalEntities; - freeInternalEntities = openEntity; - } - moveToFreeBindingList(parser, inheritedBindings); - FREE(unknownEncodingMem); - if (unknownEncodingRelease) - unknownEncodingRelease(unknownEncodingData); - poolClear(&tempPool); - poolClear(&temp2Pool); + openEntity->next = parser->m_freeInternalEntities; + parser->m_freeInternalEntities = openEntity; + } + moveToFreeBindingList(parser, parser->m_inheritedBindings); + FREE(parser, parser->m_unknownEncodingMem); + if (parser->m_unknownEncodingRelease) + parser->m_unknownEncodingRelease(parser->m_unknownEncodingData); + poolClear(&parser->m_tempPool); + poolClear(&parser->m_temp2Pool); + FREE(parser, (void *)parser->m_protocolEncodingName); + parser->m_protocolEncodingName = NULL; parserInit(parser, encodingName); - dtdReset(_dtd, &parser->m_mem); - return setContext(parser, implicitContext); + dtdReset(parser->m_dtd, &parser->m_mem); + return XML_TRUE; } enum XML_Status XMLCALL -XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) -{ +XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) { + if (parser == NULL) + return XML_STATUS_ERROR; /* Block after XML_Parse()/XML_ParseBuffer() has been called. XXX There's no way for the caller to determine which of the XXX possible error cases caused the XML_STATUS_ERROR return. */ - if (parsing == XML_PARSING || parsing == XML_SUSPENDED) + if (parser->m_parsingStatus.parsing == XML_PARSING + || parser->m_parsingStatus.parsing == XML_SUSPENDED) return XML_STATUS_ERROR; + + /* Get rid of any previous encoding name */ + FREE(parser, (void *)parser->m_protocolEncodingName); + if (encodingName == NULL) - protocolEncodingName = NULL; + /* No new encoding name */ + parser->m_protocolEncodingName = NULL; else { - protocolEncodingName = poolCopyString(&tempPool, encodingName); - if (!protocolEncodingName) + /* Copy the new encoding name into allocated memory */ + parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem)); + if (! parser->m_protocolEncodingName) return XML_STATUS_ERROR; } return XML_STATUS_OK; } XML_Parser XMLCALL -XML_ExternalEntityParserCreate(XML_Parser oldParser, - const XML_Char *context, - const XML_Char *encodingName) -{ +XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, + const XML_Char *encodingName) { XML_Parser parser = oldParser; DTD *newDtd = NULL; - DTD *oldDtd = _dtd; - XML_StartElementHandler oldStartElementHandler = startElementHandler; - XML_EndElementHandler oldEndElementHandler = endElementHandler; - XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler; - XML_ProcessingInstructionHandler oldProcessingInstructionHandler - = processingInstructionHandler; - XML_CommentHandler oldCommentHandler = commentHandler; - XML_StartCdataSectionHandler oldStartCdataSectionHandler - = startCdataSectionHandler; - XML_EndCdataSectionHandler oldEndCdataSectionHandler - = endCdataSectionHandler; - XML_DefaultHandler oldDefaultHandler = defaultHandler; - XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler - = unparsedEntityDeclHandler; - XML_NotationDeclHandler oldNotationDeclHandler = notationDeclHandler; - XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler - = startNamespaceDeclHandler; - XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler - = endNamespaceDeclHandler; - XML_NotStandaloneHandler oldNotStandaloneHandler = notStandaloneHandler; - XML_ExternalEntityRefHandler oldExternalEntityRefHandler - = externalEntityRefHandler; - XML_SkippedEntityHandler oldSkippedEntityHandler = skippedEntityHandler; - XML_UnknownEncodingHandler oldUnknownEncodingHandler - = unknownEncodingHandler; - XML_ElementDeclHandler oldElementDeclHandler = elementDeclHandler; - XML_AttlistDeclHandler oldAttlistDeclHandler = attlistDeclHandler; - XML_EntityDeclHandler oldEntityDeclHandler = entityDeclHandler; - XML_XmlDeclHandler oldXmlDeclHandler = xmlDeclHandler; - ELEMENT_TYPE * oldDeclElementType = declElementType; - - void *oldUserData = userData; - void *oldHandlerArg = handlerArg; - XML_Bool oldDefaultExpandInternalEntities = defaultExpandInternalEntities; - XML_Parser oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg; + DTD *oldDtd; + XML_StartElementHandler oldStartElementHandler; + XML_EndElementHandler oldEndElementHandler; + XML_CharacterDataHandler oldCharacterDataHandler; + XML_ProcessingInstructionHandler oldProcessingInstructionHandler; + XML_CommentHandler oldCommentHandler; + XML_StartCdataSectionHandler oldStartCdataSectionHandler; + XML_EndCdataSectionHandler oldEndCdataSectionHandler; + XML_DefaultHandler oldDefaultHandler; + XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler; + XML_NotationDeclHandler oldNotationDeclHandler; + XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler; + XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler; + XML_NotStandaloneHandler oldNotStandaloneHandler; + XML_ExternalEntityRefHandler oldExternalEntityRefHandler; + XML_SkippedEntityHandler oldSkippedEntityHandler; + XML_UnknownEncodingHandler oldUnknownEncodingHandler; + XML_ElementDeclHandler oldElementDeclHandler; + XML_AttlistDeclHandler oldAttlistDeclHandler; + XML_EntityDeclHandler oldEntityDeclHandler; + XML_XmlDeclHandler oldXmlDeclHandler; + ELEMENT_TYPE *oldDeclElementType; + + void *oldUserData; + void *oldHandlerArg; + XML_Bool oldDefaultExpandInternalEntities; + XML_Parser oldExternalEntityRefHandlerArg; +#ifdef XML_DTD + enum XML_ParamEntityParsing oldParamEntityParsing; + int oldInEntityValue; +#endif + XML_Bool oldns_triplets; + /* Note that the new parser shares the same hash secret as the old + parser, so that dtdCopy and copyEntityTable can lookup values + from hash tables associated with either parser without us having + to worry which hash secrets each table has. + */ + unsigned long oldhash_secret_salt; + + /* Validate the oldParser parameter before we pull everything out of it */ + if (oldParser == NULL) + return NULL; + + /* Stash the original parser contents on the stack */ + oldDtd = parser->m_dtd; + oldStartElementHandler = parser->m_startElementHandler; + oldEndElementHandler = parser->m_endElementHandler; + oldCharacterDataHandler = parser->m_characterDataHandler; + oldProcessingInstructionHandler = parser->m_processingInstructionHandler; + oldCommentHandler = parser->m_commentHandler; + oldStartCdataSectionHandler = parser->m_startCdataSectionHandler; + oldEndCdataSectionHandler = parser->m_endCdataSectionHandler; + oldDefaultHandler = parser->m_defaultHandler; + oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler; + oldNotationDeclHandler = parser->m_notationDeclHandler; + oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler; + oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler; + oldNotStandaloneHandler = parser->m_notStandaloneHandler; + oldExternalEntityRefHandler = parser->m_externalEntityRefHandler; + oldSkippedEntityHandler = parser->m_skippedEntityHandler; + oldUnknownEncodingHandler = parser->m_unknownEncodingHandler; + oldElementDeclHandler = parser->m_elementDeclHandler; + oldAttlistDeclHandler = parser->m_attlistDeclHandler; + oldEntityDeclHandler = parser->m_entityDeclHandler; + oldXmlDeclHandler = parser->m_xmlDeclHandler; + oldDeclElementType = parser->m_declElementType; + + oldUserData = parser->m_userData; + oldHandlerArg = parser->m_handlerArg; + oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities; + oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg; #ifdef XML_DTD - enum XML_ParamEntityParsing oldParamEntityParsing = paramEntityParsing; - int oldInEntityValue = prologState.inEntityValue; + oldParamEntityParsing = parser->m_paramEntityParsing; + oldInEntityValue = parser->m_prologState.inEntityValue; #endif - XML_Bool oldns_triplets = ns_triplets; + oldns_triplets = parser->m_ns_triplets; + /* Note that the new parser shares the same hash secret as the old + parser, so that dtdCopy and copyEntityTable can lookup values + from hash tables associated with either parser without us having + to worry which hash secrets each table has. + */ + oldhash_secret_salt = parser->m_hash_secret_salt; #ifdef XML_DTD - if (!context) + if (! context) newDtd = oldDtd; #endif /* XML_DTD */ @@ -986,545 +1344,620 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, here. This makes this function more painful to follow than it would be otherwise. */ - if (ns) { - XML_Char tmp[2]; - *tmp = namespaceSeparator; + if (parser->m_ns) { + XML_Char tmp[2] = {parser->m_namespaceSeparator, 0}; parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd); - } - else { + } else { parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd); } - if (!parser) + if (! parser) return NULL; - startElementHandler = oldStartElementHandler; - endElementHandler = oldEndElementHandler; - characterDataHandler = oldCharacterDataHandler; - processingInstructionHandler = oldProcessingInstructionHandler; - commentHandler = oldCommentHandler; - startCdataSectionHandler = oldStartCdataSectionHandler; - endCdataSectionHandler = oldEndCdataSectionHandler; - defaultHandler = oldDefaultHandler; - unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler; - notationDeclHandler = oldNotationDeclHandler; - startNamespaceDeclHandler = oldStartNamespaceDeclHandler; - endNamespaceDeclHandler = oldEndNamespaceDeclHandler; - notStandaloneHandler = oldNotStandaloneHandler; - externalEntityRefHandler = oldExternalEntityRefHandler; - skippedEntityHandler = oldSkippedEntityHandler; - unknownEncodingHandler = oldUnknownEncodingHandler; - elementDeclHandler = oldElementDeclHandler; - attlistDeclHandler = oldAttlistDeclHandler; - entityDeclHandler = oldEntityDeclHandler; - xmlDeclHandler = oldXmlDeclHandler; - declElementType = oldDeclElementType; - userData = oldUserData; + parser->m_startElementHandler = oldStartElementHandler; + parser->m_endElementHandler = oldEndElementHandler; + parser->m_characterDataHandler = oldCharacterDataHandler; + parser->m_processingInstructionHandler = oldProcessingInstructionHandler; + parser->m_commentHandler = oldCommentHandler; + parser->m_startCdataSectionHandler = oldStartCdataSectionHandler; + parser->m_endCdataSectionHandler = oldEndCdataSectionHandler; + parser->m_defaultHandler = oldDefaultHandler; + parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler; + parser->m_notationDeclHandler = oldNotationDeclHandler; + parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler; + parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler; + parser->m_notStandaloneHandler = oldNotStandaloneHandler; + parser->m_externalEntityRefHandler = oldExternalEntityRefHandler; + parser->m_skippedEntityHandler = oldSkippedEntityHandler; + parser->m_unknownEncodingHandler = oldUnknownEncodingHandler; + parser->m_elementDeclHandler = oldElementDeclHandler; + parser->m_attlistDeclHandler = oldAttlistDeclHandler; + parser->m_entityDeclHandler = oldEntityDeclHandler; + parser->m_xmlDeclHandler = oldXmlDeclHandler; + parser->m_declElementType = oldDeclElementType; + parser->m_userData = oldUserData; if (oldUserData == oldHandlerArg) - handlerArg = userData; + parser->m_handlerArg = parser->m_userData; else - handlerArg = parser; + parser->m_handlerArg = parser; if (oldExternalEntityRefHandlerArg != oldParser) - externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg; - defaultExpandInternalEntities = oldDefaultExpandInternalEntities; - ns_triplets = oldns_triplets; - parentParser = oldParser; + parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg; + parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities; + parser->m_ns_triplets = oldns_triplets; + parser->m_hash_secret_salt = oldhash_secret_salt; + parser->m_parentParser = oldParser; #ifdef XML_DTD - paramEntityParsing = oldParamEntityParsing; - prologState.inEntityValue = oldInEntityValue; + parser->m_paramEntityParsing = oldParamEntityParsing; + parser->m_prologState.inEntityValue = oldInEntityValue; if (context) { #endif /* XML_DTD */ - if (!dtdCopy(_dtd, oldDtd, &parser->m_mem) - || !setContext(parser, context)) { + if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem) + || ! setContext(parser, context)) { XML_ParserFree(parser); return NULL; } - processor = externalEntityInitProcessor; + parser->m_processor = externalEntityInitProcessor; #ifdef XML_DTD - } - else { - /* The DTD instance referenced by _dtd is shared between the document's - root parser and external PE parsers, therefore one does not need to - call setContext. In addition, one also *must* not call setContext, - because this would overwrite existing prefix->binding pointers in - _dtd with ones that get destroyed with the external PE parser. - This would leave those prefixes with dangling pointers. + } else { + /* The DTD instance referenced by parser->m_dtd is shared between the + document's root parser and external PE parsers, therefore one does not + need to call setContext. In addition, one also *must* not call + setContext, because this would overwrite existing prefix->binding + pointers in parser->m_dtd with ones that get destroyed with the external + PE parser. This would leave those prefixes with dangling pointers. */ - isParamEntity = XML_TRUE; - XmlPrologStateInitExternalEntity(&prologState); - processor = externalParEntInitProcessor; + parser->m_isParamEntity = XML_TRUE; + XmlPrologStateInitExternalEntity(&parser->m_prologState); + parser->m_processor = externalParEntInitProcessor; } #endif /* XML_DTD */ return parser; } static void FASTCALL -destroyBindings(BINDING *bindings, XML_Parser parser) -{ +destroyBindings(BINDING *bindings, XML_Parser parser) { for (;;) { BINDING *b = bindings; - if (!b) + if (! b) break; bindings = b->nextTagBinding; - FREE(b->uri); - FREE(b); + FREE(parser, b->uri); + FREE(parser, b); } } void XMLCALL -XML_ParserFree(XML_Parser parser) -{ +XML_ParserFree(XML_Parser parser) { TAG *tagList; OPEN_INTERNAL_ENTITY *entityList; if (parser == NULL) return; - /* free tagStack and freeTagList */ - tagList = tagStack; + /* free m_tagStack and m_freeTagList */ + tagList = parser->m_tagStack; for (;;) { TAG *p; if (tagList == NULL) { - if (freeTagList == NULL) + if (parser->m_freeTagList == NULL) break; - tagList = freeTagList; - freeTagList = NULL; + tagList = parser->m_freeTagList; + parser->m_freeTagList = NULL; } p = tagList; tagList = tagList->parent; - FREE(p->buf); + FREE(parser, p->buf); destroyBindings(p->bindings, parser); - FREE(p); + FREE(parser, p); } - /* free openInternalEntities and freeInternalEntities */ - entityList = openInternalEntities; + /* free m_openInternalEntities and m_freeInternalEntities */ + entityList = parser->m_openInternalEntities; for (;;) { OPEN_INTERNAL_ENTITY *openEntity; if (entityList == NULL) { - if (freeInternalEntities == NULL) + if (parser->m_freeInternalEntities == NULL) break; - entityList = freeInternalEntities; - freeInternalEntities = NULL; + entityList = parser->m_freeInternalEntities; + parser->m_freeInternalEntities = NULL; } openEntity = entityList; entityList = entityList->next; - FREE(openEntity); + FREE(parser, openEntity); } - destroyBindings(freeBindingList, parser); - destroyBindings(inheritedBindings, parser); - poolDestroy(&tempPool); - poolDestroy(&temp2Pool); + destroyBindings(parser->m_freeBindingList, parser); + destroyBindings(parser->m_inheritedBindings, parser); + poolDestroy(&parser->m_tempPool); + poolDestroy(&parser->m_temp2Pool); + FREE(parser, (void *)parser->m_protocolEncodingName); #ifdef XML_DTD /* external parameter entity parsers share the DTD structure parser->m_dtd with the root parser, so we must not destroy it */ - if (!isParamEntity && _dtd) + if (! parser->m_isParamEntity && parser->m_dtd) #else - if (_dtd) + if (parser->m_dtd) #endif /* XML_DTD */ - dtdDestroy(_dtd, (XML_Bool)!parentParser, &parser->m_mem); - FREE((void *)atts); - FREE(groupConnector); - FREE(buffer); - FREE(dataBuf); - FREE(nsAtts); - FREE(unknownEncodingMem); - if (unknownEncodingRelease) - unknownEncodingRelease(unknownEncodingData); - FREE(parser); + dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser, + &parser->m_mem); + FREE(parser, (void *)parser->m_atts); +#ifdef XML_ATTR_INFO + FREE(parser, (void *)parser->m_attInfo); +#endif + FREE(parser, parser->m_groupConnector); + FREE(parser, parser->m_buffer); + FREE(parser, parser->m_dataBuf); + FREE(parser, parser->m_nsAtts); + FREE(parser, parser->m_unknownEncodingMem); + if (parser->m_unknownEncodingRelease) + parser->m_unknownEncodingRelease(parser->m_unknownEncodingData); + FREE(parser, parser); } void XMLCALL -XML_UseParserAsHandlerArg(XML_Parser parser) -{ - handlerArg = parser; +XML_UseParserAsHandlerArg(XML_Parser parser) { + if (parser != NULL) + parser->m_handlerArg = parser; } enum XML_Error XMLCALL -XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) -{ +XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) { + if (parser == NULL) + return XML_ERROR_INVALID_ARGUMENT; #ifdef XML_DTD /* block after XML_Parse()/XML_ParseBuffer() has been called */ - if (parsing == XML_PARSING || parsing == XML_SUSPENDED) + if (parser->m_parsingStatus.parsing == XML_PARSING + || parser->m_parsingStatus.parsing == XML_SUSPENDED) return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING; - useForeignDTD = useDTD; + parser->m_useForeignDTD = useDTD; return XML_ERROR_NONE; #else + UNUSED_P(useDTD); return XML_ERROR_FEATURE_REQUIRES_XML_DTD; #endif } void XMLCALL -XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) -{ +XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) { + if (parser == NULL) + return; /* block after XML_Parse()/XML_ParseBuffer() has been called */ - if (parsing == XML_PARSING || parsing == XML_SUSPENDED) + if (parser->m_parsingStatus.parsing == XML_PARSING + || parser->m_parsingStatus.parsing == XML_SUSPENDED) return; - ns_triplets = do_nst ? XML_TRUE : XML_FALSE; + parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE; } void XMLCALL -XML_SetUserData(XML_Parser parser, void *p) -{ - if (handlerArg == userData) - handlerArg = userData = p; +XML_SetUserData(XML_Parser parser, void *p) { + if (parser == NULL) + return; + if (parser->m_handlerArg == parser->m_userData) + parser->m_handlerArg = parser->m_userData = p; else - userData = p; + parser->m_userData = p; } enum XML_Status XMLCALL -XML_SetBase(XML_Parser parser, const XML_Char *p) -{ +XML_SetBase(XML_Parser parser, const XML_Char *p) { + if (parser == NULL) + return XML_STATUS_ERROR; if (p) { - p = poolCopyString(&_dtd->pool, p); - if (!p) + p = poolCopyString(&parser->m_dtd->pool, p); + if (! p) return XML_STATUS_ERROR; - curBase = p; - } - else - curBase = NULL; + parser->m_curBase = p; + } else + parser->m_curBase = NULL; return XML_STATUS_OK; } -const XML_Char * XMLCALL -XML_GetBase(XML_Parser parser) -{ - return curBase; +const XML_Char *XMLCALL +XML_GetBase(XML_Parser parser) { + if (parser == NULL) + return NULL; + return parser->m_curBase; } int XMLCALL -XML_GetSpecifiedAttributeCount(XML_Parser parser) -{ - return nSpecifiedAtts; +XML_GetSpecifiedAttributeCount(XML_Parser parser) { + if (parser == NULL) + return -1; + return parser->m_nSpecifiedAtts; } int XMLCALL -XML_GetIdAttributeIndex(XML_Parser parser) -{ - return idAttIndex; +XML_GetIdAttributeIndex(XML_Parser parser) { + if (parser == NULL) + return -1; + return parser->m_idAttIndex; +} + +#ifdef XML_ATTR_INFO +const XML_AttrInfo *XMLCALL +XML_GetAttributeInfo(XML_Parser parser) { + if (parser == NULL) + return NULL; + return parser->m_attInfo; } +#endif void XMLCALL -XML_SetElementHandler(XML_Parser parser, - XML_StartElementHandler start, - XML_EndElementHandler end) -{ - startElementHandler = start; - endElementHandler = end; +XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, + XML_EndElementHandler end) { + if (parser == NULL) + return; + parser->m_startElementHandler = start; + parser->m_endElementHandler = end; } void XMLCALL -XML_SetStartElementHandler(XML_Parser parser, - XML_StartElementHandler start) { - startElementHandler = start; +XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) { + if (parser != NULL) + parser->m_startElementHandler = start; } void XMLCALL -XML_SetEndElementHandler(XML_Parser parser, - XML_EndElementHandler end) { - endElementHandler = end; +XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) { + if (parser != NULL) + parser->m_endElementHandler = end; } void XMLCALL XML_SetCharacterDataHandler(XML_Parser parser, - XML_CharacterDataHandler handler) -{ - characterDataHandler = handler; + XML_CharacterDataHandler handler) { + if (parser != NULL) + parser->m_characterDataHandler = handler; } void XMLCALL XML_SetProcessingInstructionHandler(XML_Parser parser, - XML_ProcessingInstructionHandler handler) -{ - processingInstructionHandler = handler; + XML_ProcessingInstructionHandler handler) { + if (parser != NULL) + parser->m_processingInstructionHandler = handler; } void XMLCALL -XML_SetCommentHandler(XML_Parser parser, - XML_CommentHandler handler) -{ - commentHandler = handler; +XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) { + if (parser != NULL) + parser->m_commentHandler = handler; } void XMLCALL XML_SetCdataSectionHandler(XML_Parser parser, XML_StartCdataSectionHandler start, - XML_EndCdataSectionHandler end) -{ - startCdataSectionHandler = start; - endCdataSectionHandler = end; + XML_EndCdataSectionHandler end) { + if (parser == NULL) + return; + parser->m_startCdataSectionHandler = start; + parser->m_endCdataSectionHandler = end; } void XMLCALL XML_SetStartCdataSectionHandler(XML_Parser parser, XML_StartCdataSectionHandler start) { - startCdataSectionHandler = start; + if (parser != NULL) + parser->m_startCdataSectionHandler = start; } void XMLCALL XML_SetEndCdataSectionHandler(XML_Parser parser, XML_EndCdataSectionHandler end) { - endCdataSectionHandler = end; + if (parser != NULL) + parser->m_endCdataSectionHandler = end; } void XMLCALL -XML_SetDefaultHandler(XML_Parser parser, - XML_DefaultHandler handler) -{ - defaultHandler = handler; - defaultExpandInternalEntities = XML_FALSE; +XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) { + if (parser == NULL) + return; + parser->m_defaultHandler = handler; + parser->m_defaultExpandInternalEntities = XML_FALSE; } void XMLCALL -XML_SetDefaultHandlerExpand(XML_Parser parser, - XML_DefaultHandler handler) -{ - defaultHandler = handler; - defaultExpandInternalEntities = XML_TRUE; +XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) { + if (parser == NULL) + return; + parser->m_defaultHandler = handler; + parser->m_defaultExpandInternalEntities = XML_TRUE; } void XMLCALL -XML_SetDoctypeDeclHandler(XML_Parser parser, - XML_StartDoctypeDeclHandler start, - XML_EndDoctypeDeclHandler end) -{ - startDoctypeDeclHandler = start; - endDoctypeDeclHandler = end; +XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start, + XML_EndDoctypeDeclHandler end) { + if (parser == NULL) + return; + parser->m_startDoctypeDeclHandler = start; + parser->m_endDoctypeDeclHandler = end; } void XMLCALL XML_SetStartDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start) { - startDoctypeDeclHandler = start; + if (parser != NULL) + parser->m_startDoctypeDeclHandler = start; } void XMLCALL -XML_SetEndDoctypeDeclHandler(XML_Parser parser, - XML_EndDoctypeDeclHandler end) { - endDoctypeDeclHandler = end; +XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) { + if (parser != NULL) + parser->m_endDoctypeDeclHandler = end; } void XMLCALL XML_SetUnparsedEntityDeclHandler(XML_Parser parser, - XML_UnparsedEntityDeclHandler handler) -{ - unparsedEntityDeclHandler = handler; + XML_UnparsedEntityDeclHandler handler) { + if (parser != NULL) + parser->m_unparsedEntityDeclHandler = handler; } void XMLCALL -XML_SetNotationDeclHandler(XML_Parser parser, - XML_NotationDeclHandler handler) -{ - notationDeclHandler = handler; +XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) { + if (parser != NULL) + parser->m_notationDeclHandler = handler; } void XMLCALL XML_SetNamespaceDeclHandler(XML_Parser parser, XML_StartNamespaceDeclHandler start, - XML_EndNamespaceDeclHandler end) -{ - startNamespaceDeclHandler = start; - endNamespaceDeclHandler = end; + XML_EndNamespaceDeclHandler end) { + if (parser == NULL) + return; + parser->m_startNamespaceDeclHandler = start; + parser->m_endNamespaceDeclHandler = end; } void XMLCALL XML_SetStartNamespaceDeclHandler(XML_Parser parser, XML_StartNamespaceDeclHandler start) { - startNamespaceDeclHandler = start; + if (parser != NULL) + parser->m_startNamespaceDeclHandler = start; } void XMLCALL XML_SetEndNamespaceDeclHandler(XML_Parser parser, XML_EndNamespaceDeclHandler end) { - endNamespaceDeclHandler = end; + if (parser != NULL) + parser->m_endNamespaceDeclHandler = end; } void XMLCALL XML_SetNotStandaloneHandler(XML_Parser parser, - XML_NotStandaloneHandler handler) -{ - notStandaloneHandler = handler; + XML_NotStandaloneHandler handler) { + if (parser != NULL) + parser->m_notStandaloneHandler = handler; } void XMLCALL XML_SetExternalEntityRefHandler(XML_Parser parser, - XML_ExternalEntityRefHandler handler) -{ - externalEntityRefHandler = handler; + XML_ExternalEntityRefHandler handler) { + if (parser != NULL) + parser->m_externalEntityRefHandler = handler; } void XMLCALL -XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) -{ +XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) { + if (parser == NULL) + return; if (arg) - externalEntityRefHandlerArg = (XML_Parser)arg; + parser->m_externalEntityRefHandlerArg = (XML_Parser)arg; else - externalEntityRefHandlerArg = parser; + parser->m_externalEntityRefHandlerArg = parser; } void XMLCALL XML_SetSkippedEntityHandler(XML_Parser parser, - XML_SkippedEntityHandler handler) -{ - skippedEntityHandler = handler; + XML_SkippedEntityHandler handler) { + if (parser != NULL) + parser->m_skippedEntityHandler = handler; } void XMLCALL XML_SetUnknownEncodingHandler(XML_Parser parser, - XML_UnknownEncodingHandler handler, - void *data) -{ - unknownEncodingHandler = handler; - unknownEncodingHandlerData = data; + XML_UnknownEncodingHandler handler, void *data) { + if (parser == NULL) + return; + parser->m_unknownEncodingHandler = handler; + parser->m_unknownEncodingHandlerData = data; } void XMLCALL -XML_SetElementDeclHandler(XML_Parser parser, - XML_ElementDeclHandler eldecl) -{ - elementDeclHandler = eldecl; +XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) { + if (parser != NULL) + parser->m_elementDeclHandler = eldecl; } void XMLCALL -XML_SetAttlistDeclHandler(XML_Parser parser, - XML_AttlistDeclHandler attdecl) -{ - attlistDeclHandler = attdecl; +XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) { + if (parser != NULL) + parser->m_attlistDeclHandler = attdecl; } void XMLCALL -XML_SetEntityDeclHandler(XML_Parser parser, - XML_EntityDeclHandler handler) -{ - entityDeclHandler = handler; +XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) { + if (parser != NULL) + parser->m_entityDeclHandler = handler; } void XMLCALL -XML_SetXmlDeclHandler(XML_Parser parser, - XML_XmlDeclHandler handler) { - xmlDeclHandler = handler; +XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) { + if (parser != NULL) + parser->m_xmlDeclHandler = handler; } int XMLCALL XML_SetParamEntityParsing(XML_Parser parser, - enum XML_ParamEntityParsing peParsing) -{ + enum XML_ParamEntityParsing peParsing) { + if (parser == NULL) + return 0; /* block after XML_Parse()/XML_ParseBuffer() has been called */ - if (parsing == XML_PARSING || parsing == XML_SUSPENDED) + if (parser->m_parsingStatus.parsing == XML_PARSING + || parser->m_parsingStatus.parsing == XML_SUSPENDED) return 0; #ifdef XML_DTD - paramEntityParsing = peParsing; + parser->m_paramEntityParsing = peParsing; return 1; #else return peParsing == XML_PARAM_ENTITY_PARSING_NEVER; #endif } +int XMLCALL +XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) { + if (parser == NULL) + return 0; + if (parser->m_parentParser) + return XML_SetHashSalt(parser->m_parentParser, hash_salt); + /* block after XML_Parse()/XML_ParseBuffer() has been called */ + if (parser->m_parsingStatus.parsing == XML_PARSING + || parser->m_parsingStatus.parsing == XML_SUSPENDED) + return 0; + parser->m_hash_secret_salt = hash_salt; + return 1; +} + enum XML_Status XMLCALL -XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) -{ - switch (parsing) { +XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { + if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) { + if (parser != NULL) + parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT; + return XML_STATUS_ERROR; + } + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: - errorCode = XML_ERROR_SUSPENDED; + parser->m_errorCode = XML_ERROR_SUSPENDED; return XML_STATUS_ERROR; case XML_FINISHED: - errorCode = XML_ERROR_FINISHED; + parser->m_errorCode = XML_ERROR_FINISHED; return XML_STATUS_ERROR; + case XML_INITIALIZED: + if (parser->m_parentParser == NULL && ! startParsing(parser)) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + return XML_STATUS_ERROR; + } + /* fall through */ default: - parsing = XML_PARSING; + parser->m_parsingStatus.parsing = XML_PARSING; } if (len == 0) { - finalBuffer = (XML_Bool)isFinal; - if (!isFinal) + parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; + if (! isFinal) return XML_STATUS_OK; - positionPtr = bufferPtr; - parseEndPtr = bufferEnd; + parser->m_positionPtr = parser->m_bufferPtr; + parser->m_parseEndPtr = parser->m_bufferEnd; /* If data are left over from last buffer, and we now know that these data are the final chunk of input, then we have to check them again - to detect errors based on this information. + to detect errors based on that fact. */ - errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr); + parser->m_errorCode + = parser->m_processor(parser, parser->m_bufferPtr, + parser->m_parseEndPtr, &parser->m_bufferPtr); - if (errorCode == XML_ERROR_NONE) { - switch (parsing) { + if (parser->m_errorCode == XML_ERROR_NONE) { + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: - XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position); - positionPtr = bufferPtr; + /* It is hard to be certain, but it seems that this case + * cannot occur. This code is cleaning up a previous parse + * with no new data (since len == 0). Changing the parsing + * state requires getting to execute a handler function, and + * there doesn't seem to be an opportunity for that while in + * this circumstance. + * + * Given the uncertainty, we retain the code but exclude it + * from coverage tests. + * + * LCOV_EXCL_START + */ + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, + parser->m_bufferPtr, &parser->m_position); + parser->m_positionPtr = parser->m_bufferPtr; return XML_STATUS_SUSPENDED; + /* LCOV_EXCL_STOP */ case XML_INITIALIZED: case XML_PARSING: - parsing = XML_FINISHED; + parser->m_parsingStatus.parsing = XML_FINISHED; /* fall through */ default: return XML_STATUS_OK; } } - eventEndPtr = eventPtr; - processor = errorProcessor; + parser->m_eventEndPtr = parser->m_eventPtr; + parser->m_processor = errorProcessor; return XML_STATUS_ERROR; } #ifndef XML_CONTEXT_BYTES - else if (bufferPtr == bufferEnd) { + else if (parser->m_bufferPtr == parser->m_bufferEnd) { const char *end; int nLeftOver; - enum XML_Error result; - parseEndByteIndex += len; - positionPtr = s; - finalBuffer = (XML_Bool)isFinal; + enum XML_Status result; + /* Detect overflow (a+b > MAX <==> b > MAX-a) */ + if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + parser->m_eventPtr = parser->m_eventEndPtr = NULL; + parser->m_processor = errorProcessor; + return XML_STATUS_ERROR; + } + parser->m_parseEndByteIndex += len; + parser->m_positionPtr = s; + parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; - errorCode = processor(parser, s, parseEndPtr = s + len, &end); + parser->m_errorCode + = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end); - if (errorCode != XML_ERROR_NONE) { - eventEndPtr = eventPtr; - processor = errorProcessor; + if (parser->m_errorCode != XML_ERROR_NONE) { + parser->m_eventEndPtr = parser->m_eventPtr; + parser->m_processor = errorProcessor; return XML_STATUS_ERROR; - } - else { - switch (parsing) { + } else { + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: result = XML_STATUS_SUSPENDED; break; case XML_INITIALIZED: case XML_PARSING: - result = XML_STATUS_OK; if (isFinal) { - parsing = XML_FINISHED; - return result; + parser->m_parsingStatus.parsing = XML_FINISHED; + return XML_STATUS_OK; } + /* fall through */ + default: + result = XML_STATUS_OK; } } - XmlUpdatePosition(encoding, positionPtr, end, &position); - positionPtr = end; + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end, + &parser->m_position); nLeftOver = s + len - end; if (nLeftOver) { - if (buffer == NULL || nLeftOver > bufferLim - buffer) { - /* FIXME avoid integer overflow */ - char *temp; - temp = (buffer == NULL - ? (char *)MALLOC(len * 2) - : (char *)REALLOC(buffer, len * 2)); - if (temp == NULL) { - errorCode = XML_ERROR_NO_MEMORY; - return XML_STATUS_ERROR; + if (parser->m_buffer == NULL + || nLeftOver > parser->m_bufferLim - parser->m_buffer) { + /* avoid _signed_ integer overflow */ + char *temp = NULL; + const int bytesToAllocate = (int)((unsigned)len * 2U); + if (bytesToAllocate > 0) { + temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate); } - buffer = temp; - if (!buffer) { - errorCode = XML_ERROR_NO_MEMORY; - eventPtr = eventEndPtr = NULL; - processor = errorProcessor; + if (temp == NULL) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + parser->m_eventPtr = parser->m_eventEndPtr = NULL; + parser->m_processor = errorProcessor; return XML_STATUS_ERROR; } - bufferLim = buffer + len * 2; + parser->m_buffer = temp; + parser->m_bufferLim = parser->m_buffer + bytesToAllocate; } - memcpy(buffer, end, nLeftOver); - bufferPtr = buffer; - bufferEnd = buffer + nLeftOver; + memcpy(parser->m_buffer, end, nLeftOver); } + parser->m_bufferPtr = parser->m_buffer; + parser->m_bufferEnd = parser->m_buffer + nLeftOver; + parser->m_positionPtr = parser->m_bufferPtr; + parser->m_parseEndPtr = parser->m_bufferEnd; + parser->m_eventPtr = parser->m_bufferPtr; + parser->m_eventEndPtr = parser->m_bufferPtr; return result; } -#endif /* not defined XML_CONTEXT_BYTES */ +#endif /* not defined XML_CONTEXT_BYTES */ else { void *buff = XML_GetBuffer(parser, len); if (buff == NULL) @@ -1537,354 +1970,489 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) } enum XML_Status XMLCALL -XML_ParseBuffer(XML_Parser parser, int len, int isFinal) -{ +XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { const char *start; - enum XML_Error result = XML_STATUS_OK; + enum XML_Status result = XML_STATUS_OK; - switch (parsing) { + if (parser == NULL) + return XML_STATUS_ERROR; + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: - errorCode = XML_ERROR_SUSPENDED; + parser->m_errorCode = XML_ERROR_SUSPENDED; return XML_STATUS_ERROR; case XML_FINISHED: - errorCode = XML_ERROR_FINISHED; + parser->m_errorCode = XML_ERROR_FINISHED; return XML_STATUS_ERROR; + case XML_INITIALIZED: + /* Has someone called XML_GetBuffer successfully before? */ + if (! parser->m_bufferPtr) { + parser->m_errorCode = XML_ERROR_NO_BUFFER; + return XML_STATUS_ERROR; + } + + if (parser->m_parentParser == NULL && ! startParsing(parser)) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + return XML_STATUS_ERROR; + } + /* fall through */ default: - parsing = XML_PARSING; + parser->m_parsingStatus.parsing = XML_PARSING; } - start = bufferPtr; - positionPtr = start; - bufferEnd += len; - parseEndPtr = bufferEnd; - parseEndByteIndex += len; - finalBuffer = (XML_Bool)isFinal; + start = parser->m_bufferPtr; + parser->m_positionPtr = start; + parser->m_bufferEnd += len; + parser->m_parseEndPtr = parser->m_bufferEnd; + parser->m_parseEndByteIndex += len; + parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; - errorCode = processor(parser, start, parseEndPtr, &bufferPtr); + parser->m_errorCode = parser->m_processor( + parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr); - if (errorCode != XML_ERROR_NONE) { - eventEndPtr = eventPtr; - processor = errorProcessor; + if (parser->m_errorCode != XML_ERROR_NONE) { + parser->m_eventEndPtr = parser->m_eventPtr; + parser->m_processor = errorProcessor; return XML_STATUS_ERROR; - } - else { - switch (parsing) { + } else { + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: result = XML_STATUS_SUSPENDED; break; case XML_INITIALIZED: case XML_PARSING: if (isFinal) { - parsing = XML_FINISHED; + parser->m_parsingStatus.parsing = XML_FINISHED; return result; } - default: ; /* should not happen */ + default:; /* should not happen */ } } - XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position); - positionPtr = bufferPtr; + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, + parser->m_bufferPtr, &parser->m_position); + parser->m_positionPtr = parser->m_bufferPtr; return result; } -void * XMLCALL -XML_GetBuffer(XML_Parser parser, int len) -{ - switch (parsing) { +void *XMLCALL +XML_GetBuffer(XML_Parser parser, int len) { + if (parser == NULL) + return NULL; + if (len < 0) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + return NULL; + } + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: - errorCode = XML_ERROR_SUSPENDED; + parser->m_errorCode = XML_ERROR_SUSPENDED; return NULL; case XML_FINISHED: - errorCode = XML_ERROR_FINISHED; + parser->m_errorCode = XML_ERROR_FINISHED; return NULL; - default: ; + default:; } - if (len > bufferLim - bufferEnd) { - /* FIXME avoid integer overflow */ - int neededSize = len + (bufferEnd - bufferPtr); + if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) { #ifdef XML_CONTEXT_BYTES - int keep = bufferPtr - buffer; - + int keep; +#endif /* defined XML_CONTEXT_BYTES */ + /* Do not invoke signed arithmetic overflow: */ + int neededSize = (int)((unsigned)len + + (unsigned)EXPAT_SAFE_PTR_DIFF( + parser->m_bufferEnd, parser->m_bufferPtr)); + if (neededSize < 0) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + return NULL; + } +#ifdef XML_CONTEXT_BYTES + keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); if (keep > XML_CONTEXT_BYTES) keep = XML_CONTEXT_BYTES; + /* Detect and prevent integer overflow */ + if (keep > INT_MAX - neededSize) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + return NULL; + } neededSize += keep; -#endif /* defined XML_CONTEXT_BYTES */ - if (neededSize <= bufferLim - buffer) { +#endif /* defined XML_CONTEXT_BYTES */ + if (neededSize + <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) { #ifdef XML_CONTEXT_BYTES - if (keep < bufferPtr - buffer) { - int offset = (bufferPtr - buffer) - keep; - memmove(buffer, &buffer[offset], bufferEnd - bufferPtr + keep); - bufferEnd -= offset; - bufferPtr -= offset; + if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) { + int offset + = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer) + - keep; + /* The buffer pointers cannot be NULL here; we have at least some bytes + * in the buffer */ + memmove(parser->m_buffer, &parser->m_buffer[offset], + parser->m_bufferEnd - parser->m_bufferPtr + keep); + parser->m_bufferEnd -= offset; + parser->m_bufferPtr -= offset; } #else - memmove(buffer, bufferPtr, bufferEnd - bufferPtr); - bufferEnd = buffer + (bufferEnd - bufferPtr); - bufferPtr = buffer; -#endif /* not defined XML_CONTEXT_BYTES */ - } - else { + if (parser->m_buffer && parser->m_bufferPtr) { + memmove(parser->m_buffer, parser->m_bufferPtr, + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); + parser->m_bufferEnd + = parser->m_buffer + + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); + parser->m_bufferPtr = parser->m_buffer; + } +#endif /* not defined XML_CONTEXT_BYTES */ + } else { char *newBuf; - int bufferSize = bufferLim - bufferPtr; + int bufferSize + = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr); if (bufferSize == 0) bufferSize = INIT_BUFFER_SIZE; do { - bufferSize *= 2; - } while (bufferSize < neededSize); - newBuf = (char *)MALLOC(bufferSize); + /* Do not invoke signed arithmetic overflow: */ + bufferSize = (int)(2U * (unsigned)bufferSize); + } while (bufferSize < neededSize && bufferSize > 0); + if (bufferSize <= 0) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + return NULL; + } + newBuf = (char *)MALLOC(parser, bufferSize); if (newBuf == 0) { - errorCode = XML_ERROR_NO_MEMORY; + parser->m_errorCode = XML_ERROR_NO_MEMORY; return NULL; } - bufferLim = newBuf + bufferSize; + parser->m_bufferLim = newBuf + bufferSize; #ifdef XML_CONTEXT_BYTES - if (bufferPtr) { - int keep = bufferPtr - buffer; - if (keep > XML_CONTEXT_BYTES) - keep = XML_CONTEXT_BYTES; - memcpy(newBuf, &bufferPtr[-keep], bufferEnd - bufferPtr + keep); - FREE(buffer); - buffer = newBuf; - bufferEnd = buffer + (bufferEnd - bufferPtr) + keep; - bufferPtr = buffer + keep; - } - else { - bufferEnd = newBuf + (bufferEnd - bufferPtr); - bufferPtr = buffer = newBuf; + if (parser->m_bufferPtr) { + memcpy(newBuf, &parser->m_bufferPtr[-keep], + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) + + keep); + FREE(parser, parser->m_buffer); + parser->m_buffer = newBuf; + parser->m_bufferEnd + = parser->m_buffer + + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) + + keep; + parser->m_bufferPtr = parser->m_buffer + keep; + } else { + /* This must be a brand new buffer with no data in it yet */ + parser->m_bufferEnd = newBuf; + parser->m_bufferPtr = parser->m_buffer = newBuf; } #else - if (bufferPtr) { - memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr); - FREE(buffer); + if (parser->m_bufferPtr) { + memcpy(newBuf, parser->m_bufferPtr, + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); + FREE(parser, parser->m_buffer); + parser->m_bufferEnd + = newBuf + + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); + } else { + /* This must be a brand new buffer with no data in it yet */ + parser->m_bufferEnd = newBuf; } - bufferEnd = newBuf + (bufferEnd - bufferPtr); - bufferPtr = buffer = newBuf; -#endif /* not defined XML_CONTEXT_BYTES */ + parser->m_bufferPtr = parser->m_buffer = newBuf; +#endif /* not defined XML_CONTEXT_BYTES */ } + parser->m_eventPtr = parser->m_eventEndPtr = NULL; + parser->m_positionPtr = NULL; } - return bufferEnd; + return parser->m_bufferEnd; } enum XML_Status XMLCALL -XML_StopParser(XML_Parser parser, XML_Bool resumable) -{ - switch (parsing) { +XML_StopParser(XML_Parser parser, XML_Bool resumable) { + if (parser == NULL) + return XML_STATUS_ERROR; + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: if (resumable) { - errorCode = XML_ERROR_SUSPENDED; + parser->m_errorCode = XML_ERROR_SUSPENDED; return XML_STATUS_ERROR; } - parsing = XML_FINISHED; + parser->m_parsingStatus.parsing = XML_FINISHED; break; case XML_FINISHED: - errorCode = XML_ERROR_FINISHED; + parser->m_errorCode = XML_ERROR_FINISHED; return XML_STATUS_ERROR; default: if (resumable) { #ifdef XML_DTD - if (isParamEntity) { - errorCode = XML_ERROR_SUSPEND_PE; + if (parser->m_isParamEntity) { + parser->m_errorCode = XML_ERROR_SUSPEND_PE; return XML_STATUS_ERROR; } #endif - parsing = XML_SUSPENDED; - } - else - parsing = XML_FINISHED; + parser->m_parsingStatus.parsing = XML_SUSPENDED; + } else + parser->m_parsingStatus.parsing = XML_FINISHED; } return XML_STATUS_OK; } enum XML_Status XMLCALL -XML_ResumeParser(XML_Parser parser) -{ - enum XML_Error result = XML_STATUS_OK; +XML_ResumeParser(XML_Parser parser) { + enum XML_Status result = XML_STATUS_OK; - if (parsing != XML_SUSPENDED) { - errorCode = XML_ERROR_NOT_SUSPENDED; + if (parser == NULL) + return XML_STATUS_ERROR; + if (parser->m_parsingStatus.parsing != XML_SUSPENDED) { + parser->m_errorCode = XML_ERROR_NOT_SUSPENDED; return XML_STATUS_ERROR; } - parsing = XML_PARSING; + parser->m_parsingStatus.parsing = XML_PARSING; - errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr); + parser->m_errorCode = parser->m_processor( + parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr); - if (errorCode != XML_ERROR_NONE) { - eventEndPtr = eventPtr; - processor = errorProcessor; + if (parser->m_errorCode != XML_ERROR_NONE) { + parser->m_eventEndPtr = parser->m_eventPtr; + parser->m_processor = errorProcessor; return XML_STATUS_ERROR; - } - else { - switch (parsing) { + } else { + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: result = XML_STATUS_SUSPENDED; break; case XML_INITIALIZED: case XML_PARSING: - if (finalBuffer) { - parsing = XML_FINISHED; + if (parser->m_parsingStatus.finalBuffer) { + parser->m_parsingStatus.parsing = XML_FINISHED; return result; } - default: ; + default:; } } - XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position); - positionPtr = bufferPtr; + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, + parser->m_bufferPtr, &parser->m_position); + parser->m_positionPtr = parser->m_bufferPtr; return result; } void XMLCALL -XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) -{ +XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) { + if (parser == NULL) + return; assert(status != NULL); *status = parser->m_parsingStatus; } enum XML_Error XMLCALL -XML_GetErrorCode(XML_Parser parser) -{ - return errorCode; +XML_GetErrorCode(XML_Parser parser) { + if (parser == NULL) + return XML_ERROR_INVALID_ARGUMENT; + return parser->m_errorCode; } -long XMLCALL -XML_GetCurrentByteIndex(XML_Parser parser) -{ - if (eventPtr) - return parseEndByteIndex - (parseEndPtr - eventPtr); +XML_Index XMLCALL +XML_GetCurrentByteIndex(XML_Parser parser) { + if (parser == NULL) + return -1; + if (parser->m_eventPtr) + return (XML_Index)(parser->m_parseEndByteIndex + - (parser->m_parseEndPtr - parser->m_eventPtr)); return -1; } int XMLCALL -XML_GetCurrentByteCount(XML_Parser parser) -{ - if (eventEndPtr && eventPtr) - return eventEndPtr - eventPtr; +XML_GetCurrentByteCount(XML_Parser parser) { + if (parser == NULL) + return 0; + if (parser->m_eventEndPtr && parser->m_eventPtr) + return (int)(parser->m_eventEndPtr - parser->m_eventPtr); return 0; } -const char * XMLCALL -XML_GetInputContext(XML_Parser parser, int *offset, int *size) -{ +const char *XMLCALL +XML_GetInputContext(XML_Parser parser, int *offset, int *size) { #ifdef XML_CONTEXT_BYTES - if (eventPtr && buffer) { - *offset = eventPtr - buffer; - *size = bufferEnd - buffer; - return buffer; + if (parser == NULL) + return NULL; + if (parser->m_eventPtr && parser->m_buffer) { + if (offset != NULL) + *offset = (int)(parser->m_eventPtr - parser->m_buffer); + if (size != NULL) + *size = (int)(parser->m_bufferEnd - parser->m_buffer); + return parser->m_buffer; } +#else + (void)parser; + (void)offset; + (void)size; #endif /* defined XML_CONTEXT_BYTES */ - return (char *) 0; + return (const char *)0; } -int XMLCALL -XML_GetCurrentLineNumber(XML_Parser parser) -{ - if (eventPtr && eventPtr >= positionPtr) { - XmlUpdatePosition(encoding, positionPtr, eventPtr, &position); - positionPtr = eventPtr; +XML_Size XMLCALL +XML_GetCurrentLineNumber(XML_Parser parser) { + if (parser == NULL) + return 0; + if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) { + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, + parser->m_eventPtr, &parser->m_position); + parser->m_positionPtr = parser->m_eventPtr; } - return position.lineNumber + 1; + return parser->m_position.lineNumber + 1; } -int XMLCALL -XML_GetCurrentColumnNumber(XML_Parser parser) -{ - if (eventPtr && eventPtr >= positionPtr) { - XmlUpdatePosition(encoding, positionPtr, eventPtr, &position); - positionPtr = eventPtr; +XML_Size XMLCALL +XML_GetCurrentColumnNumber(XML_Parser parser) { + if (parser == NULL) + return 0; + if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) { + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, + parser->m_eventPtr, &parser->m_position); + parser->m_positionPtr = parser->m_eventPtr; } - return position.columnNumber; + return parser->m_position.columnNumber; } void XMLCALL -XML_FreeContentModel(XML_Parser parser, XML_Content *model) -{ - FREE(model); +XML_FreeContentModel(XML_Parser parser, XML_Content *model) { + if (parser != NULL) + FREE(parser, model); } -void * XMLCALL -XML_MemMalloc(XML_Parser parser, size_t size) -{ - return MALLOC(size); +void *XMLCALL +XML_MemMalloc(XML_Parser parser, size_t size) { + if (parser == NULL) + return NULL; + return MALLOC(parser, size); } -void * XMLCALL -XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) -{ - return REALLOC(ptr, size); +void *XMLCALL +XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) { + if (parser == NULL) + return NULL; + return REALLOC(parser, ptr, size); } void XMLCALL -XML_MemFree(XML_Parser parser, void *ptr) -{ - FREE(ptr); +XML_MemFree(XML_Parser parser, void *ptr) { + if (parser != NULL) + FREE(parser, ptr); } void XMLCALL -XML_DefaultCurrent(XML_Parser parser) -{ - if (defaultHandler) { - if (openInternalEntities) - reportDefault(parser, - internalEncoding, - openInternalEntities->internalEventPtr, - openInternalEntities->internalEventEndPtr); +XML_DefaultCurrent(XML_Parser parser) { + if (parser == NULL) + return; + if (parser->m_defaultHandler) { + if (parser->m_openInternalEntities) + reportDefault(parser, parser->m_internalEncoding, + parser->m_openInternalEntities->internalEventPtr, + parser->m_openInternalEntities->internalEventEndPtr); else - reportDefault(parser, encoding, eventPtr, eventEndPtr); - } -} - -const XML_LChar * XMLCALL -XML_ErrorString(enum XML_Error code) -{ - static const XML_LChar *message[] = { - 0, - XML_L("out of memory"), - XML_L("syntax error"), - XML_L("no element found"), - XML_L("not well-formed (invalid token)"), - XML_L("unclosed token"), - XML_L("partial character"), - XML_L("mismatched tag"), - XML_L("duplicate attribute"), - XML_L("junk after document element"), - XML_L("illegal parameter entity reference"), - XML_L("undefined entity"), - XML_L("recursive entity reference"), - XML_L("asynchronous entity"), - XML_L("reference to invalid character number"), - XML_L("reference to binary entity"), - XML_L("reference to external entity in attribute"), - XML_L("xml declaration not at start of external entity"), - XML_L("unknown encoding"), - XML_L("encoding specified in XML declaration is incorrect"), - XML_L("unclosed CDATA section"), - XML_L("error in processing external entity reference"), - XML_L("document is not standalone"), - XML_L("unexpected parser state - please send a bug report"), - XML_L("entity declared in parameter entity"), - XML_L("requested feature requires XML_DTD support in Expat"), - XML_L("cannot change setting once parsing has begun"), - XML_L("unbound prefix"), - XML_L("must not undeclare prefix"), - XML_L("incomplete markup in parameter entity"), - XML_L("XML declaration not well-formed"), - XML_L("text declaration not well-formed"), - XML_L("illegal character(s) in public id"), - XML_L("parser suspended"), - XML_L("parser not suspended"), - XML_L("parsing aborted"), - XML_L("parsing finished"), - XML_L("cannot suspend in external parameter entity") - }; - if (code > 0 && code < sizeof(message)/sizeof(message[0])) - return message[code]; + reportDefault(parser, parser->m_encoding, parser->m_eventPtr, + parser->m_eventEndPtr); + } +} + +const XML_LChar *XMLCALL +XML_ErrorString(enum XML_Error code) { + switch (code) { + case XML_ERROR_NONE: + return NULL; + case XML_ERROR_NO_MEMORY: + return XML_L("out of memory"); + case XML_ERROR_SYNTAX: + return XML_L("syntax error"); + case XML_ERROR_NO_ELEMENTS: + return XML_L("no element found"); + case XML_ERROR_INVALID_TOKEN: + return XML_L("not well-formed (invalid token)"); + case XML_ERROR_UNCLOSED_TOKEN: + return XML_L("unclosed token"); + case XML_ERROR_PARTIAL_CHAR: + return XML_L("partial character"); + case XML_ERROR_TAG_MISMATCH: + return XML_L("mismatched tag"); + case XML_ERROR_DUPLICATE_ATTRIBUTE: + return XML_L("duplicate attribute"); + case XML_ERROR_JUNK_AFTER_DOC_ELEMENT: + return XML_L("junk after document element"); + case XML_ERROR_PARAM_ENTITY_REF: + return XML_L("illegal parameter entity reference"); + case XML_ERROR_UNDEFINED_ENTITY: + return XML_L("undefined entity"); + case XML_ERROR_RECURSIVE_ENTITY_REF: + return XML_L("recursive entity reference"); + case XML_ERROR_ASYNC_ENTITY: + return XML_L("asynchronous entity"); + case XML_ERROR_BAD_CHAR_REF: + return XML_L("reference to invalid character number"); + case XML_ERROR_BINARY_ENTITY_REF: + return XML_L("reference to binary entity"); + case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF: + return XML_L("reference to external entity in attribute"); + case XML_ERROR_MISPLACED_XML_PI: + return XML_L("XML or text declaration not at start of entity"); + case XML_ERROR_UNKNOWN_ENCODING: + return XML_L("unknown encoding"); + case XML_ERROR_INCORRECT_ENCODING: + return XML_L("encoding specified in XML declaration is incorrect"); + case XML_ERROR_UNCLOSED_CDATA_SECTION: + return XML_L("unclosed CDATA section"); + case XML_ERROR_EXTERNAL_ENTITY_HANDLING: + return XML_L("error in processing external entity reference"); + case XML_ERROR_NOT_STANDALONE: + return XML_L("document is not standalone"); + case XML_ERROR_UNEXPECTED_STATE: + return XML_L("unexpected parser state - please send a bug report"); + case XML_ERROR_ENTITY_DECLARED_IN_PE: + return XML_L("entity declared in parameter entity"); + case XML_ERROR_FEATURE_REQUIRES_XML_DTD: + return XML_L("requested feature requires XML_DTD support in Expat"); + case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING: + return XML_L("cannot change setting once parsing has begun"); + /* Added in 1.95.7. */ + case XML_ERROR_UNBOUND_PREFIX: + return XML_L("unbound prefix"); + /* Added in 1.95.8. */ + case XML_ERROR_UNDECLARING_PREFIX: + return XML_L("must not undeclare prefix"); + case XML_ERROR_INCOMPLETE_PE: + return XML_L("incomplete markup in parameter entity"); + case XML_ERROR_XML_DECL: + return XML_L("XML declaration not well-formed"); + case XML_ERROR_TEXT_DECL: + return XML_L("text declaration not well-formed"); + case XML_ERROR_PUBLICID: + return XML_L("illegal character(s) in public id"); + case XML_ERROR_SUSPENDED: + return XML_L("parser suspended"); + case XML_ERROR_NOT_SUSPENDED: + return XML_L("parser not suspended"); + case XML_ERROR_ABORTED: + return XML_L("parsing aborted"); + case XML_ERROR_FINISHED: + return XML_L("parsing finished"); + case XML_ERROR_SUSPEND_PE: + return XML_L("cannot suspend in external parameter entity"); + /* Added in 2.0.0. */ + case XML_ERROR_RESERVED_PREFIX_XML: + return XML_L( + "reserved prefix (xml) must not be undeclared or bound to another namespace name"); + case XML_ERROR_RESERVED_PREFIX_XMLNS: + return XML_L("reserved prefix (xmlns) must not be declared or undeclared"); + case XML_ERROR_RESERVED_NAMESPACE_URI: + return XML_L( + "prefix must not be bound to one of the reserved namespace names"); + /* Added in 2.2.5. */ + case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */ + return XML_L("invalid argument"); + /* Added in 2.3.0. */ + case XML_ERROR_NO_BUFFER: + return XML_L( + "a successful prior call to function XML_GetBuffer is required"); + /* Added in 2.4.0. */ + case XML_ERROR_AMPLIFICATION_LIMIT_BREACH: + return XML_L( + "limit on input amplification factor (from DTD and entities) breached"); + } return NULL; } -const XML_LChar * XMLCALL +const XML_LChar *XMLCALL XML_ExpatVersion(void) { - /* V1 is used to string-ize the version number. However, it would string-ize the actual version macro *names* unless we get them substituted before being passed to V1. CPP is defined to expand @@ -1893,8 +2461,8 @@ XML_ExpatVersion(void) { with the correct numerals. */ /* ### I'm assuming cpp is portable in this respect... */ -#define V1(a,b,c) XML_L(#a)XML_L(".")XML_L(#b)XML_L(".")XML_L(#c) -#define V2(a,b,c) XML_L("expat_")V1(a,b,c) +#define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c) +#define V2(a, b, c) XML_L("expat_") V1(a, b, c) return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION); @@ -1903,8 +2471,7 @@ XML_ExpatVersion(void) { } XML_Expat_Version XMLCALL -XML_ExpatVersionInfo(void) -{ +XML_ExpatVersionInfo(void) { XML_Expat_Version version; version.major = XML_MAJOR_VERSION; @@ -1914,50 +2481,91 @@ XML_ExpatVersionInfo(void) return version; } -const XML_Feature * XMLCALL -XML_GetFeatureList(void) -{ - static XML_Feature features[] = { - {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"), 0}, - {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"), 0}, +const XML_Feature *XMLCALL +XML_GetFeatureList(void) { + static const XML_Feature features[] = { + {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"), + sizeof(XML_Char)}, + {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"), + sizeof(XML_LChar)}, #ifdef XML_UNICODE - {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0}, + {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0}, #endif #ifdef XML_UNICODE_WCHAR_T - {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0}, + {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0}, #endif #ifdef XML_DTD - {XML_FEATURE_DTD, XML_L("XML_DTD"), 0}, + {XML_FEATURE_DTD, XML_L("XML_DTD"), 0}, #endif #ifdef XML_CONTEXT_BYTES - {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"), - XML_CONTEXT_BYTES}, + {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"), + XML_CONTEXT_BYTES}, #endif #ifdef XML_MIN_SIZE - {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0}, + {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0}, +#endif +#ifdef XML_NS + {XML_FEATURE_NS, XML_L("XML_NS"), 0}, +#endif +#ifdef XML_LARGE_SIZE + {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0}, #endif - {XML_FEATURE_END, NULL, 0} - }; +#ifdef XML_ATTR_INFO + {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0}, +#endif +#ifdef XML_DTD + /* Added in Expat 2.4.0. */ + {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, + XML_L("XML_BLAP_MAX_AMP"), + (long int) + EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT}, + {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT, + XML_L("XML_BLAP_ACT_THRES"), + EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT}, +#endif + {XML_FEATURE_END, NULL, 0}}; - features[0].value = sizeof(XML_Char); - features[1].value = sizeof(XML_LChar); return features; } +#ifdef XML_DTD +XML_Bool XMLCALL +XML_SetBillionLaughsAttackProtectionMaximumAmplification( + XML_Parser parser, float maximumAmplificationFactor) { + if ((parser == NULL) || (parser->m_parentParser != NULL) + || isnan(maximumAmplificationFactor) + || (maximumAmplificationFactor < 1.0f)) { + return XML_FALSE; + } + parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor; + return XML_TRUE; +} + +XML_Bool XMLCALL +XML_SetBillionLaughsAttackProtectionActivationThreshold( + XML_Parser parser, unsigned long long activationThresholdBytes) { + if ((parser == NULL) || (parser->m_parentParser != NULL)) { + return XML_FALSE; + } + parser->m_accounting.activationThresholdBytes = activationThresholdBytes; + return XML_TRUE; +} +#endif /* XML_DTD */ + /* Initially tag->rawName always points into the parse buffer; for those TAG instances opened while the current parse buffer was processed, and not yet closed, we need to store tag->rawName in a more permanent location, since the parse buffer is about to be discarded. */ static XML_Bool -storeRawNames(XML_Parser parser) -{ - TAG *tag = tagStack; +storeRawNames(XML_Parser parser) { + TAG *tag = parser->m_tagStack; while (tag) { int bufSize; int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1); + size_t rawNameLen; char *rawNameBuf = tag->buf + nameLen; - /* Stop if already stored. Since tagStack is a stack, we can stop + /* Stop if already stored. Since m_tagStack is a stack, we can stop at the first entry that has already been copied; everything below it in the stack is already been accounted for in a previous call to this function. @@ -1967,9 +2575,13 @@ storeRawNames(XML_Parser parser) /* For re-use purposes we need to ensure that the size of tag->buf is a multiple of sizeof(XML_Char). */ - bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)); + rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char)); + /* Detect and prevent integer overflow. */ + if (rawNameLen > (size_t)INT_MAX - nameLen) + return XML_FALSE; + bufSize = nameLen + (int)rawNameLen; if (bufSize > tag->bufEnd - tag->buf) { - char *temp = (char *)REALLOC(tag->buf, bufSize); + char *temp = (char *)REALLOC(parser, tag->buf, bufSize); if (temp == NULL) return XML_FALSE; /* if tag->name.str points to tag->buf (only when namespace @@ -1981,8 +2593,8 @@ storeRawNames(XML_Parser parser) then update it as well, since it will always point into tag->buf */ if (tag->name.localPart) - tag->name.localPart = (XML_Char *)temp + (tag->name.localPart - - (XML_Char *)tag->buf); + tag->name.localPart + = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf); tag->buf = temp; tag->bufEnd = temp + bufSize; rawNameBuf = temp + nameLen; @@ -1995,163 +2607,166 @@ storeRawNames(XML_Parser parser) } static enum XML_Error PTRCALL -contentProcessor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) -{ - enum XML_Error result = doContent(parser, 0, encoding, start, end, - endPtr, (XML_Bool)!finalBuffer); +contentProcessor(XML_Parser parser, const char *start, const char *end, + const char **endPtr) { + enum XML_Error result = doContent( + parser, 0, parser->m_encoding, start, end, endPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); if (result == XML_ERROR_NONE) { - if (!storeRawNames(parser)) + if (! storeRawNames(parser)) return XML_ERROR_NO_MEMORY; } return result; } static enum XML_Error PTRCALL -externalEntityInitProcessor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) -{ +externalEntityInitProcessor(XML_Parser parser, const char *start, + const char *end, const char **endPtr) { enum XML_Error result = initializeEncoding(parser); if (result != XML_ERROR_NONE) return result; - processor = externalEntityInitProcessor2; + parser->m_processor = externalEntityInitProcessor2; return externalEntityInitProcessor2(parser, start, end, endPtr); } static enum XML_Error PTRCALL -externalEntityInitProcessor2(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) -{ +externalEntityInitProcessor2(XML_Parser parser, const char *start, + const char *end, const char **endPtr) { const char *next = start; /* XmlContentTok doesn't always set the last arg */ - int tok = XmlContentTok(encoding, start, end, &next); + int tok = XmlContentTok(parser->m_encoding, start, end, &next); switch (tok) { case XML_TOK_BOM: +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, start, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif /* XML_DTD */ + /* If we are at the end of the buffer, this would cause the next stage, i.e. externalEntityInitProcessor3, to pass control directly to doContent (by detecting XML_TOK_NONE) without processing any xml text declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent. */ - if (next == end && !finalBuffer) { + if (next == end && ! parser->m_parsingStatus.finalBuffer) { *endPtr = next; return XML_ERROR_NONE; } start = next; break; case XML_TOK_PARTIAL: - if (!finalBuffer) { + if (! parser->m_parsingStatus.finalBuffer) { *endPtr = start; return XML_ERROR_NONE; } - eventPtr = start; + parser->m_eventPtr = start; return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: - if (!finalBuffer) { + if (! parser->m_parsingStatus.finalBuffer) { *endPtr = start; return XML_ERROR_NONE; } - eventPtr = start; + parser->m_eventPtr = start; return XML_ERROR_PARTIAL_CHAR; } - processor = externalEntityInitProcessor3; + parser->m_processor = externalEntityInitProcessor3; return externalEntityInitProcessor3(parser, start, end, endPtr); } static enum XML_Error PTRCALL -externalEntityInitProcessor3(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) -{ +externalEntityInitProcessor3(XML_Parser parser, const char *start, + const char *end, const char **endPtr) { int tok; const char *next = start; /* XmlContentTok doesn't always set the last arg */ - eventPtr = start; - tok = XmlContentTok(encoding, start, end, &next); - eventEndPtr = next; + parser->m_eventPtr = start; + tok = XmlContentTok(parser->m_encoding, start, end, &next); + /* Note: These bytes are accounted later in: + - processXmlDecl + - externalEntityContentProcessor + */ + parser->m_eventEndPtr = next; switch (tok) { - case XML_TOK_XML_DECL: - { - enum XML_Error result; - result = processXmlDecl(parser, 1, start, next); - if (result != XML_ERROR_NONE) - return result; - switch (parsing) { - case XML_SUSPENDED: - *endPtr = next; - return XML_ERROR_NONE; - case XML_FINISHED: - return XML_ERROR_ABORTED; - default: - start = next; - } + case XML_TOK_XML_DECL: { + enum XML_Error result; + result = processXmlDecl(parser, 1, start, next); + if (result != XML_ERROR_NONE) + return result; + switch (parser->m_parsingStatus.parsing) { + case XML_SUSPENDED: + *endPtr = next; + return XML_ERROR_NONE; + case XML_FINISHED: + return XML_ERROR_ABORTED; + default: + start = next; } - break; + } break; case XML_TOK_PARTIAL: - if (!finalBuffer) { + if (! parser->m_parsingStatus.finalBuffer) { *endPtr = start; return XML_ERROR_NONE; } return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: - if (!finalBuffer) { + if (! parser->m_parsingStatus.finalBuffer) { *endPtr = start; return XML_ERROR_NONE; } return XML_ERROR_PARTIAL_CHAR; } - processor = externalEntityContentProcessor; - tagLevel = 1; + parser->m_processor = externalEntityContentProcessor; + parser->m_tagLevel = 1; return externalEntityContentProcessor(parser, start, end, endPtr); } static enum XML_Error PTRCALL -externalEntityContentProcessor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) -{ - enum XML_Error result = doContent(parser, 1, encoding, start, end, - endPtr, (XML_Bool)!finalBuffer); +externalEntityContentProcessor(XML_Parser parser, const char *start, + const char *end, const char **endPtr) { + enum XML_Error result + = doContent(parser, 1, parser->m_encoding, start, end, endPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer, + XML_ACCOUNT_ENTITY_EXPANSION); if (result == XML_ERROR_NONE) { - if (!storeRawNames(parser)) + if (! storeRawNames(parser)) return XML_ERROR_NO_MEMORY; } return result; } static enum XML_Error -doContent(XML_Parser parser, - int startTagLevel, - const ENCODING *enc, - const char *s, - const char *end, - const char **nextPtr, - XML_Bool haveMore) -{ +doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, + const char *s, const char *end, const char **nextPtr, + XML_Bool haveMore, enum XML_Account account) { /* save one level of indirection */ - DTD * const dtd = _dtd; + DTD *const dtd = parser->m_dtd; const char **eventPP; const char **eventEndPP; - if (enc == encoding) { - eventPP = &eventPtr; - eventEndPP = &eventEndPtr; - } - else { - eventPP = &(openInternalEntities->internalEventPtr); - eventEndPP = &(openInternalEntities->internalEventEndPtr); + if (enc == parser->m_encoding) { + eventPP = &parser->m_eventPtr; + eventEndPP = &parser->m_eventEndPtr; + } else { + eventPP = &(parser->m_openInternalEntities->internalEventPtr); + eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); } *eventPP = s; for (;;) { const char *next = s; /* XmlContentTok doesn't always set the last arg */ int tok = XmlContentTok(enc, s, end, &next); +#ifdef XML_DTD + const char *accountAfter + = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR)) + ? (haveMore ? s /* i.e. 0 bytes */ : end) + : next; + if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__, + account)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif *eventEndPP = next; switch (tok) { case XML_TOK_TRAILING_CR: @@ -2160,18 +2775,17 @@ doContent(XML_Parser parser, return XML_ERROR_NONE; } *eventEndPP = end; - if (characterDataHandler) { + if (parser->m_characterDataHandler) { XML_Char c = 0xA; - characterDataHandler(handlerArg, &c, 1); - } - else if (defaultHandler) + parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, end); /* We are at the end of the final buffer, should we check for XML_SUSPENDED, XML_FINISHED? */ if (startTagLevel == 0) return XML_ERROR_NO_ELEMENTS; - if (tagLevel != startTagLevel) + if (parser->m_tagLevel != startTagLevel) return XML_ERROR_ASYNC_ENTITY; *nextPtr = end; return XML_ERROR_NONE; @@ -2181,7 +2795,7 @@ doContent(XML_Parser parser, return XML_ERROR_NONE; } if (startTagLevel > 0) { - if (tagLevel != startTagLevel) + if (parser->m_tagLevel != startTagLevel) return XML_ERROR_ASYNC_ENTITY; *nextPtr = s; return XML_ERROR_NONE; @@ -2202,321 +2816,325 @@ doContent(XML_Parser parser, return XML_ERROR_NONE; } return XML_ERROR_PARTIAL_CHAR; - case XML_TOK_ENTITY_REF: - { - const XML_Char *name; - ENTITY *entity; - XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (ch) { - if (characterDataHandler) - characterDataHandler(handlerArg, &ch, 1); - else if (defaultHandler) + case XML_TOK_ENTITY_REF: { + const XML_Char *name; + ENTITY *entity; + XML_Char ch = (XML_Char)XmlPredefinedEntityName( + enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); + if (ch) { +#ifdef XML_DTD + /* NOTE: We are replacing 4-6 characters original input for 1 character + * so there is no amplification and hence recording without + * protection. */ + accountingDiffTolerated(parser, tok, (char *)&ch, + ((char *)&ch) + sizeof(XML_Char), __LINE__, + XML_ACCOUNT_ENTITY_EXPANSION); +#endif /* XML_DTD */ + if (parser->m_characterDataHandler) + parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1); + else if (parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + break; + } + name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (! name) + return XML_ERROR_NO_MEMORY; + entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); + poolDiscard(&dtd->pool); + /* First, determine if a check for an existing declaration is needed; + if yes, check that the entity exists, and that it is internal, + otherwise call the skipped entity or default handler. + */ + if (! dtd->hasParamEntityRefs || dtd->standalone) { + if (! entity) + return XML_ERROR_UNDEFINED_ENTITY; + else if (! entity->is_internal) + return XML_ERROR_ENTITY_DECLARED_IN_PE; + } else if (! entity) { + if (parser->m_skippedEntityHandler) + parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); + else if (parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + break; + } + if (entity->open) + return XML_ERROR_RECURSIVE_ENTITY_REF; + if (entity->notation) + return XML_ERROR_BINARY_ENTITY_REF; + if (entity->textPtr) { + enum XML_Error result; + if (! parser->m_defaultExpandInternalEntities) { + if (parser->m_skippedEntityHandler) + parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name, + 0); + else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; } - name = poolStoreString(&dtd->pool, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!name) + result = processInternalEntity(parser, entity, XML_FALSE); + if (result != XML_ERROR_NONE) + return result; + } else if (parser->m_externalEntityRefHandler) { + const XML_Char *context; + entity->open = XML_TRUE; + context = getContext(parser); + entity->open = XML_FALSE; + if (! context) return XML_ERROR_NO_MEMORY; - entity = (ENTITY *)lookup(&dtd->generalEntities, name, 0); - poolDiscard(&dtd->pool); - /* First, determine if a check for an existing declaration is needed; - if yes, check that the entity exists, and that it is internal, - otherwise call the skipped entity or default handler. - */ - if (!dtd->hasParamEntityRefs || dtd->standalone) { - if (!entity) - return XML_ERROR_UNDEFINED_ENTITY; - else if (!entity->is_internal) - return XML_ERROR_ENTITY_DECLARED_IN_PE; - } - else if (!entity) { - if (skippedEntityHandler) - skippedEntityHandler(handlerArg, name, 0); - else if (defaultHandler) - reportDefault(parser, enc, s, next); - break; - } - if (entity->open) - return XML_ERROR_RECURSIVE_ENTITY_REF; - if (entity->notation) - return XML_ERROR_BINARY_ENTITY_REF; - if (entity->textPtr) { - enum XML_Error result; - if (!defaultExpandInternalEntities) { - if (skippedEntityHandler) - skippedEntityHandler(handlerArg, entity->name, 0); - else if (defaultHandler) - reportDefault(parser, enc, s, next); - break; - } - result = processInternalEntity(parser, entity, XML_FALSE); - if (result != XML_ERROR_NONE) - return result; - } - else if (externalEntityRefHandler) { - const XML_Char *context; - entity->open = XML_TRUE; - context = getContext(parser); - entity->open = XML_FALSE; - if (!context) - return XML_ERROR_NO_MEMORY; - if (!externalEntityRefHandler(externalEntityRefHandlerArg, - context, - entity->base, - entity->systemId, - entity->publicId)) - return XML_ERROR_EXTERNAL_ENTITY_HANDLING; - poolDiscard(&tempPool); - } - else if (defaultHandler) - reportDefault(parser, enc, s, next); - break; - } + if (! parser->m_externalEntityRefHandler( + parser->m_externalEntityRefHandlerArg, context, entity->base, + entity->systemId, entity->publicId)) + return XML_ERROR_EXTERNAL_ENTITY_HANDLING; + poolDiscard(&parser->m_tempPool); + } else if (parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + break; + } case XML_TOK_START_TAG_NO_ATTS: /* fall through */ - case XML_TOK_START_TAG_WITH_ATTS: - { - TAG *tag; - enum XML_Error result; - XML_Char *toPtr; - if (freeTagList) { - tag = freeTagList; - freeTagList = freeTagList->parent; + case XML_TOK_START_TAG_WITH_ATTS: { + TAG *tag; + enum XML_Error result; + XML_Char *toPtr; + if (parser->m_freeTagList) { + tag = parser->m_freeTagList; + parser->m_freeTagList = parser->m_freeTagList->parent; + } else { + tag = (TAG *)MALLOC(parser, sizeof(TAG)); + if (! tag) + return XML_ERROR_NO_MEMORY; + tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE); + if (! tag->buf) { + FREE(parser, tag); + return XML_ERROR_NO_MEMORY; } - else { - tag = (TAG *)MALLOC(sizeof(TAG)); - if (!tag) - return XML_ERROR_NO_MEMORY; - tag->buf = (char *)MALLOC(INIT_TAG_BUF_SIZE); - if (!tag->buf) { - FREE(tag); - return XML_ERROR_NO_MEMORY; + tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE; + } + tag->bindings = NULL; + tag->parent = parser->m_tagStack; + parser->m_tagStack = tag; + tag->name.localPart = NULL; + tag->name.prefix = NULL; + tag->rawName = s + enc->minBytesPerChar; + tag->rawNameLength = XmlNameLength(enc, tag->rawName); + ++parser->m_tagLevel; + { + const char *rawNameEnd = tag->rawName + tag->rawNameLength; + const char *fromPtr = tag->rawName; + toPtr = (XML_Char *)tag->buf; + for (;;) { + int bufSize; + int convLen; + const enum XML_Convert_Result convert_res + = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr, + (ICHAR *)tag->bufEnd - 1); + convLen = (int)(toPtr - (XML_Char *)tag->buf); + if ((fromPtr >= rawNameEnd) + || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) { + tag->name.strLen = convLen; + break; } - tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE; - } - tag->bindings = NULL; - tag->parent = tagStack; - tagStack = tag; - tag->name.localPart = NULL; - tag->name.prefix = NULL; - tag->rawName = s + enc->minBytesPerChar; - tag->rawNameLength = XmlNameLength(enc, tag->rawName); - ++tagLevel; - { - const char *rawNameEnd = tag->rawName + tag->rawNameLength; - const char *fromPtr = tag->rawName; - toPtr = (XML_Char *)tag->buf; - for (;;) { - int bufSize; - int convLen; - XmlConvert(enc, - &fromPtr, rawNameEnd, - (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1); - convLen = toPtr - (XML_Char *)tag->buf; - if (fromPtr == rawNameEnd) { - tag->name.strLen = convLen; - break; - } - bufSize = (tag->bufEnd - tag->buf) << 1; - { - char *temp = (char *)REALLOC(tag->buf, bufSize); - if (temp == NULL) - return XML_ERROR_NO_MEMORY; - tag->buf = temp; - tag->bufEnd = temp + bufSize; - toPtr = (XML_Char *)temp + convLen; - } + bufSize = (int)(tag->bufEnd - tag->buf) << 1; + { + char *temp = (char *)REALLOC(parser, tag->buf, bufSize); + if (temp == NULL) + return XML_ERROR_NO_MEMORY; + tag->buf = temp; + tag->bufEnd = temp + bufSize; + toPtr = (XML_Char *)temp + convLen; } } - tag->name.str = (XML_Char *)tag->buf; - *toPtr = XML_T('\0'); - result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings)); - if (result) - return result; - if (startElementHandler) - startElementHandler(handlerArg, tag->name.str, - (const XML_Char **)atts); - else if (defaultHandler) - reportDefault(parser, enc, s, next); - poolClear(&tempPool); - break; } + tag->name.str = (XML_Char *)tag->buf; + *toPtr = XML_T('\0'); + result + = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account); + if (result) + return result; + if (parser->m_startElementHandler) + parser->m_startElementHandler(parser->m_handlerArg, tag->name.str, + (const XML_Char **)parser->m_atts); + else if (parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + poolClear(&parser->m_tempPool); + break; + } case XML_TOK_EMPTY_ELEMENT_NO_ATTS: /* fall through */ - case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: - { - const char *rawName = s + enc->minBytesPerChar; - enum XML_Error result; - BINDING *bindings = NULL; - XML_Bool noElmHandlers = XML_TRUE; - TAG_NAME name; - name.str = poolStoreString(&tempPool, enc, rawName, - rawName + XmlNameLength(enc, rawName)); - if (!name.str) - return XML_ERROR_NO_MEMORY; - poolFinish(&tempPool); - result = storeAtts(parser, enc, s, &name, &bindings); - if (result) - return result; - poolFinish(&tempPool); - if (startElementHandler) { - startElementHandler(handlerArg, name.str, (const XML_Char **)atts); - noElmHandlers = XML_FALSE; - } - if (endElementHandler) { - if (startElementHandler) - *eventPP = *eventEndPP; - endElementHandler(handlerArg, name.str); - noElmHandlers = XML_FALSE; - } - if (noElmHandlers && defaultHandler) - reportDefault(parser, enc, s, next); - poolClear(&tempPool); - while (bindings) { - BINDING *b = bindings; - if (endNamespaceDeclHandler) - endNamespaceDeclHandler(handlerArg, b->prefix->name); - bindings = bindings->nextTagBinding; - b->nextTagBinding = freeBindingList; - freeBindingList = b; - b->prefix->binding = b->prevPrefixBinding; - } + case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: { + const char *rawName = s + enc->minBytesPerChar; + enum XML_Error result; + BINDING *bindings = NULL; + XML_Bool noElmHandlers = XML_TRUE; + TAG_NAME name; + name.str = poolStoreString(&parser->m_tempPool, enc, rawName, + rawName + XmlNameLength(enc, rawName)); + if (! name.str) + return XML_ERROR_NO_MEMORY; + poolFinish(&parser->m_tempPool); + result = storeAtts(parser, enc, s, &name, &bindings, + XML_ACCOUNT_NONE /* token spans whole start tag */); + if (result != XML_ERROR_NONE) { + freeBindings(parser, bindings); + return result; + } + poolFinish(&parser->m_tempPool); + if (parser->m_startElementHandler) { + parser->m_startElementHandler(parser->m_handlerArg, name.str, + (const XML_Char **)parser->m_atts); + noElmHandlers = XML_FALSE; + } + if (parser->m_endElementHandler) { + if (parser->m_startElementHandler) + *eventPP = *eventEndPP; + parser->m_endElementHandler(parser->m_handlerArg, name.str); + noElmHandlers = XML_FALSE; + } + if (noElmHandlers && parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + poolClear(&parser->m_tempPool); + freeBindings(parser, bindings); + } + if ((parser->m_tagLevel == 0) + && (parser->m_parsingStatus.parsing != XML_FINISHED)) { + if (parser->m_parsingStatus.parsing == XML_SUSPENDED) + parser->m_processor = epilogProcessor; + else + return epilogProcessor(parser, next, end, nextPtr); } - if (tagLevel == 0) - return epilogProcessor(parser, next, end, nextPtr); break; case XML_TOK_END_TAG: - if (tagLevel == startTagLevel) + if (parser->m_tagLevel == startTagLevel) return XML_ERROR_ASYNC_ENTITY; else { int len; const char *rawName; - TAG *tag = tagStack; - tagStack = tag->parent; - tag->parent = freeTagList; - freeTagList = tag; - rawName = s + enc->minBytesPerChar*2; + TAG *tag = parser->m_tagStack; + parser->m_tagStack = tag->parent; + tag->parent = parser->m_freeTagList; + parser->m_freeTagList = tag; + rawName = s + enc->minBytesPerChar * 2; len = XmlNameLength(enc, rawName); if (len != tag->rawNameLength || memcmp(tag->rawName, rawName, len) != 0) { *eventPP = rawName; return XML_ERROR_TAG_MISMATCH; } - --tagLevel; - if (endElementHandler) { + --parser->m_tagLevel; + if (parser->m_endElementHandler) { const XML_Char *localPart; const XML_Char *prefix; XML_Char *uri; localPart = tag->name.localPart; - if (ns && localPart) { + if (parser->m_ns && localPart) { /* localPart and prefix may have been overwritten in tag->name.str, since this points to the binding->uri buffer which gets re-used; so we have to add them again */ uri = (XML_Char *)tag->name.str + tag->name.uriLen; /* don't need to check for space - already done in storeAtts() */ - while (*localPart) *uri++ = *localPart++; + while (*localPart) + *uri++ = *localPart++; prefix = (XML_Char *)tag->name.prefix; - if (ns_triplets && prefix) { - *uri++ = namespaceSeparator; - while (*prefix) *uri++ = *prefix++; - } + if (parser->m_ns_triplets && prefix) { + *uri++ = parser->m_namespaceSeparator; + while (*prefix) + *uri++ = *prefix++; + } *uri = XML_T('\0'); } - endElementHandler(handlerArg, tag->name.str); - } - else if (defaultHandler) + parser->m_endElementHandler(parser->m_handlerArg, tag->name.str); + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); while (tag->bindings) { BINDING *b = tag->bindings; - if (endNamespaceDeclHandler) - endNamespaceDeclHandler(handlerArg, b->prefix->name); + if (parser->m_endNamespaceDeclHandler) + parser->m_endNamespaceDeclHandler(parser->m_handlerArg, + b->prefix->name); tag->bindings = tag->bindings->nextTagBinding; - b->nextTagBinding = freeBindingList; - freeBindingList = b; + b->nextTagBinding = parser->m_freeBindingList; + parser->m_freeBindingList = b; b->prefix->binding = b->prevPrefixBinding; } - if (tagLevel == 0) - return epilogProcessor(parser, next, end, nextPtr); - } - break; - case XML_TOK_CHAR_REF: - { - int n = XmlCharRefNumber(enc, s); - if (n < 0) - return XML_ERROR_BAD_CHAR_REF; - if (characterDataHandler) { - XML_Char buf[XML_ENCODE_MAX]; - characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf)); + if ((parser->m_tagLevel == 0) + && (parser->m_parsingStatus.parsing != XML_FINISHED)) { + if (parser->m_parsingStatus.parsing == XML_SUSPENDED) + parser->m_processor = epilogProcessor; + else + return epilogProcessor(parser, next, end, nextPtr); } - else if (defaultHandler) - reportDefault(parser, enc, s, next); } break; + case XML_TOK_CHAR_REF: { + int n = XmlCharRefNumber(enc, s); + if (n < 0) + return XML_ERROR_BAD_CHAR_REF; + if (parser->m_characterDataHandler) { + XML_Char buf[XML_ENCODE_MAX]; + parser->m_characterDataHandler(parser->m_handlerArg, buf, + XmlEncode(n, (ICHAR *)buf)); + } else if (parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + } break; case XML_TOK_XML_DECL: return XML_ERROR_MISPLACED_XML_PI; case XML_TOK_DATA_NEWLINE: - if (characterDataHandler) { + if (parser->m_characterDataHandler) { XML_Char c = 0xA; - characterDataHandler(handlerArg, &c, 1); - } - else if (defaultHandler) + parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; - case XML_TOK_CDATA_SECT_OPEN: - { - enum XML_Error result; - if (startCdataSectionHandler) - startCdataSectionHandler(handlerArg); -#if 0 - /* Suppose you doing a transformation on a document that involves - changing only the character data. You set up a defaultHandler - and a characterDataHandler. The defaultHandler simply copies - characters through. The characterDataHandler does the - transformation and writes the characters out escaping them as - necessary. This case will fail to work if we leave out the - following two lines (because & and < inside CDATA sections will - be incorrectly escaped). - - However, now we have a start/endCdataSectionHandler, so it seems - easier to let the user deal with this. - */ - else if (characterDataHandler) - characterDataHandler(handlerArg, dataBuf, 0); -#endif - else if (defaultHandler) - reportDefault(parser, enc, s, next); - result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore); - if (result != XML_ERROR_NONE) - return result; - else if (!next) { - processor = cdataSectionProcessor; - return result; - } + case XML_TOK_CDATA_SECT_OPEN: { + enum XML_Error result; + if (parser->m_startCdataSectionHandler) + parser->m_startCdataSectionHandler(parser->m_handlerArg); + /* BEGIN disabled code */ + /* Suppose you doing a transformation on a document that involves + changing only the character data. You set up a defaultHandler + and a characterDataHandler. The defaultHandler simply copies + characters through. The characterDataHandler does the + transformation and writes the characters out escaping them as + necessary. This case will fail to work if we leave out the + following two lines (because & and < inside CDATA sections will + be incorrectly escaped). + + However, now we have a start/endCdataSectionHandler, so it seems + easier to let the user deal with this. + */ + else if (0 && parser->m_characterDataHandler) + parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, + 0); + /* END disabled code */ + else if (parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + result + = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account); + if (result != XML_ERROR_NONE) + return result; + else if (! next) { + parser->m_processor = cdataSectionProcessor; + return result; } - break; + } break; case XML_TOK_TRAILING_RSQB: if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } - if (characterDataHandler) { + if (parser->m_characterDataHandler) { if (MUST_CONVERT(enc, s)) { - ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); - characterDataHandler(handlerArg, dataBuf, - dataPtr - (ICHAR *)dataBuf); - } - else - characterDataHandler(handlerArg, - (XML_Char *)s, - (XML_Char *)end - (XML_Char *)s); - } - else if (defaultHandler) + ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; + XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd); + parser->m_characterDataHandler( + parser->m_handlerArg, parser->m_dataBuf, + (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); + } else + parser->m_characterDataHandler( + parser->m_handlerArg, (XML_Char *)s, + (int)((XML_Char *)end - (XML_Char *)s)); + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, end); /* We are at the end of the final buffer, should we check for XML_SUSPENDED, XML_FINISHED? @@ -2525,60 +3143,90 @@ doContent(XML_Parser parser, *eventPP = end; return XML_ERROR_NO_ELEMENTS; } - if (tagLevel != startTagLevel) { + if (parser->m_tagLevel != startTagLevel) { *eventPP = end; return XML_ERROR_ASYNC_ENTITY; } *nextPtr = end; return XML_ERROR_NONE; - case XML_TOK_DATA_CHARS: - if (characterDataHandler) { + case XML_TOK_DATA_CHARS: { + XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler; + if (charDataHandler) { if (MUST_CONVERT(enc, s)) { for (;;) { - ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); + ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; + const enum XML_Convert_Result convert_res = XmlConvert( + enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd); *eventEndPP = s; - characterDataHandler(handlerArg, dataBuf, - dataPtr - (ICHAR *)dataBuf); - if (s == next) + charDataHandler(parser->m_handlerArg, parser->m_dataBuf, + (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); + if ((convert_res == XML_CONVERT_COMPLETED) + || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) break; *eventPP = s; } - } - else - characterDataHandler(handlerArg, - (XML_Char *)s, - (XML_Char *)next - (XML_Char *)s); - } - else if (defaultHandler) + } else + charDataHandler(parser->m_handlerArg, (XML_Char *)s, + (int)((XML_Char *)next - (XML_Char *)s)); + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); - break; + } break; case XML_TOK_PI: - if (!reportProcessingInstruction(parser, enc, s, next)) + if (! reportProcessingInstruction(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_COMMENT: - if (!reportComment(parser, enc, s, next)) + if (! reportComment(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; break; default: - if (defaultHandler) + /* All of the tokens produced by XmlContentTok() have their own + * explicit cases, so this default is not strictly necessary. + * However it is a useful safety net, so we retain the code and + * simply exclude it from the coverage tests. + * + * LCOV_EXCL_START + */ + if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; + /* LCOV_EXCL_STOP */ } *eventPP = s = next; - switch (parsing) { + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: *nextPtr = next; return XML_ERROR_NONE; case XML_FINISHED: return XML_ERROR_ABORTED; - default: ; + default:; } } /* not reached */ } +/* This function does not call free() on the allocated memory, merely + * moving it to the parser's m_freeBindingList where it can be freed or + * reused as appropriate. + */ +static void +freeBindings(XML_Parser parser, BINDING *bindings) { + while (bindings) { + BINDING *b = bindings; + + /* m_startNamespaceDeclHandler will have been called for this + * binding in addBindings(), so call the end handler now. + */ + if (parser->m_endNamespaceDeclHandler) + parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name); + + bindings = bindings->nextTagBinding; + b->nextTagBinding = parser->m_freeBindingList; + parser->m_freeBindingList = b; + b->prefix->binding = b->prevPrefixBinding; + } +} + /* Precondition: all arguments must be non-NULL; Purpose: - normalize attributes @@ -2590,14 +3238,13 @@ doContent(XML_Parser parser, - generate namespace aware element name (URI, prefix) */ static enum XML_Error -storeAtts(XML_Parser parser, const ENCODING *enc, - const char *attStr, TAG_NAME *tagNamePtr, - BINDING **bindingsPtr) -{ - DTD * const dtd = _dtd; /* save one level of indirection */ +storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, + TAG_NAME *tagNamePtr, BINDING **bindingsPtr, + enum XML_Account account) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ ELEMENT_TYPE *elementType; int nDefaultAtts; - const XML_Char **appAtts; /* the attribute list for the application */ + const XML_Char **appAtts; /* the attribute list for the application */ int attIndex = 0; int prefixLen; int i; @@ -2608,54 +3255,120 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const XML_Char *localPart; /* lookup the element type name */ - elementType = (ELEMENT_TYPE *)lookup(&dtd->elementTypes, tagNamePtr->str,0); - if (!elementType) { + elementType + = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0); + if (! elementType) { const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str); - if (!name) + if (! name) return XML_ERROR_NO_MEMORY; - elementType = (ELEMENT_TYPE *)lookup(&dtd->elementTypes, name, + elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE)); - if (!elementType) + if (! elementType) return XML_ERROR_NO_MEMORY; - if (ns && !setElementTypePrefix(parser, elementType)) + if (parser->m_ns && ! setElementTypePrefix(parser, elementType)) return XML_ERROR_NO_MEMORY; } nDefaultAtts = elementType->nDefaultAtts; /* get the attributes from the tokenizer */ - n = XmlGetAttributes(enc, attStr, attsSize, atts); - if (n + nDefaultAtts > attsSize) { - int oldAttsSize = attsSize; + n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts); + + /* Detect and prevent integer overflow */ + if (n > INT_MAX - nDefaultAtts) { + return XML_ERROR_NO_MEMORY; + } + + if (n + nDefaultAtts > parser->m_attsSize) { + int oldAttsSize = parser->m_attsSize; ATTRIBUTE *temp; - attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; - temp = (ATTRIBUTE *)REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE)); - if (temp == NULL) +#ifdef XML_ATTR_INFO + XML_AttrInfo *temp2; +#endif + + /* Detect and prevent integer overflow */ + if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE) + || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) { return XML_ERROR_NO_MEMORY; - atts = temp; + } + + parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; + + /* Detect and prevent integer overflow. + * The preprocessor guard addresses the "always false" warning + * from -Wtype-limits on platforms where + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ +#if UINT_MAX >= SIZE_MAX + if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) { + parser->m_attsSize = oldAttsSize; + return XML_ERROR_NO_MEMORY; + } +#endif + + temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts, + parser->m_attsSize * sizeof(ATTRIBUTE)); + if (temp == NULL) { + parser->m_attsSize = oldAttsSize; + return XML_ERROR_NO_MEMORY; + } + parser->m_atts = temp; +#ifdef XML_ATTR_INFO + /* Detect and prevent integer overflow. + * The preprocessor guard addresses the "always false" warning + * from -Wtype-limits on platforms where + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ +# if UINT_MAX >= SIZE_MAX + if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) { + parser->m_attsSize = oldAttsSize; + return XML_ERROR_NO_MEMORY; + } +# endif + + temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo, + parser->m_attsSize * sizeof(XML_AttrInfo)); + if (temp2 == NULL) { + parser->m_attsSize = oldAttsSize; + return XML_ERROR_NO_MEMORY; + } + parser->m_attInfo = temp2; +#endif if (n > oldAttsSize) - XmlGetAttributes(enc, attStr, n, atts); + XmlGetAttributes(enc, attStr, n, parser->m_atts); } - appAtts = (const XML_Char **)atts; + appAtts = (const XML_Char **)parser->m_atts; for (i = 0; i < n; i++) { + ATTRIBUTE *currAtt = &parser->m_atts[i]; +#ifdef XML_ATTR_INFO + XML_AttrInfo *currAttInfo = &parser->m_attInfo[i]; +#endif /* add the name and value to the attribute list */ - ATTRIBUTE_ID *attId = getAttributeId(parser, enc, atts[i].name, - atts[i].name - + XmlNameLength(enc, atts[i].name)); - if (!attId) + ATTRIBUTE_ID *attId + = getAttributeId(parser, enc, currAtt->name, + currAtt->name + XmlNameLength(enc, currAtt->name)); + if (! attId) return XML_ERROR_NO_MEMORY; +#ifdef XML_ATTR_INFO + currAttInfo->nameStart + = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name); + currAttInfo->nameEnd + = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name); + currAttInfo->valueStart = parser->m_parseEndByteIndex + - (parser->m_parseEndPtr - currAtt->valuePtr); + currAttInfo->valueEnd = parser->m_parseEndByteIndex + - (parser->m_parseEndPtr - currAtt->valueEnd); +#endif /* Detect duplicate attributes by their QNames. This does not work when namespace processing is turned on and different prefixes for the same namespace are used. For this case we have a check further down. */ if ((attId->name)[-1]) { - if (enc == encoding) - eventPtr = atts[i].name; + if (enc == parser->m_encoding) + parser->m_eventPtr = parser->m_atts[i].name; return XML_ERROR_DUPLICATE_ATTRIBUTE; } (attId->name)[-1] = 1; appAtts[attIndex++] = attId->name; - if (!atts[i].normalized) { + if (! parser->m_atts[i].normalized) { enum XML_Error result; XML_Bool isCdata = XML_TRUE; @@ -2671,21 +3384,21 @@ storeAtts(XML_Parser parser, const ENCODING *enc, } /* normalize the attribute value */ - result = storeAttributeValue(parser, enc, isCdata, - atts[i].valuePtr, atts[i].valueEnd, - &tempPool); + result = storeAttributeValue( + parser, enc, isCdata, parser->m_atts[i].valuePtr, + parser->m_atts[i].valueEnd, &parser->m_tempPool, account); if (result) return result; - appAtts[attIndex] = poolStart(&tempPool); - poolFinish(&tempPool); - } - else { + appAtts[attIndex] = poolStart(&parser->m_tempPool); + poolFinish(&parser->m_tempPool); + } else { /* the value did not need normalizing */ - appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr, - atts[i].valueEnd); + appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc, + parser->m_atts[i].valuePtr, + parser->m_atts[i].valueEnd); if (appAtts[attIndex] == 0) return XML_ERROR_NO_MEMORY; - poolFinish(&tempPool); + poolFinish(&parser->m_tempPool); } /* handle prefixed attribute names */ if (attId->prefix) { @@ -2696,49 +3409,44 @@ storeAtts(XML_Parser parser, const ENCODING *enc, if (result) return result; --attIndex; - } - else { + } else { /* deal with other prefixed names later */ attIndex++; nPrefixes++; (attId->name)[-1] = 2; } - } - else + } else attIndex++; } /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */ - nSpecifiedAtts = attIndex; + parser->m_nSpecifiedAtts = attIndex; if (elementType->idAtt && (elementType->idAtt->name)[-1]) { for (i = 0; i < attIndex; i += 2) if (appAtts[i] == elementType->idAtt->name) { - idAttIndex = i; + parser->m_idAttIndex = i; break; } - } - else - idAttIndex = -1; + } else + parser->m_idAttIndex = -1; /* do attribute defaulting */ for (i = 0; i < nDefaultAtts; i++) { const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i; - if (!(da->id->name)[-1] && da->value) { + if (! (da->id->name)[-1] && da->value) { if (da->id->prefix) { if (da->id->xmlns) { enum XML_Error result = addBinding(parser, da->id->prefix, da->id, da->value, bindingsPtr); if (result) return result; - } - else { + } else { (da->id->name)[-1] = 2; nPrefixes++; appAtts[attIndex++] = da->id->name; appAtts[attIndex++] = da->value; } - } - else { + } else { (da->id->name)[-1] = 1; appAtts[attIndex++] = da->id->name; appAtts[attIndex++] = da->value; @@ -2751,106 +3459,169 @@ storeAtts(XML_Parser parser, const ENCODING *enc, and clear flags that say whether attributes were specified */ i = 0; if (nPrefixes) { - int j; /* hash table index */ - unsigned long version = nsAttsVersion; - int nsAttsSize = (int)1 << nsAttsPower; + int j; /* hash table index */ + unsigned long version = parser->m_nsAttsVersion; + + /* Detect and prevent invalid shift */ + if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) { + return XML_ERROR_NO_MEMORY; + } + + unsigned int nsAttsSize = 1u << parser->m_nsAttsPower; + unsigned char oldNsAttsPower = parser->m_nsAttsPower; /* size of hash table must be at least 2 * (# of prefixed attributes) */ - if ((nPrefixes << 1) >> nsAttsPower) { /* true for nsAttsPower = 0 */ + if ((nPrefixes << 1) + >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */ NS_ATT *temp; /* hash table size must also be a power of 2 and >= 8 */ - while (nPrefixes >> nsAttsPower++); - if (nsAttsPower < 3) - nsAttsPower = 3; - nsAttsSize = (int)1 << nsAttsPower; - temp = (NS_ATT *)REALLOC(nsAtts, nsAttsSize * sizeof(NS_ATT)); - if (!temp) + while (nPrefixes >> parser->m_nsAttsPower++) + ; + if (parser->m_nsAttsPower < 3) + parser->m_nsAttsPower = 3; + + /* Detect and prevent invalid shift */ + if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) { + /* Restore actual size of memory in m_nsAtts */ + parser->m_nsAttsPower = oldNsAttsPower; return XML_ERROR_NO_MEMORY; - nsAtts = temp; - version = 0; /* force re-initialization of nsAtts hash table */ + } + + nsAttsSize = 1u << parser->m_nsAttsPower; + + /* Detect and prevent integer overflow. + * The preprocessor guard addresses the "always false" warning + * from -Wtype-limits on platforms where + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ +#if UINT_MAX >= SIZE_MAX + if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) { + /* Restore actual size of memory in m_nsAtts */ + parser->m_nsAttsPower = oldNsAttsPower; + return XML_ERROR_NO_MEMORY; + } +#endif + + temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts, + nsAttsSize * sizeof(NS_ATT)); + if (! temp) { + /* Restore actual size of memory in m_nsAtts */ + parser->m_nsAttsPower = oldNsAttsPower; + return XML_ERROR_NO_MEMORY; + } + parser->m_nsAtts = temp; + version = 0; /* force re-initialization of m_nsAtts hash table */ } - /* using a version flag saves us from initializing nsAtts every time */ - if (!version) { /* initialize version flags when version wraps around */ + /* using a version flag saves us from initializing m_nsAtts every time */ + if (! version) { /* initialize version flags when version wraps around */ version = INIT_ATTS_VERSION; - for (j = nsAttsSize; j != 0; ) - nsAtts[--j].version = version; + for (j = nsAttsSize; j != 0;) + parser->m_nsAtts[--j].version = version; } - nsAttsVersion = --version; + parser->m_nsAttsVersion = --version; /* expand prefixed names and check for duplicates */ for (; i < attIndex; i += 2) { const XML_Char *s = appAtts[i]; - if (s[-1] == 2) { /* prefixed */ + if (s[-1] == 2) { /* prefixed */ ATTRIBUTE_ID *id; const BINDING *b; - unsigned long uriHash = 0; - ((XML_Char *)s)[-1] = 0; /* clear flag */ - id = (ATTRIBUTE_ID *)lookup(&dtd->attributeIds, s, 0); + unsigned long uriHash; + struct siphash sip_state; + struct sipkey sip_key; + + copy_salt_to_sipkey(parser, &sip_key); + sip24_init(&sip_state, &sip_key); + + ((XML_Char *)s)[-1] = 0; /* clear flag */ + id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0); + if (! id || ! id->prefix) { + /* This code is walking through the appAtts array, dealing + * with (in this case) a prefixed attribute name. To be in + * the array, the attribute must have already been bound, so + * has to have passed through the hash table lookup once + * already. That implies that an entry for it already + * exists, so the lookup above will return a pointer to + * already allocated memory. There is no opportunaity for + * the allocator to fail, so the condition above cannot be + * fulfilled. + * + * Since it is difficult to be certain that the above + * analysis is complete, we retain the test and merely + * remove the code from coverage tests. + */ + return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ + } b = id->prefix->binding; - if (!b) + if (! b) return XML_ERROR_UNBOUND_PREFIX; - /* as we expand the name we also calculate its hash value */ for (j = 0; j < b->uriLen; j++) { const XML_Char c = b->uri[j]; - if (!poolAppendChar(&tempPool, c)) + if (! poolAppendChar(&parser->m_tempPool, c)) return XML_ERROR_NO_MEMORY; - uriHash = CHAR_HASH(uriHash, c); } - while (*s++ != XML_T(':')) + + sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char)); + + while (*s++ != XML_T(ASCII_COLON)) ; - do { /* copies null terminator */ - const XML_Char c = *s; - if (!poolAppendChar(&tempPool, *s)) + + sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char)); + + do { /* copies null terminator */ + if (! poolAppendChar(&parser->m_tempPool, *s)) return XML_ERROR_NO_MEMORY; - uriHash = CHAR_HASH(uriHash, c); } while (*s++); + uriHash = (unsigned long)sip24_final(&sip_state); + { /* Check hash table for duplicate of expanded name (uriName). - Derived from code in lookup(HASH_TABLE *table, ...). + Derived from code in lookup(parser, HASH_TABLE *table, ...). */ unsigned char step = 0; unsigned long mask = nsAttsSize - 1; - j = uriHash & mask; /* index into hash table */ - while (nsAtts[j].version == version) { + j = uriHash & mask; /* index into hash table */ + while (parser->m_nsAtts[j].version == version) { /* for speed we compare stored hash values first */ - if (uriHash == nsAtts[j].hash) { - const XML_Char *s1 = poolStart(&tempPool); - const XML_Char *s2 = nsAtts[j].uriName; + if (uriHash == parser->m_nsAtts[j].hash) { + const XML_Char *s1 = poolStart(&parser->m_tempPool); + const XML_Char *s2 = parser->m_nsAtts[j].uriName; /* s1 is null terminated, but not s2 */ - for (; *s1 == *s2 && *s1 != 0; s1++, s2++); + for (; *s1 == *s2 && *s1 != 0; s1++, s2++) + ; if (*s1 == 0) return XML_ERROR_DUPLICATE_ATTRIBUTE; } - if (!step) - step = PROBE_STEP(uriHash, mask, nsAttsPower); - j < step ? ( j += nsAttsSize - step) : (j -= step); + if (! step) + step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower); + j < step ? (j += nsAttsSize - step) : (j -= step); } } - if (ns_triplets) { /* append namespace separator and prefix */ - tempPool.ptr[-1] = namespaceSeparator; + if (parser->m_ns_triplets) { /* append namespace separator and prefix */ + parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator; s = b->prefix->name; do { - if (!poolAppendChar(&tempPool, *s)) + if (! poolAppendChar(&parser->m_tempPool, *s)) return XML_ERROR_NO_MEMORY; } while (*s++); } /* store expanded name in attribute list */ - s = poolStart(&tempPool); - poolFinish(&tempPool); + s = poolStart(&parser->m_tempPool); + poolFinish(&parser->m_tempPool); appAtts[i] = s; /* fill empty slot with new version, uriName and hash value */ - nsAtts[j].version = version; - nsAtts[j].hash = uriHash; - nsAtts[j].uriName = s; + parser->m_nsAtts[j].version = version; + parser->m_nsAtts[j].hash = uriHash; + parser->m_nsAtts[j].uriName = s; - if (!--nPrefixes) + if (! --nPrefixes) { + i += 2; break; - } - else /* not prefixed */ - ((XML_Char *)s)[-1] = 0; /* clear flag */ + } + } else /* not prefixed */ + ((XML_Char *)s)[-1] = 0; /* clear flag */ } } /* clear flags for the remaining attributes */ @@ -2859,68 +3630,224 @@ storeAtts(XML_Parser parser, const ENCODING *enc, for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding) binding->attId->name[-1] = 0; - if (!ns) + if (! parser->m_ns) return XML_ERROR_NONE; /* expand the element type name */ if (elementType->prefix) { binding = elementType->prefix->binding; - if (!binding) + if (! binding) return XML_ERROR_UNBOUND_PREFIX; localPart = tagNamePtr->str; - while (*localPart++ != XML_T(':')) + while (*localPart++ != XML_T(ASCII_COLON)) ; - } - else if (dtd->defaultPrefix.binding) { + } else if (dtd->defaultPrefix.binding) { binding = dtd->defaultPrefix.binding; localPart = tagNamePtr->str; - } - else + } else return XML_ERROR_NONE; prefixLen = 0; - if (ns_triplets && binding->prefix->name) { + if (parser->m_ns_triplets && binding->prefix->name) { for (; binding->prefix->name[prefixLen++];) - ; + ; /* prefixLen includes null terminator */ } tagNamePtr->localPart = localPart; tagNamePtr->uriLen = binding->uriLen; tagNamePtr->prefix = binding->prefix->name; tagNamePtr->prefixLen = prefixLen; for (i = 0; localPart[i++];) - ; + ; /* i includes null terminator */ + + /* Detect and prevent integer overflow */ + if (binding->uriLen > INT_MAX - prefixLen + || i > INT_MAX - (binding->uriLen + prefixLen)) { + return XML_ERROR_NO_MEMORY; + } + n = i + binding->uriLen + prefixLen; if (n > binding->uriAlloc) { TAG *p; - uri = (XML_Char *)MALLOC((n + EXPAND_SPARE) * sizeof(XML_Char)); - if (!uri) + + /* Detect and prevent integer overflow */ + if (n > INT_MAX - EXPAND_SPARE) { + return XML_ERROR_NO_MEMORY; + } + /* Detect and prevent integer overflow. + * The preprocessor guard addresses the "always false" warning + * from -Wtype-limits on platforms where + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ +#if UINT_MAX >= SIZE_MAX + if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { + return XML_ERROR_NO_MEMORY; + } +#endif + + uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char)); + if (! uri) return XML_ERROR_NO_MEMORY; binding->uriAlloc = n + EXPAND_SPARE; memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char)); - for (p = tagStack; p; p = p->parent) + for (p = parser->m_tagStack; p; p = p->parent) if (p->name.str == binding->uri) p->name.str = uri; - FREE(binding->uri); + FREE(parser, binding->uri); binding->uri = uri; } + /* if m_namespaceSeparator != '\0' then uri includes it already */ uri = binding->uri + binding->uriLen; memcpy(uri, localPart, i * sizeof(XML_Char)); + /* we always have a namespace separator between localPart and prefix */ if (prefixLen) { - uri = uri + (i - 1); - if (namespaceSeparator) - *uri = namespaceSeparator; + uri += i - 1; + *uri = parser->m_namespaceSeparator; /* replace null terminator */ memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char)); } tagNamePtr->str = binding->uri; return XML_ERROR_NONE; } +static XML_Bool +is_rfc3986_uri_char(XML_Char candidate) { + // For the RFC 3986 ANBF grammar see + // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A + + switch (candidate) { + // From rule "ALPHA" (uppercase half) + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + + // From rule "ALPHA" (lowercase half) + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + + // From rule "DIGIT" + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + + // From rule "pct-encoded" + case '%': + + // From rule "unreserved" + case '-': + case '.': + case '_': + case '~': + + // From rule "gen-delims" + case ':': + case '/': + case '?': + case '#': + case '[': + case ']': + case '@': + + // From rule "sub-delims" + case '!': + case '$': + case '&': + case '\'': + case '(': + case ')': + case '*': + case '+': + case ',': + case ';': + case '=': + return XML_TRUE; + + default: + return XML_FALSE; + } +} + /* addBinding() overwrites the value of prefix->binding without checking. Therefore one must keep track of the old value outside of addBinding(). */ static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, - const XML_Char *uri, BINDING **bindingsPtr) -{ + const XML_Char *uri, BINDING **bindingsPtr) { + // "http://www.w3.org/XML/1998/namespace" + static const XML_Char xmlNamespace[] + = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, + ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, + ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, + ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, + ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, + ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, + ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c, + ASCII_e, '\0'}; + static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1; + // "http://www.w3.org/2000/xmlns/" + static const XML_Char xmlnsNamespace[] + = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, + ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, + ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, + ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x, + ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'}; + static const int xmlnsLen + = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1; + + XML_Bool mustBeXML = XML_FALSE; + XML_Bool isXML = XML_TRUE; + XML_Bool isXMLNS = XML_TRUE; + BINDING *b; int len; @@ -2928,51 +3855,132 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, if (*uri == XML_T('\0') && prefix->name) return XML_ERROR_UNDECLARING_PREFIX; - for (len = 0; uri[len]; len++) - ; - if (namespaceSeparator) + if (prefix->name && prefix->name[0] == XML_T(ASCII_x) + && prefix->name[1] == XML_T(ASCII_m) + && prefix->name[2] == XML_T(ASCII_l)) { + /* Not allowed to bind xmlns */ + if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s) + && prefix->name[5] == XML_T('\0')) + return XML_ERROR_RESERVED_PREFIX_XMLNS; + + if (prefix->name[3] == XML_T('\0')) + mustBeXML = XML_TRUE; + } + + for (len = 0; uri[len]; len++) { + if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len])) + isXML = XML_FALSE; + + if (! mustBeXML && isXMLNS + && (len > xmlnsLen || uri[len] != xmlnsNamespace[len])) + isXMLNS = XML_FALSE; + + // NOTE: While Expat does not validate namespace URIs against RFC 3986 + // today (and is not REQUIRED to do so with regard to the XML 1.0 + // namespaces specification) we have to at least make sure, that + // the application on top of Expat (that is likely splitting expanded + // element names ("qualified names") of form + // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces + // in its element handler code) cannot be confused by an attacker + // putting additional namespace separator characters into namespace + // declarations. That would be ambiguous and not to be expected. + // + // While the HTML API docs of function XML_ParserCreateNS have been + // advising against use of a namespace separator character that can + // appear in a URI for >20 years now, some widespread applications + // are using URI characters (':' (colon) in particular) for a + // namespace separator, in practice. To keep these applications + // functional, we only reject namespaces URIs containing the + // application-chosen namespace separator if the chosen separator + // is a non-URI character with regard to RFC 3986. + if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator) + && ! is_rfc3986_uri_char(uri[len])) { + return XML_ERROR_SYNTAX; + } + } + isXML = isXML && len == xmlLen; + isXMLNS = isXMLNS && len == xmlnsLen; + + if (mustBeXML != isXML) + return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML + : XML_ERROR_RESERVED_NAMESPACE_URI; + + if (isXMLNS) + return XML_ERROR_RESERVED_NAMESPACE_URI; + + if (parser->m_namespaceSeparator) len++; - if (freeBindingList) { - b = freeBindingList; + if (parser->m_freeBindingList) { + b = parser->m_freeBindingList; if (len > b->uriAlloc) { - XML_Char *temp = (XML_Char *)REALLOC(b->uri, - sizeof(XML_Char) * (len + EXPAND_SPARE)); + /* Detect and prevent integer overflow */ + if (len > INT_MAX - EXPAND_SPARE) { + return XML_ERROR_NO_MEMORY; + } + + /* Detect and prevent integer overflow. + * The preprocessor guard addresses the "always false" warning + * from -Wtype-limits on platforms where + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ +#if UINT_MAX >= SIZE_MAX + if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { + return XML_ERROR_NO_MEMORY; + } +#endif + + XML_Char *temp = (XML_Char *)REALLOC( + parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE)); if (temp == NULL) return XML_ERROR_NO_MEMORY; b->uri = temp; b->uriAlloc = len + EXPAND_SPARE; } - freeBindingList = b->nextTagBinding; - } - else { - b = (BINDING *)MALLOC(sizeof(BINDING)); - if (!b) + parser->m_freeBindingList = b->nextTagBinding; + } else { + b = (BINDING *)MALLOC(parser, sizeof(BINDING)); + if (! b) + return XML_ERROR_NO_MEMORY; + + /* Detect and prevent integer overflow */ + if (len > INT_MAX - EXPAND_SPARE) { + return XML_ERROR_NO_MEMORY; + } + /* Detect and prevent integer overflow. + * The preprocessor guard addresses the "always false" warning + * from -Wtype-limits on platforms where + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ +#if UINT_MAX >= SIZE_MAX + if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { return XML_ERROR_NO_MEMORY; - b->uri = (XML_Char *)MALLOC(sizeof(XML_Char) * (len + EXPAND_SPARE)); - if (!b->uri) { - FREE(b); + } +#endif + + b->uri + = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE)); + if (! b->uri) { + FREE(parser, b); return XML_ERROR_NO_MEMORY; } b->uriAlloc = len + EXPAND_SPARE; } b->uriLen = len; memcpy(b->uri, uri, len * sizeof(XML_Char)); - if (namespaceSeparator) - b->uri[len - 1] = namespaceSeparator; + if (parser->m_namespaceSeparator) + b->uri[len - 1] = parser->m_namespaceSeparator; b->prefix = prefix; b->attId = attId; b->prevPrefixBinding = prefix->binding; /* NULL binding when default namespace undeclared */ - if (*uri == XML_T('\0') && prefix == &_dtd->defaultPrefix) + if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix) prefix->binding = NULL; else prefix->binding = b; b->nextTagBinding = *bindingsPtr; *bindingsPtr = b; /* if attId == NULL then we are not starting a namespace scope */ - if (attId && startNamespaceDeclHandler) - startNamespaceDeclHandler(handlerArg, prefix->name, - prefix->binding ? uri : 0); + if (attId && parser->m_startNamespaceDeclHandler) + parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name, + prefix->binding ? uri : 0); return XML_ERROR_NONE; } @@ -2980,22 +3988,19 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, the whole file is parsed with one call. */ static enum XML_Error PTRCALL -cdataSectionProcessor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) -{ - enum XML_Error result = doCdataSection(parser, encoding, &start, end, - endPtr, (XML_Bool)!finalBuffer); +cdataSectionProcessor(XML_Parser parser, const char *start, const char *end, + const char **endPtr) { + enum XML_Error result = doCdataSection( + parser, parser->m_encoding, &start, end, endPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); if (result != XML_ERROR_NONE) return result; if (start) { - if (parentParser) { /* we are parsing an external entity */ - processor = externalEntityContentProcessor; + if (parser->m_parentParser) { /* we are parsing an external entity */ + parser->m_processor = externalEntityContentProcessor; return externalEntityContentProcessor(parser, start, end, endPtr); - } - else { - processor = contentProcessor; + } else { + parser->m_processor = contentProcessor; return contentProcessor(parser, start, end, endPtr); } } @@ -3006,79 +4011,82 @@ cdataSectionProcessor(XML_Parser parser, the section is not yet closed. */ static enum XML_Error -doCdataSection(XML_Parser parser, - const ENCODING *enc, - const char **startPtr, - const char *end, - const char **nextPtr, - XML_Bool haveMore) -{ +doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, + const char *end, const char **nextPtr, XML_Bool haveMore, + enum XML_Account account) { const char *s = *startPtr; const char **eventPP; const char **eventEndPP; - if (enc == encoding) { - eventPP = &eventPtr; + if (enc == parser->m_encoding) { + eventPP = &parser->m_eventPtr; *eventPP = s; - eventEndPP = &eventEndPtr; - } - else { - eventPP = &(openInternalEntities->internalEventPtr); - eventEndPP = &(openInternalEntities->internalEventEndPtr); + eventEndPP = &parser->m_eventEndPtr; + } else { + eventPP = &(parser->m_openInternalEntities->internalEventPtr); + eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); } *eventPP = s; *startPtr = NULL; for (;;) { - const char *next; + const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ int tok = XmlCdataSectionTok(enc, s, end, &next); +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#else + UNUSED_P(account); +#endif *eventEndPP = next; switch (tok) { case XML_TOK_CDATA_SECT_CLOSE: - if (endCdataSectionHandler) - endCdataSectionHandler(handlerArg); -#if 0 + if (parser->m_endCdataSectionHandler) + parser->m_endCdataSectionHandler(parser->m_handlerArg); + /* BEGIN disabled code */ /* see comment under XML_TOK_CDATA_SECT_OPEN */ - else if (characterDataHandler) - characterDataHandler(handlerArg, dataBuf, 0); -#endif - else if (defaultHandler) + else if (0 && parser->m_characterDataHandler) + parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, + 0); + /* END disabled code */ + else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); *startPtr = next; *nextPtr = next; - if (parsing == XML_FINISHED) + if (parser->m_parsingStatus.parsing == XML_FINISHED) return XML_ERROR_ABORTED; else return XML_ERROR_NONE; case XML_TOK_DATA_NEWLINE: - if (characterDataHandler) { + if (parser->m_characterDataHandler) { XML_Char c = 0xA; - characterDataHandler(handlerArg, &c, 1); - } - else if (defaultHandler) + parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; - case XML_TOK_DATA_CHARS: - if (characterDataHandler) { + case XML_TOK_DATA_CHARS: { + XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler; + if (charDataHandler) { if (MUST_CONVERT(enc, s)) { for (;;) { - ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); + ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; + const enum XML_Convert_Result convert_res = XmlConvert( + enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd); *eventEndPP = next; - characterDataHandler(handlerArg, dataBuf, - dataPtr - (ICHAR *)dataBuf); - if (s == next) + charDataHandler(parser->m_handlerArg, parser->m_dataBuf, + (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); + if ((convert_res == XML_CONVERT_COMPLETED) + || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) break; *eventPP = s; } - } - else - characterDataHandler(handlerArg, - (XML_Char *)s, - (XML_Char *)next - (XML_Char *)s); - } - else if (defaultHandler) + } else + charDataHandler(parser->m_handlerArg, (XML_Char *)s, + (int)((XML_Char *)next - (XML_Char *)s)); + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); - break; + } break; case XML_TOK_INVALID: *eventPP = next; return XML_ERROR_INVALID_TOKEN; @@ -3096,18 +4104,26 @@ doCdataSection(XML_Parser parser, } return XML_ERROR_UNCLOSED_CDATA_SECTION; default: + /* Every token returned by XmlCdataSectionTok() has its own + * explicit case, so this default case will never be executed. + * We retain it as a safety net and exclude it from the coverage + * statistics. + * + * LCOV_EXCL_START + */ *eventPP = next; return XML_ERROR_UNEXPECTED_STATE; + /* LCOV_EXCL_STOP */ } *eventPP = s = next; - switch (parsing) { + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: *nextPtr = next; return XML_ERROR_NONE; case XML_FINISHED: return XML_ERROR_ABORTED; - default: ; + default:; } } /* not reached */ @@ -3119,17 +4135,15 @@ doCdataSection(XML_Parser parser, the whole file is parsed with one call. */ static enum XML_Error PTRCALL -ignoreSectionProcessor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) -{ - enum XML_Error result = doIgnoreSection(parser, encoding, &start, end, - endPtr, (XML_Bool)!finalBuffer); +ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end, + const char **endPtr) { + enum XML_Error result + = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer); if (result != XML_ERROR_NONE) return result; if (start) { - processor = prologProcessor; + parser->m_processor = prologProcessor; return prologProcessor(parser, start, end, endPtr); } return result; @@ -3139,38 +4153,51 @@ ignoreSectionProcessor(XML_Parser parser, if the section is not yet closed. */ static enum XML_Error -doIgnoreSection(XML_Parser parser, - const ENCODING *enc, - const char **startPtr, - const char *end, - const char **nextPtr, - XML_Bool haveMore) -{ - const char *next; +doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, + const char *end, const char **nextPtr, XML_Bool haveMore) { + const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ int tok; const char *s = *startPtr; const char **eventPP; const char **eventEndPP; - if (enc == encoding) { - eventPP = &eventPtr; + if (enc == parser->m_encoding) { + eventPP = &parser->m_eventPtr; *eventPP = s; - eventEndPP = &eventEndPtr; - } - else { - eventPP = &(openInternalEntities->internalEventPtr); - eventEndPP = &(openInternalEntities->internalEventEndPtr); + eventEndPP = &parser->m_eventEndPtr; + } else { + /* It's not entirely clear, but it seems the following two lines + * of code cannot be executed. The only occasions on which 'enc' + * is not 'encoding' are when this function is called + * from the internal entity processing, and IGNORE sections are an + * error in internal entities. + * + * Since it really isn't clear that this is true, we keep the code + * and just remove it from our coverage tests. + * + * LCOV_EXCL_START + */ + eventPP = &(parser->m_openInternalEntities->internalEventPtr); + eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); + /* LCOV_EXCL_STOP */ } *eventPP = s; *startPtr = NULL; tok = XmlIgnoreSectionTok(enc, s, end, &next); +# ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +# endif *eventEndPP = next; switch (tok) { case XML_TOK_IGNORE_SECT: - if (defaultHandler) + if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); *startPtr = next; *nextPtr = next; - if (parsing == XML_FINISHED) + if (parser->m_parsingStatus.parsing == XML_FINISHED) return XML_ERROR_ABORTED; else return XML_ERROR_NONE; @@ -3191,8 +4218,16 @@ doIgnoreSection(XML_Parser parser, } return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */ default: + /* All of the tokens that XmlIgnoreSectionTok() returns have + * explicit cases to handle them, so this default case is never + * executed. We keep it as a safety net anyway, and remove it + * from our test coverage statistics. + * + * LCOV_EXCL_START + */ *eventPP = next; return XML_ERROR_UNEXPECTED_STATE; + /* LCOV_EXCL_STOP */ } /* not reached */ } @@ -3200,38 +4235,38 @@ doIgnoreSection(XML_Parser parser, #endif /* XML_DTD */ static enum XML_Error -initializeEncoding(XML_Parser parser) -{ +initializeEncoding(XML_Parser parser) { const char *s; #ifdef XML_UNICODE char encodingBuf[128]; - if (!protocolEncodingName) + /* See comments about `protocolEncodingName` in parserInit() */ + if (! parser->m_protocolEncodingName) s = NULL; else { int i; - for (i = 0; protocolEncodingName[i]; i++) { + for (i = 0; parser->m_protocolEncodingName[i]; i++) { if (i == sizeof(encodingBuf) - 1 - || (protocolEncodingName[i] & ~0x7f) != 0) { + || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) { encodingBuf[0] = '\0'; break; } - encodingBuf[i] = (char)protocolEncodingName[i]; + encodingBuf[i] = (char)parser->m_protocolEncodingName[i]; } encodingBuf[i] = '\0'; s = encodingBuf; } #else - s = protocolEncodingName; + s = parser->m_protocolEncodingName; #endif - if ((ns ? XmlInitEncodingNS : XmlInitEncoding)(&initEncoding, &encoding, s)) + if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)( + &parser->m_initEncoding, &parser->m_encoding, s)) return XML_ERROR_NONE; - return handleUnknownEncoding(parser, protocolEncodingName); + return handleUnknownEncoding(parser, parser->m_protocolEncodingName); } static enum XML_Error -processXmlDecl(XML_Parser parser, int isGeneralTextEntity, - const char *s, const char *next) -{ +processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s, + const char *next) { const char *encodingName = NULL; const XML_Char *storedEncName = NULL; const ENCODING *newEncoding = NULL; @@ -3239,88 +4274,91 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *versionend; const XML_Char *storedversion = NULL; int standalone = -1; - if (!(ns - ? XmlParseXmlDeclNS - : XmlParseXmlDecl)(isGeneralTextEntity, - encoding, - s, - next, - &eventPtr, - &version, - &versionend, - &encodingName, - &newEncoding, - &standalone)) { + +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif + + if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)( + isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr, + &version, &versionend, &encodingName, &newEncoding, &standalone)) { if (isGeneralTextEntity) return XML_ERROR_TEXT_DECL; else return XML_ERROR_XML_DECL; } - if (!isGeneralTextEntity && standalone == 1) { - _dtd->standalone = XML_TRUE; + if (! isGeneralTextEntity && standalone == 1) { + parser->m_dtd->standalone = XML_TRUE; #ifdef XML_DTD - if (paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) - paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; + if (parser->m_paramEntityParsing + == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) + parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; #endif /* XML_DTD */ } - if (xmlDeclHandler) { + if (parser->m_xmlDeclHandler) { if (encodingName != NULL) { - storedEncName = poolStoreString(&temp2Pool, - encoding, - encodingName, - encodingName - + XmlNameLength(encoding, encodingName)); - if (!storedEncName) - return XML_ERROR_NO_MEMORY; - poolFinish(&temp2Pool); + storedEncName = poolStoreString( + &parser->m_temp2Pool, parser->m_encoding, encodingName, + encodingName + XmlNameLength(parser->m_encoding, encodingName)); + if (! storedEncName) + return XML_ERROR_NO_MEMORY; + poolFinish(&parser->m_temp2Pool); } if (version) { - storedversion = poolStoreString(&temp2Pool, - encoding, - version, - versionend - encoding->minBytesPerChar); - if (!storedversion) + storedversion + = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version, + versionend - parser->m_encoding->minBytesPerChar); + if (! storedversion) return XML_ERROR_NO_MEMORY; } - xmlDeclHandler(handlerArg, storedversion, storedEncName, standalone); - } - else if (defaultHandler) - reportDefault(parser, encoding, s, next); - if (protocolEncodingName == NULL) { + parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName, + standalone); + } else if (parser->m_defaultHandler) + reportDefault(parser, parser->m_encoding, s, next); + if (parser->m_protocolEncodingName == NULL) { if (newEncoding) { - if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) { - eventPtr = encodingName; + /* Check that the specified encoding does not conflict with what + * the parser has already deduced. Do we have the same number + * of bytes in the smallest representation of a character? If + * this is UTF-16, is it the same endianness? + */ + if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar + || (newEncoding->minBytesPerChar == 2 + && newEncoding != parser->m_encoding)) { + parser->m_eventPtr = encodingName; return XML_ERROR_INCORRECT_ENCODING; } - encoding = newEncoding; - } - else if (encodingName) { + parser->m_encoding = newEncoding; + } else if (encodingName) { enum XML_Error result; - if (!storedEncName) { + if (! storedEncName) { storedEncName = poolStoreString( - &temp2Pool, encoding, encodingName, - encodingName + XmlNameLength(encoding, encodingName)); - if (!storedEncName) + &parser->m_temp2Pool, parser->m_encoding, encodingName, + encodingName + XmlNameLength(parser->m_encoding, encodingName)); + if (! storedEncName) return XML_ERROR_NO_MEMORY; } result = handleUnknownEncoding(parser, storedEncName); - poolClear(&temp2Pool); + poolClear(&parser->m_temp2Pool); if (result == XML_ERROR_UNKNOWN_ENCODING) - eventPtr = encodingName; + parser->m_eventPtr = encodingName; return result; } } if (storedEncName || storedversion) - poolClear(&temp2Pool); + poolClear(&parser->m_temp2Pool); return XML_ERROR_NONE; } static enum XML_Error -handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) -{ - if (unknownEncodingHandler) { +handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) { + if (parser->m_unknownEncodingHandler) { XML_Encoding info; int i; for (i = 0; i < 256; i++) @@ -3328,25 +4366,21 @@ handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) info.convert = NULL; info.data = NULL; info.release = NULL; - if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName, - &info)) { + if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData, + encodingName, &info)) { ENCODING *enc; - unknownEncodingMem = MALLOC(XmlSizeOfUnknownEncoding()); - if (!unknownEncodingMem) { + parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding()); + if (! parser->m_unknownEncodingMem) { if (info.release) info.release(info.data); return XML_ERROR_NO_MEMORY; } - enc = (ns - ? XmlInitUnknownEncodingNS - : XmlInitUnknownEncoding)(unknownEncodingMem, - info.map, - info.convert, - info.data); + enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)( + parser->m_unknownEncodingMem, info.map, info.convert, info.data); if (enc) { - unknownEncodingData = info.data; - unknownEncodingRelease = info.release; - encoding = enc; + parser->m_unknownEncodingData = info.data; + parser->m_unknownEncodingRelease = info.release; + parser->m_encoding = enc; return XML_ERROR_NONE; } } @@ -3357,60 +4391,54 @@ handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) } static enum XML_Error PTRCALL -prologInitProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) -{ +prologInitProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { enum XML_Error result = initializeEncoding(parser); if (result != XML_ERROR_NONE) return result; - processor = prologProcessor; + parser->m_processor = prologProcessor; return prologProcessor(parser, s, end, nextPtr); } #ifdef XML_DTD static enum XML_Error PTRCALL -externalParEntInitProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) -{ +externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { enum XML_Error result = initializeEncoding(parser); if (result != XML_ERROR_NONE) return result; /* we know now that XML_Parse(Buffer) has been called, so we consider the external parameter entity read */ - _dtd->paramEntityRead = XML_TRUE; + parser->m_dtd->paramEntityRead = XML_TRUE; - if (prologState.inEntityValue) { - processor = entityValueInitProcessor; + if (parser->m_prologState.inEntityValue) { + parser->m_processor = entityValueInitProcessor; return entityValueInitProcessor(parser, s, end, nextPtr); - } - else { - processor = externalParEntProcessor; + } else { + parser->m_processor = externalParEntProcessor; return externalParEntProcessor(parser, s, end, nextPtr); } } static enum XML_Error PTRCALL -entityValueInitProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) -{ +entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { int tok; const char *start = s; const char *next = start; - eventPtr = start; + parser->m_eventPtr = start; for (;;) { - tok = XmlPrologTok(encoding, start, end, &next); - eventEndPtr = next; + tok = XmlPrologTok(parser->m_encoding, start, end, &next); + /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in: + - storeEntityValue + - processXmlDecl + */ + parser->m_eventEndPtr = next; if (tok <= 0) { - if (!finalBuffer && tok != XML_TOK_INVALID) { + if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { *nextPtr = s; return XML_ERROR_NONE; } @@ -3421,29 +4449,28 @@ entityValueInitProcessor(XML_Parser parser, return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: return XML_ERROR_PARTIAL_CHAR; - case XML_TOK_NONE: /* start == end */ + case XML_TOK_NONE: /* start == end */ default: break; } /* found end of entity value - can store it now */ - return storeEntityValue(parser, encoding, s, end); - } - else if (tok == XML_TOK_XML_DECL) { + return storeEntityValue(parser, parser->m_encoding, s, end, + XML_ACCOUNT_DIRECT); + } else if (tok == XML_TOK_XML_DECL) { enum XML_Error result; result = processXmlDecl(parser, 0, start, next); if (result != XML_ERROR_NONE) return result; - switch (parsing) { - case XML_SUSPENDED: - *nextPtr = next; - return XML_ERROR_NONE; - case XML_FINISHED: + /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For + * that to happen, a parameter entity parsing handler must have attempted + * to suspend the parser, which fails and raises an error. The parser can + * be aborted, but can't be suspended. + */ + if (parser->m_parsingStatus.parsing == XML_FINISHED) return XML_ERROR_ABORTED; - default: - *nextPtr = next; - } + *nextPtr = next; /* stop scanning for text declaration - we found one */ - processor = entityValueProcessor; + parser->m_processor = entityValueProcessor; return entityValueProcessor(parser, next, end, nextPtr); } /* If we are at the end of the buffer, this would cause XmlPrologTok to @@ -3453,27 +4480,41 @@ entityValueInitProcessor(XML_Parser parser, then, when this routine is entered the next time, XmlPrologTok will return XML_TOK_INVALID, since the BOM is still in the buffer */ - else if (tok == XML_TOK_BOM && next == end && !finalBuffer) { + else if (tok == XML_TOK_BOM && next == end + && ! parser->m_parsingStatus.finalBuffer) { +# ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +# endif + *nextPtr = next; return XML_ERROR_NONE; } + /* If we get this token, we have the start of what might be a + normal tag, but not a declaration (i.e. it doesn't begin with + "m_eventPtr = start; } } static enum XML_Error PTRCALL -externalParEntProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) -{ +externalParEntProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { const char *next = s; int tok; - tok = XmlPrologTok(encoding, s, end, &next); + tok = XmlPrologTok(parser->m_encoding, s, end, &next); if (tok <= 0) { - if (!finalBuffer && tok != XML_TOK_INVALID) { + if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { *nextPtr = s; return XML_ERROR_NONE; } @@ -3484,40 +4525,48 @@ externalParEntProcessor(XML_Parser parser, return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: return XML_ERROR_PARTIAL_CHAR; - case XML_TOK_NONE: /* start == end */ + case XML_TOK_NONE: /* start == end */ default: break; } } /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM. However, when parsing an external subset, doProlog will not accept a BOM - as valid, and report a syntax error, so we have to skip the BOM + as valid, and report a syntax error, so we have to skip the BOM, and + account for the BOM bytes. */ else if (tok == XML_TOK_BOM) { + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } + s = next; - tok = XmlPrologTok(encoding, s, end, &next); + tok = XmlPrologTok(parser->m_encoding, s, end, &next); } - processor = prologProcessor; - return doProlog(parser, encoding, s, end, tok, next, - nextPtr, (XML_Bool)!finalBuffer); + parser->m_processor = prologProcessor; + return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, + XML_ACCOUNT_DIRECT); } static enum XML_Error PTRCALL -entityValueProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) -{ +entityValueProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { const char *start = s; const char *next = s; - const ENCODING *enc = encoding; + const ENCODING *enc = parser->m_encoding; int tok; for (;;) { tok = XmlPrologTok(enc, start, end, &next); + /* Note: These bytes are accounted later in: + - storeEntityValue + */ if (tok <= 0) { - if (!finalBuffer && tok != XML_TOK_INVALID) { + if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { *nextPtr = s; return XML_ERROR_NONE; } @@ -3528,12 +4577,12 @@ entityValueProcessor(XML_Parser parser, return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: return XML_ERROR_PARTIAL_CHAR; - case XML_TOK_NONE: /* start == end */ + case XML_TOK_NONE: /* start == end */ default: break; } /* found end of entity value - can store it now */ - return storeEntityValue(parser, enc, s, end); + return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT); } start = next; } @@ -3542,60 +4591,62 @@ entityValueProcessor(XML_Parser parser, #endif /* XML_DTD */ static enum XML_Error PTRCALL -prologProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) -{ +prologProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { const char *next = s; - int tok = XmlPrologTok(encoding, s, end, &next); - return doProlog(parser, encoding, s, end, tok, next, - nextPtr, (XML_Bool)!finalBuffer); + int tok = XmlPrologTok(parser->m_encoding, s, end, &next); + return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, + XML_ACCOUNT_DIRECT); } static enum XML_Error -doProlog(XML_Parser parser, - const ENCODING *enc, - const char *s, - const char *end, - int tok, - const char *next, - const char **nextPtr, - XML_Bool haveMore) -{ +doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, + int tok, const char *next, const char **nextPtr, XML_Bool haveMore, + XML_Bool allowClosingDoctype, enum XML_Account account) { #ifdef XML_DTD - static const XML_Char externalSubsetName[] = { '#' , '\0' }; + static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'}; #endif /* XML_DTD */ - static const XML_Char atypeCDATA[] = { 'C', 'D', 'A', 'T', 'A', '\0' }; - static const XML_Char atypeID[] = { 'I', 'D', '\0' }; - static const XML_Char atypeIDREF[] = { 'I', 'D', 'R', 'E', 'F', '\0' }; - static const XML_Char atypeIDREFS[] = { 'I', 'D', 'R', 'E', 'F', 'S', '\0' }; - static const XML_Char atypeENTITY[] = { 'E', 'N', 'T', 'I', 'T', 'Y', '\0' }; - static const XML_Char atypeENTITIES[] = - { 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S', '\0' }; - static const XML_Char atypeNMTOKEN[] = { - 'N', 'M', 'T', 'O', 'K', 'E', 'N', '\0' }; - static const XML_Char atypeNMTOKENS[] = { - 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S', '\0' }; - static const XML_Char notationPrefix[] = { - 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N', '(', '\0' }; - static const XML_Char enumValueSep[] = { '|', '\0' }; - static const XML_Char enumValueStart[] = { '(', '\0' }; + static const XML_Char atypeCDATA[] + = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; + static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'}; + static const XML_Char atypeIDREF[] + = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'}; + static const XML_Char atypeIDREFS[] + = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'}; + static const XML_Char atypeENTITY[] + = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'}; + static const XML_Char atypeENTITIES[] + = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, + ASCII_I, ASCII_E, ASCII_S, '\0'}; + static const XML_Char atypeNMTOKEN[] + = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'}; + static const XML_Char atypeNMTOKENS[] + = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, + ASCII_E, ASCII_N, ASCII_S, '\0'}; + static const XML_Char notationPrefix[] + = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, + ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'}; + static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'}; + static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'}; + +#ifndef XML_DTD + UNUSED_P(account); +#endif /* save one level of indirection */ - DTD * const dtd = _dtd; + DTD *const dtd = parser->m_dtd; const char **eventPP; const char **eventEndPP; enum XML_Content_Quant quant; - if (enc == encoding) { - eventPP = &eventPtr; - eventEndPP = &eventEndPtr; - } - else { - eventPP = &(openInternalEntities->internalEventPtr); - eventEndPP = &(openInternalEntities->internalEventEndPtr); + if (enc == parser->m_encoding) { + eventPP = &parser->m_eventPtr; + eventEndPP = &parser->m_eventEndPtr; + } else { + eventPP = &(parser->m_openInternalEntities->internalEventPtr); + eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); } for (;;) { @@ -3616,10 +4667,14 @@ doProlog(XML_Parser parser, return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: return XML_ERROR_PARTIAL_CHAR; + case -XML_TOK_PROLOG_S: + tok = -tok; + break; case XML_TOK_NONE: #ifdef XML_DTD /* for internal PE NOT referenced between declarations */ - if (enc != encoding && !openInternalEntities->betweenDecl) { + if (enc != parser->m_encoding + && ! parser->m_openInternalEntities->betweenDecl) { *nextPtr = s; return XML_ERROR_NONE; } @@ -3627,8 +4682,8 @@ doProlog(XML_Parser parser, complete markup, not only for external PEs, but also for internal PEs if the reference occurs between declarations. */ - if (isParamEntity || enc != encoding) { - if (XmlTokenRole(&prologState, XML_TOK_NONE, end, end, enc) + if (parser->m_isParamEntity || enc != parser->m_encoding) { + if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc) == XML_ROLE_ERROR) return XML_ERROR_INCOMPLETE_PE; *nextPtr = s; @@ -3642,131 +4697,161 @@ doProlog(XML_Parser parser, break; } } - role = XmlTokenRole(&prologState, tok, s, next, enc); + role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc); +#ifdef XML_DTD switch (role) { - case XML_ROLE_XML_DECL: - { - enum XML_Error result = processXmlDecl(parser, 0, s, next); - if (result != XML_ERROR_NONE) - return result; - enc = encoding; - handleDefault = XML_FALSE; - } + case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor + case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl + case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl break; + default: + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } + } +#endif + switch (role) { + case XML_ROLE_XML_DECL: { + enum XML_Error result = processXmlDecl(parser, 0, s, next); + if (result != XML_ERROR_NONE) + return result; + enc = parser->m_encoding; + handleDefault = XML_FALSE; + } break; case XML_ROLE_DOCTYPE_NAME: - if (startDoctypeDeclHandler) { - doctypeName = poolStoreString(&tempPool, enc, s, next); - if (!doctypeName) + if (parser->m_startDoctypeDeclHandler) { + parser->m_doctypeName + = poolStoreString(&parser->m_tempPool, enc, s, next); + if (! parser->m_doctypeName) return XML_ERROR_NO_MEMORY; - poolFinish(&tempPool); - doctypePubid = NULL; + poolFinish(&parser->m_tempPool); + parser->m_doctypePubid = NULL; handleDefault = XML_FALSE; } - doctypeSysid = NULL; /* always initialize to NULL */ + parser->m_doctypeSysid = NULL; /* always initialize to NULL */ break; case XML_ROLE_DOCTYPE_INTERNAL_SUBSET: - if (startDoctypeDeclHandler) { - startDoctypeDeclHandler(handlerArg, doctypeName, doctypeSysid, - doctypePubid, 1); - doctypeName = NULL; - poolClear(&tempPool); + if (parser->m_startDoctypeDeclHandler) { + parser->m_startDoctypeDeclHandler( + parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid, + parser->m_doctypePubid, 1); + parser->m_doctypeName = NULL; + poolClear(&parser->m_tempPool); handleDefault = XML_FALSE; } break; #ifdef XML_DTD - case XML_ROLE_TEXT_DECL: - { - enum XML_Error result = processXmlDecl(parser, 1, s, next); - if (result != XML_ERROR_NONE) - return result; - enc = encoding; - handleDefault = XML_FALSE; - } - break; + case XML_ROLE_TEXT_DECL: { + enum XML_Error result = processXmlDecl(parser, 1, s, next); + if (result != XML_ERROR_NONE) + return result; + enc = parser->m_encoding; + handleDefault = XML_FALSE; + } break; #endif /* XML_DTD */ case XML_ROLE_DOCTYPE_PUBLIC_ID: #ifdef XML_DTD - useForeignDTD = XML_FALSE; - declEntity = (ENTITY *)lookup(&dtd->paramEntities, - externalSubsetName, - sizeof(ENTITY)); - if (!declEntity) + parser->m_useForeignDTD = XML_FALSE; + parser->m_declEntity = (ENTITY *)lookup( + parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); + if (! parser->m_declEntity) return XML_ERROR_NO_MEMORY; #endif /* XML_DTD */ dtd->hasParamEntityRefs = XML_TRUE; - if (startDoctypeDeclHandler) { - if (!XmlIsPublicId(enc, s, next, eventPP)) + if (parser->m_startDoctypeDeclHandler) { + XML_Char *pubId; + if (! XmlIsPublicId(enc, s, next, eventPP)) return XML_ERROR_PUBLICID; - doctypePubid = poolStoreString(&tempPool, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!doctypePubid) + pubId = poolStoreString(&parser->m_tempPool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (! pubId) return XML_ERROR_NO_MEMORY; - normalizePublicId((XML_Char *)doctypePubid); - poolFinish(&tempPool); + normalizePublicId(pubId); + poolFinish(&parser->m_tempPool); + parser->m_doctypePubid = pubId; handleDefault = XML_FALSE; goto alreadyChecked; } /* fall through */ case XML_ROLE_ENTITY_PUBLIC_ID: - if (!XmlIsPublicId(enc, s, next, eventPP)) + if (! XmlIsPublicId(enc, s, next, eventPP)) return XML_ERROR_PUBLICID; alreadyChecked: - if (dtd->keepProcessing && declEntity) { - XML_Char *tem = poolStoreString(&dtd->pool, - enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!tem) + if (dtd->keepProcessing && parser->m_declEntity) { + XML_Char *tem + = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (! tem) return XML_ERROR_NO_MEMORY; normalizePublicId(tem); - declEntity->publicId = tem; + parser->m_declEntity->publicId = tem; poolFinish(&dtd->pool); - if (entityDeclHandler) + /* Don't suppress the default handler if we fell through from + * the XML_ROLE_DOCTYPE_PUBLIC_ID case. + */ + if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID) handleDefault = XML_FALSE; } break; case XML_ROLE_DOCTYPE_CLOSE: - if (doctypeName) { - startDoctypeDeclHandler(handlerArg, doctypeName, - doctypeSysid, doctypePubid, 0); - poolClear(&tempPool); + if (allowClosingDoctype != XML_TRUE) { + /* Must not close doctype from within expanded parameter entities */ + return XML_ERROR_INVALID_TOKEN; + } + + if (parser->m_doctypeName) { + parser->m_startDoctypeDeclHandler( + parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid, + parser->m_doctypePubid, 0); + poolClear(&parser->m_tempPool); handleDefault = XML_FALSE; } - /* doctypeSysid will be non-NULL in the case of a previous - XML_ROLE_DOCTYPE_SYSTEM_ID, even if startDoctypeDeclHandler + /* parser->m_doctypeSysid will be non-NULL in the case of a previous + XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler was not set, indicating an external subset */ #ifdef XML_DTD - if (doctypeSysid || useForeignDTD) { - dtd->hasParamEntityRefs = XML_TRUE; /* when docTypeSysid == NULL */ - if (paramEntityParsing && externalEntityRefHandler) { - ENTITY *entity = (ENTITY *)lookup(&dtd->paramEntities, - externalSubsetName, - sizeof(ENTITY)); - if (!entity) - return XML_ERROR_NO_MEMORY; - if (useForeignDTD) - entity->base = curBase; + if (parser->m_doctypeSysid || parser->m_useForeignDTD) { + XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; + dtd->hasParamEntityRefs = XML_TRUE; + if (parser->m_paramEntityParsing + && parser->m_externalEntityRefHandler) { + ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities, + externalSubsetName, sizeof(ENTITY)); + if (! entity) { + /* The external subset name "#" will have already been + * inserted into the hash table at the start of the + * external entity parsing, so no allocation will happen + * and lookup() cannot fail. + */ + return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ + } + if (parser->m_useForeignDTD) + entity->base = parser->m_curBase; dtd->paramEntityRead = XML_FALSE; - if (!externalEntityRefHandler(externalEntityRefHandlerArg, - 0, - entity->base, - entity->systemId, - entity->publicId)) + if (! parser->m_externalEntityRefHandler( + parser->m_externalEntityRefHandlerArg, 0, entity->base, + entity->systemId, entity->publicId)) return XML_ERROR_EXTERNAL_ENTITY_HANDLING; - if (dtd->paramEntityRead && - !dtd->standalone && - notStandaloneHandler && - !notStandaloneHandler(handlerArg)) - return XML_ERROR_NOT_STANDALONE; + if (dtd->paramEntityRead) { + if (! dtd->standalone && parser->m_notStandaloneHandler + && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) + return XML_ERROR_NOT_STANDALONE; + } + /* if we didn't read the foreign DTD then this means that there + is no external subset and we must reset dtd->hasParamEntityRefs + */ + else if (! parser->m_doctypeSysid) + dtd->hasParamEntityRefs = hadParamEntityRefs; /* end of DTD - no need to update dtd->keepProcessing */ } - useForeignDTD = XML_FALSE; + parser->m_useForeignDTD = XML_FALSE; } #endif /* XML_DTD */ - if (endDoctypeDeclHandler) { - endDoctypeDeclHandler(handlerArg); + if (parser->m_endDoctypeDeclHandler) { + parser->m_endDoctypeDeclHandler(parser->m_handlerArg); handleDefault = XML_FALSE; } break; @@ -3775,118 +4860,122 @@ doProlog(XML_Parser parser, /* if there is no DOCTYPE declaration then now is the last chance to read the foreign DTD */ - if (useForeignDTD) { + if (parser->m_useForeignDTD) { + XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; dtd->hasParamEntityRefs = XML_TRUE; - if (paramEntityParsing && externalEntityRefHandler) { - ENTITY *entity = (ENTITY *)lookup(&dtd->paramEntities, - externalSubsetName, - sizeof(ENTITY)); - if (!entity) + if (parser->m_paramEntityParsing + && parser->m_externalEntityRefHandler) { + ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities, + externalSubsetName, sizeof(ENTITY)); + if (! entity) return XML_ERROR_NO_MEMORY; - entity->base = curBase; + entity->base = parser->m_curBase; dtd->paramEntityRead = XML_FALSE; - if (!externalEntityRefHandler(externalEntityRefHandlerArg, - 0, - entity->base, - entity->systemId, - entity->publicId)) + if (! parser->m_externalEntityRefHandler( + parser->m_externalEntityRefHandlerArg, 0, entity->base, + entity->systemId, entity->publicId)) return XML_ERROR_EXTERNAL_ENTITY_HANDLING; - if (dtd->paramEntityRead && - !dtd->standalone && - notStandaloneHandler && - !notStandaloneHandler(handlerArg)) - return XML_ERROR_NOT_STANDALONE; + if (dtd->paramEntityRead) { + if (! dtd->standalone && parser->m_notStandaloneHandler + && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) + return XML_ERROR_NOT_STANDALONE; + } + /* if we didn't read the foreign DTD then this means that there + is no external subset and we must reset dtd->hasParamEntityRefs + */ + else + dtd->hasParamEntityRefs = hadParamEntityRefs; /* end of DTD - no need to update dtd->keepProcessing */ } } #endif /* XML_DTD */ - processor = contentProcessor; + parser->m_processor = contentProcessor; return contentProcessor(parser, s, end, nextPtr); case XML_ROLE_ATTLIST_ELEMENT_NAME: - declElementType = getElementType(parser, enc, s, next); - if (!declElementType) + parser->m_declElementType = getElementType(parser, enc, s, next); + if (! parser->m_declElementType) return XML_ERROR_NO_MEMORY; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_NAME: - declAttributeId = getAttributeId(parser, enc, s, next); - if (!declAttributeId) + parser->m_declAttributeId = getAttributeId(parser, enc, s, next); + if (! parser->m_declAttributeId) return XML_ERROR_NO_MEMORY; - declAttributeIsCdata = XML_FALSE; - declAttributeType = NULL; - declAttributeIsId = XML_FALSE; + parser->m_declAttributeIsCdata = XML_FALSE; + parser->m_declAttributeType = NULL; + parser->m_declAttributeIsId = XML_FALSE; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_CDATA: - declAttributeIsCdata = XML_TRUE; - declAttributeType = atypeCDATA; + parser->m_declAttributeIsCdata = XML_TRUE; + parser->m_declAttributeType = atypeCDATA; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_ID: - declAttributeIsId = XML_TRUE; - declAttributeType = atypeID; + parser->m_declAttributeIsId = XML_TRUE; + parser->m_declAttributeType = atypeID; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_IDREF: - declAttributeType = atypeIDREF; + parser->m_declAttributeType = atypeIDREF; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_IDREFS: - declAttributeType = atypeIDREFS; + parser->m_declAttributeType = atypeIDREFS; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_ENTITY: - declAttributeType = atypeENTITY; + parser->m_declAttributeType = atypeENTITY; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES: - declAttributeType = atypeENTITIES; + parser->m_declAttributeType = atypeENTITIES; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN: - declAttributeType = atypeNMTOKEN; + parser->m_declAttributeType = atypeNMTOKEN; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS: - declAttributeType = atypeNMTOKENS; + parser->m_declAttributeType = atypeNMTOKENS; checkAttListDeclHandler: - if (dtd->keepProcessing && attlistDeclHandler) + if (dtd->keepProcessing && parser->m_attlistDeclHandler) handleDefault = XML_FALSE; break; case XML_ROLE_ATTRIBUTE_ENUM_VALUE: case XML_ROLE_ATTRIBUTE_NOTATION_VALUE: - if (dtd->keepProcessing && attlistDeclHandler) { + if (dtd->keepProcessing && parser->m_attlistDeclHandler) { const XML_Char *prefix; - if (declAttributeType) { + if (parser->m_declAttributeType) { prefix = enumValueSep; + } else { + prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix + : enumValueStart); } - else { - prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE - ? notationPrefix - : enumValueStart); - } - if (!poolAppendString(&tempPool, prefix)) + if (! poolAppendString(&parser->m_tempPool, prefix)) return XML_ERROR_NO_MEMORY; - if (!poolAppend(&tempPool, enc, s, next)) + if (! poolAppend(&parser->m_tempPool, enc, s, next)) return XML_ERROR_NO_MEMORY; - declAttributeType = tempPool.start; + parser->m_declAttributeType = parser->m_tempPool.start; handleDefault = XML_FALSE; } break; case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE: case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE: if (dtd->keepProcessing) { - if (!defineAttribute(declElementType, declAttributeId, - declAttributeIsCdata, declAttributeIsId, - 0, parser)) + if (! defineAttribute(parser->m_declElementType, + parser->m_declAttributeId, + parser->m_declAttributeIsCdata, + parser->m_declAttributeIsId, 0, parser)) return XML_ERROR_NO_MEMORY; - if (attlistDeclHandler && declAttributeType) { - if (*declAttributeType == XML_T('(') - || (*declAttributeType == XML_T('N') - && declAttributeType[1] == XML_T('O'))) { + if (parser->m_attlistDeclHandler && parser->m_declAttributeType) { + if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN) + || (*parser->m_declAttributeType == XML_T(ASCII_N) + && parser->m_declAttributeType[1] == XML_T(ASCII_O))) { /* Enumerated or Notation type */ - if (!poolAppendChar(&tempPool, XML_T(')')) - || !poolAppendChar(&tempPool, XML_T('\0'))) + if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN)) + || ! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return XML_ERROR_NO_MEMORY; - declAttributeType = tempPool.start; - poolFinish(&tempPool); + parser->m_declAttributeType = parser->m_tempPool.start; + poolFinish(&parser->m_tempPool); } *eventEndPP = s; - attlistDeclHandler(handlerArg, declElementType->name, - declAttributeId->name, declAttributeType, - 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE); - poolClear(&tempPool); + parser->m_attlistDeclHandler( + parser->m_handlerArg, parser->m_declElementType->name, + parser->m_declAttributeId->name, parser->m_declAttributeType, 0, + role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE); + poolClear(&parser->m_tempPool); handleDefault = XML_FALSE; } } @@ -3895,61 +4984,59 @@ doProlog(XML_Parser parser, case XML_ROLE_FIXED_ATTRIBUTE_VALUE: if (dtd->keepProcessing) { const XML_Char *attVal; - enum XML_Error result = - storeAttributeValue(parser, enc, declAttributeIsCdata, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar, - &dtd->pool); + enum XML_Error result = storeAttributeValue( + parser, enc, parser->m_declAttributeIsCdata, + s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool, + XML_ACCOUNT_NONE); if (result) return result; attVal = poolStart(&dtd->pool); poolFinish(&dtd->pool); /* ID attributes aren't allowed to have a default */ - if (!defineAttribute(declElementType, declAttributeId, - declAttributeIsCdata, XML_FALSE, attVal, parser)) + if (! defineAttribute( + parser->m_declElementType, parser->m_declAttributeId, + parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser)) return XML_ERROR_NO_MEMORY; - if (attlistDeclHandler && declAttributeType) { - if (*declAttributeType == XML_T('(') - || (*declAttributeType == XML_T('N') - && declAttributeType[1] == XML_T('O'))) { + if (parser->m_attlistDeclHandler && parser->m_declAttributeType) { + if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN) + || (*parser->m_declAttributeType == XML_T(ASCII_N) + && parser->m_declAttributeType[1] == XML_T(ASCII_O))) { /* Enumerated or Notation type */ - if (!poolAppendChar(&tempPool, XML_T(')')) - || !poolAppendChar(&tempPool, XML_T('\0'))) + if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN)) + || ! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return XML_ERROR_NO_MEMORY; - declAttributeType = tempPool.start; - poolFinish(&tempPool); + parser->m_declAttributeType = parser->m_tempPool.start; + poolFinish(&parser->m_tempPool); } *eventEndPP = s; - attlistDeclHandler(handlerArg, declElementType->name, - declAttributeId->name, declAttributeType, - attVal, - role == XML_ROLE_FIXED_ATTRIBUTE_VALUE); - poolClear(&tempPool); + parser->m_attlistDeclHandler( + parser->m_handlerArg, parser->m_declElementType->name, + parser->m_declAttributeId->name, parser->m_declAttributeType, + attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE); + poolClear(&parser->m_tempPool); handleDefault = XML_FALSE; } } break; case XML_ROLE_ENTITY_VALUE: if (dtd->keepProcessing) { - enum XML_Error result = storeEntityValue(parser, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (declEntity) { - declEntity->textPtr = poolStart(&dtd->entityValuePool); - declEntity->textLen = poolLength(&dtd->entityValuePool); + enum XML_Error result + = storeEntityValue(parser, enc, s + enc->minBytesPerChar, + next - enc->minBytesPerChar, XML_ACCOUNT_NONE); + if (parser->m_declEntity) { + parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool); + parser->m_declEntity->textLen + = (int)(poolLength(&dtd->entityValuePool)); poolFinish(&dtd->entityValuePool); - if (entityDeclHandler) { + if (parser->m_entityDeclHandler) { *eventEndPP = s; - entityDeclHandler(handlerArg, - declEntity->name, - declEntity->is_param, - declEntity->textPtr, - declEntity->textLen, - curBase, 0, 0, 0); + parser->m_entityDeclHandler( + parser->m_handlerArg, parser->m_declEntity->name, + parser->m_declEntity->is_param, parser->m_declEntity->textPtr, + parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0); handleDefault = XML_FALSE; } - } - else + } else poolDiscard(&dtd->entityValuePool); if (result != XML_ERROR_NONE) return result; @@ -3957,226 +5044,212 @@ doProlog(XML_Parser parser, break; case XML_ROLE_DOCTYPE_SYSTEM_ID: #ifdef XML_DTD - useForeignDTD = XML_FALSE; + parser->m_useForeignDTD = XML_FALSE; #endif /* XML_DTD */ dtd->hasParamEntityRefs = XML_TRUE; - if (startDoctypeDeclHandler) { - doctypeSysid = poolStoreString(&tempPool, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (doctypeSysid == NULL) + if (parser->m_startDoctypeDeclHandler) { + parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (parser->m_doctypeSysid == NULL) return XML_ERROR_NO_MEMORY; - poolFinish(&tempPool); + poolFinish(&parser->m_tempPool); handleDefault = XML_FALSE; } #ifdef XML_DTD else - /* use externalSubsetName to make doctypeSysid non-NULL - for the case where no startDoctypeDeclHandler is set */ - doctypeSysid = externalSubsetName; + /* use externalSubsetName to make parser->m_doctypeSysid non-NULL + for the case where no parser->m_startDoctypeDeclHandler is set */ + parser->m_doctypeSysid = externalSubsetName; #endif /* XML_DTD */ - if (!dtd->standalone + if (! dtd->standalone #ifdef XML_DTD - && !paramEntityParsing + && ! parser->m_paramEntityParsing #endif /* XML_DTD */ - && notStandaloneHandler - && !notStandaloneHandler(handlerArg)) + && parser->m_notStandaloneHandler + && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) return XML_ERROR_NOT_STANDALONE; #ifndef XML_DTD break; -#else /* XML_DTD */ - if (!declEntity) { - declEntity = (ENTITY *)lookup(&dtd->paramEntities, - externalSubsetName, - sizeof(ENTITY)); - if (!declEntity) +#else /* XML_DTD */ + if (! parser->m_declEntity) { + parser->m_declEntity = (ENTITY *)lookup( + parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); + if (! parser->m_declEntity) return XML_ERROR_NO_MEMORY; - declEntity->publicId = NULL; + parser->m_declEntity->publicId = NULL; } - /* fall through */ #endif /* XML_DTD */ + /* fall through */ case XML_ROLE_ENTITY_SYSTEM_ID: - if (dtd->keepProcessing && declEntity) { - declEntity->systemId = poolStoreString(&dtd->pool, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!declEntity->systemId) + if (dtd->keepProcessing && parser->m_declEntity) { + parser->m_declEntity->systemId + = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (! parser->m_declEntity->systemId) return XML_ERROR_NO_MEMORY; - declEntity->base = curBase; + parser->m_declEntity->base = parser->m_curBase; poolFinish(&dtd->pool); - if (entityDeclHandler) + /* Don't suppress the default handler if we fell through from + * the XML_ROLE_DOCTYPE_SYSTEM_ID case. + */ + if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID) handleDefault = XML_FALSE; } break; case XML_ROLE_ENTITY_COMPLETE: - if (dtd->keepProcessing && declEntity && entityDeclHandler) { + if (dtd->keepProcessing && parser->m_declEntity + && parser->m_entityDeclHandler) { *eventEndPP = s; - entityDeclHandler(handlerArg, - declEntity->name, - declEntity->is_param, - 0,0, - declEntity->base, - declEntity->systemId, - declEntity->publicId, - 0); + parser->m_entityDeclHandler( + parser->m_handlerArg, parser->m_declEntity->name, + parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base, + parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0); handleDefault = XML_FALSE; } break; case XML_ROLE_ENTITY_NOTATION_NAME: - if (dtd->keepProcessing && declEntity) { - declEntity->notation = poolStoreString(&dtd->pool, enc, s, next); - if (!declEntity->notation) + if (dtd->keepProcessing && parser->m_declEntity) { + parser->m_declEntity->notation + = poolStoreString(&dtd->pool, enc, s, next); + if (! parser->m_declEntity->notation) return XML_ERROR_NO_MEMORY; poolFinish(&dtd->pool); - if (unparsedEntityDeclHandler) { + if (parser->m_unparsedEntityDeclHandler) { *eventEndPP = s; - unparsedEntityDeclHandler(handlerArg, - declEntity->name, - declEntity->base, - declEntity->systemId, - declEntity->publicId, - declEntity->notation); + parser->m_unparsedEntityDeclHandler( + parser->m_handlerArg, parser->m_declEntity->name, + parser->m_declEntity->base, parser->m_declEntity->systemId, + parser->m_declEntity->publicId, parser->m_declEntity->notation); handleDefault = XML_FALSE; - } - else if (entityDeclHandler) { + } else if (parser->m_entityDeclHandler) { *eventEndPP = s; - entityDeclHandler(handlerArg, - declEntity->name, - 0,0,0, - declEntity->base, - declEntity->systemId, - declEntity->publicId, - declEntity->notation); + parser->m_entityDeclHandler( + parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0, + parser->m_declEntity->base, parser->m_declEntity->systemId, + parser->m_declEntity->publicId, parser->m_declEntity->notation); handleDefault = XML_FALSE; } } break; - case XML_ROLE_GENERAL_ENTITY_NAME: - { - if (XmlPredefinedEntityName(enc, s, next)) { - declEntity = NULL; - break; - } - if (dtd->keepProcessing) { - const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); - if (!name) - return XML_ERROR_NO_MEMORY; - declEntity = (ENTITY *)lookup(&dtd->generalEntities, name, - sizeof(ENTITY)); - if (!declEntity) - return XML_ERROR_NO_MEMORY; - if (declEntity->name != name) { - poolDiscard(&dtd->pool); - declEntity = NULL; - } - else { - poolFinish(&dtd->pool); - declEntity->publicId = NULL; - declEntity->is_param = XML_FALSE; - /* if we have a parent parser or are reading an internal parameter - entity, then the entity declaration is not considered "internal" - */ - declEntity->is_internal = !(parentParser || openInternalEntities); - if (entityDeclHandler) - handleDefault = XML_FALSE; - } - } - else { + case XML_ROLE_GENERAL_ENTITY_NAME: { + if (XmlPredefinedEntityName(enc, s, next)) { + parser->m_declEntity = NULL; + break; + } + if (dtd->keepProcessing) { + const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); + if (! name) + return XML_ERROR_NO_MEMORY; + parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, + name, sizeof(ENTITY)); + if (! parser->m_declEntity) + return XML_ERROR_NO_MEMORY; + if (parser->m_declEntity->name != name) { poolDiscard(&dtd->pool); - declEntity = NULL; + parser->m_declEntity = NULL; + } else { + poolFinish(&dtd->pool); + parser->m_declEntity->publicId = NULL; + parser->m_declEntity->is_param = XML_FALSE; + /* if we have a parent parser or are reading an internal parameter + entity, then the entity declaration is not considered "internal" + */ + parser->m_declEntity->is_internal + = ! (parser->m_parentParser || parser->m_openInternalEntities); + if (parser->m_entityDeclHandler) + handleDefault = XML_FALSE; } + } else { + poolDiscard(&dtd->pool); + parser->m_declEntity = NULL; } - break; + } break; case XML_ROLE_PARAM_ENTITY_NAME: #ifdef XML_DTD if (dtd->keepProcessing) { const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); - if (!name) + if (! name) return XML_ERROR_NO_MEMORY; - declEntity = (ENTITY *)lookup(&dtd->paramEntities, - name, sizeof(ENTITY)); - if (!declEntity) + parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities, + name, sizeof(ENTITY)); + if (! parser->m_declEntity) return XML_ERROR_NO_MEMORY; - if (declEntity->name != name) { + if (parser->m_declEntity->name != name) { poolDiscard(&dtd->pool); - declEntity = NULL; - } - else { + parser->m_declEntity = NULL; + } else { poolFinish(&dtd->pool); - declEntity->publicId = NULL; - declEntity->is_param = XML_TRUE; + parser->m_declEntity->publicId = NULL; + parser->m_declEntity->is_param = XML_TRUE; /* if we have a parent parser or are reading an internal parameter entity, then the entity declaration is not considered "internal" */ - declEntity->is_internal = !(parentParser || openInternalEntities); - if (entityDeclHandler) + parser->m_declEntity->is_internal + = ! (parser->m_parentParser || parser->m_openInternalEntities); + if (parser->m_entityDeclHandler) handleDefault = XML_FALSE; } - } - else { + } else { poolDiscard(&dtd->pool); - declEntity = NULL; + parser->m_declEntity = NULL; } -#else /* not XML_DTD */ - declEntity = NULL; +#else /* not XML_DTD */ + parser->m_declEntity = NULL; #endif /* XML_DTD */ break; case XML_ROLE_NOTATION_NAME: - declNotationPublicId = NULL; - declNotationName = NULL; - if (notationDeclHandler) { - declNotationName = poolStoreString(&tempPool, enc, s, next); - if (!declNotationName) + parser->m_declNotationPublicId = NULL; + parser->m_declNotationName = NULL; + if (parser->m_notationDeclHandler) { + parser->m_declNotationName + = poolStoreString(&parser->m_tempPool, enc, s, next); + if (! parser->m_declNotationName) return XML_ERROR_NO_MEMORY; - poolFinish(&tempPool); + poolFinish(&parser->m_tempPool); handleDefault = XML_FALSE; } break; case XML_ROLE_NOTATION_PUBLIC_ID: - if (!XmlIsPublicId(enc, s, next, eventPP)) + if (! XmlIsPublicId(enc, s, next, eventPP)) return XML_ERROR_PUBLICID; - if (declNotationName) { /* means notationDeclHandler != NULL */ - XML_Char *tem = poolStoreString(&tempPool, - enc, + if (parser + ->m_declNotationName) { /* means m_notationDeclHandler != NULL */ + XML_Char *tem = poolStoreString(&parser->m_tempPool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); - if (!tem) + if (! tem) return XML_ERROR_NO_MEMORY; normalizePublicId(tem); - declNotationPublicId = tem; - poolFinish(&tempPool); + parser->m_declNotationPublicId = tem; + poolFinish(&parser->m_tempPool); handleDefault = XML_FALSE; } break; case XML_ROLE_NOTATION_SYSTEM_ID: - if (declNotationName && notationDeclHandler) { - const XML_Char *systemId - = poolStoreString(&tempPool, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!systemId) + if (parser->m_declNotationName && parser->m_notationDeclHandler) { + const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (! systemId) return XML_ERROR_NO_MEMORY; *eventEndPP = s; - notationDeclHandler(handlerArg, - declNotationName, - curBase, - systemId, - declNotationPublicId); + parser->m_notationDeclHandler( + parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase, + systemId, parser->m_declNotationPublicId); handleDefault = XML_FALSE; } - poolClear(&tempPool); + poolClear(&parser->m_tempPool); break; case XML_ROLE_NOTATION_NO_SYSTEM_ID: - if (declNotationPublicId && notationDeclHandler) { + if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) { *eventEndPP = s; - notationDeclHandler(handlerArg, - declNotationName, - curBase, - 0, - declNotationPublicId); + parser->m_notationDeclHandler( + parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase, + 0, parser->m_declNotationPublicId); handleDefault = XML_FALSE; } - poolClear(&tempPool); + poolClear(&parser->m_tempPool); break; case XML_ROLE_ERROR: switch (tok) { @@ -4190,111 +5263,151 @@ doProlog(XML_Parser parser, return XML_ERROR_SYNTAX; } #ifdef XML_DTD - case XML_ROLE_IGNORE_SECT: - { - enum XML_Error result; - if (defaultHandler) - reportDefault(parser, enc, s, next); - handleDefault = XML_FALSE; - result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore); - if (result != XML_ERROR_NONE) - return result; - else if (!next) { - processor = ignoreSectionProcessor; - return result; - } + case XML_ROLE_IGNORE_SECT: { + enum XML_Error result; + if (parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + handleDefault = XML_FALSE; + result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore); + if (result != XML_ERROR_NONE) + return result; + else if (! next) { + parser->m_processor = ignoreSectionProcessor; + return result; } - break; + } break; #endif /* XML_DTD */ case XML_ROLE_GROUP_OPEN: - if (prologState.level >= groupSize) { - if (groupSize) { - char *temp = (char *)REALLOC(groupConnector, groupSize *= 2); - if (temp == NULL) - return XML_ERROR_NO_MEMORY; - groupConnector = temp; + if (parser->m_prologState.level >= parser->m_groupSize) { + if (parser->m_groupSize) { + { + /* Detect and prevent integer overflow */ + if (parser->m_groupSize > (unsigned int)(-1) / 2u) { + return XML_ERROR_NO_MEMORY; + } + + char *const new_connector = (char *)REALLOC( + parser, parser->m_groupConnector, parser->m_groupSize *= 2); + if (new_connector == NULL) { + parser->m_groupSize /= 2; + return XML_ERROR_NO_MEMORY; + } + parser->m_groupConnector = new_connector; + } + if (dtd->scaffIndex) { - int *temp = (int *)REALLOC(dtd->scaffIndex, - groupSize * sizeof(int)); - if (temp == NULL) + /* Detect and prevent integer overflow. + * The preprocessor guard addresses the "always false" warning + * from -Wtype-limits on platforms where + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ +#if UINT_MAX >= SIZE_MAX + if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) { + return XML_ERROR_NO_MEMORY; + } +#endif + + int *const new_scaff_index = (int *)REALLOC( + parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int)); + if (new_scaff_index == NULL) return XML_ERROR_NO_MEMORY; - dtd->scaffIndex = temp; + dtd->scaffIndex = new_scaff_index; } - } - else { - groupConnector = (char *)MALLOC(groupSize = 32); - if (!groupConnector) + } else { + parser->m_groupConnector + = (char *)MALLOC(parser, parser->m_groupSize = 32); + if (! parser->m_groupConnector) { + parser->m_groupSize = 0; return XML_ERROR_NO_MEMORY; + } } } - groupConnector[prologState.level] = 0; + parser->m_groupConnector[parser->m_prologState.level] = 0; if (dtd->in_eldecl) { int myindex = nextScaffoldPart(parser); if (myindex < 0) return XML_ERROR_NO_MEMORY; + assert(dtd->scaffIndex != NULL); dtd->scaffIndex[dtd->scaffLevel] = myindex; dtd->scaffLevel++; dtd->scaffold[myindex].type = XML_CTYPE_SEQ; - if (elementDeclHandler) + if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; } break; case XML_ROLE_GROUP_SEQUENCE: - if (groupConnector[prologState.level] == '|') + if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE) return XML_ERROR_SYNTAX; - groupConnector[prologState.level] = ','; - if (dtd->in_eldecl && elementDeclHandler) + parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA; + if (dtd->in_eldecl && parser->m_elementDeclHandler) handleDefault = XML_FALSE; break; case XML_ROLE_GROUP_CHOICE: - if (groupConnector[prologState.level] == ',') + if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA) return XML_ERROR_SYNTAX; if (dtd->in_eldecl - && !groupConnector[prologState.level] + && ! parser->m_groupConnector[parser->m_prologState.level] && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type - != XML_CTYPE_MIXED) - ) { + != XML_CTYPE_MIXED)) { dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type = XML_CTYPE_CHOICE; - if (elementDeclHandler) + if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; } - groupConnector[prologState.level] = '|'; + parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE; break; case XML_ROLE_PARAM_ENTITY_REF: #ifdef XML_DTD case XML_ROLE_INNER_PARAM_ENTITY_REF: dtd->hasParamEntityRefs = XML_TRUE; - if (!paramEntityParsing) + if (! parser->m_paramEntityParsing) dtd->keepProcessing = dtd->standalone; else { const XML_Char *name; ENTITY *entity; - name = poolStoreString(&dtd->pool, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!name) + name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (! name) return XML_ERROR_NO_MEMORY; - entity = (ENTITY *)lookup(&dtd->paramEntities, name, 0); + entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); poolDiscard(&dtd->pool); /* first, determine if a check for an existing declaration is needed; if yes, check that the entity exists, and that it is internal, otherwise call the skipped entity handler */ - if (prologState.documentEntity && - (dtd->standalone - ? !openInternalEntities - : !dtd->hasParamEntityRefs)) { - if (!entity) + if (parser->m_prologState.documentEntity + && (dtd->standalone ? ! parser->m_openInternalEntities + : ! dtd->hasParamEntityRefs)) { + if (! entity) return XML_ERROR_UNDEFINED_ENTITY; - else if (!entity->is_internal) - return XML_ERROR_ENTITY_DECLARED_IN_PE; - } - else if (!entity) { + else if (! entity->is_internal) { + /* It's hard to exhaustively search the code to be sure, + * but there doesn't seem to be a way of executing the + * following line. There are two cases: + * + * If 'standalone' is false, the DTD must have no + * parameter entities or we wouldn't have passed the outer + * 'if' statement. That measn the only entity in the hash + * table is the external subset name "#" which cannot be + * given as a parameter entity name in XML syntax, so the + * lookup must have returned NULL and we don't even reach + * the test for an internal entity. + * + * If 'standalone' is true, it does not seem to be + * possible to create entities taking this code path that + * are not internal entities, so fail the test above. + * + * Because this analysis is very uncertain, the code is + * being left in place and merely removed from the + * coverage test statistics. + */ + return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */ + } + } else if (! entity) { dtd->keepProcessing = dtd->standalone; /* cannot report skipped entities in declarations */ - if ((role == XML_ROLE_PARAM_ENTITY_REF) && skippedEntityHandler) { - skippedEntityHandler(handlerArg, name, 1); + if ((role == XML_ROLE_PARAM_ENTITY_REF) + && parser->m_skippedEntityHandler) { + parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1); handleDefault = XML_FALSE; } break; @@ -4303,50 +5416,49 @@ doProlog(XML_Parser parser, return XML_ERROR_RECURSIVE_ENTITY_REF; if (entity->textPtr) { enum XML_Error result; - XML_Bool betweenDecl = - (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE); + XML_Bool betweenDecl + = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE); result = processInternalEntity(parser, entity, betweenDecl); if (result != XML_ERROR_NONE) return result; handleDefault = XML_FALSE; break; } - if (externalEntityRefHandler) { + if (parser->m_externalEntityRefHandler) { dtd->paramEntityRead = XML_FALSE; entity->open = XML_TRUE; - if (!externalEntityRefHandler(externalEntityRefHandlerArg, - 0, - entity->base, - entity->systemId, - entity->publicId)) { + entityTrackingOnOpen(parser, entity, __LINE__); + if (! parser->m_externalEntityRefHandler( + parser->m_externalEntityRefHandlerArg, 0, entity->base, + entity->systemId, entity->publicId)) { + entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; return XML_ERROR_EXTERNAL_ENTITY_HANDLING; } + entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; handleDefault = XML_FALSE; - if (!dtd->paramEntityRead) { + if (! dtd->paramEntityRead) { dtd->keepProcessing = dtd->standalone; break; } - } - else { + } else { dtd->keepProcessing = dtd->standalone; break; } } #endif /* XML_DTD */ - if (!dtd->standalone && - notStandaloneHandler && - !notStandaloneHandler(handlerArg)) + if (! dtd->standalone && parser->m_notStandaloneHandler + && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) return XML_ERROR_NOT_STANDALONE; break; - /* Element declaration stuff */ + /* Element declaration stuff */ case XML_ROLE_ELEMENT_NAME: - if (elementDeclHandler) { - declElementType = getElementType(parser, enc, s, next); - if (!declElementType) + if (parser->m_elementDeclHandler) { + parser->m_declElementType = getElementType(parser, enc, s, next); + if (! parser->m_declElementType) return XML_ERROR_NO_MEMORY; dtd->scaffLevel = 0; dtd->scaffCount = 0; @@ -4358,19 +5470,20 @@ doProlog(XML_Parser parser, case XML_ROLE_CONTENT_ANY: case XML_ROLE_CONTENT_EMPTY: if (dtd->in_eldecl) { - if (elementDeclHandler) { - XML_Content * content = (XML_Content *) MALLOC(sizeof(XML_Content)); - if (!content) + if (parser->m_elementDeclHandler) { + XML_Content *content + = (XML_Content *)MALLOC(parser, sizeof(XML_Content)); + if (! content) return XML_ERROR_NO_MEMORY; content->quant = XML_CQUANT_NONE; content->name = NULL; content->numchildren = 0; content->children = NULL; - content->type = ((role == XML_ROLE_CONTENT_ANY) ? - XML_CTYPE_ANY : - XML_CTYPE_EMPTY); + content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY + : XML_CTYPE_EMPTY); *eventEndPP = s; - elementDeclHandler(handlerArg, declElementType->name, content); + parser->m_elementDeclHandler( + parser->m_handlerArg, parser->m_declElementType->name, content); handleDefault = XML_FALSE; } dtd->in_eldecl = XML_FALSE; @@ -4381,7 +5494,7 @@ doProlog(XML_Parser parser, if (dtd->in_eldecl) { dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type = XML_CTYPE_MIXED; - if (elementDeclHandler) + if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; } break; @@ -4401,24 +5514,30 @@ doProlog(XML_Parser parser, if (dtd->in_eldecl) { ELEMENT_TYPE *el; const XML_Char *name; - int nameLen; - const char *nxt = (quant == XML_CQUANT_NONE - ? next - : next - enc->minBytesPerChar); + size_t nameLen; + const char *nxt + = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar); int myindex = nextScaffoldPart(parser); if (myindex < 0) return XML_ERROR_NO_MEMORY; dtd->scaffold[myindex].type = XML_CTYPE_NAME; dtd->scaffold[myindex].quant = quant; el = getElementType(parser, enc, s, nxt); - if (!el) + if (! el) return XML_ERROR_NO_MEMORY; name = el->name; dtd->scaffold[myindex].name = name; nameLen = 0; - for (; name[nameLen++]; ); - dtd->contentStringLen += nameLen; - if (elementDeclHandler) + for (; name[nameLen++];) + ; + + /* Detect and prevent integer overflow */ + if (nameLen > UINT_MAX - dtd->contentStringLen) { + return XML_ERROR_NO_MEMORY; + } + + dtd->contentStringLen += (unsigned)nameLen; + if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; } break; @@ -4436,17 +5555,18 @@ doProlog(XML_Parser parser, quant = XML_CQUANT_PLUS; closeGroup: if (dtd->in_eldecl) { - if (elementDeclHandler) + if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; dtd->scaffLevel--; dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant; if (dtd->scaffLevel == 0) { - if (!handleDefault) { + if (! handleDefault) { XML_Content *model = build_model(parser); - if (!model) + if (! model) return XML_ERROR_NO_MEMORY; *eventEndPP = s; - elementDeclHandler(handlerArg, declElementType->name, model); + parser->m_elementDeclHandler( + parser->m_handlerArg, parser->m_declElementType->name, model); } dtd->in_eldecl = XML_FALSE; dtd->contentStringLen = 0; @@ -4456,12 +5576,12 @@ doProlog(XML_Parser parser, /* End element declaration stuff */ case XML_ROLE_PI: - if (!reportProcessingInstruction(parser, enc, s, next)) + if (! reportProcessingInstruction(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; handleDefault = XML_FALSE; break; case XML_ROLE_COMMENT: - if (!reportComment(parser, enc, s, next)) + if (! reportComment(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; handleDefault = XML_FALSE; break; @@ -4473,31 +5593,31 @@ doProlog(XML_Parser parser, } break; case XML_ROLE_DOCTYPE_NONE: - if (startDoctypeDeclHandler) + if (parser->m_startDoctypeDeclHandler) handleDefault = XML_FALSE; break; case XML_ROLE_ENTITY_NONE: - if (dtd->keepProcessing && entityDeclHandler) + if (dtd->keepProcessing && parser->m_entityDeclHandler) handleDefault = XML_FALSE; break; case XML_ROLE_NOTATION_NONE: - if (notationDeclHandler) + if (parser->m_notationDeclHandler) handleDefault = XML_FALSE; break; case XML_ROLE_ATTLIST_NONE: - if (dtd->keepProcessing && attlistDeclHandler) + if (dtd->keepProcessing && parser->m_attlistDeclHandler) handleDefault = XML_FALSE; break; case XML_ROLE_ELEMENT_NONE: - if (elementDeclHandler) + if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; break; } /* end of big switch */ - if (handleDefault && defaultHandler) + if (handleDefault && parser->m_defaultHandler) reportDefault(parser, enc, s, next); - switch (parsing) { + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: *nextPtr = next; return XML_ERROR_NONE; @@ -4512,23 +5632,27 @@ doProlog(XML_Parser parser, } static enum XML_Error PTRCALL -epilogProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) -{ - processor = epilogProcessor; - eventPtr = s; +epilogProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { + parser->m_processor = epilogProcessor; + parser->m_eventPtr = s; for (;;) { const char *next = NULL; - int tok = XmlPrologTok(encoding, s, end, &next); - eventEndPtr = next; + int tok = XmlPrologTok(parser->m_encoding, s, end, &next); +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif + parser->m_eventEndPtr = next; switch (tok) { /* report partial linebreak - it might be the last token */ case -XML_TOK_PROLOG_S: - if (defaultHandler) { - reportDefault(parser, encoding, s, next); - if (parsing == XML_FINISHED) + if (parser->m_defaultHandler) { + reportDefault(parser, parser->m_encoding, s, next); + if (parser->m_parsingStatus.parsing == XML_FINISHED) return XML_ERROR_ABORTED; } *nextPtr = next; @@ -4537,28 +5661,28 @@ epilogProcessor(XML_Parser parser, *nextPtr = s; return XML_ERROR_NONE; case XML_TOK_PROLOG_S: - if (defaultHandler) - reportDefault(parser, encoding, s, next); + if (parser->m_defaultHandler) + reportDefault(parser, parser->m_encoding, s, next); break; case XML_TOK_PI: - if (!reportProcessingInstruction(parser, encoding, s, next)) + if (! reportProcessingInstruction(parser, parser->m_encoding, s, next)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_COMMENT: - if (!reportComment(parser, encoding, s, next)) + if (! reportComment(parser, parser->m_encoding, s, next)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_INVALID: - eventPtr = next; + parser->m_eventPtr = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: - if (!finalBuffer) { + if (! parser->m_parsingStatus.finalBuffer) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: - if (!finalBuffer) { + if (! parser->m_parsingStatus.finalBuffer) { *nextPtr = s; return XML_ERROR_NONE; } @@ -4566,209 +5690,232 @@ epilogProcessor(XML_Parser parser, default: return XML_ERROR_JUNK_AFTER_DOC_ELEMENT; } - eventPtr = s = next; - switch (parsing) { + parser->m_eventPtr = s = next; + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: *nextPtr = next; return XML_ERROR_NONE; case XML_FINISHED: return XML_ERROR_ABORTED; - default: ; + default:; } } } static enum XML_Error -processInternalEntity(XML_Parser parser, ENTITY *entity, - XML_Bool betweenDecl) -{ +processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) { const char *textStart, *textEnd; const char *next; enum XML_Error result; OPEN_INTERNAL_ENTITY *openEntity; - if (freeInternalEntities) { - openEntity = freeInternalEntities; - freeInternalEntities = openEntity->next; - } - else { - openEntity = (OPEN_INTERNAL_ENTITY *)MALLOC(sizeof(OPEN_INTERNAL_ENTITY)); - if (!openEntity) + if (parser->m_freeInternalEntities) { + openEntity = parser->m_freeInternalEntities; + parser->m_freeInternalEntities = openEntity->next; + } else { + openEntity + = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY)); + if (! openEntity) return XML_ERROR_NO_MEMORY; } entity->open = XML_TRUE; +#ifdef XML_DTD + entityTrackingOnOpen(parser, entity, __LINE__); +#endif entity->processed = 0; - openEntity->next = openInternalEntities; - openInternalEntities = openEntity; + openEntity->next = parser->m_openInternalEntities; + parser->m_openInternalEntities = openEntity; openEntity->entity = entity; - openEntity->startTagLevel = tagLevel; + openEntity->startTagLevel = parser->m_tagLevel; openEntity->betweenDecl = betweenDecl; openEntity->internalEventPtr = NULL; openEntity->internalEventEndPtr = NULL; - textStart = (char *)entity->textPtr; - textEnd = (char *)(entity->textPtr + entity->textLen); + textStart = (const char *)entity->textPtr; + textEnd = (const char *)(entity->textPtr + entity->textLen); + /* Set a safe default value in case 'next' does not get set */ + next = textStart; #ifdef XML_DTD if (entity->is_param) { - int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next); - result = doProlog(parser, internalEncoding, textStart, textEnd, tok, - next, &next, XML_FALSE); - } - else + int tok + = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); + result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, + tok, next, &next, XML_FALSE, XML_FALSE, + XML_ACCOUNT_ENTITY_EXPANSION); + } else #endif /* XML_DTD */ - result = doContent(parser, tagLevel, internalEncoding, textStart, - textEnd, &next, XML_FALSE); + result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding, + textStart, textEnd, &next, XML_FALSE, + XML_ACCOUNT_ENTITY_EXPANSION); if (result == XML_ERROR_NONE) { - if (textEnd != next && parsing == XML_SUSPENDED) { - entity->processed = next - textStart; - processor = internalEntityProcessor; - } - else { + if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { + entity->processed = (int)(next - textStart); + parser->m_processor = internalEntityProcessor; + } else { +#ifdef XML_DTD + entityTrackingOnClose(parser, entity, __LINE__); +#endif /* XML_DTD */ entity->open = XML_FALSE; - openInternalEntities = openEntity->next; + parser->m_openInternalEntities = openEntity->next; /* put openEntity back in list of free instances */ - openEntity->next = freeInternalEntities; - freeInternalEntities = openEntity; + openEntity->next = parser->m_freeInternalEntities; + parser->m_freeInternalEntities = openEntity; } } return result; } static enum XML_Error PTRCALL -internalEntityProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) -{ +internalEntityProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { ENTITY *entity; const char *textStart, *textEnd; const char *next; enum XML_Error result; - OPEN_INTERNAL_ENTITY *openEntity = openInternalEntities; - if (!openEntity) + OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities; + if (! openEntity) return XML_ERROR_UNEXPECTED_STATE; entity = openEntity->entity; - textStart = ((char *)entity->textPtr) + entity->processed; - textEnd = (char *)(entity->textPtr + entity->textLen); + textStart = ((const char *)entity->textPtr) + entity->processed; + textEnd = (const char *)(entity->textPtr + entity->textLen); + /* Set a safe default value in case 'next' does not get set */ + next = textStart; #ifdef XML_DTD if (entity->is_param) { - int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next); - result = doProlog(parser, internalEncoding, textStart, textEnd, tok, - next, &next, XML_FALSE); - } - else + int tok + = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); + result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, + tok, next, &next, XML_FALSE, XML_TRUE, + XML_ACCOUNT_ENTITY_EXPANSION); + } else #endif /* XML_DTD */ - result = doContent(parser, openEntity->startTagLevel, internalEncoding, - textStart, textEnd, &next, XML_FALSE); + result = doContent(parser, openEntity->startTagLevel, + parser->m_internalEncoding, textStart, textEnd, &next, + XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); if (result != XML_ERROR_NONE) return result; - else if (textEnd != next && parsing == XML_SUSPENDED) { - entity->processed = next - (char *)entity->textPtr; + else if (textEnd != next + && parser->m_parsingStatus.parsing == XML_SUSPENDED) { + entity->processed = (int)(next - (const char *)entity->textPtr); return result; - } - else { + } else { +#ifdef XML_DTD + entityTrackingOnClose(parser, entity, __LINE__); +#endif entity->open = XML_FALSE; - openInternalEntities = openEntity->next; + parser->m_openInternalEntities = openEntity->next; /* put openEntity back in list of free instances */ - openEntity->next = freeInternalEntities; - freeInternalEntities = openEntity; + openEntity->next = parser->m_freeInternalEntities; + parser->m_freeInternalEntities = openEntity; } #ifdef XML_DTD if (entity->is_param) { int tok; - processor = prologProcessor; - tok = XmlPrologTok(encoding, s, end, &next); - return doProlog(parser, encoding, s, end, tok, next, nextPtr, - (XML_Bool)!finalBuffer); - } - else + parser->m_processor = prologProcessor; + tok = XmlPrologTok(parser->m_encoding, s, end, &next); + return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, + XML_ACCOUNT_DIRECT); + } else #endif /* XML_DTD */ { - processor = contentProcessor; + parser->m_processor = contentProcessor; /* see externalEntityContentProcessor vs contentProcessor */ - return doContent(parser, parentParser ? 1 : 0, encoding, s, end, - nextPtr, (XML_Bool)!finalBuffer); + return doContent(parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, + s, end, nextPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer, + XML_ACCOUNT_DIRECT); } } static enum XML_Error PTRCALL -errorProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) -{ - return errorCode; +errorProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { + UNUSED_P(s); + UNUSED_P(end); + UNUSED_P(nextPtr); + return parser->m_errorCode; } static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, - const char *ptr, const char *end, - STRING_POOL *pool) -{ - enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr, - end, pool); + const char *ptr, const char *end, STRING_POOL *pool, + enum XML_Account account) { + enum XML_Error result + = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account); if (result) return result; - if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20) + if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20) poolChop(pool); - if (!poolAppendChar(pool, XML_T('\0'))) + if (! poolAppendChar(pool, XML_T('\0'))) return XML_ERROR_NO_MEMORY; return XML_ERROR_NONE; } static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, - const char *ptr, const char *end, - STRING_POOL *pool) -{ - DTD * const dtd = _dtd; /* save one level of indirection */ + const char *ptr, const char *end, STRING_POOL *pool, + enum XML_Account account) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ +#ifndef XML_DTD + UNUSED_P(account); +#endif + for (;;) { - const char *next; + const char *next + = ptr; /* XmlAttributeValueTok doesn't always set the last arg */ int tok = XmlAttributeValueTok(enc, ptr, end, &next); +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif switch (tok) { case XML_TOK_NONE: return XML_ERROR_NONE; case XML_TOK_INVALID: - if (enc == encoding) - eventPtr = next; + if (enc == parser->m_encoding) + parser->m_eventPtr = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: - if (enc == encoding) - eventPtr = ptr; + if (enc == parser->m_encoding) + parser->m_eventPtr = ptr; return XML_ERROR_INVALID_TOKEN; - case XML_TOK_CHAR_REF: - { - XML_Char buf[XML_ENCODE_MAX]; - int i; - int n = XmlCharRefNumber(enc, ptr); - if (n < 0) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_BAD_CHAR_REF; - } - if (!isCdata - && n == 0x20 /* space */ - && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) - break; - n = XmlEncode(n, (ICHAR *)buf); - if (!n) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_BAD_CHAR_REF; - } - for (i = 0; i < n; i++) { - if (!poolAppendChar(pool, buf[i])) - return XML_ERROR_NO_MEMORY; - } + case XML_TOK_CHAR_REF: { + XML_Char buf[XML_ENCODE_MAX]; + int i; + int n = XmlCharRefNumber(enc, ptr); + if (n < 0) { + if (enc == parser->m_encoding) + parser->m_eventPtr = ptr; + return XML_ERROR_BAD_CHAR_REF; } - break; + if (! isCdata && n == 0x20 /* space */ + && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) + break; + n = XmlEncode(n, (ICHAR *)buf); + /* The XmlEncode() functions can never return 0 here. That + * error return happens if the code point passed in is either + * negative or greater than or equal to 0x110000. The + * XmlCharRefNumber() functions will all return a number + * strictly less than 0x110000 or a negative value if an error + * occurred. The negative value is intercepted above, so + * XmlEncode() is never passed a value it might return an + * error for. + */ + for (i = 0; i < n; i++) { + if (! poolAppendChar(pool, buf[i])) + return XML_ERROR_NO_MEMORY; + } + } break; case XML_TOK_DATA_CHARS: - if (!poolAppend(pool, enc, ptr, next)) + if (! poolAppend(pool, enc, ptr, next)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_TRAILING_CR: @@ -4776,93 +5923,134 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, /* fall through */ case XML_TOK_ATTRIBUTE_VALUE_S: case XML_TOK_DATA_NEWLINE: - if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) + if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) break; - if (!poolAppendChar(pool, 0x20)) + if (! poolAppendChar(pool, 0x20)) return XML_ERROR_NO_MEMORY; break; - case XML_TOK_ENTITY_REF: - { - const XML_Char *name; - ENTITY *entity; - char checkEntityDecl; - XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc, - ptr + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (ch) { - if (!poolAppendChar(pool, ch)) - return XML_ERROR_NO_MEMORY; - break; - } - name = poolStoreString(&temp2Pool, enc, - ptr + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!name) + case XML_TOK_ENTITY_REF: { + const XML_Char *name; + ENTITY *entity; + char checkEntityDecl; + XML_Char ch = (XML_Char)XmlPredefinedEntityName( + enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar); + if (ch) { +#ifdef XML_DTD + /* NOTE: We are replacing 4-6 characters original input for 1 character + * so there is no amplification and hence recording without + * protection. */ + accountingDiffTolerated(parser, tok, (char *)&ch, + ((char *)&ch) + sizeof(XML_Char), __LINE__, + XML_ACCOUNT_ENTITY_EXPANSION); +#endif /* XML_DTD */ + if (! poolAppendChar(pool, ch)) return XML_ERROR_NO_MEMORY; - entity = (ENTITY *)lookup(&dtd->generalEntities, name, 0); - poolDiscard(&temp2Pool); - /* first, determine if a check for an existing declaration is needed; - if yes, check that the entity exists, and that it is internal, - otherwise call the default handler (if called from content) - */ - if (pool == &dtd->pool) /* are we called from prolog? */ - checkEntityDecl = + break; + } + name = poolStoreString(&parser->m_temp2Pool, enc, + ptr + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (! name) + return XML_ERROR_NO_MEMORY; + entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); + poolDiscard(&parser->m_temp2Pool); + /* First, determine if a check for an existing declaration is needed; + if yes, check that the entity exists, and that it is internal. + */ + if (pool == &dtd->pool) /* are we called from prolog? */ + checkEntityDecl = #ifdef XML_DTD - prologState.documentEntity && + parser->m_prologState.documentEntity && #endif /* XML_DTD */ - (dtd->standalone - ? !openInternalEntities - : !dtd->hasParamEntityRefs); - else /* if (pool == &tempPool): we are called from content */ - checkEntityDecl = !dtd->hasParamEntityRefs || dtd->standalone; - if (checkEntityDecl) { - if (!entity) - return XML_ERROR_UNDEFINED_ENTITY; - else if (!entity->is_internal) - return XML_ERROR_ENTITY_DECLARED_IN_PE; - } - else if (!entity) { - /* cannot report skipped entity here - see comments on - skippedEntityHandler - if (skippedEntityHandler) - skippedEntityHandler(handlerArg, name, 0); - */ - if ((pool == &tempPool) && defaultHandler) - reportDefault(parser, enc, ptr, next); - break; - } - if (entity->open) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_RECURSIVE_ENTITY_REF; - } - if (entity->notation) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_BINARY_ENTITY_REF; - } - if (!entity->textPtr) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; - } - else { - enum XML_Error result; - const XML_Char *textEnd = entity->textPtr + entity->textLen; - entity->open = XML_TRUE; - result = appendAttributeValue(parser, internalEncoding, isCdata, - (char *)entity->textPtr, - (char *)textEnd, pool); - entity->open = XML_FALSE; - if (result) - return result; + (dtd->standalone ? ! parser->m_openInternalEntities + : ! dtd->hasParamEntityRefs); + else /* if (pool == &parser->m_tempPool): we are called from content */ + checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone; + if (checkEntityDecl) { + if (! entity) + return XML_ERROR_UNDEFINED_ENTITY; + else if (! entity->is_internal) + return XML_ERROR_ENTITY_DECLARED_IN_PE; + } else if (! entity) { + /* Cannot report skipped entity here - see comments on + parser->m_skippedEntityHandler. + if (parser->m_skippedEntityHandler) + parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); + */ + /* Cannot call the default handler because this would be + out of sync with the call to the startElementHandler. + if ((pool == &parser->m_tempPool) && parser->m_defaultHandler) + reportDefault(parser, enc, ptr, next); + */ + break; + } + if (entity->open) { + if (enc == parser->m_encoding) { + /* It does not appear that this line can be executed. + * + * The "if (entity->open)" check catches recursive entity + * definitions. In order to be called with an open + * entity, it must have gone through this code before and + * been through the recursive call to + * appendAttributeValue() some lines below. That call + * sets the local encoding ("enc") to the parser's + * internal encoding (internal_utf8 or internal_utf16), + * which can never be the same as the principle encoding. + * It doesn't appear there is another code path that gets + * here with entity->open being TRUE. + * + * Since it is not certain that this logic is watertight, + * we keep the line and merely exclude it from coverage + * tests. + */ + parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */ } + return XML_ERROR_RECURSIVE_ENTITY_REF; } - break; + if (entity->notation) { + if (enc == parser->m_encoding) + parser->m_eventPtr = ptr; + return XML_ERROR_BINARY_ENTITY_REF; + } + if (! entity->textPtr) { + if (enc == parser->m_encoding) + parser->m_eventPtr = ptr; + return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; + } else { + enum XML_Error result; + const XML_Char *textEnd = entity->textPtr + entity->textLen; + entity->open = XML_TRUE; +#ifdef XML_DTD + entityTrackingOnOpen(parser, entity, __LINE__); +#endif + result = appendAttributeValue(parser, parser->m_internalEncoding, + isCdata, (const char *)entity->textPtr, + (const char *)textEnd, pool, + XML_ACCOUNT_ENTITY_EXPANSION); +#ifdef XML_DTD + entityTrackingOnClose(parser, entity, __LINE__); +#endif + entity->open = XML_FALSE; + if (result) + return result; + } + } break; default: - if (enc == encoding) - eventPtr = ptr; + /* The only token returned by XmlAttributeValueTok() that does + * not have an explicit case here is XML_TOK_PARTIAL_CHAR. + * Getting that would require an entity name to contain an + * incomplete XML character (e.g. \xE2\x82); however previous + * tokenisers will have already recognised and rejected such + * names before XmlAttributeValueTok() gets a look-in. This + * default case should be retained as a safety net, but the code + * excluded from coverage tests. + * + * LCOV_EXCL_START + */ + if (enc == parser->m_encoding) + parser->m_eventPtr = ptr; return XML_ERROR_UNEXPECTED_STATE; + /* LCOV_EXCL_STOP */ } ptr = next; } @@ -4870,87 +6058,98 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, } static enum XML_Error -storeEntityValue(XML_Parser parser, - const ENCODING *enc, - const char *entityTextPtr, - const char *entityTextEnd) -{ - DTD * const dtd = _dtd; /* save one level of indirection */ +storeEntityValue(XML_Parser parser, const ENCODING *enc, + const char *entityTextPtr, const char *entityTextEnd, + enum XML_Account account) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ STRING_POOL *pool = &(dtd->entityValuePool); enum XML_Error result = XML_ERROR_NONE; #ifdef XML_DTD - int oldInEntityValue = prologState.inEntityValue; - prologState.inEntityValue = 1; + int oldInEntityValue = parser->m_prologState.inEntityValue; + parser->m_prologState.inEntityValue = 1; +#else + UNUSED_P(account); #endif /* XML_DTD */ /* never return Null for the value argument in EntityDeclHandler, since this would indicate an external entity; therefore we have to make sure that entityValuePool.start is not null */ - if (!pool->blocks) { - if (!poolGrow(pool)) + if (! pool->blocks) { + if (! poolGrow(pool)) return XML_ERROR_NO_MEMORY; } for (;;) { - const char *next; + const char *next + = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */ int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next); + +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__, + account)) { + accountingOnAbort(parser); + result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + goto endEntityValue; + } +#endif + switch (tok) { case XML_TOK_PARAM_ENTITY_REF: #ifdef XML_DTD - if (isParamEntity || enc != encoding) { + if (parser->m_isParamEntity || enc != parser->m_encoding) { const XML_Char *name; ENTITY *entity; - name = poolStoreString(&tempPool, enc, + name = poolStoreString(&parser->m_tempPool, enc, entityTextPtr + enc->minBytesPerChar, next - enc->minBytesPerChar); - if (!name) { + if (! name) { result = XML_ERROR_NO_MEMORY; goto endEntityValue; } - entity = (ENTITY *)lookup(&dtd->paramEntities, name, 0); - poolDiscard(&tempPool); - if (!entity) { + entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); + poolDiscard(&parser->m_tempPool); + if (! entity) { /* not a well-formedness error - see XML 1.0: WFC Entity Declared */ /* cannot report skipped entity here - see comments on - skippedEntityHandler - if (skippedEntityHandler) - skippedEntityHandler(handlerArg, name, 0); + parser->m_skippedEntityHandler + if (parser->m_skippedEntityHandler) + parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); */ dtd->keepProcessing = dtd->standalone; goto endEntityValue; } if (entity->open) { - if (enc == encoding) - eventPtr = entityTextPtr; + if (enc == parser->m_encoding) + parser->m_eventPtr = entityTextPtr; result = XML_ERROR_RECURSIVE_ENTITY_REF; goto endEntityValue; } if (entity->systemId) { - if (externalEntityRefHandler) { + if (parser->m_externalEntityRefHandler) { dtd->paramEntityRead = XML_FALSE; entity->open = XML_TRUE; - if (!externalEntityRefHandler(externalEntityRefHandlerArg, - 0, - entity->base, - entity->systemId, - entity->publicId)) { + entityTrackingOnOpen(parser, entity, __LINE__); + if (! parser->m_externalEntityRefHandler( + parser->m_externalEntityRefHandlerArg, 0, entity->base, + entity->systemId, entity->publicId)) { + entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; result = XML_ERROR_EXTERNAL_ENTITY_HANDLING; goto endEntityValue; } + entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; - if (!dtd->paramEntityRead) + if (! dtd->paramEntityRead) dtd->keepProcessing = dtd->standalone; - } - else + } else dtd->keepProcessing = dtd->standalone; - } - else { + } else { entity->open = XML_TRUE; - result = storeEntityValue(parser, - internalEncoding, - (char *)entity->textPtr, - (char *)(entity->textPtr - + entity->textLen)); + entityTrackingOnOpen(parser, entity, __LINE__); + result = storeEntityValue( + parser, parser->m_internalEncoding, (const char *)entity->textPtr, + (const char *)(entity->textPtr + entity->textLen), + XML_ACCOUNT_ENTITY_EXPANSION); + entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; if (result) goto endEntityValue; @@ -4960,7 +6159,7 @@ storeEntityValue(XML_Parser parser, #endif /* XML_DTD */ /* In the internal subset, PE references are not legal within markup declarations, e.g entity values in this case. */ - eventPtr = entityTextPtr; + parser->m_eventPtr = entityTextPtr; result = XML_ERROR_PARAM_ENTITY_REF; goto endEntityValue; case XML_TOK_NONE: @@ -4968,7 +6167,7 @@ storeEntityValue(XML_Parser parser, goto endEntityValue; case XML_TOK_ENTITY_REF: case XML_TOK_DATA_CHARS: - if (!poolAppend(pool, enc, entityTextPtr, next)) { + if (! poolAppend(pool, enc, entityTextPtr, next)) { result = XML_ERROR_NO_MEMORY; goto endEntityValue; } @@ -4977,67 +6176,75 @@ storeEntityValue(XML_Parser parser, next = entityTextPtr + enc->minBytesPerChar; /* fall through */ case XML_TOK_DATA_NEWLINE: - if (pool->end == pool->ptr && !poolGrow(pool)) { - result = XML_ERROR_NO_MEMORY; + if (pool->end == pool->ptr && ! poolGrow(pool)) { + result = XML_ERROR_NO_MEMORY; goto endEntityValue; } *(pool->ptr)++ = 0xA; break; - case XML_TOK_CHAR_REF: - { - XML_Char buf[XML_ENCODE_MAX]; - int i; - int n = XmlCharRefNumber(enc, entityTextPtr); - if (n < 0) { - if (enc == encoding) - eventPtr = entityTextPtr; - result = XML_ERROR_BAD_CHAR_REF; - goto endEntityValue; - } - n = XmlEncode(n, (ICHAR *)buf); - if (!n) { - if (enc == encoding) - eventPtr = entityTextPtr; - result = XML_ERROR_BAD_CHAR_REF; + case XML_TOK_CHAR_REF: { + XML_Char buf[XML_ENCODE_MAX]; + int i; + int n = XmlCharRefNumber(enc, entityTextPtr); + if (n < 0) { + if (enc == parser->m_encoding) + parser->m_eventPtr = entityTextPtr; + result = XML_ERROR_BAD_CHAR_REF; + goto endEntityValue; + } + n = XmlEncode(n, (ICHAR *)buf); + /* The XmlEncode() functions can never return 0 here. That + * error return happens if the code point passed in is either + * negative or greater than or equal to 0x110000. The + * XmlCharRefNumber() functions will all return a number + * strictly less than 0x110000 or a negative value if an error + * occurred. The negative value is intercepted above, so + * XmlEncode() is never passed a value it might return an + * error for. + */ + for (i = 0; i < n; i++) { + if (pool->end == pool->ptr && ! poolGrow(pool)) { + result = XML_ERROR_NO_MEMORY; goto endEntityValue; } - for (i = 0; i < n; i++) { - if (pool->end == pool->ptr && !poolGrow(pool)) { - result = XML_ERROR_NO_MEMORY; - goto endEntityValue; - } - *(pool->ptr)++ = buf[i]; - } + *(pool->ptr)++ = buf[i]; } - break; + } break; case XML_TOK_PARTIAL: - if (enc == encoding) - eventPtr = entityTextPtr; + if (enc == parser->m_encoding) + parser->m_eventPtr = entityTextPtr; result = XML_ERROR_INVALID_TOKEN; goto endEntityValue; case XML_TOK_INVALID: - if (enc == encoding) - eventPtr = next; + if (enc == parser->m_encoding) + parser->m_eventPtr = next; result = XML_ERROR_INVALID_TOKEN; goto endEntityValue; default: - if (enc == encoding) - eventPtr = entityTextPtr; + /* This default case should be unnecessary -- all the tokens + * that XmlEntityValueTok() can return have their own explicit + * cases -- but should be retained for safety. We do however + * exclude it from the coverage statistics. + * + * LCOV_EXCL_START + */ + if (enc == parser->m_encoding) + parser->m_eventPtr = entityTextPtr; result = XML_ERROR_UNEXPECTED_STATE; goto endEntityValue; + /* LCOV_EXCL_STOP */ } entityTextPtr = next; } endEntityValue: #ifdef XML_DTD - prologState.inEntityValue = oldInEntityValue; + parser->m_prologState.inEntityValue = oldInEntityValue; #endif /* XML_DTD */ return result; } static void FASTCALL -normalizeLines(XML_Char *s) -{ +normalizeLines(XML_Char *s) { XML_Char *p; for (;; s++) { if (*s == XML_T('\0')) @@ -5051,8 +6258,7 @@ normalizeLines(XML_Char *s) *p++ = 0xA; if (*++s == 0xA) s++; - } - else + } else *p++ = *s++; } while (*s); *p = XML_T('\0'); @@ -5060,87 +6266,100 @@ normalizeLines(XML_Char *s) static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, - const char *start, const char *end) -{ + const char *start, const char *end) { const XML_Char *target; XML_Char *data; const char *tem; - if (!processingInstructionHandler) { - if (defaultHandler) + if (! parser->m_processingInstructionHandler) { + if (parser->m_defaultHandler) reportDefault(parser, enc, start, end); return 1; } start += enc->minBytesPerChar * 2; tem = start + XmlNameLength(enc, start); - target = poolStoreString(&tempPool, enc, start, tem); - if (!target) + target = poolStoreString(&parser->m_tempPool, enc, start, tem); + if (! target) return 0; - poolFinish(&tempPool); - data = poolStoreString(&tempPool, enc, - XmlSkipS(enc, tem), - end - enc->minBytesPerChar*2); - if (!data) + poolFinish(&parser->m_tempPool); + data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem), + end - enc->minBytesPerChar * 2); + if (! data) return 0; normalizeLines(data); - processingInstructionHandler(handlerArg, target, data); - poolClear(&tempPool); + parser->m_processingInstructionHandler(parser->m_handlerArg, target, data); + poolClear(&parser->m_tempPool); return 1; } static int -reportComment(XML_Parser parser, const ENCODING *enc, - const char *start, const char *end) -{ +reportComment(XML_Parser parser, const ENCODING *enc, const char *start, + const char *end) { XML_Char *data; - if (!commentHandler) { - if (defaultHandler) + if (! parser->m_commentHandler) { + if (parser->m_defaultHandler) reportDefault(parser, enc, start, end); return 1; } - data = poolStoreString(&tempPool, - enc, + data = poolStoreString(&parser->m_tempPool, enc, start + enc->minBytesPerChar * 4, end - enc->minBytesPerChar * 3); - if (!data) + if (! data) return 0; normalizeLines(data); - commentHandler(handlerArg, data); - poolClear(&tempPool); + parser->m_commentHandler(parser->m_handlerArg, data); + poolClear(&parser->m_tempPool); return 1; } static void -reportDefault(XML_Parser parser, const ENCODING *enc, - const char *s, const char *end) -{ +reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, + const char *end) { if (MUST_CONVERT(enc, s)) { + enum XML_Convert_Result convert_res; const char **eventPP; const char **eventEndPP; - if (enc == encoding) { - eventPP = &eventPtr; - eventEndPP = &eventEndPtr; - } - else { - eventPP = &(openInternalEntities->internalEventPtr); - eventEndPP = &(openInternalEntities->internalEventEndPtr); + if (enc == parser->m_encoding) { + eventPP = &parser->m_eventPtr; + eventEndPP = &parser->m_eventEndPtr; + } else { + /* To get here, two things must be true; the parser must be + * using a character encoding that is not the same as the + * encoding passed in, and the encoding passed in must need + * conversion to the internal format (UTF-8 unless XML_UNICODE + * is defined). The only occasions on which the encoding passed + * in is not the same as the parser's encoding are when it is + * the internal encoding (e.g. a previously defined parameter + * entity, already converted to internal format). This by + * definition doesn't need conversion, so the whole branch never + * gets executed. + * + * For safety's sake we don't delete these lines and merely + * exclude them from coverage statistics. + * + * LCOV_EXCL_START + */ + eventPP = &(parser->m_openInternalEntities->internalEventPtr); + eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); + /* LCOV_EXCL_STOP */ } do { - ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); + ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; + convert_res + = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd); *eventEndPP = s; - defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf); + parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf, + (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); *eventPP = s; - } while (s != end); - } - else - defaultHandler(handlerArg, (XML_Char *)s, (XML_Char *)end - (XML_Char *)s); + } while ((convert_res != XML_CONVERT_COMPLETED) + && (convert_res != XML_CONVERT_INPUT_INCOMPLETE)); + } else + parser->m_defaultHandler(parser->m_handlerArg, (XML_Char *)s, + (int)((XML_Char *)end - (XML_Char *)s)); } - static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, - XML_Bool isId, const XML_Char *value, XML_Parser parser) -{ + XML_Bool isId, const XML_Char *value, XML_Parser parser) { DEFAULT_ATTRIBUTE *att; if (value || isId) { /* The handling of default attributes gets messed up if we have @@ -5149,22 +6368,40 @@ defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, for (i = 0; i < type->nDefaultAtts; i++) if (attId == type->defaultAtts[i].id) return 1; - if (isId && !type->idAtt && !attId->xmlns) + if (isId && ! type->idAtt && ! attId->xmlns) type->idAtt = attId; } if (type->nDefaultAtts == type->allocDefaultAtts) { if (type->allocDefaultAtts == 0) { type->allocDefaultAtts = 8; - type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(type->allocDefaultAtts - * sizeof(DEFAULT_ATTRIBUTE)); - if (!type->defaultAtts) + type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC( + parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); + if (! type->defaultAtts) { + type->allocDefaultAtts = 0; return 0; - } - else { + } + } else { DEFAULT_ATTRIBUTE *temp; + + /* Detect and prevent integer overflow */ + if (type->allocDefaultAtts > INT_MAX / 2) { + return 0; + } + int count = type->allocDefaultAtts * 2; - temp = (DEFAULT_ATTRIBUTE *) - REALLOC(type->defaultAtts, (count * sizeof(DEFAULT_ATTRIBUTE))); + + /* Detect and prevent integer overflow. + * The preprocessor guard addresses the "always false" warning + * from -Wtype-limits on platforms where + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ +#if UINT_MAX >= SIZE_MAX + if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) { + return 0; + } +#endif + + temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts, + (count * sizeof(DEFAULT_ATTRIBUTE))); if (temp == NULL) return 0; type->allocDefaultAtts = count; @@ -5175,91 +6412,90 @@ defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, att->id = attId; att->value = value; att->isCdata = isCdata; - if (!isCdata) + if (! isCdata) attId->maybeTokenized = XML_TRUE; type->nDefaultAtts += 1; return 1; } static int -setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) -{ - DTD * const dtd = _dtd; /* save one level of indirection */ +setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ const XML_Char *name; for (name = elementType->name; *name; name++) { - if (*name == XML_T(':')) { + if (*name == XML_T(ASCII_COLON)) { PREFIX *prefix; const XML_Char *s; for (s = elementType->name; s != name; s++) { - if (!poolAppendChar(&dtd->pool, *s)) + if (! poolAppendChar(&dtd->pool, *s)) return 0; } - if (!poolAppendChar(&dtd->pool, XML_T('\0'))) + if (! poolAppendChar(&dtd->pool, XML_T('\0'))) return 0; - prefix = (PREFIX *)lookup(&dtd->prefixes, poolStart(&dtd->pool), + prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool), sizeof(PREFIX)); - if (!prefix) + if (! prefix) return 0; if (prefix->name == poolStart(&dtd->pool)) poolFinish(&dtd->pool); else poolDiscard(&dtd->pool); elementType->prefix = prefix; - + break; } } return 1; } static ATTRIBUTE_ID * -getAttributeId(XML_Parser parser, const ENCODING *enc, - const char *start, const char *end) -{ - DTD * const dtd = _dtd; /* save one level of indirection */ +getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, + const char *end) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ ATTRIBUTE_ID *id; const XML_Char *name; - if (!poolAppendChar(&dtd->pool, XML_T('\0'))) + if (! poolAppendChar(&dtd->pool, XML_T('\0'))) return NULL; name = poolStoreString(&dtd->pool, enc, start, end); - if (!name) + if (! name) return NULL; /* skip quotation mark - its storage will be re-used (like in name[-1]) */ ++name; - id = (ATTRIBUTE_ID *)lookup(&dtd->attributeIds, name, sizeof(ATTRIBUTE_ID)); - if (!id) + id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, + sizeof(ATTRIBUTE_ID)); + if (! id) return NULL; if (id->name != name) poolDiscard(&dtd->pool); else { poolFinish(&dtd->pool); - if (!ns) + if (! parser->m_ns) ; - else if (name[0] == XML_T('x') - && name[1] == XML_T('m') - && name[2] == XML_T('l') - && name[3] == XML_T('n') - && name[4] == XML_T('s') - && (name[5] == XML_T('\0') || name[5] == XML_T(':'))) { + else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m) + && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n) + && name[4] == XML_T(ASCII_s) + && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) { if (name[5] == XML_T('\0')) id->prefix = &dtd->defaultPrefix; else - id->prefix = (PREFIX *)lookup(&dtd->prefixes, name + 6, sizeof(PREFIX)); + id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, + sizeof(PREFIX)); id->xmlns = XML_TRUE; - } - else { + } else { int i; for (i = 0; name[i]; i++) { /* attributes without prefix are *not* in the default namespace */ - if (name[i] == XML_T(':')) { + if (name[i] == XML_T(ASCII_COLON)) { int j; for (j = 0; j < i; j++) { - if (!poolAppendChar(&dtd->pool, name[j])) + if (! poolAppendChar(&dtd->pool, name[j])) return NULL; } - if (!poolAppendChar(&dtd->pool, XML_T('\0'))) + if (! poolAppendChar(&dtd->pool, XML_T('\0'))) + return NULL; + id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, + poolStart(&dtd->pool), sizeof(PREFIX)); + if (! id->prefix) return NULL; - id->prefix = (PREFIX *)lookup(&dtd->prefixes, poolStart(&dtd->pool), - sizeof(PREFIX)); if (id->prefix->name == poolStart(&dtd->pool)) poolFinish(&dtd->pool); else @@ -5272,26 +6508,47 @@ getAttributeId(XML_Parser parser, const ENCODING *enc, return id; } -#define CONTEXT_SEP XML_T('\f') +#define CONTEXT_SEP XML_T(ASCII_FF) static const XML_Char * -getContext(XML_Parser parser) -{ - DTD * const dtd = _dtd; /* save one level of indirection */ +getContext(XML_Parser parser) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ HASH_TABLE_ITER iter; XML_Bool needSep = XML_FALSE; if (dtd->defaultPrefix.binding) { int i; int len; - if (!poolAppendChar(&tempPool, XML_T('='))) + if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS))) return NULL; len = dtd->defaultPrefix.binding->uriLen; - if (namespaceSeparator != XML_T('\0')) + if (parser->m_namespaceSeparator) len--; - for (i = 0; i < len; i++) - if (!poolAppendChar(&tempPool, dtd->defaultPrefix.binding->uri[i])) - return NULL; + for (i = 0; i < len; i++) { + if (! poolAppendChar(&parser->m_tempPool, + dtd->defaultPrefix.binding->uri[i])) { + /* Because of memory caching, I don't believe this line can be + * executed. + * + * This is part of a loop copying the default prefix binding + * URI into the parser's temporary string pool. Previously, + * that URI was copied into the same string pool, with a + * terminating NUL character, as part of setContext(). When + * the pool was cleared, that leaves a block definitely big + * enough to hold the URI on the free block list of the pool. + * The URI copy in getContext() therefore cannot run out of + * memory. + * + * If the pool is used between the setContext() and + * getContext() calls, the worst it can do is leave a bigger + * block on the front of the free list. Given that this is + * all somewhat inobvious and program logic can be changed, we + * don't delete the line but we do exclude it from the test + * coverage statistics. + */ + return NULL; /* LCOV_EXCL_LINE */ + } + } needSep = XML_TRUE; } @@ -5301,102 +6558,107 @@ getContext(XML_Parser parser) int len; const XML_Char *s; PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter); - if (!prefix) + if (! prefix) break; - if (!prefix->binding) - continue; - if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP)) + if (! prefix->binding) { + /* This test appears to be (justifiable) paranoia. There does + * not seem to be a way of injecting a prefix without a binding + * that doesn't get errored long before this function is called. + * The test should remain for safety's sake, so we instead + * exclude the following line from the coverage statistics. + */ + continue; /* LCOV_EXCL_LINE */ + } + if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP)) return NULL; for (s = prefix->name; *s; s++) - if (!poolAppendChar(&tempPool, *s)) + if (! poolAppendChar(&parser->m_tempPool, *s)) return NULL; - if (!poolAppendChar(&tempPool, XML_T('='))) + if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS))) return NULL; len = prefix->binding->uriLen; - if (namespaceSeparator != XML_T('\0')) + if (parser->m_namespaceSeparator) len--; for (i = 0; i < len; i++) - if (!poolAppendChar(&tempPool, prefix->binding->uri[i])) + if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i])) return NULL; needSep = XML_TRUE; } - hashTableIterInit(&iter, &(dtd->generalEntities)); for (;;) { const XML_Char *s; ENTITY *e = (ENTITY *)hashTableIterNext(&iter); - if (!e) + if (! e) break; - if (!e->open) + if (! e->open) continue; - if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP)) + if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP)) return NULL; for (s = e->name; *s; s++) - if (!poolAppendChar(&tempPool, *s)) + if (! poolAppendChar(&parser->m_tempPool, *s)) return 0; needSep = XML_TRUE; } - if (!poolAppendChar(&tempPool, XML_T('\0'))) + if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return NULL; - return tempPool.start; + return parser->m_tempPool.start; } static XML_Bool -setContext(XML_Parser parser, const XML_Char *context) -{ - DTD * const dtd = _dtd; /* save one level of indirection */ +setContext(XML_Parser parser, const XML_Char *context) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ const XML_Char *s = context; while (*context != XML_T('\0')) { if (*s == CONTEXT_SEP || *s == XML_T('\0')) { ENTITY *e; - if (!poolAppendChar(&tempPool, XML_T('\0'))) + if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return XML_FALSE; - e = (ENTITY *)lookup(&dtd->generalEntities, poolStart(&tempPool), 0); + e = (ENTITY *)lookup(parser, &dtd->generalEntities, + poolStart(&parser->m_tempPool), 0); if (e) e->open = XML_TRUE; if (*s != XML_T('\0')) s++; context = s; - poolDiscard(&tempPool); - } - else if (*s == XML_T('=')) { + poolDiscard(&parser->m_tempPool); + } else if (*s == XML_T(ASCII_EQUALS)) { PREFIX *prefix; - if (poolLength(&tempPool) == 0) + if (poolLength(&parser->m_tempPool) == 0) prefix = &dtd->defaultPrefix; else { - if (!poolAppendChar(&tempPool, XML_T('\0'))) + if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return XML_FALSE; - prefix = (PREFIX *)lookup(&dtd->prefixes, poolStart(&tempPool), - sizeof(PREFIX)); - if (!prefix) + prefix + = (PREFIX *)lookup(parser, &dtd->prefixes, + poolStart(&parser->m_tempPool), sizeof(PREFIX)); + if (! prefix) return XML_FALSE; - if (prefix->name == poolStart(&tempPool)) { + if (prefix->name == poolStart(&parser->m_tempPool)) { prefix->name = poolCopyString(&dtd->pool, prefix->name); - if (!prefix->name) + if (! prefix->name) return XML_FALSE; } - poolDiscard(&tempPool); + poolDiscard(&parser->m_tempPool); } - for (context = s + 1; - *context != CONTEXT_SEP && *context != XML_T('\0'); + for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); context++) - if (!poolAppendChar(&tempPool, *context)) + if (! poolAppendChar(&parser->m_tempPool, *context)) return XML_FALSE; - if (!poolAppendChar(&tempPool, XML_T('\0'))) + if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return XML_FALSE; - if (addBinding(parser, prefix, NULL, poolStart(&tempPool), - &inheritedBindings) != XML_ERROR_NONE) + if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool), + &parser->m_inheritedBindings) + != XML_ERROR_NONE) return XML_FALSE; - poolDiscard(&tempPool); + poolDiscard(&parser->m_tempPool); if (*context != XML_T('\0')) ++context; s = context; - } - else { - if (!poolAppendChar(&tempPool, *s)) + } else { + if (! poolAppendChar(&parser->m_tempPool, *s)) return XML_FALSE; s++; } @@ -5405,8 +6667,7 @@ setContext(XML_Parser parser, const XML_Char *context) } static void FASTCALL -normalizePublicId(XML_Char *publicId) -{ +normalizePublicId(XML_Char *publicId) { XML_Char *p = publicId; XML_Char *s; for (s = publicId; *s; s++) { @@ -5427,9 +6688,8 @@ normalizePublicId(XML_Char *publicId) } static DTD * -dtdCreate(const XML_Memory_Handling_Suite *ms) -{ - DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD)); +dtdCreate(const XML_Memory_Handling_Suite *ms) { + DTD *p = ms->malloc_fcn(sizeof(DTD)); if (p == NULL) return p; poolInit(&(p->pool), ms); @@ -5460,13 +6720,12 @@ dtdCreate(const XML_Memory_Handling_Suite *ms) } static void -dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) -{ +dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) { HASH_TABLE_ITER iter; hashTableIterInit(&iter, &(p->elementTypes)); for (;;) { ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter); - if (!e) + if (! e) break; if (e->allocDefaultAtts != 0) ms->free_fcn(e->defaultAtts); @@ -5502,13 +6761,12 @@ dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) } static void -dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) -{ +dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) { HASH_TABLE_ITER iter; hashTableIterInit(&iter, &(p->elementTypes)); for (;;) { ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter); - if (!e) + if (! e) break; if (e->allocDefaultAtts != 0) ms->free_fcn(e->defaultAtts); @@ -5533,8 +6791,8 @@ dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) The new DTD has already been initialized. */ static int -dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) -{ +dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, + const XML_Memory_Handling_Suite *ms) { HASH_TABLE_ITER iter; /* Copy the prefix table. */ @@ -5543,12 +6801,12 @@ dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) for (;;) { const XML_Char *name; const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter); - if (!oldP) + if (! oldP) break; name = poolCopyString(&(newDtd->pool), oldP->name); - if (!name) + if (! name) return 0; - if (!lookup(&(newDtd->prefixes), name, sizeof(PREFIX))) + if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX))) return 0; } @@ -5561,18 +6819,18 @@ dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) const XML_Char *name; const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter); - if (!oldA) + if (! oldA) break; /* Remember to allocate the scratch byte before the name. */ - if (!poolAppendChar(&(newDtd->pool), XML_T('\0'))) + if (! poolAppendChar(&(newDtd->pool), XML_T('\0'))) return 0; name = poolCopyString(&(newDtd->pool), oldA->name); - if (!name) + if (! name) return 0; ++name; - newA = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), name, + newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name, sizeof(ATTRIBUTE_ID)); - if (!newA) + if (! newA) return 0; newA->maybeTokenized = oldA->maybeTokenized; if (oldA->prefix) { @@ -5580,7 +6838,7 @@ dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) if (oldA->prefix == &oldDtd->defaultPrefix) newA->prefix = &newDtd->defaultPrefix; else - newA->prefix = (PREFIX *)lookup(&(newDtd->prefixes), + newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), oldA->prefix->name, 0); } } @@ -5594,56 +6852,52 @@ dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) ELEMENT_TYPE *newE; const XML_Char *name; const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter); - if (!oldE) + if (! oldE) break; name = poolCopyString(&(newDtd->pool), oldE->name); - if (!name) + if (! name) return 0; - newE = (ELEMENT_TYPE *)lookup(&(newDtd->elementTypes), name, + newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name, sizeof(ELEMENT_TYPE)); - if (!newE) + if (! newE) return 0; if (oldE->nDefaultAtts) { - newE->defaultAtts = (DEFAULT_ATTRIBUTE *) - ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); - if (!newE->defaultAtts) { - ms->free_fcn(newE); + newE->defaultAtts + = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); + if (! newE->defaultAtts) { return 0; } } if (oldE->idAtt) - newE->idAtt = (ATTRIBUTE_ID *) - lookup(&(newDtd->attributeIds), oldE->idAtt->name, 0); + newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), + oldE->idAtt->name, 0); newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts; if (oldE->prefix) - newE->prefix = (PREFIX *)lookup(&(newDtd->prefixes), + newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), oldE->prefix->name, 0); for (i = 0; i < newE->nDefaultAtts; i++) { - newE->defaultAtts[i].id = (ATTRIBUTE_ID *) - lookup(&(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); + newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup( + oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata; if (oldE->defaultAtts[i].value) { newE->defaultAtts[i].value = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value); - if (!newE->defaultAtts[i].value) + if (! newE->defaultAtts[i].value) return 0; - } - else + } else newE->defaultAtts[i].value = NULL; } } /* Copy the entity tables. */ - if (!copyEntityTable(&(newDtd->generalEntities), - &(newDtd->pool), - &(oldDtd->generalEntities))) - return 0; + if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool), + &(oldDtd->generalEntities))) + return 0; #ifdef XML_DTD - if (!copyEntityTable(&(newDtd->paramEntities), - &(newDtd->pool), - &(oldDtd->paramEntities))) - return 0; + if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool), + &(oldDtd->paramEntities))) + return 0; newDtd->paramEntityRead = oldDtd->paramEntityRead; #endif /* XML_DTD */ @@ -5660,13 +6914,11 @@ dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) newDtd->scaffIndex = oldDtd->scaffIndex; return 1; -} /* End dtdCopy */ +} /* End dtdCopy */ static int -copyEntityTable(HASH_TABLE *newTable, - STRING_POOL *newPool, - const HASH_TABLE *oldTable) -{ +copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable, + STRING_POOL *newPool, const HASH_TABLE *oldTable) { HASH_TABLE_ITER iter; const XML_Char *cachedOldBase = NULL; const XML_Char *cachedNewBase = NULL; @@ -5677,17 +6929,17 @@ copyEntityTable(HASH_TABLE *newTable, ENTITY *newE; const XML_Char *name; const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter); - if (!oldE) + if (! oldE) break; name = poolCopyString(newPool, oldE->name); - if (!name) + if (! name) return 0; - newE = (ENTITY *)lookup(newTable, name, sizeof(ENTITY)); - if (!newE) + newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY)); + if (! newE) return 0; if (oldE->systemId) { const XML_Char *tem = poolCopyString(newPool, oldE->systemId); - if (!tem) + if (! tem) return 0; newE->systemId = tem; if (oldE->base) { @@ -5696,29 +6948,28 @@ copyEntityTable(HASH_TABLE *newTable, else { cachedOldBase = oldE->base; tem = poolCopyString(newPool, cachedOldBase); - if (!tem) + if (! tem) return 0; cachedNewBase = newE->base = tem; } } if (oldE->publicId) { tem = poolCopyString(newPool, oldE->publicId); - if (!tem) + if (! tem) return 0; newE->publicId = tem; } - } - else { - const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr, - oldE->textLen); - if (!tem) + } else { + const XML_Char *tem + = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen); + if (! tem) return 0; newE->textPtr = tem; newE->textLen = oldE->textLen; } if (oldE->notation) { const XML_Char *tem = poolCopyString(newPool, oldE->notation); - if (!tem) + if (! tem) return 0; newE->notation = tem; } @@ -5731,75 +6982,100 @@ copyEntityTable(HASH_TABLE *newTable, #define INIT_POWER 6 static XML_Bool FASTCALL -keyeq(KEY s1, KEY s2) -{ +keyeq(KEY s1, KEY s2) { for (; *s1 == *s2; s1++, s2++) if (*s1 == 0) return XML_TRUE; return XML_FALSE; } +static size_t +keylen(KEY s) { + size_t len = 0; + for (; *s; s++, len++) + ; + return len; +} + +static void +copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) { + key->k[0] = 0; + key->k[1] = get_hash_secret_salt(parser); +} + static unsigned long FASTCALL -hash(KEY s) -{ - unsigned long h = 0; - while (*s) - h = CHAR_HASH(h, *s++); - return h; +hash(XML_Parser parser, KEY s) { + struct siphash state; + struct sipkey key; + (void)sip24_valid; + copy_salt_to_sipkey(parser, &key); + sip24_init(&state, &key); + sip24_update(&state, s, keylen(s) * sizeof(XML_Char)); + return (unsigned long)sip24_final(&state); } static NAMED * -lookup(HASH_TABLE *table, KEY name, size_t createSize) -{ +lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { size_t i; if (table->size == 0) { size_t tsize; - if (!createSize) + if (! createSize) return NULL; table->power = INIT_POWER; /* table->size is a power of 2 */ table->size = (size_t)1 << INIT_POWER; tsize = table->size * sizeof(NAMED *); - table->v = (NAMED **)table->mem->malloc_fcn(tsize); - if (!table->v) { + table->v = table->mem->malloc_fcn(tsize); + if (! table->v) { table->size = 0; return NULL; } memset(table->v, 0, tsize); - i = hash(name) & ((unsigned long)table->size - 1); - } - else { - unsigned long h = hash(name); + i = hash(parser, name) & ((unsigned long)table->size - 1); + } else { + unsigned long h = hash(parser, name); unsigned long mask = (unsigned long)table->size - 1; unsigned char step = 0; i = h & mask; while (table->v[i]) { if (keyeq(name, table->v[i]->name)) return table->v[i]; - if (!step) + if (! step) step = PROBE_STEP(h, mask, table->power); i < step ? (i += table->size - step) : (i -= step); } - if (!createSize) + if (! createSize) return NULL; /* check for overflow (table is half full) */ if (table->used >> (table->power - 1)) { unsigned char newPower = table->power + 1; + + /* Detect and prevent invalid shift */ + if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) { + return NULL; + } + size_t newSize = (size_t)1 << newPower; unsigned long newMask = (unsigned long)newSize - 1; + + /* Detect and prevent integer overflow */ + if (newSize > (size_t)(-1) / sizeof(NAMED *)) { + return NULL; + } + size_t tsize = newSize * sizeof(NAMED *); - NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize); - if (!newV) + NAMED **newV = table->mem->malloc_fcn(tsize); + if (! newV) return NULL; memset(newV, 0, tsize); for (i = 0; i < table->size; i++) if (table->v[i]) { - unsigned long newHash = hash(table->v[i]->name); + unsigned long newHash = hash(parser, table->v[i]->name); size_t j = newHash & newMask; step = 0; while (newV[j]) { - if (!step) + if (! step) step = PROBE_STEP(newHash, newMask, newPower); j < step ? (j += newSize - step) : (j -= step); } @@ -5812,14 +7088,14 @@ lookup(HASH_TABLE *table, KEY name, size_t createSize) i = h & newMask; step = 0; while (table->v[i]) { - if (!step) + if (! step) step = PROBE_STEP(h, newMask, newPower); i < step ? (i += newSize - step) : (i -= step); } } } - table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize); - if (!table->v[i]) + table->v[i] = table->mem->malloc_fcn(createSize); + if (! table->v[i]) return NULL; memset(table->v[i], 0, createSize); table->v[i]->name = name; @@ -5828,8 +7104,7 @@ lookup(HASH_TABLE *table, KEY name, size_t createSize) } static void FASTCALL -hashTableClear(HASH_TABLE *table) -{ +hashTableClear(HASH_TABLE *table) { size_t i; for (i = 0; i < table->size; i++) { table->mem->free_fcn(table->v[i]); @@ -5839,8 +7114,7 @@ hashTableClear(HASH_TABLE *table) } static void FASTCALL -hashTableDestroy(HASH_TABLE *table) -{ +hashTableDestroy(HASH_TABLE *table) { size_t i; for (i = 0; i < table->size; i++) table->mem->free_fcn(table->v[i]); @@ -5848,8 +7122,7 @@ hashTableDestroy(HASH_TABLE *table) } static void FASTCALL -hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) -{ +hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) { p->power = 0; p->size = 0; p->used = 0; @@ -5858,15 +7131,13 @@ hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) } static void FASTCALL -hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) -{ +hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) { iter->p = table->v; - iter->end = iter->p + table->size; + iter->end = iter->p ? iter->p + table->size : NULL; } -static NAMED * FASTCALL -hashTableIterNext(HASH_TABLE_ITER *iter) -{ +static NAMED *FASTCALL +hashTableIterNext(HASH_TABLE_ITER *iter) { while (iter->p != iter->end) { NAMED *tem = *(iter->p)++; if (tem) @@ -5876,8 +7147,7 @@ hashTableIterNext(HASH_TABLE_ITER *iter) } static void FASTCALL -poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) -{ +poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) { pool->blocks = NULL; pool->freeBlocks = NULL; pool->start = NULL; @@ -5887,9 +7157,8 @@ poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) } static void FASTCALL -poolClear(STRING_POOL *pool) -{ - if (!pool->freeBlocks) +poolClear(STRING_POOL *pool) { + if (! pool->freeBlocks) pool->freeBlocks = pool->blocks; else { BLOCK *p = pool->blocks; @@ -5907,8 +7176,7 @@ poolClear(STRING_POOL *pool) } static void FASTCALL -poolDestroy(STRING_POOL *pool) -{ +poolDestroy(STRING_POOL *pool) { BLOCK *p = pool->blocks; while (p) { BLOCK *tem = p->next; @@ -5924,26 +7192,26 @@ poolDestroy(STRING_POOL *pool) } static XML_Char * -poolAppend(STRING_POOL *pool, const ENCODING *enc, - const char *ptr, const char *end) -{ - if (!pool->ptr && !poolGrow(pool)) +poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr, + const char *end) { + if (! pool->ptr && ! poolGrow(pool)) return NULL; for (;;) { - XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end); - if (ptr == end) + const enum XML_Convert_Result convert_res = XmlConvert( + enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end); + if ((convert_res == XML_CONVERT_COMPLETED) + || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) break; - if (!poolGrow(pool)) + if (! poolGrow(pool)) return NULL; } return pool->start; } -static const XML_Char * FASTCALL -poolCopyString(STRING_POOL *pool, const XML_Char *s) -{ +static const XML_Char *FASTCALL +poolCopyString(STRING_POOL *pool, const XML_Char *s) { do { - if (!poolAppendChar(pool, *s)) + if (! poolAppendChar(pool, *s)) return NULL; } while (*s++); s = pool->start; @@ -5952,12 +7220,23 @@ poolCopyString(STRING_POOL *pool, const XML_Char *s) } static const XML_Char * -poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) -{ - if (!pool->ptr && !poolGrow(pool)) - return NULL; +poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) { + if (! pool->ptr && ! poolGrow(pool)) { + /* The following line is unreachable given the current usage of + * poolCopyStringN(). Currently it is called from exactly one + * place to copy the text of a simple general entity. By that + * point, the name of the entity is already stored in the pool, so + * pool->ptr cannot be NULL. + * + * If poolCopyStringN() is used elsewhere as it well might be, + * this line may well become executable again. Regardless, this + * sort of check shouldn't be removed lightly, so we just exclude + * it from the coverage statistics. + */ + return NULL; /* LCOV_EXCL_LINE */ + } for (; n > 0; --n, s++) { - if (!poolAppendChar(pool, *s)) + if (! poolAppendChar(pool, *s)) return NULL; } s = pool->start; @@ -5965,11 +7244,10 @@ poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) return s; } -static const XML_Char * FASTCALL -poolAppendString(STRING_POOL *pool, const XML_Char *s) -{ +static const XML_Char *FASTCALL +poolAppendString(STRING_POOL *pool, const XML_Char *s) { while (*s) { - if (!poolAppendChar(pool, *s)) + if (! poolAppendChar(pool, *s)) return NULL; s++; } @@ -5977,20 +7255,46 @@ poolAppendString(STRING_POOL *pool, const XML_Char *s) } static XML_Char * -poolStoreString(STRING_POOL *pool, const ENCODING *enc, - const char *ptr, const char *end) -{ - if (!poolAppend(pool, enc, ptr, end)) +poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr, + const char *end) { + if (! poolAppend(pool, enc, ptr, end)) return NULL; - if (pool->ptr == pool->end && !poolGrow(pool)) + if (pool->ptr == pool->end && ! poolGrow(pool)) return NULL; *(pool->ptr)++ = 0; return pool->start; } +static size_t +poolBytesToAllocateFor(int blockSize) { + /* Unprotected math would be: + ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char); + ** + ** Detect overflow, avoiding _signed_ overflow undefined behavior + ** For a + b * c we check b * c in isolation first, so that addition of a + ** on top has no chance of making us accept a small non-negative number + */ + const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */ + + if (blockSize <= 0) + return 0; + + if (blockSize > (int)(INT_MAX / stretch)) + return 0; + + { + const int stretchedBlockSize = blockSize * (int)stretch; + const int bytesToAllocate + = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize); + if (bytesToAllocate < 0) + return 0; + + return (size_t)bytesToAllocate; + } +} + static XML_Bool FASTCALL -poolGrow(STRING_POOL *pool) -{ +poolGrow(STRING_POOL *pool) { if (pool->freeBlocks) { if (pool->start == 0) { pool->blocks = pool->freeBlocks; @@ -6015,35 +7319,77 @@ poolGrow(STRING_POOL *pool) } } if (pool->blocks && pool->start == pool->blocks->s) { - int blockSize = (pool->end - pool->start)*2; - pool->blocks = (BLOCK *) - pool->mem->realloc_fcn(pool->blocks, - (offsetof(BLOCK, s) - + blockSize * sizeof(XML_Char))); - if (pool->blocks == NULL) + BLOCK *temp; + int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U); + size_t bytesToAllocate; + + /* NOTE: Needs to be calculated prior to calling `realloc` + to avoid dangling pointers: */ + const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start; + + if (blockSize < 0) { + /* This condition traps a situation where either more than + * INT_MAX/2 bytes have already been allocated. This isn't + * readily testable, since it is unlikely that an average + * machine will have that much memory, so we exclude it from the + * coverage statistics. + */ + return XML_FALSE; /* LCOV_EXCL_LINE */ + } + + bytesToAllocate = poolBytesToAllocateFor(blockSize); + if (bytesToAllocate == 0) + return XML_FALSE; + + temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks, + (unsigned)bytesToAllocate); + if (temp == NULL) return XML_FALSE; + pool->blocks = temp; pool->blocks->size = blockSize; - pool->ptr = pool->blocks->s + (pool->ptr - pool->start); + pool->ptr = pool->blocks->s + offsetInsideBlock; pool->start = pool->blocks->s; pool->end = pool->start + blockSize; - } - else { + } else { BLOCK *tem; - int blockSize = pool->end - pool->start; + int blockSize = (int)(pool->end - pool->start); + size_t bytesToAllocate; + + if (blockSize < 0) { + /* This condition traps a situation where either more than + * INT_MAX bytes have already been allocated (which is prevented + * by various pieces of program logic, not least this one, never + * mind the unlikelihood of actually having that much memory) or + * the pool control fields have been corrupted (which could + * conceivably happen in an extremely buggy user handler + * function). Either way it isn't readily testable, so we + * exclude it from the coverage statistics. + */ + return XML_FALSE; /* LCOV_EXCL_LINE */ + } + if (blockSize < INIT_BLOCK_SIZE) blockSize = INIT_BLOCK_SIZE; - else + else { + /* Detect overflow, avoiding _signed_ overflow undefined behavior */ + if ((int)((unsigned)blockSize * 2U) < 0) { + return XML_FALSE; + } blockSize *= 2; - tem = (BLOCK *)pool->mem->malloc_fcn(offsetof(BLOCK, s) - + blockSize * sizeof(XML_Char)); - if (!tem) + } + + bytesToAllocate = poolBytesToAllocateFor(blockSize); + if (bytesToAllocate == 0) + return XML_FALSE; + + tem = pool->mem->malloc_fcn(bytesToAllocate); + if (! tem) return XML_FALSE; tem->size = blockSize; tem->next = pool->blocks; pool->blocks = tem; if (pool->ptr != pool->start) - memcpy(tem->s, pool->start, - (pool->ptr - pool->start) * sizeof(XML_Char)); + memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char)); pool->ptr = tem->s + (pool->ptr - pool->start); pool->start = tem->s; pool->end = tem->s + blockSize; @@ -6052,15 +7398,14 @@ poolGrow(STRING_POOL *pool) } static int FASTCALL -nextScaffoldPart(XML_Parser parser) -{ - DTD * const dtd = _dtd; /* save one level of indirection */ - CONTENT_SCAFFOLD * me; +nextScaffoldPart(XML_Parser parser) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ + CONTENT_SCAFFOLD *me; int next; - if (!dtd->scaffIndex) { - dtd->scaffIndex = (int *)MALLOC(groupSize * sizeof(int)); - if (!dtd->scaffIndex) + if (! dtd->scaffIndex) { + dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int)); + if (! dtd->scaffIndex) return -1; dtd->scaffIndex[0] = 0; } @@ -6068,15 +7413,28 @@ nextScaffoldPart(XML_Parser parser) if (dtd->scaffCount >= dtd->scaffSize) { CONTENT_SCAFFOLD *temp; if (dtd->scaffold) { - temp = (CONTENT_SCAFFOLD *) - REALLOC(dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD)); + /* Detect and prevent integer overflow */ + if (dtd->scaffSize > UINT_MAX / 2u) { + return -1; + } + /* Detect and prevent integer overflow. + * The preprocessor guard addresses the "always false" warning + * from -Wtype-limits on platforms where + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ +#if UINT_MAX >= SIZE_MAX + if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) { + return -1; + } +#endif + + temp = (CONTENT_SCAFFOLD *)REALLOC( + parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD)); if (temp == NULL) return -1; dtd->scaffSize *= 2; - } - else { - temp = (CONTENT_SCAFFOLD *)MALLOC(INIT_SCAFFOLD_ELEMENTS - * sizeof(CONTENT_SCAFFOLD)); + } else { + temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS + * sizeof(CONTENT_SCAFFOLD)); if (temp == NULL) return -1; dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS; @@ -6086,11 +7444,12 @@ nextScaffoldPart(XML_Parser parser) next = dtd->scaffCount++; me = &dtd->scaffold[next]; if (dtd->scaffLevel) { - CONTENT_SCAFFOLD *parent = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel-1]]; + CONTENT_SCAFFOLD *parent + = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]]; if (parent->lastchild) { dtd->scaffold[parent->lastchild].nextsib = next; } - if (!parent->childcnt) + if (! parent->childcnt) parent->firstchild = next; parent->lastchild = next; parent->childcnt++; @@ -6099,86 +7458,925 @@ nextScaffoldPart(XML_Parser parser) return next; } -static void -build_node(XML_Parser parser, - int src_node, - XML_Content *dest, - XML_Content **contpos, - XML_Char **strpos) -{ - DTD * const dtd = _dtd; /* save one level of indirection */ - dest->type = dtd->scaffold[src_node].type; - dest->quant = dtd->scaffold[src_node].quant; - if (dest->type == XML_CTYPE_NAME) { - const XML_Char *src; - dest->name = *strpos; - src = dtd->scaffold[src_node].name; - for (;;) { - *(*strpos)++ = *src; - if (!*src) - break; - src++; - } - dest->numchildren = 0; - dest->children = NULL; +static XML_Content * +build_model(XML_Parser parser) { + /* Function build_model transforms the existing parser->m_dtd->scaffold + * array of CONTENT_SCAFFOLD tree nodes into a new array of + * XML_Content tree nodes followed by a gapless list of zero-terminated + * strings. */ + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ + XML_Content *ret; + XML_Char *str; /* the current string writing location */ + + /* Detect and prevent integer overflow. + * The preprocessor guard addresses the "always false" warning + * from -Wtype-limits on platforms where + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ +#if UINT_MAX >= SIZE_MAX + if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) { + return NULL; } - else { - unsigned int i; - int cn; - dest->numchildren = dtd->scaffold[src_node].childcnt; - dest->children = *contpos; - *contpos += dest->numchildren; - for (i = 0, cn = dtd->scaffold[src_node].firstchild; - i < dest->numchildren; - i++, cn = dtd->scaffold[cn].nextsib) { - build_node(parser, cn, &(dest->children[i]), contpos, strpos); - } - dest->name = NULL; + if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) { + return NULL; + } +#endif + if (dtd->scaffCount * sizeof(XML_Content) + > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) { + return NULL; } -} -static XML_Content * -build_model (XML_Parser parser) -{ - DTD * const dtd = _dtd; /* save one level of indirection */ - XML_Content *ret; - XML_Content *cpos; - XML_Char * str; - int allocsize = (dtd->scaffCount * sizeof(XML_Content) - + (dtd->contentStringLen * sizeof(XML_Char))); + const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content) + + (dtd->contentStringLen * sizeof(XML_Char))); - ret = (XML_Content *)MALLOC(allocsize); - if (!ret) + ret = (XML_Content *)MALLOC(parser, allocsize); + if (! ret) return NULL; - str = (XML_Char *) (&ret[dtd->scaffCount]); - cpos = &ret[1]; + /* What follows is an iterative implementation (of what was previously done + * recursively in a dedicated function called "build_node". The old recursive + * build_node could be forced into stack exhaustion from input as small as a + * few megabyte, and so that was a security issue. Hence, a function call + * stack is avoided now by resolving recursion.) + * + * The iterative approach works as follows: + * + * - We have two writing pointers, both walking up the result array; one does + * the work, the other creates "jobs" for its colleague to do, and leads + * the way: + * + * - The faster one, pointer jobDest, always leads and writes "what job + * to do" by the other, once they reach that place in the + * array: leader "jobDest" stores the source node array index (relative + * to array dtd->scaffold) in field "numchildren". + * + * - The slower one, pointer dest, looks at the value stored in the + * "numchildren" field (which actually holds a source node array index + * at that time) and puts the real data from dtd->scaffold in. + * + * - Before the loop starts, jobDest writes source array index 0 + * (where the root node is located) so that dest will have something to do + * when it starts operation. + * + * - Whenever nodes with children are encountered, jobDest appends + * them as new jobs, in order. As a result, tree node siblings are + * adjacent in the resulting array, for example: + * + * [0] root, has two children + * [1] first child of 0, has three children + * [3] first child of 1, does not have children + * [4] second child of 1, does not have children + * [5] third child of 1, does not have children + * [2] second child of 0, does not have children + * + * Or (the same data) presented in flat array view: + * + * [0] root, has two children + * + * [1] first child of 0, has three children + * [2] second child of 0, does not have children + * + * [3] first child of 1, does not have children + * [4] second child of 1, does not have children + * [5] third child of 1, does not have children + * + * - The algorithm repeats until all target array indices have been processed. + */ + XML_Content *dest = ret; /* tree node writing location, moves upwards */ + XML_Content *const destLimit = &ret[dtd->scaffCount]; + XML_Content *jobDest = ret; /* next free writing location in target array */ + str = (XML_Char *)&ret[dtd->scaffCount]; + + /* Add the starting job, the root node (index 0) of the source tree */ + (jobDest++)->numchildren = 0; + + for (; dest < destLimit; dest++) { + /* Retrieve source tree array index from job storage */ + const int src_node = (int)dest->numchildren; + + /* Convert item */ + dest->type = dtd->scaffold[src_node].type; + dest->quant = dtd->scaffold[src_node].quant; + if (dest->type == XML_CTYPE_NAME) { + const XML_Char *src; + dest->name = str; + src = dtd->scaffold[src_node].name; + for (;;) { + *str++ = *src; + if (! *src) + break; + src++; + } + dest->numchildren = 0; + dest->children = NULL; + } else { + unsigned int i; + int cn; + dest->name = NULL; + dest->numchildren = dtd->scaffold[src_node].childcnt; + dest->children = jobDest; + + /* Append scaffold indices of children to array */ + for (i = 0, cn = dtd->scaffold[src_node].firstchild; + i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) + (jobDest++)->numchildren = (unsigned int)cn; + } + } - build_node(parser, 0, ret, &cpos, &str); return ret; } static ELEMENT_TYPE * -getElementType(XML_Parser parser, - const ENCODING *enc, - const char *ptr, - const char *end) -{ - DTD * const dtd = _dtd; /* save one level of indirection */ +getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr, + const char *end) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end); ELEMENT_TYPE *ret; - if (!name) + if (! name) return NULL; - ret = (ELEMENT_TYPE *) lookup(&dtd->elementTypes, name, sizeof(ELEMENT_TYPE)); - if (!ret) + ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name, + sizeof(ELEMENT_TYPE)); + if (! ret) return NULL; if (ret->name != name) poolDiscard(&dtd->pool); else { poolFinish(&dtd->pool); - if (!setElementTypePrefix(parser, ret)) + if (! setElementTypePrefix(parser, ret)) return NULL; } return ret; } + +static XML_Char * +copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) { + size_t charsRequired = 0; + XML_Char *result; + + /* First determine how long the string is */ + while (s[charsRequired] != 0) { + charsRequired++; + } + /* Include the terminator */ + charsRequired++; + + /* Now allocate space for the copy */ + result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char)); + if (result == NULL) + return NULL; + /* Copy the original into place */ + memcpy(result, s, charsRequired * sizeof(XML_Char)); + return result; +} + +#ifdef XML_DTD + +static float +accountingGetCurrentAmplification(XML_Parser rootParser) { + const XmlBigCount countBytesOutput + = rootParser->m_accounting.countBytesDirect + + rootParser->m_accounting.countBytesIndirect; + const float amplificationFactor + = rootParser->m_accounting.countBytesDirect + ? (countBytesOutput + / (float)(rootParser->m_accounting.countBytesDirect)) + : 1.0f; + assert(! rootParser->m_parentParser); + return amplificationFactor; +} + +static void +accountingReportStats(XML_Parser originParser, const char *epilog) { + const XML_Parser rootParser = getRootParserOf(originParser, NULL); + assert(! rootParser->m_parentParser); + + if (rootParser->m_accounting.debugLevel < 1) { + return; + } + + const float amplificationFactor + = accountingGetCurrentAmplification(rootParser); + fprintf(stderr, + "expat: Accounting(%p): Direct " EXPAT_FMT_ULL( + "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s", + (void *)rootParser, rootParser->m_accounting.countBytesDirect, + rootParser->m_accounting.countBytesIndirect, + (double)amplificationFactor, epilog); +} + +static void +accountingOnAbort(XML_Parser originParser) { + accountingReportStats(originParser, " ABORTING\n"); +} + +static void +accountingReportDiff(XML_Parser rootParser, + unsigned int levelsAwayFromRootParser, const char *before, + const char *after, ptrdiff_t bytesMore, int source_line, + enum XML_Account account) { + assert(! rootParser->m_parentParser); + + fprintf(stderr, + " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"", + bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP", + levelsAwayFromRootParser, source_line, 10, ""); + + const char ellipis[] = "[..]"; + const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1; + const unsigned int contextLength = 10; + + /* Note: Performance is of no concern here */ + const char *walker = before; + if ((rootParser->m_accounting.debugLevel >= 3) + || (after - before) + <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) { + for (; walker < after; walker++) { + fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); + } + } else { + for (; walker < before + contextLength; walker++) { + fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); + } + fprintf(stderr, ellipis); + walker = after - contextLength; + for (; walker < after; walker++) { + fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); + } + } + fprintf(stderr, "\"\n"); +} + +static XML_Bool +accountingDiffTolerated(XML_Parser originParser, int tok, const char *before, + const char *after, int source_line, + enum XML_Account account) { + /* Note: We need to check the token type *first* to be sure that + * we can even access variable , safely. + * E.g. for XML_TOK_NONE may hold an invalid pointer. */ + switch (tok) { + case XML_TOK_INVALID: + case XML_TOK_PARTIAL: + case XML_TOK_PARTIAL_CHAR: + case XML_TOK_NONE: + return XML_TRUE; + } + + if (account == XML_ACCOUNT_NONE) + return XML_TRUE; /* because these bytes have been accounted for, already */ + + unsigned int levelsAwayFromRootParser; + const XML_Parser rootParser + = getRootParserOf(originParser, &levelsAwayFromRootParser); + assert(! rootParser->m_parentParser); + + const int isDirect + = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser); + const ptrdiff_t bytesMore = after - before; + + XmlBigCount *const additionTarget + = isDirect ? &rootParser->m_accounting.countBytesDirect + : &rootParser->m_accounting.countBytesIndirect; + + /* Detect and avoid integer overflow */ + if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore) + return XML_FALSE; + *additionTarget += bytesMore; + + const XmlBigCount countBytesOutput + = rootParser->m_accounting.countBytesDirect + + rootParser->m_accounting.countBytesIndirect; + const float amplificationFactor + = accountingGetCurrentAmplification(rootParser); + const XML_Bool tolerated + = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes) + || (amplificationFactor + <= rootParser->m_accounting.maximumAmplificationFactor); + + if (rootParser->m_accounting.debugLevel >= 2) { + accountingReportStats(rootParser, ""); + accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after, + bytesMore, source_line, account); + } + + return tolerated; +} + +unsigned long long +testingAccountingGetCountBytesDirect(XML_Parser parser) { + if (! parser) + return 0; + return parser->m_accounting.countBytesDirect; +} + +unsigned long long +testingAccountingGetCountBytesIndirect(XML_Parser parser) { + if (! parser) + return 0; + return parser->m_accounting.countBytesIndirect; +} + +static void +entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity, + const char *action, int sourceLine) { + assert(! rootParser->m_parentParser); + if (rootParser->m_entity_stats.debugLevel < 1) + return; + +# if defined(XML_UNICODE) + const char *const entityName = "[..]"; +# else + const char *const entityName = entity->name; +# endif + + fprintf( + stderr, + "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n", + (void *)rootParser, rootParser->m_entity_stats.countEverOpened, + rootParser->m_entity_stats.currentDepth, + rootParser->m_entity_stats.maximumDepthSeen, + (rootParser->m_entity_stats.currentDepth - 1) * 2, "", + entity->is_param ? "%" : "&", entityName, action, entity->textLen, + sourceLine); +} + +static void +entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) { + const XML_Parser rootParser = getRootParserOf(originParser, NULL); + assert(! rootParser->m_parentParser); + + rootParser->m_entity_stats.countEverOpened++; + rootParser->m_entity_stats.currentDepth++; + if (rootParser->m_entity_stats.currentDepth + > rootParser->m_entity_stats.maximumDepthSeen) { + rootParser->m_entity_stats.maximumDepthSeen++; + } + + entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine); +} + +static void +entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) { + const XML_Parser rootParser = getRootParserOf(originParser, NULL); + assert(! rootParser->m_parentParser); + + entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine); + rootParser->m_entity_stats.currentDepth--; +} + +static XML_Parser +getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) { + XML_Parser rootParser = parser; + unsigned int stepsTakenUpwards = 0; + while (rootParser->m_parentParser) { + rootParser = rootParser->m_parentParser; + stepsTakenUpwards++; + } + assert(! rootParser->m_parentParser); + if (outLevelDiff != NULL) { + *outLevelDiff = stepsTakenUpwards; + } + return rootParser; +} + +const char * +unsignedCharToPrintable(unsigned char c) { + switch (c) { + case 0: + return "\\0"; + case 1: + return "\\x1"; + case 2: + return "\\x2"; + case 3: + return "\\x3"; + case 4: + return "\\x4"; + case 5: + return "\\x5"; + case 6: + return "\\x6"; + case 7: + return "\\x7"; + case 8: + return "\\x8"; + case 9: + return "\\t"; + case 10: + return "\\n"; + case 11: + return "\\xB"; + case 12: + return "\\xC"; + case 13: + return "\\r"; + case 14: + return "\\xE"; + case 15: + return "\\xF"; + case 16: + return "\\x10"; + case 17: + return "\\x11"; + case 18: + return "\\x12"; + case 19: + return "\\x13"; + case 20: + return "\\x14"; + case 21: + return "\\x15"; + case 22: + return "\\x16"; + case 23: + return "\\x17"; + case 24: + return "\\x18"; + case 25: + return "\\x19"; + case 26: + return "\\x1A"; + case 27: + return "\\x1B"; + case 28: + return "\\x1C"; + case 29: + return "\\x1D"; + case 30: + return "\\x1E"; + case 31: + return "\\x1F"; + case 32: + return " "; + case 33: + return "!"; + case 34: + return "\\\""; + case 35: + return "#"; + case 36: + return "$"; + case 37: + return "%"; + case 38: + return "&"; + case 39: + return "'"; + case 40: + return "("; + case 41: + return ")"; + case 42: + return "*"; + case 43: + return "+"; + case 44: + return ","; + case 45: + return "-"; + case 46: + return "."; + case 47: + return "/"; + case 48: + return "0"; + case 49: + return "1"; + case 50: + return "2"; + case 51: + return "3"; + case 52: + return "4"; + case 53: + return "5"; + case 54: + return "6"; + case 55: + return "7"; + case 56: + return "8"; + case 57: + return "9"; + case 58: + return ":"; + case 59: + return ";"; + case 60: + return "<"; + case 61: + return "="; + case 62: + return ">"; + case 63: + return "?"; + case 64: + return "@"; + case 65: + return "A"; + case 66: + return "B"; + case 67: + return "C"; + case 68: + return "D"; + case 69: + return "E"; + case 70: + return "F"; + case 71: + return "G"; + case 72: + return "H"; + case 73: + return "I"; + case 74: + return "J"; + case 75: + return "K"; + case 76: + return "L"; + case 77: + return "M"; + case 78: + return "N"; + case 79: + return "O"; + case 80: + return "P"; + case 81: + return "Q"; + case 82: + return "R"; + case 83: + return "S"; + case 84: + return "T"; + case 85: + return "U"; + case 86: + return "V"; + case 87: + return "W"; + case 88: + return "X"; + case 89: + return "Y"; + case 90: + return "Z"; + case 91: + return "["; + case 92: + return "\\\\"; + case 93: + return "]"; + case 94: + return "^"; + case 95: + return "_"; + case 96: + return "`"; + case 97: + return "a"; + case 98: + return "b"; + case 99: + return "c"; + case 100: + return "d"; + case 101: + return "e"; + case 102: + return "f"; + case 103: + return "g"; + case 104: + return "h"; + case 105: + return "i"; + case 106: + return "j"; + case 107: + return "k"; + case 108: + return "l"; + case 109: + return "m"; + case 110: + return "n"; + case 111: + return "o"; + case 112: + return "p"; + case 113: + return "q"; + case 114: + return "r"; + case 115: + return "s"; + case 116: + return "t"; + case 117: + return "u"; + case 118: + return "v"; + case 119: + return "w"; + case 120: + return "x"; + case 121: + return "y"; + case 122: + return "z"; + case 123: + return "{"; + case 124: + return "|"; + case 125: + return "}"; + case 126: + return "~"; + case 127: + return "\\x7F"; + case 128: + return "\\x80"; + case 129: + return "\\x81"; + case 130: + return "\\x82"; + case 131: + return "\\x83"; + case 132: + return "\\x84"; + case 133: + return "\\x85"; + case 134: + return "\\x86"; + case 135: + return "\\x87"; + case 136: + return "\\x88"; + case 137: + return "\\x89"; + case 138: + return "\\x8A"; + case 139: + return "\\x8B"; + case 140: + return "\\x8C"; + case 141: + return "\\x8D"; + case 142: + return "\\x8E"; + case 143: + return "\\x8F"; + case 144: + return "\\x90"; + case 145: + return "\\x91"; + case 146: + return "\\x92"; + case 147: + return "\\x93"; + case 148: + return "\\x94"; + case 149: + return "\\x95"; + case 150: + return "\\x96"; + case 151: + return "\\x97"; + case 152: + return "\\x98"; + case 153: + return "\\x99"; + case 154: + return "\\x9A"; + case 155: + return "\\x9B"; + case 156: + return "\\x9C"; + case 157: + return "\\x9D"; + case 158: + return "\\x9E"; + case 159: + return "\\x9F"; + case 160: + return "\\xA0"; + case 161: + return "\\xA1"; + case 162: + return "\\xA2"; + case 163: + return "\\xA3"; + case 164: + return "\\xA4"; + case 165: + return "\\xA5"; + case 166: + return "\\xA6"; + case 167: + return "\\xA7"; + case 168: + return "\\xA8"; + case 169: + return "\\xA9"; + case 170: + return "\\xAA"; + case 171: + return "\\xAB"; + case 172: + return "\\xAC"; + case 173: + return "\\xAD"; + case 174: + return "\\xAE"; + case 175: + return "\\xAF"; + case 176: + return "\\xB0"; + case 177: + return "\\xB1"; + case 178: + return "\\xB2"; + case 179: + return "\\xB3"; + case 180: + return "\\xB4"; + case 181: + return "\\xB5"; + case 182: + return "\\xB6"; + case 183: + return "\\xB7"; + case 184: + return "\\xB8"; + case 185: + return "\\xB9"; + case 186: + return "\\xBA"; + case 187: + return "\\xBB"; + case 188: + return "\\xBC"; + case 189: + return "\\xBD"; + case 190: + return "\\xBE"; + case 191: + return "\\xBF"; + case 192: + return "\\xC0"; + case 193: + return "\\xC1"; + case 194: + return "\\xC2"; + case 195: + return "\\xC3"; + case 196: + return "\\xC4"; + case 197: + return "\\xC5"; + case 198: + return "\\xC6"; + case 199: + return "\\xC7"; + case 200: + return "\\xC8"; + case 201: + return "\\xC9"; + case 202: + return "\\xCA"; + case 203: + return "\\xCB"; + case 204: + return "\\xCC"; + case 205: + return "\\xCD"; + case 206: + return "\\xCE"; + case 207: + return "\\xCF"; + case 208: + return "\\xD0"; + case 209: + return "\\xD1"; + case 210: + return "\\xD2"; + case 211: + return "\\xD3"; + case 212: + return "\\xD4"; + case 213: + return "\\xD5"; + case 214: + return "\\xD6"; + case 215: + return "\\xD7"; + case 216: + return "\\xD8"; + case 217: + return "\\xD9"; + case 218: + return "\\xDA"; + case 219: + return "\\xDB"; + case 220: + return "\\xDC"; + case 221: + return "\\xDD"; + case 222: + return "\\xDE"; + case 223: + return "\\xDF"; + case 224: + return "\\xE0"; + case 225: + return "\\xE1"; + case 226: + return "\\xE2"; + case 227: + return "\\xE3"; + case 228: + return "\\xE4"; + case 229: + return "\\xE5"; + case 230: + return "\\xE6"; + case 231: + return "\\xE7"; + case 232: + return "\\xE8"; + case 233: + return "\\xE9"; + case 234: + return "\\xEA"; + case 235: + return "\\xEB"; + case 236: + return "\\xEC"; + case 237: + return "\\xED"; + case 238: + return "\\xEE"; + case 239: + return "\\xEF"; + case 240: + return "\\xF0"; + case 241: + return "\\xF1"; + case 242: + return "\\xF2"; + case 243: + return "\\xF3"; + case 244: + return "\\xF4"; + case 245: + return "\\xF5"; + case 246: + return "\\xF6"; + case 247: + return "\\xF7"; + case 248: + return "\\xF8"; + case 249: + return "\\xF9"; + case 250: + return "\\xFA"; + case 251: + return "\\xFB"; + case 252: + return "\\xFC"; + case 253: + return "\\xFD"; + case 254: + return "\\xFE"; + case 255: + return "\\xFF"; + default: + assert(0); /* never gets here */ + return "dead code"; + } + assert(0); /* never gets here */ +} + +#endif /* XML_DTD */ + +static unsigned long +getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) { + const char *const valueOrNull = getenv(variableName); + if (valueOrNull == NULL) { + return defaultDebugLevel; + } + const char *const value = valueOrNull; + + errno = 0; + char *afterValue = (char *)value; + unsigned long debugLevel = strtoul(value, &afterValue, 10); + if ((errno != 0) || (afterValue[0] != '\0')) { + errno = 0; + return defaultDebugLevel; + } + + return debugLevel; +} diff --git a/src/simgear/xml/xmlrole.c b/src/simgear/xml/xmlrole.c index deea0eecc..3f0f5c150 100644 --- a/src/simgear/xml/xmlrole.c +++ b/src/simgear/xml/xmlrole.c @@ -1,18 +1,50 @@ -/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Greg Stein + Copyright (c) 2002-2006 Karl Waclawek + Copyright (c) 2002-2003 Fred L. Drake, Jr. + Copyright (c) 2005-2009 Steven Solie + Copyright (c) 2016-2021 Sebastian Pipping + Copyright (c) 2017 Rhodri James + Copyright (c) 2019 David Loffredo + Copyright (c) 2021 Dong-hee Na + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include + #include -#ifdef COMPILED_FROM_DSP -#include "winconfig.h" -#elif defined(MACOS_CLASSIC) -#include "macconfig.h" -#else -#ifdef HAVE_EXPAT_CONFIG_H -#include "expat_config.h" +#ifdef _WIN32 +# include "winconfig.h" #endif -#endif /* ndef COMPILED_FROM_DSP */ #include "expat_external.h" #include "internal.h" @@ -26,103 +58,88 @@ */ -static const char KW_ANY[] = { - ASCII_A, ASCII_N, ASCII_Y, '\0' }; -static const char KW_ATTLIST[] = { - ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' }; -static const char KW_CDATA[] = { - ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; -static const char KW_DOCTYPE[] = { - ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' }; -static const char KW_ELEMENT[] = { - ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' }; -static const char KW_EMPTY[] = { - ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' }; -static const char KW_ENTITIES[] = { - ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, - '\0' }; -static const char KW_ENTITY[] = { - ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' }; -static const char KW_FIXED[] = { - ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' }; -static const char KW_ID[] = { - ASCII_I, ASCII_D, '\0' }; -static const char KW_IDREF[] = { - ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' }; -static const char KW_IDREFS[] = { - ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' }; -static const char KW_IGNORE[] = { - ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' }; -static const char KW_IMPLIED[] = { - ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' }; -static const char KW_INCLUDE[] = { - ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' }; -static const char KW_NDATA[] = { - ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; -static const char KW_NMTOKEN[] = { - ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' }; -static const char KW_NMTOKENS[] = { - ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, - '\0' }; -static const char KW_NOTATION[] = - { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, - '\0' }; -static const char KW_PCDATA[] = { - ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; -static const char KW_PUBLIC[] = { - ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' }; -static const char KW_REQUIRED[] = { - ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D, - '\0' }; -static const char KW_SYSTEM[] = { - ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' }; +static const char KW_ANY[] = {ASCII_A, ASCII_N, ASCII_Y, '\0'}; +static const char KW_ATTLIST[] + = {ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0'}; +static const char KW_CDATA[] + = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; +static const char KW_DOCTYPE[] + = {ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0'}; +static const char KW_ELEMENT[] + = {ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0'}; +static const char KW_EMPTY[] + = {ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0'}; +static const char KW_ENTITIES[] = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, + ASCII_I, ASCII_E, ASCII_S, '\0'}; +static const char KW_ENTITY[] + = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'}; +static const char KW_FIXED[] + = {ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0'}; +static const char KW_ID[] = {ASCII_I, ASCII_D, '\0'}; +static const char KW_IDREF[] + = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'}; +static const char KW_IDREFS[] + = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'}; +#ifdef XML_DTD +static const char KW_IGNORE[] + = {ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0'}; +#endif +static const char KW_IMPLIED[] + = {ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0'}; +#ifdef XML_DTD +static const char KW_INCLUDE[] + = {ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0'}; +#endif +static const char KW_NDATA[] + = {ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; +static const char KW_NMTOKEN[] + = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'}; +static const char KW_NMTOKENS[] = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, + ASCII_E, ASCII_N, ASCII_S, '\0'}; +static const char KW_NOTATION[] = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, + ASCII_I, ASCII_O, ASCII_N, '\0'}; +static const char KW_PCDATA[] + = {ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; +static const char KW_PUBLIC[] + = {ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0'}; +static const char KW_REQUIRED[] = {ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, + ASCII_R, ASCII_E, ASCII_D, '\0'}; +static const char KW_SYSTEM[] + = {ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0'}; #ifndef MIN_BYTES_PER_CHAR -#define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar) +# define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar) #endif #ifdef XML_DTD -#define setTopLevel(state) \ - ((state)->handler = ((state)->documentEntity \ - ? internalSubset \ - : externalSubset1)) +# define setTopLevel(state) \ + ((state)->handler \ + = ((state)->documentEntity ? internalSubset : externalSubset1)) #else /* not XML_DTD */ -#define setTopLevel(state) ((state)->handler = internalSubset) +# define setTopLevel(state) ((state)->handler = internalSubset) #endif /* not XML_DTD */ -typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, +typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state, int tok, + const char *ptr, const char *end, const ENCODING *enc); -static PROLOG_HANDLER - prolog0, prolog1, prolog2, - doctype0, doctype1, doctype2, doctype3, doctype4, doctype5, - internalSubset, - entity0, entity1, entity2, entity3, entity4, entity5, entity6, - entity7, entity8, entity9, entity10, - notation0, notation1, notation2, notation3, notation4, - attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6, - attlist7, attlist8, attlist9, - element0, element1, element2, element3, element4, element5, element6, - element7, +static PROLOG_HANDLER prolog0, prolog1, prolog2, doctype0, doctype1, doctype2, + doctype3, doctype4, doctype5, internalSubset, entity0, entity1, entity2, + entity3, entity4, entity5, entity6, entity7, entity8, entity9, entity10, + notation0, notation1, notation2, notation3, notation4, attlist0, attlist1, + attlist2, attlist3, attlist4, attlist5, attlist6, attlist7, attlist8, + attlist9, element0, element1, element2, element3, element4, element5, + element6, element7, #ifdef XML_DTD - externalSubset0, externalSubset1, - condSect0, condSect1, condSect2, + externalSubset0, externalSubset1, condSect0, condSect1, condSect2, #endif /* XML_DTD */ - declClose, - error; + declClose, error; static int FASTCALL common(PROLOG_STATE *state, int tok); static int PTRCALL -prolog0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +prolog0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: state->handler = prolog1; @@ -139,10 +156,8 @@ prolog0(PROLOG_STATE *state, case XML_TOK_BOM: return XML_ROLE_NONE; case XML_TOK_DECL_OPEN: - if (!XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - end, - KW_DOCTYPE)) + if (! XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, + KW_DOCTYPE)) break; state->handler = doctype0; return XML_ROLE_DOCTYPE_NONE; @@ -154,12 +169,8 @@ prolog0(PROLOG_STATE *state, } static int PTRCALL -prolog1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +prolog1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; @@ -168,12 +179,17 @@ prolog1(PROLOG_STATE *state, case XML_TOK_COMMENT: return XML_ROLE_COMMENT; case XML_TOK_BOM: - return XML_ROLE_NONE; + /* This case can never arise. To reach this role function, the + * parse must have passed through prolog0 and therefore have had + * some form of input, even if only a space. At that point, a + * byte order mark is no longer a valid character (though + * technically it should be interpreted as a non-breaking space), + * so will be rejected by the tokenizing stages. + */ + return XML_ROLE_NONE; /* LCOV_EXCL_LINE */ case XML_TOK_DECL_OPEN: - if (!XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - end, - KW_DOCTYPE)) + if (! XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, + KW_DOCTYPE)) break; state->handler = doctype0; return XML_ROLE_DOCTYPE_NONE; @@ -185,12 +201,11 @@ prolog1(PROLOG_STATE *state, } static int PTRCALL -prolog2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +prolog2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; @@ -206,12 +221,11 @@ prolog2(PROLOG_STATE *state, } static int PTRCALL -doctype0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +doctype0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_DOCTYPE_NONE; @@ -224,12 +238,8 @@ doctype0(PROLOG_STATE *state, } static int PTRCALL -doctype1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +doctype1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_DOCTYPE_NONE; @@ -254,12 +264,11 @@ doctype1(PROLOG_STATE *state, } static int PTRCALL -doctype2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +doctype2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_DOCTYPE_NONE; @@ -271,12 +280,11 @@ doctype2(PROLOG_STATE *state, } static int PTRCALL -doctype3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +doctype3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_DOCTYPE_NONE; @@ -288,12 +296,11 @@ doctype3(PROLOG_STATE *state, } static int PTRCALL -doctype4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +doctype4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_DOCTYPE_NONE; @@ -308,12 +315,11 @@ doctype4(PROLOG_STATE *state, } static int PTRCALL -doctype5(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +doctype5(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_DOCTYPE_NONE; @@ -325,40 +331,28 @@ doctype5(PROLOG_STATE *state, } static int PTRCALL -internalSubset(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +internalSubset(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_DECL_OPEN: - if (XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - end, + if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, KW_ENTITY)) { state->handler = entity0; return XML_ROLE_ENTITY_NONE; } - if (XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - end, + if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, KW_ATTLIST)) { state->handler = attlist0; return XML_ROLE_ATTLIST_NONE; } - if (XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - end, + if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, KW_ELEMENT)) { state->handler = element0; return XML_ROLE_ELEMENT_NONE; } - if (XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - end, + if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, KW_NOTATION)) { state->handler = notation0; return XML_ROLE_NOTATION_NONE; @@ -382,12 +376,8 @@ internalSubset(PROLOG_STATE *state, #ifdef XML_DTD static int PTRCALL -externalSubset0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +externalSubset0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { state->handler = externalSubset1; if (tok == XML_TOK_XML_DECL) return XML_ROLE_TEXT_DECL; @@ -395,12 +385,8 @@ externalSubset0(PROLOG_STATE *state, } static int PTRCALL -externalSubset1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +externalSubset1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_COND_SECT_OPEN: state->handler = condSect0; @@ -427,12 +413,11 @@ externalSubset1(PROLOG_STATE *state, #endif /* XML_DTD */ static int PTRCALL -entity0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +entity0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ENTITY_NONE; @@ -447,12 +432,11 @@ entity0(PROLOG_STATE *state, } static int PTRCALL -entity1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +entity1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ENTITY_NONE; @@ -464,12 +448,8 @@ entity1(PROLOG_STATE *state, } static int PTRCALL -entity2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +entity2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ENTITY_NONE; @@ -492,12 +472,11 @@ entity2(PROLOG_STATE *state, } static int PTRCALL -entity3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +entity3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ENTITY_NONE; @@ -509,12 +488,11 @@ entity3(PROLOG_STATE *state, } static int PTRCALL -entity4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +entity4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ENTITY_NONE; @@ -526,12 +504,8 @@ entity4(PROLOG_STATE *state, } static int PTRCALL -entity5(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +entity5(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ENTITY_NONE; @@ -549,12 +523,11 @@ entity5(PROLOG_STATE *state, } static int PTRCALL -entity6(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +entity6(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ENTITY_NONE; @@ -567,12 +540,8 @@ entity6(PROLOG_STATE *state, } static int PTRCALL -entity7(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +entity7(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ENTITY_NONE; @@ -595,12 +564,11 @@ entity7(PROLOG_STATE *state, } static int PTRCALL -entity8(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +entity8(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ENTITY_NONE; @@ -612,12 +580,11 @@ entity8(PROLOG_STATE *state, } static int PTRCALL -entity9(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +entity9(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ENTITY_NONE; @@ -629,12 +596,11 @@ entity9(PROLOG_STATE *state, } static int PTRCALL -entity10(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +entity10(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ENTITY_NONE; @@ -646,12 +612,11 @@ entity10(PROLOG_STATE *state, } static int PTRCALL -notation0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +notation0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NOTATION_NONE; @@ -663,12 +628,8 @@ notation0(PROLOG_STATE *state, } static int PTRCALL -notation1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +notation1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NOTATION_NONE; @@ -687,12 +648,11 @@ notation1(PROLOG_STATE *state, } static int PTRCALL -notation2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +notation2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NOTATION_NONE; @@ -704,12 +664,11 @@ notation2(PROLOG_STATE *state, } static int PTRCALL -notation3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +notation3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NOTATION_NONE; @@ -722,12 +681,11 @@ notation3(PROLOG_STATE *state, } static int PTRCALL -notation4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +notation4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NOTATION_NONE; @@ -743,12 +701,11 @@ notation4(PROLOG_STATE *state, } static int PTRCALL -attlist0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +attlist0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ATTLIST_NONE; @@ -761,12 +718,11 @@ attlist0(PROLOG_STATE *state, } static int PTRCALL -attlist1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +attlist1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ATTLIST_NONE; @@ -782,34 +738,23 @@ attlist1(PROLOG_STATE *state, } static int PTRCALL -attlist2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +attlist2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ATTLIST_NONE; - case XML_TOK_NAME: - { - static const char *types[] = { - KW_CDATA, - KW_ID, - KW_IDREF, - KW_IDREFS, - KW_ENTITY, - KW_ENTITIES, - KW_NMTOKEN, - KW_NMTOKENS, - }; - int i; - for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++) - if (XmlNameMatchesAscii(enc, ptr, end, types[i])) { - state->handler = attlist8; - return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i; - } - } + case XML_TOK_NAME: { + static const char *const types[] = { + KW_CDATA, KW_ID, KW_IDREF, KW_IDREFS, + KW_ENTITY, KW_ENTITIES, KW_NMTOKEN, KW_NMTOKENS, + }; + int i; + for (i = 0; i < (int)(sizeof(types) / sizeof(types[0])); i++) + if (XmlNameMatchesAscii(enc, ptr, end, types[i])) { + state->handler = attlist8; + return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i; + } + } if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) { state->handler = attlist5; return XML_ROLE_ATTLIST_NONE; @@ -823,12 +768,11 @@ attlist2(PROLOG_STATE *state, } static int PTRCALL -attlist3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +attlist3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ATTLIST_NONE; @@ -842,12 +786,11 @@ attlist3(PROLOG_STATE *state, } static int PTRCALL -attlist4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +attlist4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ATTLIST_NONE; @@ -862,12 +805,11 @@ attlist4(PROLOG_STATE *state, } static int PTRCALL -attlist5(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +attlist5(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ATTLIST_NONE; @@ -879,12 +821,11 @@ attlist5(PROLOG_STATE *state, } static int PTRCALL -attlist6(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +attlist6(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ATTLIST_NONE; @@ -896,12 +837,11 @@ attlist6(PROLOG_STATE *state, } static int PTRCALL -attlist7(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +attlist7(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ATTLIST_NONE; @@ -917,33 +857,23 @@ attlist7(PROLOG_STATE *state, /* default value */ static int PTRCALL -attlist8(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +attlist8(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ATTLIST_NONE; case XML_TOK_POUND_NAME: - if (XmlNameMatchesAscii(enc, - ptr + MIN_BYTES_PER_CHAR(enc), - end, + if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, KW_IMPLIED)) { state->handler = attlist1; return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE; } - if (XmlNameMatchesAscii(enc, - ptr + MIN_BYTES_PER_CHAR(enc), - end, + if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, KW_REQUIRED)) { state->handler = attlist1; return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE; } - if (XmlNameMatchesAscii(enc, - ptr + MIN_BYTES_PER_CHAR(enc), - end, + if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, KW_FIXED)) { state->handler = attlist9; return XML_ROLE_ATTLIST_NONE; @@ -957,12 +887,11 @@ attlist8(PROLOG_STATE *state, } static int PTRCALL -attlist9(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +attlist9(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ATTLIST_NONE; @@ -974,12 +903,11 @@ attlist9(PROLOG_STATE *state, } static int PTRCALL -element0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +element0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ELEMENT_NONE; @@ -992,12 +920,8 @@ element0(PROLOG_STATE *state, } static int PTRCALL -element1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +element1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ELEMENT_NONE; @@ -1022,19 +946,13 @@ element1(PROLOG_STATE *state, } static int PTRCALL -element2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +element2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ELEMENT_NONE; case XML_TOK_POUND_NAME: - if (XmlNameMatchesAscii(enc, - ptr + MIN_BYTES_PER_CHAR(enc), - end, + if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, KW_PCDATA)) { state->handler = element3; return XML_ROLE_CONTENT_PCDATA; @@ -1062,12 +980,11 @@ element2(PROLOG_STATE *state, } static int PTRCALL -element3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +element3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ELEMENT_NONE; @@ -1087,12 +1004,11 @@ element3(PROLOG_STATE *state, } static int PTRCALL -element4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +element4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ELEMENT_NONE; @@ -1105,12 +1021,11 @@ element4(PROLOG_STATE *state, } static int PTRCALL -element5(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +element5(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ELEMENT_NONE; @@ -1126,12 +1041,11 @@ element5(PROLOG_STATE *state, } static int PTRCALL -element6(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +element6(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ELEMENT_NONE; @@ -1156,12 +1070,11 @@ element6(PROLOG_STATE *state, } static int PTRCALL -element7(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +element7(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ELEMENT_NONE; @@ -1206,12 +1119,8 @@ element7(PROLOG_STATE *state, #ifdef XML_DTD static int PTRCALL -condSect0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +condSect0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; @@ -1230,12 +1139,11 @@ condSect0(PROLOG_STATE *state, } static int PTRCALL -condSect1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +condSect1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; @@ -1248,12 +1156,11 @@ condSect1(PROLOG_STATE *state, } static int PTRCALL -condSect2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +condSect2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; @@ -1267,12 +1174,11 @@ condSect2(PROLOG_STATE *state, #endif /* XML_DTD */ static int PTRCALL -declClose(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +declClose(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return state->role_none; @@ -1283,30 +1189,52 @@ declClose(PROLOG_STATE *state, return common(state, tok); } +/* This function will only be invoked if the internal logic of the + * parser has broken down. It is used in two cases: + * + * 1: When the XML prolog has been finished. At this point the + * processor (the parser level above these role handlers) should + * switch from prologProcessor to contentProcessor and reinitialise + * the handler function. + * + * 2: When an error has been detected (via common() below). At this + * point again the processor should be switched to errorProcessor, + * which will never call a handler. + * + * The result of this is that error() can only be called if the + * processor switch failed to happen, which is an internal error and + * therefore we shouldn't be able to provoke it simply by using the + * library. It is a necessary backstop, however, so we merely exclude + * it from the coverage statistics. + * + * LCOV_EXCL_START + */ static int PTRCALL -error(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +error(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(state); + UNUSED_P(tok); + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); return XML_ROLE_NONE; } +/* LCOV_EXCL_STOP */ static int FASTCALL -common(PROLOG_STATE *state, int tok) -{ +common(PROLOG_STATE *state, int tok) { #ifdef XML_DTD - if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF) + if (! state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF) return XML_ROLE_INNER_PARAM_ENTITY_REF; +#else + UNUSED_P(tok); #endif state->handler = error; return XML_ROLE_ERROR; } void -XmlPrologStateInit(PROLOG_STATE *state) -{ +XmlPrologStateInit(PROLOG_STATE *state) { state->handler = prolog0; #ifdef XML_DTD state->documentEntity = 1; @@ -1318,8 +1246,7 @@ XmlPrologStateInit(PROLOG_STATE *state) #ifdef XML_DTD void -XmlPrologStateInitExternalEntity(PROLOG_STATE *state) -{ +XmlPrologStateInitExternalEntity(PROLOG_STATE *state) { state->handler = externalSubset0; state->documentEntity = 0; state->includeLevel = 0; diff --git a/src/simgear/xml/xmlrole.h b/src/simgear/xml/xmlrole.h index 4dd9f06f9..d6e1fa150 100644 --- a/src/simgear/xml/xmlrole.h +++ b/src/simgear/xml/xmlrole.h @@ -1,5 +1,36 @@ -/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Karl Waclawek + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2017 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef XmlRole_INCLUDED @@ -8,7 +39,7 @@ #ifdef __VMS /* 0 1 2 3 0 1 2 3 1234567890123456789012345678901 1234567890123456789012345678901 */ -#define XmlPrologStateInitExternalEntity XmlPrologStateInitExternalEnt +# define XmlPrologStateInitExternalEntity XmlPrologStateInitExternalEnt #endif #include "xmltok.h" @@ -85,11 +116,8 @@ enum { }; typedef struct prolog_state { - int (PTRCALL *handler) (struct prolog_state *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc); + int(PTRCALL *handler)(struct prolog_state *state, int tok, const char *ptr, + const char *end, const ENCODING *enc); unsigned level; int role_none; #ifdef XML_DTD @@ -104,8 +132,8 @@ void XmlPrologStateInit(PROLOG_STATE *); void XmlPrologStateInitExternalEntity(PROLOG_STATE *); #endif /* XML_DTD */ -#define XmlTokenRole(state, tok, ptr, end, enc) \ - (((state)->handler)(state, tok, ptr, end, enc)) +#define XmlTokenRole(state, tok, ptr, end, enc) \ + (((state)->handler)(state, tok, ptr, end, enc)) #ifdef __cplusplus } diff --git a/src/simgear/xml/xmltok.c b/src/simgear/xml/xmltok.c index 2b3b74ff8..c659983b4 100644 --- a/src/simgear/xml/xmltok.c +++ b/src/simgear/xml/xmltok.c @@ -1,18 +1,57 @@ -/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2001-2003 Fred L. Drake, Jr. + Copyright (c) 2002 Greg Stein + Copyright (c) 2002-2016 Karl Waclawek + Copyright (c) 2005-2009 Steven Solie + Copyright (c) 2016-2022 Sebastian Pipping + Copyright (c) 2016 Pascal Cuoq + Copyright (c) 2016 Don Lewis + Copyright (c) 2017 Rhodri James + Copyright (c) 2017 Alexander Bluhm + Copyright (c) 2017 Benbuck Nason + Copyright (c) 2017 José Gutiérrez de la Concha + Copyright (c) 2019 David Loffredo + Copyright (c) 2021 Dong-hee Na + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include + #include +#include /* memcpy */ +#include -#ifdef COMPILED_FROM_DSP -#include "winconfig.h" -#elif defined(MACOS_CLASSIC) -#include "macconfig.h" -#else -#ifdef HAVE_EXPAT_CONFIG_H -#include "expat_config.h" +#ifdef _WIN32 +# include "winconfig.h" #endif -#endif /* ndef COMPILED_FROM_DSP */ #include "expat_external.h" #include "internal.h" @@ -20,59 +59,44 @@ #include "nametab.h" #ifdef XML_DTD -#define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok) +# define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok) #else -#define IGNORE_SECTION_TOK_VTABLE /* as nothing */ +# define IGNORE_SECTION_TOK_VTABLE /* as nothing */ #endif -#define VTABLE1 \ - { PREFIX(prologTok), PREFIX(contentTok), \ - PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \ - { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \ - PREFIX(sameName), \ - PREFIX(nameMatchesAscii), \ - PREFIX(nameLength), \ - PREFIX(skipS), \ - PREFIX(getAtts), \ - PREFIX(charRefNumber), \ - PREFIX(predefinedEntityName), \ - PREFIX(updatePosition), \ - PREFIX(isPublicId) +#define VTABLE1 \ + {PREFIX(prologTok), PREFIX(contentTok), \ + PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE}, \ + {PREFIX(attributeValueTok), PREFIX(entityValueTok)}, \ + PREFIX(nameMatchesAscii), PREFIX(nameLength), PREFIX(skipS), \ + PREFIX(getAtts), PREFIX(charRefNumber), PREFIX(predefinedEntityName), \ + PREFIX(updatePosition), PREFIX(isPublicId) #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) -#define UCS2_GET_NAMING(pages, hi, lo) \ - (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F))) +#define UCS2_GET_NAMING(pages, hi, lo) \ + (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo)&0x1F))) /* A 2 byte UTF-8 representation splits the characters 11 bits between the bottom 5 and 6 bits of the bytes. We need 8 bits to index into pages, 3 bits to add to that index and 5 bits to generate the mask. */ -#define UTF8_GET_NAMING2(pages, byte) \ - (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ - + ((((byte)[0]) & 3) << 1) \ - + ((((byte)[1]) >> 5) & 1)] \ - & (1 << (((byte)[1]) & 0x1F))) +#define UTF8_GET_NAMING2(pages, byte) \ + (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ + + ((((byte)[0]) & 3) << 1) + ((((byte)[1]) >> 5) & 1)] \ + & (1u << (((byte)[1]) & 0x1F))) /* A 3 byte UTF-8 representation splits the characters 16 bits between the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index into pages, 3 bits to add to that index and 5 bits to generate the mask. */ -#define UTF8_GET_NAMING3(pages, byte) \ - (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \ - + ((((byte)[1]) >> 2) & 0xF)] \ - << 3) \ - + ((((byte)[1]) & 3) << 1) \ - + ((((byte)[2]) >> 5) & 1)] \ - & (1 << (((byte)[2]) & 0x1F))) - -#define UTF8_GET_NAMING(pages, p, n) \ - ((n) == 2 \ - ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \ - : ((n) == 3 \ - ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \ - : 0)) +#define UTF8_GET_NAMING3(pages, byte) \ + (namingBitmap \ + [((pages)[((((byte)[0]) & 0xF) << 4) + ((((byte)[1]) >> 2) & 0xF)] \ + << 3) \ + + ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)] \ + & (1u << (((byte)[2]) & 0x1F))) /* Detection of invalid UTF-8 sequences is based on Table 3.1B of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/ @@ -84,88 +108,76 @@ (A & 0xC0) == 0xC0 means A > 0xBF */ -#define UTF8_INVALID2(p) \ +#define UTF8_INVALID2(p) \ ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0) -#define UTF8_INVALID3(p) \ - (((p)[2] & 0x80) == 0 \ - || \ - ((*p) == 0xEF && (p)[1] == 0xBF \ - ? \ - (p)[2] > 0xBD \ - : \ - ((p)[2] & 0xC0) == 0xC0) \ - || \ - ((*p) == 0xE0 \ - ? \ - (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \ - : \ - ((p)[1] & 0x80) == 0 \ - || \ - ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0))) - -#define UTF8_INVALID4(p) \ - (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 \ - || \ - ((p)[2] & 0x80) == 0 || ((p)[2] & 0xC0) == 0xC0 \ - || \ - ((*p) == 0xF0 \ - ? \ - (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \ - : \ - ((p)[1] & 0x80) == 0 \ - || \ - ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0))) +#define UTF8_INVALID3(p) \ + (((p)[2] & 0x80) == 0 \ + || ((*p) == 0xEF && (p)[1] == 0xBF ? (p)[2] > 0xBD \ + : ((p)[2] & 0xC0) == 0xC0) \ + || ((*p) == 0xE0 \ + ? (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \ + : ((p)[1] & 0x80) == 0 \ + || ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0))) + +#define UTF8_INVALID4(p) \ + (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 || ((p)[2] & 0x80) == 0 \ + || ((p)[2] & 0xC0) == 0xC0 \ + || ((*p) == 0xF0 \ + ? (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \ + : ((p)[1] & 0x80) == 0 \ + || ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0))) static int PTRFASTCALL -isNever(const ENCODING *enc, const char *p) -{ +isNever(const ENCODING *enc, const char *p) { + UNUSED_P(enc); + UNUSED_P(p); return 0; } static int PTRFASTCALL -utf8_isName2(const ENCODING *enc, const char *p) -{ +utf8_isName2(const ENCODING *enc, const char *p) { + UNUSED_P(enc); return UTF8_GET_NAMING2(namePages, (const unsigned char *)p); } static int PTRFASTCALL -utf8_isName3(const ENCODING *enc, const char *p) -{ +utf8_isName3(const ENCODING *enc, const char *p) { + UNUSED_P(enc); return UTF8_GET_NAMING3(namePages, (const unsigned char *)p); } #define utf8_isName4 isNever static int PTRFASTCALL -utf8_isNmstrt2(const ENCODING *enc, const char *p) -{ +utf8_isNmstrt2(const ENCODING *enc, const char *p) { + UNUSED_P(enc); return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p); } static int PTRFASTCALL -utf8_isNmstrt3(const ENCODING *enc, const char *p) -{ +utf8_isNmstrt3(const ENCODING *enc, const char *p) { + UNUSED_P(enc); return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p); } #define utf8_isNmstrt4 isNever static int PTRFASTCALL -utf8_isInvalid2(const ENCODING *enc, const char *p) -{ +utf8_isInvalid2(const ENCODING *enc, const char *p) { + UNUSED_P(enc); return UTF8_INVALID2((const unsigned char *)p); } static int PTRFASTCALL -utf8_isInvalid3(const ENCODING *enc, const char *p) -{ +utf8_isInvalid3(const ENCODING *enc, const char *p) { + UNUSED_P(enc); return UTF8_INVALID3((const unsigned char *)p); } static int PTRFASTCALL -utf8_isInvalid4(const ENCODING *enc, const char *p) -{ +utf8_isInvalid4(const ENCODING *enc, const char *p) { + UNUSED_P(enc); return UTF8_INVALID4((const unsigned char *)p); } @@ -173,50 +185,44 @@ struct normal_encoding { ENCODING enc; unsigned char type[256]; #ifdef XML_MIN_SIZE - int (PTRFASTCALL *byteType)(const ENCODING *, const char *); - int (PTRFASTCALL *isNameMin)(const ENCODING *, const char *); - int (PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *); - int (PTRFASTCALL *byteToAscii)(const ENCODING *, const char *); - int (PTRCALL *charMatches)(const ENCODING *, const char *, int); + int(PTRFASTCALL *byteType)(const ENCODING *, const char *); + int(PTRFASTCALL *isNameMin)(const ENCODING *, const char *); + int(PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *); + int(PTRFASTCALL *byteToAscii)(const ENCODING *, const char *); + int(PTRCALL *charMatches)(const ENCODING *, const char *, int); #endif /* XML_MIN_SIZE */ - int (PTRFASTCALL *isName2)(const ENCODING *, const char *); - int (PTRFASTCALL *isName3)(const ENCODING *, const char *); - int (PTRFASTCALL *isName4)(const ENCODING *, const char *); - int (PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *); - int (PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *); - int (PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *); - int (PTRFASTCALL *isInvalid2)(const ENCODING *, const char *); - int (PTRFASTCALL *isInvalid3)(const ENCODING *, const char *); - int (PTRFASTCALL *isInvalid4)(const ENCODING *, const char *); + int(PTRFASTCALL *isName2)(const ENCODING *, const char *); + int(PTRFASTCALL *isName3)(const ENCODING *, const char *); + int(PTRFASTCALL *isName4)(const ENCODING *, const char *); + int(PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *); + int(PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *); + int(PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *); + int(PTRFASTCALL *isInvalid2)(const ENCODING *, const char *); + int(PTRFASTCALL *isInvalid3)(const ENCODING *, const char *); + int(PTRFASTCALL *isInvalid4)(const ENCODING *, const char *); }; -#define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *) (enc)) +#define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *)(enc)) #ifdef XML_MIN_SIZE -#define STANDARD_VTABLE(E) \ - E ## byteType, \ - E ## isNameMin, \ - E ## isNmstrtMin, \ - E ## byteToAscii, \ - E ## charMatches, +# define STANDARD_VTABLE(E) \ + E##byteType, E##isNameMin, E##isNmstrtMin, E##byteToAscii, E##charMatches, #else -#define STANDARD_VTABLE(E) /* as nothing */ +# define STANDARD_VTABLE(E) /* as nothing */ #endif -#define NORMAL_VTABLE(E) \ - E ## isName2, \ - E ## isName3, \ - E ## isName4, \ - E ## isNmstrt2, \ - E ## isNmstrt3, \ - E ## isNmstrt4, \ - E ## isInvalid2, \ - E ## isInvalid3, \ - E ## isInvalid4 +#define NORMAL_VTABLE(E) \ + E##isName2, E##isName3, E##isName4, E##isNmstrt2, E##isNmstrt3, \ + E##isNmstrt4, E##isInvalid2, E##isInvalid3, E##isInvalid4 + +#define NULL_VTABLE \ + /* isName2 */ NULL, /* isName3 */ NULL, /* isName4 */ NULL, \ + /* isNmstrt2 */ NULL, /* isNmstrt3 */ NULL, /* isNmstrt4 */ NULL, \ + /* isInvalid2 */ NULL, /* isInvalid3 */ NULL, /* isInvalid4 */ NULL static int FASTCALL checkCharRefNumber(int); @@ -224,76 +230,79 @@ static int FASTCALL checkCharRefNumber(int); #include "ascii.h" #ifdef XML_MIN_SIZE -#define sb_isNameMin isNever -#define sb_isNmstrtMin isNever +# define sb_isNameMin isNever +# define sb_isNmstrtMin isNever #endif #ifdef XML_MIN_SIZE -#define MINBPC(enc) ((enc)->minBytesPerChar) +# define MINBPC(enc) ((enc)->minBytesPerChar) #else /* minimum bytes per character */ -#define MINBPC(enc) 1 +# define MINBPC(enc) 1 #endif -#define SB_BYTE_TYPE(enc, p) \ +#define SB_BYTE_TYPE(enc, p) \ (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)]) #ifdef XML_MIN_SIZE static int PTRFASTCALL -sb_byteType(const ENCODING *enc, const char *p) -{ +sb_byteType(const ENCODING *enc, const char *p) { return SB_BYTE_TYPE(enc, p); } -#define BYTE_TYPE(enc, p) \ - (AS_NORMAL_ENCODING(enc)->byteType(enc, p)) +# define BYTE_TYPE(enc, p) (AS_NORMAL_ENCODING(enc)->byteType(enc, p)) #else -#define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p) +# define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p) #endif #ifdef XML_MIN_SIZE -#define BYTE_TO_ASCII(enc, p) \ - (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p)) +# define BYTE_TO_ASCII(enc, p) (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p)) static int PTRFASTCALL -sb_byteToAscii(const ENCODING *enc, const char *p) -{ +sb_byteToAscii(const ENCODING *enc, const char *p) { + UNUSED_P(enc); return *p; } #else -#define BYTE_TO_ASCII(enc, p) (*(p)) +# define BYTE_TO_ASCII(enc, p) (*(p)) #endif -#define IS_NAME_CHAR(enc, p, n) \ - (AS_NORMAL_ENCODING(enc)->isName ## n(enc, p)) -#define IS_NMSTRT_CHAR(enc, p, n) \ - (AS_NORMAL_ENCODING(enc)->isNmstrt ## n(enc, p)) -#define IS_INVALID_CHAR(enc, p, n) \ - (AS_NORMAL_ENCODING(enc)->isInvalid ## n(enc, p)) +#define IS_NAME_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isName##n(enc, p)) +#define IS_NMSTRT_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isNmstrt##n(enc, p)) +#ifdef XML_MIN_SIZE +# define IS_INVALID_CHAR(enc, p, n) \ + (AS_NORMAL_ENCODING(enc)->isInvalid##n \ + && AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p)) +#else +# define IS_INVALID_CHAR(enc, p, n) \ + (AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p)) +#endif #ifdef XML_MIN_SIZE -#define IS_NAME_CHAR_MINBPC(enc, p) \ - (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p)) -#define IS_NMSTRT_CHAR_MINBPC(enc, p) \ - (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p)) +# define IS_NAME_CHAR_MINBPC(enc, p) \ + (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p)) +# define IS_NMSTRT_CHAR_MINBPC(enc, p) \ + (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p)) #else -#define IS_NAME_CHAR_MINBPC(enc, p) (0) -#define IS_NMSTRT_CHAR_MINBPC(enc, p) (0) +# define IS_NAME_CHAR_MINBPC(enc, p) (0) +# define IS_NMSTRT_CHAR_MINBPC(enc, p) (0) #endif #ifdef XML_MIN_SIZE -#define CHAR_MATCHES(enc, p, c) \ - (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c)) +# define CHAR_MATCHES(enc, p, c) \ + (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c)) static int PTRCALL -sb_charMatches(const ENCODING *enc, const char *p, int c) -{ +sb_charMatches(const ENCODING *enc, const char *p, int c) { + UNUSED_P(enc); return *p == c; } #else /* c is an ASCII character */ -#define CHAR_MATCHES(enc, p, c) (*(p) == c) +# define CHAR_MATCHES(enc, p, c) (*(p) == c) #endif -#define PREFIX(ident) normal_ ## ident +#define PREFIX(ident) normal_##ident +#define XML_TOK_IMPL_C #include "xmltok_impl.c" +#undef XML_TOK_IMPL_C #undef MINBPC #undef BYTE_TYPE @@ -305,223 +314,301 @@ sb_charMatches(const ENCODING *enc, const char *p, int c) #undef IS_NMSTRT_CHAR_MINBPC #undef IS_INVALID_CHAR -enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ - UTF8_cval1 = 0x00, - UTF8_cval2 = 0xc0, - UTF8_cval3 = 0xe0, - UTF8_cval4 = 0xf0 +enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ + UTF8_cval1 = 0x00, + UTF8_cval2 = 0xc0, + UTF8_cval3 = 0xe0, + UTF8_cval4 = 0xf0 }; -static void PTRCALL -utf8_toUtf8(const ENCODING *enc, - const char **fromP, const char *fromLim, - char **toP, const char *toLim) -{ - char *to; - const char *from; - if (fromLim - *fromP > toLim - *toP) { - /* Avoid copying partial characters. */ - for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--) - if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) +void +_INTERNAL_trim_to_complete_utf8_characters(const char *from, + const char **fromLimRef) { + const char *fromLim = *fromLimRef; + size_t walked = 0; + for (; fromLim > from; fromLim--, walked++) { + const unsigned char prev = (unsigned char)fromLim[-1]; + if ((prev & 0xf8u) + == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */ + if (walked + 1 >= 4) { + fromLim += 4 - 1; + break; + } else { + walked = 0; + } + } else if ((prev & 0xf0u) + == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */ + if (walked + 1 >= 3) { + fromLim += 3 - 1; break; + } else { + walked = 0; + } + } else if ((prev & 0xe0u) + == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */ + if (walked + 1 >= 2) { + fromLim += 2 - 1; + break; + } else { + walked = 0; + } + } else if ((prev & 0x80u) + == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */ + break; + } } - for (to = *toP, from = *fromP; from != fromLim; from++, to++) - *to = *from; - *fromP = from; - *toP = to; + *fromLimRef = fromLim; } -static void PTRCALL -utf8_toUtf16(const ENCODING *enc, - const char **fromP, const char *fromLim, - unsigned short **toP, const unsigned short *toLim) -{ +static enum XML_Convert_Result PTRCALL +utf8_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, + char **toP, const char *toLim) { + bool input_incomplete = false; + bool output_exhausted = false; + + /* Avoid copying partial characters (due to limited space). */ + const ptrdiff_t bytesAvailable = fromLim - *fromP; + const ptrdiff_t bytesStorable = toLim - *toP; + UNUSED_P(enc); + if (bytesAvailable > bytesStorable) { + fromLim = *fromP + bytesStorable; + output_exhausted = true; + } + + /* Avoid copying partial characters (from incomplete input). */ + { + const char *const fromLimBefore = fromLim; + _INTERNAL_trim_to_complete_utf8_characters(*fromP, &fromLim); + if (fromLim < fromLimBefore) { + input_incomplete = true; + } + } + + { + const ptrdiff_t bytesToCopy = fromLim - *fromP; + memcpy(*toP, *fromP, bytesToCopy); + *fromP += bytesToCopy; + *toP += bytesToCopy; + } + + if (output_exhausted) /* needs to go first */ + return XML_CONVERT_OUTPUT_EXHAUSTED; + else if (input_incomplete) + return XML_CONVERT_INPUT_INCOMPLETE; + else + return XML_CONVERT_COMPLETED; +} + +static enum XML_Convert_Result PTRCALL +utf8_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, + unsigned short **toP, const unsigned short *toLim) { + enum XML_Convert_Result res = XML_CONVERT_COMPLETED; unsigned short *to = *toP; const char *from = *fromP; - while (from != fromLim && to != toLim) { + while (from < fromLim && to < toLim) { switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { case BT_LEAD2: + if (fromLim - from < 2) { + res = XML_CONVERT_INPUT_INCOMPLETE; + goto after; + } *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); from += 2; break; case BT_LEAD3: - *to++ = (unsigned short)(((from[0] & 0xf) << 12) - | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f)); + if (fromLim - from < 3) { + res = XML_CONVERT_INPUT_INCOMPLETE; + goto after; + } + *to++ = (unsigned short)(((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) + | (from[2] & 0x3f)); from += 3; break; - case BT_LEAD4: - { - unsigned long n; - if (to + 1 == toLim) - goto after; - n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) - | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); - n -= 0x10000; - to[0] = (unsigned short)((n >> 10) | 0xD800); - to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); - to += 2; - from += 4; + case BT_LEAD4: { + unsigned long n; + if (toLim - to < 2) { + res = XML_CONVERT_OUTPUT_EXHAUSTED; + goto after; } - break; + if (fromLim - from < 4) { + res = XML_CONVERT_INPUT_INCOMPLETE; + goto after; + } + n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) + | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); + n -= 0x10000; + to[0] = (unsigned short)((n >> 10) | 0xD800); + to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); + to += 2; + from += 4; + } break; default: *to++ = *from++; break; } } + if (from < fromLim) + res = XML_CONVERT_OUTPUT_EXHAUSTED; after: *fromP = from; *toP = to; + return res; } #ifdef XML_NS -static const struct normal_encoding utf8_encoding_ns = { - { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, - { -#include "asciitab.h" -#include "utf8tab.h" - }, - STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) -}; +static const struct normal_encoding utf8_encoding_ns + = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0}, + { +# include "asciitab.h" +# include "utf8tab.h" + }, + STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)}; #endif -static const struct normal_encoding utf8_encoding = { - { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, - { +static const struct normal_encoding utf8_encoding + = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0}, + { #define BT_COLON BT_NMSTRT #include "asciitab.h" #undef BT_COLON #include "utf8tab.h" - }, - STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) -}; + }, + STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)}; #ifdef XML_NS -static const struct normal_encoding internal_utf8_encoding_ns = { - { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, - { -#include "iasciitab.h" -#include "utf8tab.h" - }, - STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) -}; +static const struct normal_encoding internal_utf8_encoding_ns + = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0}, + { +# include "iasciitab.h" +# include "utf8tab.h" + }, + STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)}; #endif -static const struct normal_encoding internal_utf8_encoding = { - { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, - { +static const struct normal_encoding internal_utf8_encoding + = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0}, + { #define BT_COLON BT_NMSTRT #include "iasciitab.h" #undef BT_COLON #include "utf8tab.h" - }, - STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) -}; + }, + STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)}; -static void PTRCALL -latin1_toUtf8(const ENCODING *enc, - const char **fromP, const char *fromLim, - char **toP, const char *toLim) -{ +static enum XML_Convert_Result PTRCALL +latin1_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, + char **toP, const char *toLim) { + UNUSED_P(enc); for (;;) { unsigned char c; if (*fromP == fromLim) - break; + return XML_CONVERT_COMPLETED; c = (unsigned char)**fromP; if (c & 0x80) { if (toLim - *toP < 2) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; *(*toP)++ = (char)((c >> 6) | UTF8_cval2); *(*toP)++ = (char)((c & 0x3f) | 0x80); (*fromP)++; - } - else { + } else { if (*toP == toLim) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; *(*toP)++ = *(*fromP)++; } } } -static void PTRCALL -latin1_toUtf16(const ENCODING *enc, - const char **fromP, const char *fromLim, - unsigned short **toP, const unsigned short *toLim) -{ - while (*fromP != fromLim && *toP != toLim) +static enum XML_Convert_Result PTRCALL +latin1_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, + unsigned short **toP, const unsigned short *toLim) { + UNUSED_P(enc); + while (*fromP < fromLim && *toP < toLim) *(*toP)++ = (unsigned char)*(*fromP)++; + + if ((*toP == toLim) && (*fromP < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return XML_CONVERT_COMPLETED; } #ifdef XML_NS -static const struct normal_encoding latin1_encoding_ns = { - { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, - { -#include "asciitab.h" -#include "latin1tab.h" - }, - STANDARD_VTABLE(sb_) -}; +static const struct normal_encoding latin1_encoding_ns + = {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0}, + { +# include "asciitab.h" +# include "latin1tab.h" + }, + STANDARD_VTABLE(sb_) NULL_VTABLE}; #endif -static const struct normal_encoding latin1_encoding = { - { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, - { +static const struct normal_encoding latin1_encoding + = {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0}, + { #define BT_COLON BT_NMSTRT #include "asciitab.h" #undef BT_COLON #include "latin1tab.h" - }, - STANDARD_VTABLE(sb_) -}; - -static void PTRCALL -ascii_toUtf8(const ENCODING *enc, - const char **fromP, const char *fromLim, - char **toP, const char *toLim) -{ - while (*fromP != fromLim && *toP != toLim) + }, + STANDARD_VTABLE(sb_) NULL_VTABLE}; + +static enum XML_Convert_Result PTRCALL +ascii_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, + char **toP, const char *toLim) { + UNUSED_P(enc); + while (*fromP < fromLim && *toP < toLim) *(*toP)++ = *(*fromP)++; + + if ((*toP == toLim) && (*fromP < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return XML_CONVERT_COMPLETED; } #ifdef XML_NS -static const struct normal_encoding ascii_encoding_ns = { - { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, - { -#include "asciitab.h" -/* BT_NONXML == 0 */ - }, - STANDARD_VTABLE(sb_) -}; +static const struct normal_encoding ascii_encoding_ns + = {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0}, + { +# include "asciitab.h" + /* BT_NONXML == 0 */ + }, + STANDARD_VTABLE(sb_) NULL_VTABLE}; #endif -static const struct normal_encoding ascii_encoding = { - { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, - { +static const struct normal_encoding ascii_encoding + = {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0}, + { #define BT_COLON BT_NMSTRT #include "asciitab.h" #undef BT_COLON -/* BT_NONXML == 0 */ - }, - STANDARD_VTABLE(sb_) -}; + /* BT_NONXML == 0 */ + }, + STANDARD_VTABLE(sb_) NULL_VTABLE}; static int PTRFASTCALL -unicode_byte_type(char hi, char lo) -{ +unicode_byte_type(char hi, char lo) { switch ((unsigned char)hi) { - case 0xD8: case 0xD9: case 0xDA: case 0xDB: + /* 0xD800-0xDBFF first 16-bit code unit or high surrogate (W1) */ + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: return BT_LEAD4; - case 0xDC: case 0xDD: case 0xDE: case 0xDF: + /* 0xDC00-0xDFFF second 16-bit code unit or low surrogate (W2) */ + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: return BT_TRAIL; case 0xFF: switch ((unsigned char)lo) { - case 0xFF: - case 0xFE: + case 0xFF: /* noncharacter-FFFF */ + case 0xFE: /* noncharacter-FFFE */ return BT_NONXML; } break; @@ -529,85 +616,105 @@ unicode_byte_type(char hi, char lo) return BT_NONASCII; } -#define DEFINE_UTF16_TO_UTF8(E) \ -static void PTRCALL \ -E ## toUtf8(const ENCODING *enc, \ - const char **fromP, const char *fromLim, \ - char **toP, const char *toLim) \ -{ \ - const char *from; \ - for (from = *fromP; from != fromLim; from += 2) { \ - int plane; \ - unsigned char lo2; \ - unsigned char lo = GET_LO(from); \ - unsigned char hi = GET_HI(from); \ - switch (hi) { \ - case 0: \ - if (lo < 0x80) { \ - if (*toP == toLim) { \ - *fromP = from; \ - return; \ - } \ - *(*toP)++ = lo; \ - break; \ - } \ - /* fall through */ \ - case 0x1: case 0x2: case 0x3: \ - case 0x4: case 0x5: case 0x6: case 0x7: \ - if (toLim - *toP < 2) { \ - *fromP = from; \ - return; \ - } \ - *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ - *(*toP)++ = ((lo & 0x3f) | 0x80); \ - break; \ - default: \ - if (toLim - *toP < 3) { \ - *fromP = from; \ - return; \ - } \ - /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ - *(*toP)++ = ((hi >> 4) | UTF8_cval3); \ - *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \ - *(*toP)++ = ((lo & 0x3f) | 0x80); \ - break; \ - case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ - if (toLim - *toP < 4) { \ - *fromP = from; \ - return; \ - } \ - plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ - *(*toP)++ = ((plane >> 2) | UTF8_cval4); \ - *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \ - from += 2; \ - lo2 = GET_LO(from); \ - *(*toP)++ = (((lo & 0x3) << 4) \ - | ((GET_HI(from) & 0x3) << 2) \ - | (lo2 >> 6) \ - | 0x80); \ - *(*toP)++ = ((lo2 & 0x3f) | 0x80); \ - break; \ - } \ - } \ - *fromP = from; \ -} +#define DEFINE_UTF16_TO_UTF8(E) \ + static enum XML_Convert_Result PTRCALL E##toUtf8( \ + const ENCODING *enc, const char **fromP, const char *fromLim, \ + char **toP, const char *toLim) { \ + const char *from = *fromP; \ + UNUSED_P(enc); \ + fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \ + for (; from < fromLim; from += 2) { \ + int plane; \ + unsigned char lo2; \ + unsigned char lo = GET_LO(from); \ + unsigned char hi = GET_HI(from); \ + switch (hi) { \ + case 0: \ + if (lo < 0x80) { \ + if (*toP == toLim) { \ + *fromP = from; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ + } \ + *(*toP)++ = lo; \ + break; \ + } \ + /* fall through */ \ + case 0x1: \ + case 0x2: \ + case 0x3: \ + case 0x4: \ + case 0x5: \ + case 0x6: \ + case 0x7: \ + if (toLim - *toP < 2) { \ + *fromP = from; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ + } \ + *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ + *(*toP)++ = ((lo & 0x3f) | 0x80); \ + break; \ + default: \ + if (toLim - *toP < 3) { \ + *fromP = from; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ + } \ + /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ + *(*toP)++ = ((hi >> 4) | UTF8_cval3); \ + *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \ + *(*toP)++ = ((lo & 0x3f) | 0x80); \ + break; \ + case 0xD8: \ + case 0xD9: \ + case 0xDA: \ + case 0xDB: \ + if (toLim - *toP < 4) { \ + *fromP = from; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ + } \ + if (fromLim - from < 4) { \ + *fromP = from; \ + return XML_CONVERT_INPUT_INCOMPLETE; \ + } \ + plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ + *(*toP)++ = (char)((plane >> 2) | UTF8_cval4); \ + *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \ + from += 2; \ + lo2 = GET_LO(from); \ + *(*toP)++ = (((lo & 0x3) << 4) | ((GET_HI(from) & 0x3) << 2) \ + | (lo2 >> 6) | 0x80); \ + *(*toP)++ = ((lo2 & 0x3f) | 0x80); \ + break; \ + } \ + } \ + *fromP = from; \ + if (from < fromLim) \ + return XML_CONVERT_INPUT_INCOMPLETE; \ + else \ + return XML_CONVERT_COMPLETED; \ + } -#define DEFINE_UTF16_TO_UTF16(E) \ -static void PTRCALL \ -E ## toUtf16(const ENCODING *enc, \ - const char **fromP, const char *fromLim, \ - unsigned short **toP, const unsigned short *toLim) \ -{ \ - /* Avoid copying first half only of surrogate */ \ - if (fromLim - *fromP > ((toLim - *toP) << 1) \ - && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \ - fromLim -= 2; \ - for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \ - *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ -} +#define DEFINE_UTF16_TO_UTF16(E) \ + static enum XML_Convert_Result PTRCALL E##toUtf16( \ + const ENCODING *enc, const char **fromP, const char *fromLim, \ + unsigned short **toP, const unsigned short *toLim) { \ + enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \ + UNUSED_P(enc); \ + fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \ + /* Avoid copying first half only of surrogate */ \ + if (fromLim - *fromP > ((toLim - *toP) << 1) \ + && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \ + fromLim -= 2; \ + res = XML_CONVERT_INPUT_INCOMPLETE; \ + } \ + for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \ + *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ + if ((*toP == toLim) && (*fromP < fromLim)) \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ + else \ + return res; \ + } -#define SET2(ptr, ch) \ - (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8))) +#define SET2(ptr, ch) (((ptr)[0] = ((ch)&0xff)), ((ptr)[1] = ((ch) >> 8))) #define GET_LO(ptr) ((unsigned char)(ptr)[0]) #define GET_HI(ptr) ((unsigned char)(ptr)[1]) @@ -618,8 +725,7 @@ DEFINE_UTF16_TO_UTF16(little2_) #undef GET_LO #undef GET_HI -#define SET2(ptr, ch) \ - (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF))) +#define SET2(ptr, ch) (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch)&0xFF))) #define GET_LO(ptr) ((unsigned char)(ptr)[1]) #define GET_HI(ptr) ((unsigned char)(ptr)[0]) @@ -630,313 +736,307 @@ DEFINE_UTF16_TO_UTF16(big2_) #undef GET_LO #undef GET_HI -#define LITTLE2_BYTE_TYPE(enc, p) \ - ((p)[1] == 0 \ - ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \ - : unicode_byte_type((p)[1], (p)[0])) -#define LITTLE2_BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1) -#define LITTLE2_CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c) -#define LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) \ +#define LITTLE2_BYTE_TYPE(enc, p) \ + ((p)[1] == 0 ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \ + : unicode_byte_type((p)[1], (p)[0])) +#define LITTLE2_BYTE_TO_ASCII(p) ((p)[1] == 0 ? (p)[0] : -1) +#define LITTLE2_CHAR_MATCHES(p, c) ((p)[1] == 0 && (p)[0] == c) +#define LITTLE2_IS_NAME_CHAR_MINBPC(p) \ UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0]) -#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) \ +#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(p) \ UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0]) #ifdef XML_MIN_SIZE static int PTRFASTCALL -little2_byteType(const ENCODING *enc, const char *p) -{ +little2_byteType(const ENCODING *enc, const char *p) { return LITTLE2_BYTE_TYPE(enc, p); } static int PTRFASTCALL -little2_byteToAscii(const ENCODING *enc, const char *p) -{ - return LITTLE2_BYTE_TO_ASCII(enc, p); +little2_byteToAscii(const ENCODING *enc, const char *p) { + UNUSED_P(enc); + return LITTLE2_BYTE_TO_ASCII(p); } static int PTRCALL -little2_charMatches(const ENCODING *enc, const char *p, int c) -{ - return LITTLE2_CHAR_MATCHES(enc, p, c); +little2_charMatches(const ENCODING *enc, const char *p, int c) { + UNUSED_P(enc); + return LITTLE2_CHAR_MATCHES(p, c); } static int PTRFASTCALL -little2_isNameMin(const ENCODING *enc, const char *p) -{ - return LITTLE2_IS_NAME_CHAR_MINBPC(enc, p); +little2_isNameMin(const ENCODING *enc, const char *p) { + UNUSED_P(enc); + return LITTLE2_IS_NAME_CHAR_MINBPC(p); } static int PTRFASTCALL -little2_isNmstrtMin(const ENCODING *enc, const char *p) -{ - return LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p); +little2_isNmstrtMin(const ENCODING *enc, const char *p) { + UNUSED_P(enc); + return LITTLE2_IS_NMSTRT_CHAR_MINBPC(p); } -#undef VTABLE -#define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16 +# undef VTABLE +# define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16 #else /* not XML_MIN_SIZE */ -#undef PREFIX -#define PREFIX(ident) little2_ ## ident -#define MINBPC(enc) 2 +# undef PREFIX +# define PREFIX(ident) little2_##ident +# define MINBPC(enc) 2 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */ -#define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p) -#define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p) -#define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c) -#define IS_NAME_CHAR(enc, p, n) 0 -#define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) -#define IS_NMSTRT_CHAR(enc, p, n) (0) -#define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) - -#include "xmltok_impl.c" - -#undef MINBPC -#undef BYTE_TYPE -#undef BYTE_TO_ASCII -#undef CHAR_MATCHES -#undef IS_NAME_CHAR -#undef IS_NAME_CHAR_MINBPC -#undef IS_NMSTRT_CHAR -#undef IS_NMSTRT_CHAR_MINBPC -#undef IS_INVALID_CHAR +# define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p) +# define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(p) +# define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(p, c) +# define IS_NAME_CHAR(enc, p, n) 0 +# define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(p) +# define IS_NMSTRT_CHAR(enc, p, n) (0) +# define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(p) + +# define XML_TOK_IMPL_C +# include "xmltok_impl.c" +# undef XML_TOK_IMPL_C + +# undef MINBPC +# undef BYTE_TYPE +# undef BYTE_TO_ASCII +# undef CHAR_MATCHES +# undef IS_NAME_CHAR +# undef IS_NAME_CHAR_MINBPC +# undef IS_NMSTRT_CHAR +# undef IS_NMSTRT_CHAR_MINBPC +# undef IS_INVALID_CHAR #endif /* not XML_MIN_SIZE */ #ifdef XML_NS -static const struct normal_encoding little2_encoding_ns = { - { VTABLE, 2, 0, -#if BYTEORDER == 1234 - 1 -#else - 0 -#endif - }, - { -#include "asciitab.h" -#include "latin1tab.h" - }, - STANDARD_VTABLE(little2_) -}; +static const struct normal_encoding little2_encoding_ns + = {{VTABLE, 2, 0, +# if BYTEORDER == 1234 + 1 +# else + 0 +# endif + }, + { +# include "asciitab.h" +# include "latin1tab.h" + }, + STANDARD_VTABLE(little2_) NULL_VTABLE}; #endif -static const struct normal_encoding little2_encoding = { - { VTABLE, 2, 0, +static const struct normal_encoding little2_encoding + = {{VTABLE, 2, 0, #if BYTEORDER == 1234 - 1 + 1 #else - 0 + 0 #endif - }, - { + }, + { #define BT_COLON BT_NMSTRT #include "asciitab.h" #undef BT_COLON #include "latin1tab.h" - }, - STANDARD_VTABLE(little2_) -}; + }, + STANDARD_VTABLE(little2_) NULL_VTABLE}; #if BYTEORDER != 4321 -#ifdef XML_NS +# ifdef XML_NS -static const struct normal_encoding internal_little2_encoding_ns = { - { VTABLE, 2, 0, 1 }, - { -#include "iasciitab.h" -#include "latin1tab.h" - }, - STANDARD_VTABLE(little2_) -}; +static const struct normal_encoding internal_little2_encoding_ns + = {{VTABLE, 2, 0, 1}, + { +# include "iasciitab.h" +# include "latin1tab.h" + }, + STANDARD_VTABLE(little2_) NULL_VTABLE}; -#endif +# endif -static const struct normal_encoding internal_little2_encoding = { - { VTABLE, 2, 0, 1 }, - { -#define BT_COLON BT_NMSTRT -#include "iasciitab.h" -#undef BT_COLON -#include "latin1tab.h" - }, - STANDARD_VTABLE(little2_) -}; +static const struct normal_encoding internal_little2_encoding + = {{VTABLE, 2, 0, 1}, + { +# define BT_COLON BT_NMSTRT +# include "iasciitab.h" +# undef BT_COLON +# include "latin1tab.h" + }, + STANDARD_VTABLE(little2_) NULL_VTABLE}; #endif - -#define BIG2_BYTE_TYPE(enc, p) \ - ((p)[0] == 0 \ - ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \ - : unicode_byte_type((p)[0], (p)[1])) -#define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1) -#define BIG2_CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c) -#define BIG2_IS_NAME_CHAR_MINBPC(enc, p) \ +#define BIG2_BYTE_TYPE(enc, p) \ + ((p)[0] == 0 \ + ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \ + : unicode_byte_type((p)[0], (p)[1])) +#define BIG2_BYTE_TO_ASCII(p) ((p)[0] == 0 ? (p)[1] : -1) +#define BIG2_CHAR_MATCHES(p, c) ((p)[0] == 0 && (p)[1] == c) +#define BIG2_IS_NAME_CHAR_MINBPC(p) \ UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1]) -#define BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) \ +#define BIG2_IS_NMSTRT_CHAR_MINBPC(p) \ UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1]) #ifdef XML_MIN_SIZE static int PTRFASTCALL -big2_byteType(const ENCODING *enc, const char *p) -{ +big2_byteType(const ENCODING *enc, const char *p) { return BIG2_BYTE_TYPE(enc, p); } static int PTRFASTCALL -big2_byteToAscii(const ENCODING *enc, const char *p) -{ - return BIG2_BYTE_TO_ASCII(enc, p); +big2_byteToAscii(const ENCODING *enc, const char *p) { + UNUSED_P(enc); + return BIG2_BYTE_TO_ASCII(p); } static int PTRCALL -big2_charMatches(const ENCODING *enc, const char *p, int c) -{ - return BIG2_CHAR_MATCHES(enc, p, c); +big2_charMatches(const ENCODING *enc, const char *p, int c) { + UNUSED_P(enc); + return BIG2_CHAR_MATCHES(p, c); } static int PTRFASTCALL -big2_isNameMin(const ENCODING *enc, const char *p) -{ - return BIG2_IS_NAME_CHAR_MINBPC(enc, p); +big2_isNameMin(const ENCODING *enc, const char *p) { + UNUSED_P(enc); + return BIG2_IS_NAME_CHAR_MINBPC(p); } static int PTRFASTCALL -big2_isNmstrtMin(const ENCODING *enc, const char *p) -{ - return BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p); +big2_isNmstrtMin(const ENCODING *enc, const char *p) { + UNUSED_P(enc); + return BIG2_IS_NMSTRT_CHAR_MINBPC(p); } -#undef VTABLE -#define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16 +# undef VTABLE +# define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16 #else /* not XML_MIN_SIZE */ -#undef PREFIX -#define PREFIX(ident) big2_ ## ident -#define MINBPC(enc) 2 +# undef PREFIX +# define PREFIX(ident) big2_##ident +# define MINBPC(enc) 2 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */ -#define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p) -#define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p) -#define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c) -#define IS_NAME_CHAR(enc, p, n) 0 -#define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p) -#define IS_NMSTRT_CHAR(enc, p, n) (0) -#define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) - -#include "xmltok_impl.c" - -#undef MINBPC -#undef BYTE_TYPE -#undef BYTE_TO_ASCII -#undef CHAR_MATCHES -#undef IS_NAME_CHAR -#undef IS_NAME_CHAR_MINBPC -#undef IS_NMSTRT_CHAR -#undef IS_NMSTRT_CHAR_MINBPC -#undef IS_INVALID_CHAR +# define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p) +# define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(p) +# define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(p, c) +# define IS_NAME_CHAR(enc, p, n) 0 +# define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(p) +# define IS_NMSTRT_CHAR(enc, p, n) (0) +# define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(p) + +# define XML_TOK_IMPL_C +# include "xmltok_impl.c" +# undef XML_TOK_IMPL_C + +# undef MINBPC +# undef BYTE_TYPE +# undef BYTE_TO_ASCII +# undef CHAR_MATCHES +# undef IS_NAME_CHAR +# undef IS_NAME_CHAR_MINBPC +# undef IS_NMSTRT_CHAR +# undef IS_NMSTRT_CHAR_MINBPC +# undef IS_INVALID_CHAR #endif /* not XML_MIN_SIZE */ #ifdef XML_NS -static const struct normal_encoding big2_encoding_ns = { - { VTABLE, 2, 0, -#if BYTEORDER == 4321 - 1 -#else - 0 -#endif - }, - { -#include "asciitab.h" -#include "latin1tab.h" - }, - STANDARD_VTABLE(big2_) -}; +static const struct normal_encoding big2_encoding_ns + = {{VTABLE, 2, 0, +# if BYTEORDER == 4321 + 1 +# else + 0 +# endif + }, + { +# include "asciitab.h" +# include "latin1tab.h" + }, + STANDARD_VTABLE(big2_) NULL_VTABLE}; #endif -static const struct normal_encoding big2_encoding = { - { VTABLE, 2, 0, +static const struct normal_encoding big2_encoding + = {{VTABLE, 2, 0, #if BYTEORDER == 4321 - 1 + 1 #else - 0 + 0 #endif - }, - { + }, + { #define BT_COLON BT_NMSTRT #include "asciitab.h" #undef BT_COLON #include "latin1tab.h" - }, - STANDARD_VTABLE(big2_) -}; + }, + STANDARD_VTABLE(big2_) NULL_VTABLE}; #if BYTEORDER != 1234 -#ifdef XML_NS +# ifdef XML_NS -static const struct normal_encoding internal_big2_encoding_ns = { - { VTABLE, 2, 0, 1 }, - { -#include "iasciitab.h" -#include "latin1tab.h" - }, - STANDARD_VTABLE(big2_) -}; +static const struct normal_encoding internal_big2_encoding_ns + = {{VTABLE, 2, 0, 1}, + { +# include "iasciitab.h" +# include "latin1tab.h" + }, + STANDARD_VTABLE(big2_) NULL_VTABLE}; -#endif +# endif -static const struct normal_encoding internal_big2_encoding = { - { VTABLE, 2, 0, 1 }, - { -#define BT_COLON BT_NMSTRT -#include "iasciitab.h" -#undef BT_COLON -#include "latin1tab.h" - }, - STANDARD_VTABLE(big2_) -}; +static const struct normal_encoding internal_big2_encoding + = {{VTABLE, 2, 0, 1}, + { +# define BT_COLON BT_NMSTRT +# include "iasciitab.h" +# undef BT_COLON +# include "latin1tab.h" + }, + STANDARD_VTABLE(big2_) NULL_VTABLE}; #endif #undef PREFIX static int FASTCALL -streqci(const char *s1, const char *s2) -{ +streqci(const char *s1, const char *s2) { for (;;) { char c1 = *s1++; char c2 = *s2++; if (ASCII_a <= c1 && c1 <= ASCII_z) c1 += ASCII_A - ASCII_a; if (ASCII_a <= c2 && c2 <= ASCII_z) - c2 += ASCII_A - ASCII_a; + /* The following line will never get executed. streqci() is + * only called from two places, both of which guarantee to put + * upper-case strings into s2. + */ + c2 += ASCII_A - ASCII_a; /* LCOV_EXCL_LINE */ if (c1 != c2) return 0; - if (!c1) + if (! c1) break; } return 1; } static void PTRCALL -initUpdatePosition(const ENCODING *enc, const char *ptr, - const char *end, POSITION *pos) -{ +initUpdatePosition(const ENCODING *enc, const char *ptr, const char *end, + POSITION *pos) { + UNUSED_P(enc); normal_updatePosition(&utf8_encoding.enc, ptr, end, pos); } static int -toAscii(const ENCODING *enc, const char *ptr, const char *end) -{ +toAscii(const ENCODING *enc, const char *ptr, const char *end) { char buf[1]; char *p = buf; XmlUtf8Convert(enc, &ptr, end, &p, p + 1); @@ -947,8 +1047,7 @@ toAscii(const ENCODING *enc, const char *ptr, const char *end) } static int FASTCALL -isSpace(int c) -{ +isSpace(int c) { switch (c) { case 0x20: case 0xD: @@ -963,21 +1062,16 @@ isSpace(int c) followed by name=val. */ static int -parsePseudoAttribute(const ENCODING *enc, - const char *ptr, - const char *end, - const char **namePtr, - const char **nameEndPtr, - const char **valPtr, - const char **nextTokPtr) -{ +parsePseudoAttribute(const ENCODING *enc, const char *ptr, const char *end, + const char **namePtr, const char **nameEndPtr, + const char **valPtr, const char **nextTokPtr) { int c; char open; if (ptr == end) { *namePtr = NULL; return 1; } - if (!isSpace(toAscii(enc, ptr, end))) { + if (! isSpace(toAscii(enc, ptr, end))) { *nextTokPtr = ptr; return 0; } @@ -1033,12 +1127,9 @@ parsePseudoAttribute(const ENCODING *enc, c = toAscii(enc, ptr, end); if (c == open) break; - if (!(ASCII_a <= c && c <= ASCII_z) - && !(ASCII_A <= c && c <= ASCII_Z) - && !(ASCII_0 <= c && c <= ASCII_9) - && c != ASCII_PERIOD - && c != ASCII_MINUS - && c != ASCII_UNDERSCORE) { + if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z) + && ! (ASCII_0 <= c && c <= ASCII_9) && c != ASCII_PERIOD + && c != ASCII_MINUS && c != ASCII_UNDERSCORE) { *nextTokPtr = ptr; return 0; } @@ -1047,68 +1138,52 @@ parsePseudoAttribute(const ENCODING *enc, return 1; } -static const char KW_version[] = { - ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0' -}; +static const char KW_version[] + = {ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0'}; -static const char KW_encoding[] = { - ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, ASCII_i, ASCII_n, ASCII_g, '\0' -}; +static const char KW_encoding[] = {ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, + ASCII_i, ASCII_n, ASCII_g, '\0'}; -static const char KW_standalone[] = { - ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o, - ASCII_n, ASCII_e, '\0' -}; +static const char KW_standalone[] + = {ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, + ASCII_l, ASCII_o, ASCII_n, ASCII_e, '\0'}; -static const char KW_yes[] = { - ASCII_y, ASCII_e, ASCII_s, '\0' -}; +static const char KW_yes[] = {ASCII_y, ASCII_e, ASCII_s, '\0'}; -static const char KW_no[] = { - ASCII_n, ASCII_o, '\0' -}; +static const char KW_no[] = {ASCII_n, ASCII_o, '\0'}; static int -doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, - const char *, +doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, const char *, const char *), - int isGeneralTextEntity, - const ENCODING *enc, - const char *ptr, - const char *end, - const char **badPtr, - const char **versionPtr, - const char **versionEndPtr, - const char **encodingName, - const ENCODING **encoding, - int *standalone) -{ + int isGeneralTextEntity, const ENCODING *enc, const char *ptr, + const char *end, const char **badPtr, const char **versionPtr, + const char **versionEndPtr, const char **encodingName, + const ENCODING **encoding, int *standalone) { const char *val = NULL; const char *name = NULL; const char *nameEnd = NULL; ptr += 5 * enc->minBytesPerChar; end -= 2 * enc->minBytesPerChar; - if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr) - || !name) { + if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr) + || ! name) { *badPtr = ptr; return 0; } - if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) { - if (!isGeneralTextEntity) { + if (! XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) { + if (! isGeneralTextEntity) { *badPtr = name; return 0; } - } - else { + } else { if (versionPtr) *versionPtr = val; if (versionEndPtr) *versionEndPtr = ptr; - if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { + if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { *badPtr = ptr; return 0; } - if (!name) { + if (! name) { if (isGeneralTextEntity) { /* a TextDecl must have an EncodingDecl */ *badPtr = ptr; @@ -1119,7 +1194,7 @@ doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, } if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) { int c = toAscii(enc, val, end); - if (!(ASCII_a <= c && c <= ASCII_z) && !(ASCII_A <= c && c <= ASCII_Z)) { + if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z)) { *badPtr = val; return 0; } @@ -1127,14 +1202,14 @@ doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, *encodingName = val; if (encoding) *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar); - if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { + if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { *badPtr = ptr; return 0; } - if (!name) + if (! name) return 1; } - if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone) + if (! XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone) || isGeneralTextEntity) { *badPtr = name; return 0; @@ -1142,12 +1217,10 @@ doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) { if (standalone) *standalone = 1; - } - else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) { + } else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) { if (standalone) *standalone = 0; - } - else { + } else { *badPtr = val; return 0; } @@ -1161,11 +1234,16 @@ doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, } static int FASTCALL -checkCharRefNumber(int result) -{ +checkCharRefNumber(int result) { switch (result >> 8) { - case 0xD8: case 0xD9: case 0xDA: case 0xDB: - case 0xDC: case 0xDD: case 0xDE: case 0xDF: + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: return -1; case 0: if (latin1_encoding.type[result] == BT_NONXML) @@ -1180,8 +1258,7 @@ checkCharRefNumber(int result) } int FASTCALL -XmlUtf8Encode(int c, char *buf) -{ +XmlUtf8Encode(int c, char *buf) { enum { /* minN is minimum legal resulting value for N byte sequence */ min2 = 0x80, @@ -1190,7 +1267,7 @@ XmlUtf8Encode(int c, char *buf) }; if (c < 0) - return 0; + return 0; /* LCOV_EXCL_LINE: this case is always eliminated beforehand */ if (c < min2) { buf[0] = (char)(c | UTF8_cval1); return 1; @@ -1213,12 +1290,11 @@ XmlUtf8Encode(int c, char *buf) buf[3] = (char)((c & 0x3f) | 0x80); return 4; } - return 0; + return 0; /* LCOV_EXCL_LINE: this case too is eliminated before calling */ } int FASTCALL -XmlUtf16Encode(int charNum, unsigned short *buf) -{ +XmlUtf16Encode(int charNum, unsigned short *buf) { if (charNum < 0) return 0; if (charNum < 0x10000) { @@ -1242,17 +1318,15 @@ struct unknown_encoding { char utf8[256][4]; }; -#define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *) (enc)) +#define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *)(enc)) int -XmlSizeOfUnknownEncoding(void) -{ +XmlSizeOfUnknownEncoding(void) { return sizeof(struct unknown_encoding); } static int PTRFASTCALL -unknown_isName(const ENCODING *enc, const char *p) -{ +unknown_isName(const ENCODING *enc, const char *p) { const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); int c = uenc->convert(uenc->userData, p); if (c & ~0xFFFF) @@ -1261,8 +1335,7 @@ unknown_isName(const ENCODING *enc, const char *p) } static int PTRFASTCALL -unknown_isNmstrt(const ENCODING *enc, const char *p) -{ +unknown_isNmstrt(const ENCODING *enc, const char *p) { const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); int c = uenc->convert(uenc->userData, p); if (c & ~0xFFFF) @@ -1271,81 +1344,72 @@ unknown_isNmstrt(const ENCODING *enc, const char *p) } static int PTRFASTCALL -unknown_isInvalid(const ENCODING *enc, const char *p) -{ +unknown_isInvalid(const ENCODING *enc, const char *p) { const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); int c = uenc->convert(uenc->userData, p); return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; } -static void PTRCALL -unknown_toUtf8(const ENCODING *enc, - const char **fromP, const char *fromLim, - char **toP, const char *toLim) -{ +static enum XML_Convert_Result PTRCALL +unknown_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, + char **toP, const char *toLim) { const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); char buf[XML_UTF8_ENCODE_MAX]; for (;;) { const char *utf8; int n; if (*fromP == fromLim) - break; + return XML_CONVERT_COMPLETED; utf8 = uenc->utf8[(unsigned char)**fromP]; n = *utf8++; if (n == 0) { int c = uenc->convert(uenc->userData, *fromP); n = XmlUtf8Encode(c, buf); if (n > toLim - *toP) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; utf8 = buf; *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] - (BT_LEAD2 - 2)); - } - else { + } else { if (n > toLim - *toP) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; (*fromP)++; } - do { - *(*toP)++ = *utf8++; - } while (--n != 0); + memcpy(*toP, utf8, n); + *toP += n; } } -static void PTRCALL -unknown_toUtf16(const ENCODING *enc, - const char **fromP, const char *fromLim, - unsigned short **toP, const unsigned short *toLim) -{ +static enum XML_Convert_Result PTRCALL +unknown_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, + unsigned short **toP, const unsigned short *toLim) { const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); - while (*fromP != fromLim && *toP != toLim) { + while (*fromP < fromLim && *toP < toLim) { unsigned short c = uenc->utf16[(unsigned char)**fromP]; if (c == 0) { - c = (unsigned short) - uenc->convert(uenc->userData, *fromP); + c = (unsigned short)uenc->convert(uenc->userData, *fromP); *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] - (BT_LEAD2 - 2)); - } - else + } else (*fromP)++; *(*toP)++ = c; } + + if ((*toP == toLim) && (*fromP < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return XML_CONVERT_COMPLETED; } ENCODING * -XmlInitUnknownEncoding(void *mem, - int *table, - CONVERTER convert, - void *userData) -{ +XmlInitUnknownEncoding(void *mem, int *table, CONVERTER convert, + void *userData) { int i; struct unknown_encoding *e = (struct unknown_encoding *)mem; - for (i = 0; i < (int)sizeof(struct normal_encoding); i++) - ((char *)mem)[i] = ((char *)&latin1_encoding)[i]; + memcpy(mem, &latin1_encoding, sizeof(struct normal_encoding)); for (i = 0; i < 128; i++) if (latin1_encoding.type[i] != BT_OTHER - && latin1_encoding.type[i] != BT_NONXML - && table[i] != i) + && latin1_encoding.type[i] != BT_NONXML && table[i] != i) return 0; for (i = 0; i < 256; i++) { int c = table[i]; @@ -1355,32 +1419,30 @@ XmlInitUnknownEncoding(void *mem, e->utf16[i] = 0xFFFF; e->utf8[i][0] = 1; e->utf8[i][1] = 0; - } - else if (c < 0) { + } else if (c < 0) { if (c < -4) return 0; + /* Multi-byte sequences need a converter function */ + if (! convert) + return 0; e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2)); e->utf8[i][0] = 0; e->utf16[i] = 0; - } - else if (c < 0x80) { + } else if (c < 0x80) { if (latin1_encoding.type[c] != BT_OTHER - && latin1_encoding.type[c] != BT_NONXML - && c != i) + && latin1_encoding.type[c] != BT_NONXML && c != i) return 0; e->normal.type[i] = latin1_encoding.type[c]; e->utf8[i][0] = 1; e->utf8[i][1] = (char)c; e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c); - } - else if (checkCharRefNumber(c) < 0) { + } else if (checkCharRefNumber(c) < 0) { e->normal.type[i] = BT_NONXML; /* This shouldn't really get used. */ e->utf16[i] = 0xFFFF; e->utf8[i][0] = 1; e->utf8[i][1] = 0; - } - else { + } else { if (c > 0xFFFF) return 0; if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff)) @@ -1425,44 +1487,32 @@ enum { NO_ENC }; -static const char KW_ISO_8859_1[] = { - ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, ASCII_5, ASCII_9, - ASCII_MINUS, ASCII_1, '\0' -}; -static const char KW_US_ASCII[] = { - ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I, - '\0' -}; -static const char KW_UTF_8[] = { - ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0' -}; -static const char KW_UTF_16[] = { - ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0' -}; -static const char KW_UTF_16BE[] = { - ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E, - '\0' -}; -static const char KW_UTF_16LE[] = { - ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E, - '\0' -}; +static const char KW_ISO_8859_1[] + = {ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, + ASCII_5, ASCII_9, ASCII_MINUS, ASCII_1, '\0'}; +static const char KW_US_ASCII[] + = {ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, + ASCII_C, ASCII_I, ASCII_I, '\0'}; +static const char KW_UTF_8[] + = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0'}; +static const char KW_UTF_16[] + = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0'}; +static const char KW_UTF_16BE[] + = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, + ASCII_6, ASCII_B, ASCII_E, '\0'}; +static const char KW_UTF_16LE[] + = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, + ASCII_6, ASCII_L, ASCII_E, '\0'}; static int FASTCALL -getEncodingIndex(const char *name) -{ - static const char *encodingNames[] = { - KW_ISO_8859_1, - KW_US_ASCII, - KW_UTF_8, - KW_UTF_16, - KW_UTF_16BE, - KW_UTF_16LE, +getEncodingIndex(const char *name) { + static const char *const encodingNames[] = { + KW_ISO_8859_1, KW_US_ASCII, KW_UTF_8, KW_UTF_16, KW_UTF_16BE, KW_UTF_16LE, }; int i; if (name == NULL) return NO_ENC; - for (i = 0; i < (int)(sizeof(encodingNames)/sizeof(encodingNames[0])); i++) + for (i = 0; i < (int)(sizeof(encodingNames) / sizeof(encodingNames[0])); i++) if (streqci(name, encodingNames[i])) return i; return UNKNOWN_ENC; @@ -1482,18 +1532,12 @@ getEncodingIndex(const char *name) XML_PROLOG_STATE otherwise. */ - static int -initScan(const ENCODING **encodingTable, - const INIT_ENCODING *enc, - int state, - const char *ptr, - const char *end, - const char **nextTokPtr) -{ +initScan(const ENCODING *const *encodingTable, const INIT_ENCODING *enc, + int state, const char *ptr, const char *end, const char **nextTokPtr) { const ENCODING **encPtr; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; encPtr = enc->encPtr; if (ptr + 1 == end) { @@ -1515,20 +1559,17 @@ initScan(const ENCODING **encodingTable, case 0xFE: case 0xFF: case 0xEF: /* possibly first byte of UTF-8 BOM */ - if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC - && state == XML_CONTENT_STATE) + if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE) break; /* fall through */ case 0x00: case 0x3C: return XML_TOK_PARTIAL; } - } - else { + } else { switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) { case 0xFEFF: - if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC - && state == XML_CONTENT_STATE) + if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE) break; *nextTokPtr = ptr + 2; *encPtr = encodingTable[UTF_16BE_ENC]; @@ -1542,8 +1583,7 @@ initScan(const ENCODING **encodingTable, *encPtr = encodingTable[UTF_16LE_ENC]; return XmlTok(*encPtr, state, ptr, end, nextTokPtr); case 0xFFFE: - if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC - && state == XML_CONTENT_STATE) + if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE) break; *nextTokPtr = ptr + 2; *encPtr = encodingTable[UTF_16LE_ENC]; @@ -1558,8 +1598,8 @@ initScan(const ENCODING **encodingTable, */ if (state == XML_CONTENT_STATE) { int e = INIT_ENC_INDEX(enc); - if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC - || e == UTF_16LE_ENC || e == UTF_16_ENC) + if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC + || e == UTF_16_ENC) break; } if (ptr + 2 == end) @@ -1582,8 +1622,7 @@ initScan(const ENCODING **encodingTable, break; *encPtr = encodingTable[UTF_16BE_ENC]; return XmlTok(*encPtr, state, ptr, end, nextTokPtr); - } - else if (ptr[1] == '\0') { + } else if (ptr[1] == '\0') { /* We could recover here in the case: - parsing an external entity - second byte is 0 @@ -1605,29 +1644,29 @@ initScan(const ENCODING **encodingTable, return XmlTok(*encPtr, state, ptr, end, nextTokPtr); } - #define NS(x) x #define ns(x) x +#define XML_TOK_NS_C #include "xmltok_ns.c" +#undef XML_TOK_NS_C #undef NS #undef ns #ifdef XML_NS -#define NS(x) x ## NS -#define ns(x) x ## _ns +# define NS(x) x##NS +# define ns(x) x##_ns -#include "xmltok_ns.c" +# define XML_TOK_NS_C +# include "xmltok_ns.c" +# undef XML_TOK_NS_C -#undef NS -#undef ns +# undef NS +# undef ns ENCODING * -XmlInitUnknownEncodingNS(void *mem, - int *table, - CONVERTER convert, - void *userData) -{ +XmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert, + void *userData) { ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData); if (enc) ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON; diff --git a/src/simgear/xml/xmltok.h b/src/simgear/xml/xmltok.h index 1ecd05f88..6f630c2f9 100644 --- a/src/simgear/xml/xmltok.h +++ b/src/simgear/xml/xmltok.h @@ -1,5 +1,37 @@ -/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2002-2005 Karl Waclawek + Copyright (c) 2016-2017 Sebastian Pipping + Copyright (c) 2017 Rhodri James + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef XmlTok_INCLUDED @@ -10,16 +42,18 @@ extern "C" { #endif /* The following token may be returned by XmlContentTok */ -#define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be - start of illegal ]]> sequence */ +#define XML_TOK_TRAILING_RSQB \ + -5 /* ] or ]] at the end of the scan; might be \ + start of illegal ]]> sequence */ /* The following tokens may be returned by both XmlPrologTok and XmlContentTok. */ -#define XML_TOK_NONE -4 /* The string to be scanned is empty */ -#define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan; - might be part of CRLF sequence */ -#define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */ -#define XML_TOK_PARTIAL -1 /* only part of a token */ +#define XML_TOK_NONE -4 /* The string to be scanned is empty */ +#define XML_TOK_TRAILING_CR \ + -3 /* A CR at the end of the scan; \ + might be part of CRLF sequence */ +#define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */ +#define XML_TOK_PARTIAL -1 /* only part of a token */ #define XML_TOK_INVALID 0 /* The following tokens are returned by XmlContentTok; some are also @@ -34,24 +68,24 @@ extern "C" { #define XML_TOK_DATA_NEWLINE 7 #define XML_TOK_CDATA_SECT_OPEN 8 #define XML_TOK_ENTITY_REF 9 -#define XML_TOK_CHAR_REF 10 /* numeric character reference */ +#define XML_TOK_CHAR_REF 10 /* numeric character reference */ /* The following tokens may be returned by both XmlPrologTok and XmlContentTok. */ -#define XML_TOK_PI 11 /* processing instruction */ -#define XML_TOK_XML_DECL 12 /* XML decl or text decl */ +#define XML_TOK_PI 11 /* processing instruction */ +#define XML_TOK_XML_DECL 12 /* XML decl or text decl */ #define XML_TOK_COMMENT 13 -#define XML_TOK_BOM 14 /* Byte order mark */ +#define XML_TOK_BOM 14 /* Byte order mark */ /* The following tokens are returned only by XmlPrologTok */ #define XML_TOK_PROLOG_S 15 -#define XML_TOK_DECL_OPEN 16 /* */ +#define XML_TOK_DECL_OPEN 16 /* */ #define XML_TOK_NAME 18 #define XML_TOK_NMTOKEN 19 -#define XML_TOK_POUND_NAME 20 /* #name */ -#define XML_TOK_OR 21 /* | */ +#define XML_TOK_POUND_NAME 20 /* #name */ +#define XML_TOK_OR 21 /* | */ #define XML_TOK_PERCENT 22 #define XML_TOK_OPEN_PAREN 23 #define XML_TOK_CLOSE_PAREN 24 @@ -62,14 +96,14 @@ extern "C" { #define XML_TOK_INSTANCE_START 29 /* The following occur only in element type declarations */ -#define XML_TOK_NAME_QUESTION 30 /* name? */ -#define XML_TOK_NAME_ASTERISK 31 /* name* */ -#define XML_TOK_NAME_PLUS 32 /* name+ */ -#define XML_TOK_COND_SECT_OPEN 33 /* */ -#define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */ -#define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */ -#define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */ +#define XML_TOK_NAME_QUESTION 30 /* name? */ +#define XML_TOK_NAME_ASTERISK 31 /* name* */ +#define XML_TOK_NAME_PLUS 32 /* name+ */ +#define XML_TOK_COND_SECT_OPEN 33 /* */ +#define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */ +#define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */ +#define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */ #define XML_TOK_COMMA 38 /* The following token is returned only by XmlAttributeValueTok */ @@ -84,20 +118,20 @@ extern "C" { #define XML_TOK_PREFIXED_NAME 41 #ifdef XML_DTD -#define XML_TOK_IGNORE_SECT 42 +# define XML_TOK_IGNORE_SECT 42 #endif /* XML_DTD */ #ifdef XML_DTD -#define XML_N_STATES 4 +# define XML_N_STATES 4 #else /* not XML_DTD */ -#define XML_N_STATES 3 +# define XML_N_STATES 3 #endif /* not XML_DTD */ #define XML_PROLOG_STATE 0 #define XML_CONTENT_STATE 1 #define XML_CDATA_SECTION_STATE 2 #ifdef XML_DTD -#define XML_IGNORE_SECTION_STATE 3 +# define XML_IGNORE_SECTION_STATE 3 #endif /* XML_DTD */ #define XML_N_LITERAL_TYPES 2 @@ -111,8 +145,8 @@ extern "C" { typedef struct position { /* first line and first column are 0 not 1 */ - unsigned long lineNumber; - unsigned long columnNumber; + XML_Size lineNumber; + XML_Size columnNumber; } POSITION; typedef struct { @@ -125,49 +159,41 @@ typedef struct { struct encoding; typedef struct encoding ENCODING; -typedef int (PTRCALL *SCANNER)(const ENCODING *, - const char *, - const char *, - const char **); +typedef int(PTRCALL *SCANNER)(const ENCODING *, const char *, const char *, + const char **); + +enum XML_Convert_Result { + XML_CONVERT_COMPLETED = 0, + XML_CONVERT_INPUT_INCOMPLETE = 1, + XML_CONVERT_OUTPUT_EXHAUSTED + = 2 /* and therefore potentially input remaining as well */ +}; struct encoding { SCANNER scanners[XML_N_STATES]; SCANNER literalScanners[XML_N_LITERAL_TYPES]; - int (PTRCALL *sameName)(const ENCODING *, - const char *, - const char *); - int (PTRCALL *nameMatchesAscii)(const ENCODING *, - const char *, - const char *, - const char *); - int (PTRFASTCALL *nameLength)(const ENCODING *, const char *); + int(PTRCALL *nameMatchesAscii)(const ENCODING *, const char *, const char *, + const char *); + int(PTRFASTCALL *nameLength)(const ENCODING *, const char *); const char *(PTRFASTCALL *skipS)(const ENCODING *, const char *); - int (PTRCALL *getAtts)(const ENCODING *enc, - const char *ptr, - int attsMax, - ATTRIBUTE *atts); - int (PTRFASTCALL *charRefNumber)(const ENCODING *enc, const char *ptr); - int (PTRCALL *predefinedEntityName)(const ENCODING *, - const char *, - const char *); - void (PTRCALL *updatePosition)(const ENCODING *, - const char *ptr, - const char *end, - POSITION *); - int (PTRCALL *isPublicId)(const ENCODING *enc, - const char *ptr, - const char *end, - const char **badPtr); - void (PTRCALL *utf8Convert)(const ENCODING *enc, - const char **fromP, - const char *fromLim, - char **toP, - const char *toLim); - void (PTRCALL *utf16Convert)(const ENCODING *enc, - const char **fromP, - const char *fromLim, - unsigned short **toP, - const unsigned short *toLim); + int(PTRCALL *getAtts)(const ENCODING *enc, const char *ptr, int attsMax, + ATTRIBUTE *atts); + int(PTRFASTCALL *charRefNumber)(const ENCODING *enc, const char *ptr); + int(PTRCALL *predefinedEntityName)(const ENCODING *, const char *, + const char *); + void(PTRCALL *updatePosition)(const ENCODING *, const char *ptr, + const char *end, POSITION *); + int(PTRCALL *isPublicId)(const ENCODING *enc, const char *ptr, + const char *end, const char **badPtr); + enum XML_Convert_Result(PTRCALL *utf8Convert)(const ENCODING *enc, + const char **fromP, + const char *fromLim, char **toP, + const char *toLim); + enum XML_Convert_Result(PTRCALL *utf16Convert)(const ENCODING *enc, + const char **fromP, + const char *fromLim, + unsigned short **toP, + const unsigned short *toLim); int minBytesPerChar; char isUtf8; char isUtf16; @@ -194,68 +220,62 @@ struct encoding { the prolog outside literals, comments and processing instructions. */ - -#define XmlTok(enc, state, ptr, end, nextTokPtr) \ +#define XmlTok(enc, state, ptr, end, nextTokPtr) \ (((enc)->scanners[state])(enc, ptr, end, nextTokPtr)) -#define XmlPrologTok(enc, ptr, end, nextTokPtr) \ - XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr) +#define XmlPrologTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr) -#define XmlContentTok(enc, ptr, end, nextTokPtr) \ - XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr) +#define XmlContentTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr) -#define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \ - XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr) +#define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr) #ifdef XML_DTD -#define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \ - XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr) +# define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr) #endif /* XML_DTD */ /* This is used for performing a 2nd-level tokenization on the content of a literal that has already been returned by XmlTok. */ -#define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \ +#define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \ (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr)) -#define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \ - XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr) +#define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \ + XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr) -#define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \ - XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr) +#define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \ + XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr) -#define XmlSameName(enc, ptr1, ptr2) (((enc)->sameName)(enc, ptr1, ptr2)) - -#define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \ +#define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \ (((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2)) -#define XmlNameLength(enc, ptr) \ - (((enc)->nameLength)(enc, ptr)) +#define XmlNameLength(enc, ptr) (((enc)->nameLength)(enc, ptr)) -#define XmlSkipS(enc, ptr) \ - (((enc)->skipS)(enc, ptr)) +#define XmlSkipS(enc, ptr) (((enc)->skipS)(enc, ptr)) -#define XmlGetAttributes(enc, ptr, attsMax, atts) \ +#define XmlGetAttributes(enc, ptr, attsMax, atts) \ (((enc)->getAtts)(enc, ptr, attsMax, atts)) -#define XmlCharRefNumber(enc, ptr) \ - (((enc)->charRefNumber)(enc, ptr)) +#define XmlCharRefNumber(enc, ptr) (((enc)->charRefNumber)(enc, ptr)) -#define XmlPredefinedEntityName(enc, ptr, end) \ +#define XmlPredefinedEntityName(enc, ptr, end) \ (((enc)->predefinedEntityName)(enc, ptr, end)) -#define XmlUpdatePosition(enc, ptr, end, pos) \ +#define XmlUpdatePosition(enc, ptr, end, pos) \ (((enc)->updatePosition)(enc, ptr, end, pos)) -#define XmlIsPublicId(enc, ptr, end, badPtr) \ +#define XmlIsPublicId(enc, ptr, end, badPtr) \ (((enc)->isPublicId)(enc, ptr, end, badPtr)) -#define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \ +#define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \ (((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim)) -#define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \ +#define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \ (((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim)) typedef struct { @@ -263,16 +283,11 @@ typedef struct { const ENCODING **encPtr; } INIT_ENCODING; -int XmlParseXmlDecl(int isGeneralTextEntity, - const ENCODING *enc, - const char *ptr, - const char *end, - const char **badPtr, - const char **versionPtr, - const char **versionEndPtr, +int XmlParseXmlDecl(int isGeneralTextEntity, const ENCODING *enc, + const char *ptr, const char *end, const char **badPtr, + const char **versionPtr, const char **versionEndPtr, const char **encodingNamePtr, - const ENCODING **namedEncodingPtr, - int *standalonePtr); + const ENCODING **namedEncodingPtr, int *standalonePtr); int XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name); const ENCODING *XmlGetUtf8InternalEncoding(void); @@ -281,34 +296,22 @@ int FASTCALL XmlUtf8Encode(int charNumber, char *buf); int FASTCALL XmlUtf16Encode(int charNumber, unsigned short *buf); int XmlSizeOfUnknownEncoding(void); +typedef int(XMLCALL *CONVERTER)(void *userData, const char *p); -typedef int (XMLCALL *CONVERTER) (void *userData, const char *p); - -ENCODING * -XmlInitUnknownEncoding(void *mem, - int *table, - CONVERTER convert, - void *userData); +ENCODING *XmlInitUnknownEncoding(void *mem, int *table, CONVERTER convert, + void *userData); -int XmlParseXmlDeclNS(int isGeneralTextEntity, - const ENCODING *enc, - const char *ptr, - const char *end, - const char **badPtr, - const char **versionPtr, - const char **versionEndPtr, +int XmlParseXmlDeclNS(int isGeneralTextEntity, const ENCODING *enc, + const char *ptr, const char *end, const char **badPtr, + const char **versionPtr, const char **versionEndPtr, const char **encodingNamePtr, - const ENCODING **namedEncodingPtr, - int *standalonePtr); + const ENCODING **namedEncodingPtr, int *standalonePtr); int XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name); const ENCODING *XmlGetUtf8InternalEncodingNS(void); const ENCODING *XmlGetUtf16InternalEncodingNS(void); -ENCODING * -XmlInitUnknownEncodingNS(void *mem, - int *table, - CONVERTER convert, - void *userData); +ENCODING *XmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert, + void *userData); #ifdef __cplusplus } #endif diff --git a/src/simgear/xml/xmltok_impl.c b/src/simgear/xml/xmltok_impl.c index 46569fe38..4072b0649 100644 --- a/src/simgear/xml/xmltok_impl.c +++ b/src/simgear/xml/xmltok_impl.c @@ -1,111 +1,163 @@ -/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* This file is included (from xmltok.c, 1-3 times depending on XML_MIN_SIZE)! + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2002-2016 Karl Waclawek + Copyright (c) 2016-2022 Sebastian Pipping + Copyright (c) 2017 Rhodri James + Copyright (c) 2018 Benjamin Peterson + Copyright (c) 2018 Anton Maklakov + Copyright (c) 2019 David Loffredo + Copyright (c) 2020 Boris Kolpackov + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef IS_INVALID_CHAR -#define IS_INVALID_CHAR(enc, ptr, n) (0) -#endif +#ifdef XML_TOK_IMPL_C -#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ - case BT_LEAD ## n: \ - if (end - ptr < n) \ - return XML_TOK_PARTIAL_CHAR; \ - if (IS_INVALID_CHAR(enc, ptr, n)) { \ - *(nextTokPtr) = (ptr); \ - return XML_TOK_INVALID; \ - } \ - ptr += n; \ - break; +# ifndef IS_INVALID_CHAR // i.e. for UTF-16 and XML_MIN_SIZE not defined +# define IS_INVALID_CHAR(enc, ptr, n) (0) +# endif -#define INVALID_CASES(ptr, nextTokPtr) \ - INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ - INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ - INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ - case BT_NONXML: \ - case BT_MALFORM: \ - case BT_TRAIL: \ - *(nextTokPtr) = (ptr); \ +# define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ + case BT_LEAD##n: \ + if (end - ptr < n) \ + return XML_TOK_PARTIAL_CHAR; \ + if (IS_INVALID_CHAR(enc, ptr, n)) { \ + *(nextTokPtr) = (ptr); \ + return XML_TOK_INVALID; \ + } \ + ptr += n; \ + break; + +# define INVALID_CASES(ptr, nextTokPtr) \ + INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ + INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ + INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ + case BT_NONXML: \ + case BT_MALFORM: \ + case BT_TRAIL: \ + *(nextTokPtr) = (ptr); \ return XML_TOK_INVALID; -#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ - case BT_LEAD ## n: \ - if (end - ptr < n) \ - return XML_TOK_PARTIAL_CHAR; \ - if (!IS_NAME_CHAR(enc, ptr, n)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_INVALID; \ - } \ - ptr += n; \ - break; +# define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ + case BT_LEAD##n: \ + if (end - ptr < n) \ + return XML_TOK_PARTIAL_CHAR; \ + if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ + ptr += n; \ + break; -#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ - case BT_NONASCII: \ - if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_INVALID; \ - } \ - case BT_NMSTRT: \ - case BT_HEX: \ - case BT_DIGIT: \ - case BT_NAME: \ - case BT_MINUS: \ - ptr += MINBPC(enc); \ - break; \ - CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ - CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ - CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) +# define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ + case BT_NONASCII: \ + if (! IS_NAME_CHAR_MINBPC(enc, ptr)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ + /* fall through */ \ + case BT_NMSTRT: \ + case BT_HEX: \ + case BT_DIGIT: \ + case BT_NAME: \ + case BT_MINUS: \ + ptr += MINBPC(enc); \ + break; \ + CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ + CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ + CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) -#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ - case BT_LEAD ## n: \ - if (end - ptr < n) \ - return XML_TOK_PARTIAL_CHAR; \ - if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_INVALID; \ - } \ - ptr += n; \ - break; +# define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ + case BT_LEAD##n: \ + if (end - ptr < n) \ + return XML_TOK_PARTIAL_CHAR; \ + if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ + ptr += n; \ + break; -#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ - case BT_NONASCII: \ - if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_INVALID; \ - } \ - case BT_NMSTRT: \ - case BT_HEX: \ - ptr += MINBPC(enc); \ - break; \ - CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ - CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ - CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) +# define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ + case BT_NONASCII: \ + if (! IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ + /* fall through */ \ + case BT_NMSTRT: \ + case BT_HEX: \ + ptr += MINBPC(enc); \ + break; \ + CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ + CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ + CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) -#ifndef PREFIX -#define PREFIX(ident) ident -#endif +# ifndef PREFIX +# define PREFIX(ident) ident +# endif + +# define HAS_CHARS(enc, ptr, end, count) (end - ptr >= count * MINBPC(enc)) + +# define HAS_CHAR(enc, ptr, end) HAS_CHARS(enc, ptr, end, 1) + +# define REQUIRE_CHARS(enc, ptr, end, count) \ + { \ + if (! HAS_CHARS(enc, ptr, end, count)) { \ + return XML_TOK_PARTIAL; \ + } \ + } + +# define REQUIRE_CHAR(enc, ptr, end) REQUIRE_CHARS(enc, ptr, end, 1) /* ptr points to character following " */ switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { - case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: + case BT_S: + case BT_CR: + case BT_LF: + case BT_PERCNT: *nextTokPtr = ptr; return XML_TOK_INVALID; } /* fall through */ - case BT_S: case BT_CR: case BT_LF: + case BT_S: + case BT_CR: + case BT_LF: *nextTokPtr = ptr; return XML_TOK_DECL_OPEN; case BT_NMSTRT: @@ -172,12 +226,12 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, } static int PTRCALL -PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, - const char *end, int *tokPtr) -{ +PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, + int *tokPtr) { int upper = 0; + UNUSED_P(enc); *tokPtr = XML_TOK_PI; - if (end - ptr != MINBPC(enc)*3) + if (end - ptr != MINBPC(enc) * 3) return 1; switch (BYTE_TO_ASCII(enc, ptr)) { case ASCII_x: @@ -217,35 +271,34 @@ PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, /* ptr points to character following "= end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -316,14 +366,12 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, switch (BYTE_TYPE(enc, ptr)) { case BT_RSQB: ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) + REQUIRE_CHAR(enc, ptr, end); + if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { + REQUIRE_CHAR(enc, ptr, end); + if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); break; } @@ -331,8 +379,7 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, return XML_TOK_CDATA_SECT_CLOSE; case BT_CR: ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); *nextTokPtr = ptr; @@ -340,23 +387,25 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, case BT_LF: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_DATA_NEWLINE; - INVALID_CASES(ptr, nextTokPtr) + INVALID_CASES(ptr, nextTokPtr) default: ptr += MINBPC(enc); break; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_DATA_CHARS; \ - } \ - ptr += n; \ - break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_DATA_CHARS; \ + } \ + ptr += n; \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_NONXML: case BT_MALFORM: case BT_TRAIL: @@ -377,24 +426,26 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, /* ptr points to character following "= end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -800,7 +838,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_CR: ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); @@ -811,50 +849,52 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_DATA_NEWLINE; case BT_RSQB: ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_RSQB; - if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) + if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_RSQB; - if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { + if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); break; } *nextTokPtr = ptr; return XML_TOK_INVALID; - INVALID_CASES(ptr, nextTokPtr) + INVALID_CASES(ptr, nextTokPtr) default: ptr += MINBPC(enc); break; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_DATA_CHARS; \ - } \ - ptr += n; \ - break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_DATA_CHARS; \ + } \ + ptr += n; \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_RSQB: - if (ptr + MINBPC(enc) != end) { - if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { - ptr += MINBPC(enc); - break; - } - if (ptr + 2*MINBPC(enc) != end) { - if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { - ptr += MINBPC(enc); - break; - } - *nextTokPtr = ptr + 2*MINBPC(enc); - return XML_TOK_INVALID; - } + if (HAS_CHARS(enc, ptr, end, 2)) { + if (! CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { + ptr += MINBPC(enc); + break; + } + if (HAS_CHARS(enc, ptr, end, 3)) { + if (! CHAR_MATCHES(enc, ptr + 2 * MINBPC(enc), ASCII_GT)) { + ptr += MINBPC(enc); + break; + } + *nextTokPtr = ptr + 2 * MINBPC(enc); + return XML_TOK_INVALID; + } } /* fall through */ case BT_AMP: @@ -879,22 +919,23 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, static int PTRCALL PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ - if (ptr == end) - return -XML_TOK_PERCENT; + const char **nextTokPtr) { + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) - case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) + case BT_S: + case BT_LF: + case BT_CR: + case BT_PERCNT: *nextTokPtr = ptr; return XML_TOK_PERCENT; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_SEMI: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_PARAM_ENTITY_REF; @@ -908,21 +949,24 @@ PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, static int PTRCALL PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ - if (ptr == end) - return XML_TOK_PARTIAL; + const char **nextTokPtr) { + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) - case BT_CR: case BT_LF: case BT_S: - case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR: + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) + case BT_CR: + case BT_LF: + case BT_S: + case BT_RPAR: + case BT_GT: + case BT_PERCNT: + case BT_VERBAR: *nextTokPtr = ptr; return XML_TOK_POUND_NAME; default: @@ -934,25 +978,27 @@ PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, } static int PTRCALL -PREFIX(scanLit)(int open, const ENCODING *enc, - const char *ptr, const char *end, - const char **nextTokPtr) -{ - while (ptr != end) { +PREFIX(scanLit)(int open, const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { + while (HAS_CHAR(enc, ptr, end)) { int t = BYTE_TYPE(enc, ptr); switch (t) { - INVALID_CASES(ptr, nextTokPtr) + INVALID_CASES(ptr, nextTokPtr) case BT_QUOT: case BT_APOS: ptr += MINBPC(enc); if (t != open) break; - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return -XML_TOK_LITERAL; *nextTokPtr = ptr; switch (BYTE_TYPE(enc, ptr)) { - case BT_S: case BT_CR: case BT_LF: - case BT_GT: case BT_PERCNT: case BT_LSQB: + case BT_S: + case BT_CR: + case BT_LF: + case BT_GT: + case BT_PERCNT: + case BT_LSQB: return XML_TOK_LITERAL; default: return XML_TOK_INVALID; @@ -967,10 +1013,9 @@ PREFIX(scanLit)(int open, const ENCODING *enc, static int PTRCALL PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ + const char **nextTokPtr) { int tok; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -986,28 +1031,26 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_APOS: return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); - case BT_LT: - { - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - switch (BYTE_TYPE(enc, ptr)) { - case BT_EXCL: - return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); - case BT_QUEST: - return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); - case BT_NMSTRT: - case BT_HEX: - case BT_NONASCII: - case BT_LEAD2: - case BT_LEAD3: - case BT_LEAD4: - *nextTokPtr = ptr - MINBPC(enc); - return XML_TOK_INSTANCE_START; - } - *nextTokPtr = ptr; - return XML_TOK_INVALID; + case BT_LT: { + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); + switch (BYTE_TYPE(enc, ptr)) { + case BT_EXCL: + return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); + case BT_QUEST: + return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); + case BT_NMSTRT: + case BT_HEX: + case BT_NONASCII: + case BT_LEAD2: + case BT_LEAD3: + case BT_LEAD4: + *nextTokPtr = ptr - MINBPC(enc); + return XML_TOK_INSTANCE_START; } + *nextTokPtr = ptr; + return XML_TOK_INVALID; + } case BT_CR: if (ptr + MINBPC(enc) == end) { *nextTokPtr = end; @@ -1015,13 +1058,15 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return -XML_TOK_PROLOG_S; } /* fall through */ - case BT_S: case BT_LF: + case BT_S: + case BT_LF: for (;;) { ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) break; switch (BYTE_TYPE(enc, ptr)) { - case BT_S: case BT_LF: + case BT_S: + case BT_LF: break; case BT_CR: /* don't split CR/LF pair */ @@ -1045,13 +1090,12 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_OPEN_BRACKET; case BT_RSQB: ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return -XML_TOK_CLOSE_BRACKET; if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { - if (ptr + MINBPC(enc) == end) - return XML_TOK_PARTIAL; + REQUIRE_CHARS(enc, ptr, end, 2); if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { - *nextTokPtr = ptr + 2*MINBPC(enc); + *nextTokPtr = ptr + 2 * MINBPC(enc); return XML_TOK_COND_SECT_CLOSE; } } @@ -1062,7 +1106,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_OPEN_PAREN; case BT_RPAR: ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return -XML_TOK_CLOSE_PAREN; switch (BYTE_TYPE(enc, ptr)) { case BT_AST: @@ -1074,8 +1118,12 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_PLUS: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_CLOSE_PAREN_PLUS; - case BT_CR: case BT_LF: case BT_S: - case BT_GT: case BT_COMMA: case BT_VERBAR: + case BT_CR: + case BT_LF: + case BT_S: + case BT_GT: + case BT_COMMA: + case BT_VERBAR: case BT_RPAR: *nextTokPtr = ptr; return XML_TOK_CLOSE_PAREN; @@ -1090,24 +1138,30 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_DECL_CLOSE; case BT_NUM: return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - if (end - ptr < n) \ - return XML_TOK_PARTIAL_CHAR; \ - if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ - ptr += n; \ - tok = XML_TOK_NAME; \ - break; \ - } \ - if (IS_NAME_CHAR(enc, ptr, n)) { \ - ptr += n; \ - tok = XML_TOK_NMTOKEN; \ - break; \ - } \ - *nextTokPtr = ptr; \ +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + if (end - ptr < n) \ + return XML_TOK_PARTIAL_CHAR; \ + if (IS_INVALID_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ + if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ + ptr += n; \ + tok = XML_TOK_NAME; \ + break; \ + } \ + if (IS_NAME_CHAR(enc, ptr, n)) { \ + ptr += n; \ + tok = XML_TOK_NMTOKEN; \ + break; \ + } \ + *nextTokPtr = ptr; \ return XML_TOK_INVALID; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_NMSTRT: case BT_HEX: tok = XML_TOK_NAME; @@ -1116,9 +1170,9 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_DIGIT: case BT_NAME: case BT_MINUS: -#ifdef XML_NS +# ifdef XML_NS case BT_COLON: -#endif +# endif tok = XML_TOK_NMTOKEN; ptr += MINBPC(enc); break; @@ -1138,24 +1192,29 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) - case BT_GT: case BT_RPAR: case BT_COMMA: - case BT_VERBAR: case BT_LSQB: case BT_PERCNT: - case BT_S: case BT_CR: case BT_LF: + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) + case BT_GT: + case BT_RPAR: + case BT_COMMA: + case BT_VERBAR: + case BT_LSQB: + case BT_PERCNT: + case BT_S: + case BT_CR: + case BT_LF: *nextTokPtr = ptr; return tok; -#ifdef XML_NS +# ifdef XML_NS case BT_COLON: ptr += MINBPC(enc); switch (tok) { case XML_TOK_NAME: - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); tok = XML_TOK_PREFIXED_NAME; switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) default: tok = XML_TOK_NMTOKEN; break; @@ -1166,23 +1225,23 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, break; } break; -#endif +# endif case BT_PLUS: - if (tok == XML_TOK_NMTOKEN) { + if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_PLUS; case BT_AST: - if (tok == XML_TOK_NMTOKEN) { + if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_ASTERISK; case BT_QUEST: - if (tok == XML_TOK_NMTOKEN) { + if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } @@ -1197,19 +1256,30 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, } static int PTRCALL -PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ +PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { const char *start; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; + else if (! HAS_CHAR(enc, ptr, end)) { + /* This line cannot be executed. The incoming data has already + * been tokenized once, so incomplete characters like this have + * already been eliminated from the input. Retaining the paranoia + * check is still valuable, however. + */ + return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ + } start = ptr; - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: ptr += n; break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + ptr += n; /* NOTE: The encoding has already been validated. */ \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_AMP: if (ptr == start) return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); @@ -1229,7 +1299,7 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, case BT_CR: if (ptr == start) { ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); @@ -1255,19 +1325,30 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, } static int PTRCALL -PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ +PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { const char *start; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; + else if (! HAS_CHAR(enc, ptr, end)) { + /* This line cannot be executed. The incoming data has already + * been tokenized once, so incomplete characters like this have + * already been eliminated from the input. Retaining the paranoia + * check is still valuable, however. + */ + return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ + } start = ptr; - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: ptr += n; break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + ptr += n; /* NOTE: The encoding has already been validated. */ \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_AMP: if (ptr == start) return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); @@ -1275,8 +1356,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, return XML_TOK_DATA_CHARS; case BT_PERCNT: if (ptr == start) { - int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), - end, nextTokPtr); + int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; } *nextTokPtr = ptr; @@ -1291,7 +1371,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, case BT_CR: if (ptr == start) { ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); @@ -1309,12 +1389,11 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, return XML_TOK_DATA_CHARS; } -#ifdef XML_DTD +# ifdef XML_DTD static int PTRCALL -PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ +PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { int level = 0; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -1323,15 +1402,15 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, end = ptr + n; } } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { - INVALID_CASES(ptr, nextTokPtr) + INVALID_CASES(ptr, nextTokPtr) case BT_LT: - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { ++level; ptr += MINBPC(enc); @@ -1339,11 +1418,11 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, } break; case BT_RSQB: - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr += MINBPC(enc); if (level == 0) { @@ -1362,15 +1441,14 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, return XML_TOK_PARTIAL; } -#endif /* XML_DTD */ +# endif /* XML_DTD */ static int PTRCALL PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, - const char **badPtr) -{ + const char **badPtr) { ptr += MINBPC(enc); end -= MINBPC(enc); - for (; ptr != end; ptr += MINBPC(enc)) { + for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: @@ -1390,9 +1468,9 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, case BT_AST: case BT_PERCNT: case BT_NUM: -#ifdef XML_NS +# ifdef XML_NS case BT_COLON: -#endif +# endif break; case BT_S: if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { @@ -1402,8 +1480,9 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, break; case BT_NAME: case BT_NMSTRT: - if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) + if (! (BYTE_TO_ASCII(enc, ptr) & ~0x7f)) break; + /* fall through */ default: switch (BYTE_TO_ASCII(enc, ptr)) { case 0x24: /* $ */ @@ -1425,9 +1504,8 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, */ static int PTRCALL -PREFIX(getAtts)(const ENCODING *enc, const char *ptr, - int attsMax, ATTRIBUTE *atts) -{ +PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax, + ATTRIBUTE *atts) { enum { other, inName, inValue } state = inName; int nAtts = 0; int open = 0; /* defined when state == inValue; @@ -1435,32 +1513,35 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { -#define START_NAME \ - if (state == other) { \ - if (nAtts < attsMax) { \ - atts[nAtts].name = ptr; \ - atts[nAtts].normalized = 1; \ - } \ - state = inName; \ - } -#define LEAD_CASE(n) \ - case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define START_NAME \ + if (state == other) { \ + if (nAtts < attsMax) { \ + atts[nAtts].name = ptr; \ + atts[nAtts].normalized = 1; \ + } \ + state = inName; \ + } +# define LEAD_CASE(n) \ + case BT_LEAD##n: /* NOTE: The encoding has already been validated. */ \ + START_NAME ptr += (n - MINBPC(enc)); \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_NONASCII: case BT_NMSTRT: case BT_HEX: START_NAME break; -#undef START_NAME +# undef START_NAME case BT_QUOT: if (state != inValue) { if (nAtts < attsMax) atts[nAtts].valuePtr = ptr + MINBPC(enc); state = inValue; open = BT_QUOT; - } - else if (open == BT_QUOT) { + } else if (open == BT_QUOT) { state = other; if (nAtts < attsMax) atts[nAtts].valueEnd = ptr; @@ -1473,8 +1554,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, atts[nAtts].valuePtr = ptr + MINBPC(enc); state = inValue; open = BT_APOS; - } - else if (open == BT_APOS) { + } else if (open == BT_APOS) { state = other; if (nAtts < attsMax) atts[nAtts].valueEnd = ptr; @@ -1488,16 +1568,15 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, case BT_S: if (state == inName) state = other; - else if (state == inValue - && nAtts < attsMax - && atts[nAtts].normalized + else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized && (ptr == atts[nAtts].valuePtr || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) atts[nAtts].normalized = 0; break; - case BT_CR: case BT_LF: + case BT_CR: + case BT_LF: /* This case ensures that the first attribute name is counted Apart from that we could just change state on the quote. */ if (state == inName) @@ -1518,29 +1597,44 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, } static int PTRFASTCALL -PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) -{ +PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) { int result = 0; /* skip &# */ - ptr += 2*MINBPC(enc); + UNUSED_P(enc); + ptr += 2 * MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_x)) { - for (ptr += MINBPC(enc); - !CHAR_MATCHES(enc, ptr, ASCII_SEMI); + for (ptr += MINBPC(enc); ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); switch (c) { - case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: - case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9: + case ASCII_0: + case ASCII_1: + case ASCII_2: + case ASCII_3: + case ASCII_4: + case ASCII_5: + case ASCII_6: + case ASCII_7: + case ASCII_8: + case ASCII_9: result <<= 4; result |= (c - ASCII_0); break; - case ASCII_A: case ASCII_B: case ASCII_C: - case ASCII_D: case ASCII_E: case ASCII_F: + case ASCII_A: + case ASCII_B: + case ASCII_C: + case ASCII_D: + case ASCII_E: + case ASCII_F: result <<= 4; result += 10 + (c - ASCII_A); break; - case ASCII_a: case ASCII_b: case ASCII_c: - case ASCII_d: case ASCII_e: case ASCII_f: + case ASCII_a: + case ASCII_b: + case ASCII_c: + case ASCII_d: + case ASCII_e: + case ASCII_f: result <<= 4; result += 10 + (c - ASCII_a); break; @@ -1548,9 +1642,8 @@ PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) if (result >= 0x110000) return -1; } - } - else { - for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { + } else { + for (; ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); result *= 10; result += (c - ASCII_0); @@ -1563,9 +1656,9 @@ PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) static int PTRCALL PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, - const char *end) -{ - switch ((end - ptr)/MINBPC(enc)) { + const char *end) { + UNUSED_P(enc); + switch ((end - ptr) / MINBPC(enc)) { case 2: if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { switch (BYTE_TO_ASCII(enc, ptr)) { @@ -1615,98 +1708,43 @@ PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, return 0; } -static int PTRCALL -PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) -{ - for (;;) { - switch (BYTE_TYPE(enc, ptr1)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - if (*ptr1++ != *ptr2++) \ - return 0; - LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) -#undef LEAD_CASE - /* fall through */ - if (*ptr1++ != *ptr2++) - return 0; - break; - case BT_NONASCII: - case BT_NMSTRT: -#ifdef XML_NS - case BT_COLON: -#endif - case BT_HEX: - case BT_DIGIT: - case BT_NAME: - case BT_MINUS: - if (*ptr2++ != *ptr1++) - return 0; - if (MINBPC(enc) > 1) { - if (*ptr2++ != *ptr1++) - return 0; - if (MINBPC(enc) > 2) { - if (*ptr2++ != *ptr1++) - return 0; - if (MINBPC(enc) > 3) { - if (*ptr2++ != *ptr1++) - return 0; - } - } - } - break; - default: - if (MINBPC(enc) == 1 && *ptr1 == *ptr2) - return 1; - switch (BYTE_TYPE(enc, ptr2)) { - case BT_LEAD2: - case BT_LEAD3: - case BT_LEAD4: - case BT_NONASCII: - case BT_NMSTRT: -#ifdef XML_NS - case BT_COLON: -#endif - case BT_HEX: - case BT_DIGIT: - case BT_NAME: - case BT_MINUS: - return 0; - default: - return 1; - } - } - } - /* not reached */ -} - static int PTRCALL PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, - const char *end1, const char *ptr2) -{ + const char *end1, const char *ptr2) { + UNUSED_P(enc); for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { - if (ptr1 == end1) - return 0; - if (!CHAR_MATCHES(enc, ptr1, *ptr2)) + if (end1 - ptr1 < MINBPC(enc)) { + /* This line cannot be executed. The incoming data has already + * been tokenized once, so incomplete characters like this have + * already been eliminated from the input. Retaining the + * paranoia check is still valuable, however. + */ + return 0; /* LCOV_EXCL_LINE */ + } + if (! CHAR_MATCHES(enc, ptr1, *ptr2)) return 0; } return ptr1 == end1; } static int PTRFASTCALL -PREFIX(nameLength)(const ENCODING *enc, const char *ptr) -{ +PREFIX(nameLength)(const ENCODING *enc, const char *ptr) { const char *start = ptr; for (;;) { switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: ptr += n; break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + ptr += n; /* NOTE: The encoding has already been validated. */ \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_NONASCII: case BT_NMSTRT: -#ifdef XML_NS +# ifdef XML_NS case BT_COLON: -#endif +# endif case BT_HEX: case BT_DIGIT: case BT_NAME: @@ -1714,14 +1752,13 @@ PREFIX(nameLength)(const ENCODING *enc, const char *ptr) ptr += MINBPC(enc); break; default: - return ptr - start; + return (int)(ptr - start); } } } -static const char * PTRFASTCALL -PREFIX(skipS)(const ENCODING *enc, const char *ptr) -{ +static const char *PTRFASTCALL +PREFIX(skipS)(const ENCODING *enc, const char *ptr) { for (;;) { switch (BYTE_TYPE(enc, ptr)) { case BT_LF: @@ -1736,44 +1773,45 @@ PREFIX(skipS)(const ENCODING *enc, const char *ptr) } static void PTRCALL -PREFIX(updatePosition)(const ENCODING *enc, - const char *ptr, - const char *end, - POSITION *pos) -{ - while (ptr != end) { +PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end, + POSITION *pos) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - ptr += n; \ - break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + ptr += n; /* NOTE: The encoding has already been validated. */ \ + pos->columnNumber++; \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_LF: - pos->columnNumber = (unsigned)-1; + pos->columnNumber = 0; pos->lineNumber++; ptr += MINBPC(enc); break; case BT_CR: pos->lineNumber++; ptr += MINBPC(enc); - if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) + if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); - pos->columnNumber = (unsigned)-1; + pos->columnNumber = 0; break; default: ptr += MINBPC(enc); + pos->columnNumber++; break; } - pos->columnNumber++; } } -#undef DO_LEAD_CASE -#undef MULTIBYTE_CASES -#undef INVALID_CASES -#undef CHECK_NAME_CASE -#undef CHECK_NAME_CASES -#undef CHECK_NMSTRT_CASE -#undef CHECK_NMSTRT_CASES +# undef DO_LEAD_CASE +# undef MULTIBYTE_CASES +# undef INVALID_CASES +# undef CHECK_NAME_CASE +# undef CHECK_NAME_CASES +# undef CHECK_NMSTRT_CASE +# undef CHECK_NMSTRT_CASES +#endif /* XML_TOK_IMPL_C */ diff --git a/src/simgear/xml/xmltok_impl.h b/src/simgear/xml/xmltok_impl.h index da0ea60a6..c518aada0 100644 --- a/src/simgear/xml/xmltok_impl.h +++ b/src/simgear/xml/xmltok_impl.h @@ -1,46 +1,74 @@ /* -Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file COPYING for copying permission. + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2017-2019 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ enum { - BT_NONXML, - BT_MALFORM, - BT_LT, - BT_AMP, - BT_RSQB, - BT_LEAD2, - BT_LEAD3, - BT_LEAD4, - BT_TRAIL, - BT_CR, - BT_LF, - BT_GT, - BT_QUOT, - BT_APOS, - BT_EQUALS, - BT_QUEST, - BT_EXCL, - BT_SOL, - BT_SEMI, - BT_NUM, - BT_LSQB, - BT_S, - BT_NMSTRT, - BT_COLON, - BT_HEX, - BT_DIGIT, - BT_NAME, - BT_MINUS, - BT_OTHER, /* known not to be a name or name start character */ + BT_NONXML, /* e.g. noncharacter-FFFF */ + BT_MALFORM, /* illegal, with regard to encoding */ + BT_LT, /* less than = "<" */ + BT_AMP, /* ampersand = "&" */ + BT_RSQB, /* right square bracket = "[" */ + BT_LEAD2, /* lead byte of a 2-byte UTF-8 character */ + BT_LEAD3, /* lead byte of a 3-byte UTF-8 character */ + BT_LEAD4, /* lead byte of a 4-byte UTF-8 character */ + BT_TRAIL, /* trailing unit, e.g. second 16-bit unit of a 4-byte char. */ + BT_CR, /* carriage return = "\r" */ + BT_LF, /* line feed = "\n" */ + BT_GT, /* greater than = ">" */ + BT_QUOT, /* quotation character = "\"" */ + BT_APOS, /* aposthrophe = "'" */ + BT_EQUALS, /* equal sign = "=" */ + BT_QUEST, /* question mark = "?" */ + BT_EXCL, /* exclamation mark = "!" */ + BT_SOL, /* solidus, slash = "/" */ + BT_SEMI, /* semicolon = ";" */ + BT_NUM, /* number sign = "#" */ + BT_LSQB, /* left square bracket = "[" */ + BT_S, /* white space, e.g. "\t", " "[, "\r"] */ + BT_NMSTRT, /* non-hex name start letter = "G".."Z" + "g".."z" + "_" */ + BT_COLON, /* colon = ":" */ + BT_HEX, /* hex letter = "A".."F" + "a".."f" */ + BT_DIGIT, /* digit = "0".."9" */ + BT_NAME, /* dot and middle dot = "." + chr(0xb7) */ + BT_MINUS, /* minus = "-" */ + BT_OTHER, /* known not to be a name or name start character */ BT_NONASCII, /* might be a name or name start character */ - BT_PERCNT, - BT_LPAR, - BT_RPAR, - BT_AST, - BT_PLUS, - BT_COMMA, - BT_VERBAR + BT_PERCNT, /* percent sign = "%" */ + BT_LPAR, /* left parenthesis = "(" */ + BT_RPAR, /* right parenthesis = "(" */ + BT_AST, /* asterisk = "*" */ + BT_PLUS, /* plus sign = "+" */ + BT_COMMA, /* comma = "," */ + BT_VERBAR /* vertical bar = "|" */ }; #include diff --git a/src/simgear/xml/xmltok_ns.c b/src/simgear/xml/xmltok_ns.c index 5610eb95b..fbdd3e3c7 100644 --- a/src/simgear/xml/xmltok_ns.c +++ b/src/simgear/xml/xmltok_ns.c @@ -1,54 +1,83 @@ +/* This file is included! + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Greg Stein + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2002-2006 Karl Waclawek + Copyright (c) 2017-2021 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#ifdef XML_TOK_NS_C + const ENCODING * -NS(XmlGetUtf8InternalEncoding)(void) -{ +NS(XmlGetUtf8InternalEncoding)(void) { return &ns(internal_utf8_encoding).enc; } const ENCODING * -NS(XmlGetUtf16InternalEncoding)(void) -{ -#if BYTEORDER == 1234 +NS(XmlGetUtf16InternalEncoding)(void) { +# if BYTEORDER == 1234 return &ns(internal_little2_encoding).enc; -#elif BYTEORDER == 4321 +# elif BYTEORDER == 4321 return &ns(internal_big2_encoding).enc; -#else +# else const short n = 1; - return (*(const char *)&n - ? &ns(internal_little2_encoding).enc - : &ns(internal_big2_encoding).enc); -#endif + return (*(const char *)&n ? &ns(internal_little2_encoding).enc + : &ns(internal_big2_encoding).enc); +# endif } -static const ENCODING *NS(encodings)[] = { - &ns(latin1_encoding).enc, - &ns(ascii_encoding).enc, - &ns(utf8_encoding).enc, - &ns(big2_encoding).enc, - &ns(big2_encoding).enc, - &ns(little2_encoding).enc, - &ns(utf8_encoding).enc /* NO_ENC */ +static const ENCODING *const NS(encodings)[] = { + &ns(latin1_encoding).enc, &ns(ascii_encoding).enc, + &ns(utf8_encoding).enc, &ns(big2_encoding).enc, + &ns(big2_encoding).enc, &ns(little2_encoding).enc, + &ns(utf8_encoding).enc /* NO_ENC */ }; static int PTRCALL NS(initScanProlog)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ - return initScan(NS(encodings), (const INIT_ENCODING *)enc, - XML_PROLOG_STATE, ptr, end, nextTokPtr); + const char **nextTokPtr) { + return initScan(NS(encodings), (const INIT_ENCODING *)enc, XML_PROLOG_STATE, + ptr, end, nextTokPtr); } static int PTRCALL NS(initScanContent)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ - return initScan(NS(encodings), (const INIT_ENCODING *)enc, - XML_CONTENT_STATE, ptr, end, nextTokPtr); + const char **nextTokPtr) { + return initScan(NS(encodings), (const INIT_ENCODING *)enc, XML_CONTENT_STATE, + ptr, end, nextTokPtr); } int NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, - const char *name) -{ + const char *name) { int i = getEncodingIndex(name); if (i == UNKNOWN_ENC) return 0; @@ -62,10 +91,9 @@ NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, } static const ENCODING * -NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end) -{ -#define ENCODING_MAX 128 - char buf[ENCODING_MAX]; +NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end) { +# define ENCODING_MAX 128 + char buf[ENCODING_MAX] = ""; char *p = buf; int i; XmlUtf8Convert(enc, &ptr, end, &p, p + ENCODING_MAX - 1); @@ -81,26 +109,14 @@ NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end) } int -NS(XmlParseXmlDecl)(int isGeneralTextEntity, - const ENCODING *enc, - const char *ptr, - const char *end, - const char **badPtr, - const char **versionPtr, - const char **versionEndPtr, - const char **encodingName, - const ENCODING **encoding, - int *standalone) -{ - return doParseXmlDecl(NS(findEncoding), - isGeneralTextEntity, - enc, - ptr, - end, - badPtr, - versionPtr, - versionEndPtr, - encodingName, - encoding, - standalone); +NS(XmlParseXmlDecl)(int isGeneralTextEntity, const ENCODING *enc, + const char *ptr, const char *end, const char **badPtr, + const char **versionPtr, const char **versionEndPtr, + const char **encodingName, const ENCODING **encoding, + int *standalone) { + return doParseXmlDecl(NS(findEncoding), isGeneralTextEntity, enc, ptr, end, + badPtr, versionPtr, versionEndPtr, encodingName, + encoding, standalone); } + +#endif /* XML_TOK_NS_C */