[LIBXML2] Update to version 2.10.0. CORE-17766

2025-08-05 15:33:07 +00:00 · 2022-11-20 10:17:45 -05:00 · 2022-11-20 10:17:45 -05:00 · 911153da10
commit 911153da10
parent 608bbe1136
80 changed files with 2351 additions and 20735 deletions
--- a/sdk/lib/3rdparty/libxml2/parserInternals.c
+++ b/sdk/lib/3rdparty/libxml2/parserInternals.c
@ -10,31 +10,15 @@
 #define IN_LIBXML
 #include "libxml.h"

-#if defined(_WIN32) && !defined (__CYGWIN__)
+#if defined(_WIN32)
 #define XML_DIR_SEP '\\'
 #else
 #define XML_DIR_SEP '/'
 #endif

 #include <string.h>
-#ifdef HAVE_CTYPE_H
 #include <ctype.h>
-#endif
-#ifdef HAVE_STDLIB_H
 #include <stdlib.h>
-#endif
-#ifdef HAVE_SYS_STAT_H
-#include <sys/stat.h>
-#endif
-#ifdef HAVE_FCNTL_H
-#include <fcntl.h>
-#endif
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#ifdef LIBXML_ZLIB_ENABLED
-#include <zlib.h>
-#endif

 #include <libxml/xmlmemory.h>
 #include <libxml/tree.h>
@ -300,7 +284,6 @@ int
 xmlParserInputGrow(xmlParserInputPtr in, int len) {
    int ret;
    size_t indx;
-    const xmlChar *content;

    if ((in == NULL) || (len < 0)) return(-1);
 #ifdef DEBUG_INPUT
@ -325,22 +308,8 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) {
    } else
        return(0);

-    /*
-     * NOTE : in->base may be a "dangling" i.e. freed pointer in this
-     *        block, but we use it really as an integer to do some
-     *        pointer arithmetic. Insure will raise it as a bug but in
-     *        that specific case, that's not !
-     */
-
-    content = xmlBufContent(in->buf->buffer);
-    if (in->base != content) {
-        /*
-	 * the buffer has been reallocated
-	 */
-	indx = in->cur - in->base;
-	in->base = content;
-	in->cur = &content[indx];
-    }
+    in->base = xmlBufContent(in->buf->buffer);
+    in->cur = in->base + indx;
    in->end = xmlBufEnd(in->buf->buffer);

    CHECK_BUFFER(in);
@ -358,8 +327,6 @@ void
 xmlParserInputShrink(xmlParserInputPtr in) {
    size_t used;
    size_t ret;
-    size_t indx;
-    const xmlChar *content;

 #ifdef DEBUG_INPUT
    xmlGenericError(xmlGenericErrorContext, "Shrink\n");
@ -372,7 +339,7 @@ xmlParserInputShrink(xmlParserInputPtr in) {

    CHECK_BUFFER(in);

-    used = in->cur - xmlBufContent(in->buf->buffer);
+    used = in->cur - in->base;
    /*
     * Do not shrink on large buffers whose only a tiny fraction
     * was consumed
@ -380,27 +347,17 @@ xmlParserInputShrink(xmlParserInputPtr in) {
    if (used > INPUT_CHUNK) {
 	ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
 	if (ret > 0) {
-	    in->cur -= ret;
+            used -= ret;
 	    in->consumed += ret;
 	}
-	in->end = xmlBufEnd(in->buf->buffer);
    }

-    CHECK_BUFFER(in);
+    if (xmlBufUse(in->buf->buffer) <= INPUT_CHUNK) {
+        xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
+    }

-    if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) {
-        return;
-    }
-    xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
-    content = xmlBufContent(in->buf->buffer);
-    if (in->base != content) {
-        /*
-	 * the buffer has been reallocated
-	 */
-	indx = in->cur - in->base;
-	in->base = content;
-	in->cur = &content[indx];
-    }
+    in->base = xmlBufContent(in->buf->buffer);
+    in->cur = in->base + used;
    in->end = xmlBufEnd(in->buf->buffer);

    CHECK_BUFFER(in);
@ -909,9 +866,6 @@ xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
 ************************************************************************/

 static int
-xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
-                       xmlCharEncodingHandlerPtr handler, int len);
-static int
 xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
                          xmlCharEncodingHandlerPtr handler, int len);
 /**
@ -1011,55 +965,7 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
 		/* default encoding, no conversion should be needed */
 		ctxt->charset = XML_CHAR_ENCODING_UTF8;
 		return(0);
-	    case XML_CHAR_ENCODING_UTF16LE:
-		break;
-	    case XML_CHAR_ENCODING_UTF16BE:
-		break;
-	    case XML_CHAR_ENCODING_UCS4LE:
-		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
-			       "encoding not supported %s\n",
-			       BAD_CAST "USC4 little endian", NULL);
-		break;
-	    case XML_CHAR_ENCODING_UCS4BE:
-		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
-			       "encoding not supported %s\n",
-			       BAD_CAST "USC4 big endian", NULL);
-		break;
-	    case XML_CHAR_ENCODING_EBCDIC:
-		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
-			       "encoding not supported %s\n",
-			       BAD_CAST "EBCDIC", NULL);
-		break;
-	    case XML_CHAR_ENCODING_UCS4_2143:
-		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
-			       "encoding not supported %s\n",
-			       BAD_CAST "UCS4 2143", NULL);
-		break;
-	    case XML_CHAR_ENCODING_UCS4_3412:
-		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
-			       "encoding not supported %s\n",
-			       BAD_CAST "UCS4 3412", NULL);
-		break;
-	    case XML_CHAR_ENCODING_UCS2:
-		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
-			       "encoding not supported %s\n",
-			       BAD_CAST "UCS2", NULL);
-		break;
 	    case XML_CHAR_ENCODING_8859_1:
-	    case XML_CHAR_ENCODING_8859_2:
-	    case XML_CHAR_ENCODING_8859_3:
-	    case XML_CHAR_ENCODING_8859_4:
-	    case XML_CHAR_ENCODING_8859_5:
-	    case XML_CHAR_ENCODING_8859_6:
-	    case XML_CHAR_ENCODING_8859_7:
-	    case XML_CHAR_ENCODING_8859_8:
-	    case XML_CHAR_ENCODING_8859_9:
-		/*
-		 * We used to keep the internal content in the
-		 * document encoding however this turns being unmaintainable
-		 * So xmlGetCharEncodingHandler() will return non-null
-		 * values for this now.
-		 */
 		if ((ctxt->inputNr == 1) &&
 		    (ctxt->encoding == NULL) &&
 		    (ctxt->input != NULL) &&
@ -1068,36 +974,20 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
 		}
 		ctxt->charset = enc;
 		return(0);
-	    case XML_CHAR_ENCODING_2022_JP:
-		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
-			       "encoding not supported %s\n",
-			       BAD_CAST "ISO-2022-JP", NULL);
-		break;
-	    case XML_CHAR_ENCODING_SHIFT_JIS:
-		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
-			       "encoding not supported %s\n",
-			       BAD_CAST "Shift_JIS", NULL);
-		break;
-	    case XML_CHAR_ENCODING_EUC_JP:
-		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
-			       "encoding not supported %s\n",
-			       BAD_CAST "EUC-JP", NULL);
-		break;
 	    default:
-	        break;
-	}
+		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
+                        "encoding not supported: %s\n",
+			BAD_CAST xmlGetCharEncodingName(enc), NULL);
+                /*
+                 * TODO: We could recover from errors in external entities
+                 * if we didn't stop the parser. But most callers of this
+                 * function don't check the return value.
+                 */
+                xmlStopParser(ctxt);
+                return(-1);
+        }
    }
-    /*
-     * TODO: We could recover from errors in external entities if we
-     * didn't stop the parser. But most callers of this function don't
-     * check the return value.
-     */
-    if (handler == NULL) {
-        xmlStopParser(ctxt);
-	return(-1);
-    }
-    ctxt->charset = XML_CHAR_ENCODING_UTF8;
-    ret = xmlSwitchToEncodingInt(ctxt, handler, len);
+    ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
    if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) {
        /*
 	 * on encoding conversion errors, stop the parser
@ -1109,7 +999,7 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
 }

 /**
- * xmlSwitchInputEncoding:
+ * xmlSwitchInputEncodingInt:
 * @ctxt:  the parser context
 * @input:  the input stream
 * @handler:  the encoding handler
@ -1131,6 +1021,8 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
    if (input == NULL)
        return (-1);
    if (input->buf != NULL) {
+	ctxt->charset = XML_CHAR_ENCODING_UTF8;
+
        if (input->buf->encoder != NULL) {
            /*
             * Check in case the auto encoding detection triggered
@ -1234,12 +1126,9 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
 	    input->buf->rawconsumed += use - xmlBufUse(input->buf->raw);
        }
        return (0);
-    } else if (input->length == 0) {
-	/*
-	 * When parsing a static memory array one must know the
-	 * size to be able to convert the buffer.
-	 */
-	xmlErrInternal(ctxt, "switching encoding : no input\n", NULL);
+    } else {
+	xmlErrInternal(ctxt,
+                "static memory buffer doesn't support encoding\n", NULL);
        /*
         * Callers assume that the input buffer takes ownership of the
         * encoding handler. xmlCharEncCloseFunc frees unregistered
@ -1248,11 +1137,6 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
        xmlCharEncCloseFunc(handler);
 	return (-1);
    }
-    /*
-     * We should actually raise an error here, see issue #34.
-     */
-    xmlCharEncCloseFunc(handler);
-    return (0);
 }

 /**
@ -1261,6 +1145,8 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
 * @input:  the input stream
 * @handler:  the encoding handler
 *
+ * DEPRECATED: Use xmlSwitchToEncoding
+ *
 * change the input functions when discovering the character encoding
 * of a given entity.
 *
@ -1272,41 +1158,6 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
    return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1));
 }

-/**
- * xmlSwitchToEncodingInt:
- * @ctxt:  the parser context
- * @handler:  the encoding handler
- * @len: the length to convert or -1
- *
- * change the input functions when discovering the character encoding
- * of a given entity, and convert only @len bytes of the output, this
- * is needed on auto detect to allows any declared encoding later to
- * convert the actual content after the xmlDecl
- *
- * Returns 0 in case of success, -1 otherwise
- */
-static int
-xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
-                       xmlCharEncodingHandlerPtr handler, int len) {
-    int ret = 0;
-
-    if (handler != NULL) {
-        if (ctxt->input != NULL) {
-	    ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
-	} else {
-	    xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
-	                   NULL);
-	    return(-1);
-	}
-	/*
-	 * The parsing is now done in UTF8 natively
-	 */
-	ctxt->charset = XML_CHAR_ENCODING_UTF8;
-    } else
-	return(-1);
-    return(ret);
-}
-
 /**
 * xmlSwitchToEncoding:
 * @ctxt:  the parser context
@ -1320,7 +1171,9 @@ xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
 int
 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
 {
-    return (xmlSwitchToEncodingInt(ctxt, handler, -1));
+    if (ctxt == NULL)
+        return(-1);
+    return(xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, -1));
 }

 /************************************************************************
@ -1597,7 +1450,7 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
        return(-1);
    }

-    xmlDefaultSAXHandlerInit();
+    xmlInitParser();

    if (ctxt->dict == NULL)
 	ctxt->dict = xmlDictCreate();
@ -1733,7 +1586,7 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
 	ctxt->options |= XML_PARSE_NOBLANKS;
    }

-    ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
+    ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
    ctxt->vctxt.userData = ctxt;
    ctxt->vctxt.error = xmlParserValidityError;
    ctxt->vctxt.warning = xmlParserValidityWarning;
@ -2162,5 +2015,3 @@ xmlKeepBlanksDefault(int val) {
    return(old);
 }

-#define bottom_parserInternals
-#include "elfgcchack.h"