[LIBXML2] Update to version 2.10.0. CORE-17766

This commit is contained in:
Thomas Faber 2022-11-20 10:17:45 -05:00
parent 608bbe1136
commit 911153da10
No known key found for this signature in database
GPG key ID: 076E7C3D44720826
80 changed files with 2351 additions and 20735 deletions

View file

@ -10,31 +10,15 @@
#define IN_LIBXML
#include "libxml.h"
#if defined(_WIN32) && !defined (__CYGWIN__)
#if defined(_WIN32)
#define XML_DIR_SEP '\\'
#else
#define XML_DIR_SEP '/'
#endif
#include <string.h>
#ifdef HAVE_CTYPE_H
#include <ctype.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef LIBXML_ZLIB_ENABLED
#include <zlib.h>
#endif
#include <libxml/xmlmemory.h>
#include <libxml/tree.h>
@ -300,7 +284,6 @@ int
xmlParserInputGrow(xmlParserInputPtr in, int len) {
int ret;
size_t indx;
const xmlChar *content;
if ((in == NULL) || (len < 0)) return(-1);
#ifdef DEBUG_INPUT
@ -325,22 +308,8 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) {
} else
return(0);
/*
* NOTE : in->base may be a "dangling" i.e. freed pointer in this
* block, but we use it really as an integer to do some
* pointer arithmetic. Insure will raise it as a bug but in
* that specific case, that's not !
*/
content = xmlBufContent(in->buf->buffer);
if (in->base != content) {
/*
* the buffer has been reallocated
*/
indx = in->cur - in->base;
in->base = content;
in->cur = &content[indx];
}
in->base = xmlBufContent(in->buf->buffer);
in->cur = in->base + indx;
in->end = xmlBufEnd(in->buf->buffer);
CHECK_BUFFER(in);
@ -358,8 +327,6 @@ void
xmlParserInputShrink(xmlParserInputPtr in) {
size_t used;
size_t ret;
size_t indx;
const xmlChar *content;
#ifdef DEBUG_INPUT
xmlGenericError(xmlGenericErrorContext, "Shrink\n");
@ -372,7 +339,7 @@ xmlParserInputShrink(xmlParserInputPtr in) {
CHECK_BUFFER(in);
used = in->cur - xmlBufContent(in->buf->buffer);
used = in->cur - in->base;
/*
* Do not shrink on large buffers whose only a tiny fraction
* was consumed
@ -380,27 +347,17 @@ xmlParserInputShrink(xmlParserInputPtr in) {
if (used > INPUT_CHUNK) {
ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
if (ret > 0) {
in->cur -= ret;
used -= ret;
in->consumed += ret;
}
in->end = xmlBufEnd(in->buf->buffer);
}
CHECK_BUFFER(in);
if (xmlBufUse(in->buf->buffer) <= INPUT_CHUNK) {
xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
}
if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) {
return;
}
xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
content = xmlBufContent(in->buf->buffer);
if (in->base != content) {
/*
* the buffer has been reallocated
*/
indx = in->cur - in->base;
in->base = content;
in->cur = &content[indx];
}
in->base = xmlBufContent(in->buf->buffer);
in->cur = in->base + used;
in->end = xmlBufEnd(in->buf->buffer);
CHECK_BUFFER(in);
@ -909,9 +866,6 @@ xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
************************************************************************/
static int
xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
xmlCharEncodingHandlerPtr handler, int len);
static int
xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
xmlCharEncodingHandlerPtr handler, int len);
/**
@ -1011,55 +965,7 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
/* default encoding, no conversion should be needed */
ctxt->charset = XML_CHAR_ENCODING_UTF8;
return(0);
case XML_CHAR_ENCODING_UTF16LE:
break;
case XML_CHAR_ENCODING_UTF16BE:
break;
case XML_CHAR_ENCODING_UCS4LE:
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
"encoding not supported %s\n",
BAD_CAST "USC4 little endian", NULL);
break;
case XML_CHAR_ENCODING_UCS4BE:
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
"encoding not supported %s\n",
BAD_CAST "USC4 big endian", NULL);
break;
case XML_CHAR_ENCODING_EBCDIC:
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
"encoding not supported %s\n",
BAD_CAST "EBCDIC", NULL);
break;
case XML_CHAR_ENCODING_UCS4_2143:
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
"encoding not supported %s\n",
BAD_CAST "UCS4 2143", NULL);
break;
case XML_CHAR_ENCODING_UCS4_3412:
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
"encoding not supported %s\n",
BAD_CAST "UCS4 3412", NULL);
break;
case XML_CHAR_ENCODING_UCS2:
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
"encoding not supported %s\n",
BAD_CAST "UCS2", NULL);
break;
case XML_CHAR_ENCODING_8859_1:
case XML_CHAR_ENCODING_8859_2:
case XML_CHAR_ENCODING_8859_3:
case XML_CHAR_ENCODING_8859_4:
case XML_CHAR_ENCODING_8859_5:
case XML_CHAR_ENCODING_8859_6:
case XML_CHAR_ENCODING_8859_7:
case XML_CHAR_ENCODING_8859_8:
case XML_CHAR_ENCODING_8859_9:
/*
* We used to keep the internal content in the
* document encoding however this turns being unmaintainable
* So xmlGetCharEncodingHandler() will return non-null
* values for this now.
*/
if ((ctxt->inputNr == 1) &&
(ctxt->encoding == NULL) &&
(ctxt->input != NULL) &&
@ -1068,36 +974,20 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
}
ctxt->charset = enc;
return(0);
case XML_CHAR_ENCODING_2022_JP:
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
"encoding not supported %s\n",
BAD_CAST "ISO-2022-JP", NULL);
break;
case XML_CHAR_ENCODING_SHIFT_JIS:
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
"encoding not supported %s\n",
BAD_CAST "Shift_JIS", NULL);
break;
case XML_CHAR_ENCODING_EUC_JP:
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
"encoding not supported %s\n",
BAD_CAST "EUC-JP", NULL);
break;
default:
break;
}
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
"encoding not supported: %s\n",
BAD_CAST xmlGetCharEncodingName(enc), NULL);
/*
* TODO: We could recover from errors in external entities
* if we didn't stop the parser. But most callers of this
* function don't check the return value.
*/
xmlStopParser(ctxt);
return(-1);
}
}
/*
* TODO: We could recover from errors in external entities if we
* didn't stop the parser. But most callers of this function don't
* check the return value.
*/
if (handler == NULL) {
xmlStopParser(ctxt);
return(-1);
}
ctxt->charset = XML_CHAR_ENCODING_UTF8;
ret = xmlSwitchToEncodingInt(ctxt, handler, len);
ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) {
/*
* on encoding conversion errors, stop the parser
@ -1109,7 +999,7 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
}
/**
* xmlSwitchInputEncoding:
* xmlSwitchInputEncodingInt:
* @ctxt: the parser context
* @input: the input stream
* @handler: the encoding handler
@ -1131,6 +1021,8 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
if (input == NULL)
return (-1);
if (input->buf != NULL) {
ctxt->charset = XML_CHAR_ENCODING_UTF8;
if (input->buf->encoder != NULL) {
/*
* Check in case the auto encoding detection triggered
@ -1234,12 +1126,9 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
input->buf->rawconsumed += use - xmlBufUse(input->buf->raw);
}
return (0);
} else if (input->length == 0) {
/*
* When parsing a static memory array one must know the
* size to be able to convert the buffer.
*/
xmlErrInternal(ctxt, "switching encoding : no input\n", NULL);
} else {
xmlErrInternal(ctxt,
"static memory buffer doesn't support encoding\n", NULL);
/*
* Callers assume that the input buffer takes ownership of the
* encoding handler. xmlCharEncCloseFunc frees unregistered
@ -1248,11 +1137,6 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
xmlCharEncCloseFunc(handler);
return (-1);
}
/*
* We should actually raise an error here, see issue #34.
*/
xmlCharEncCloseFunc(handler);
return (0);
}
/**
@ -1261,6 +1145,8 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
* @input: the input stream
* @handler: the encoding handler
*
* DEPRECATED: Use xmlSwitchToEncoding
*
* change the input functions when discovering the character encoding
* of a given entity.
*
@ -1272,41 +1158,6 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1));
}
/**
* xmlSwitchToEncodingInt:
* @ctxt: the parser context
* @handler: the encoding handler
* @len: the length to convert or -1
*
* change the input functions when discovering the character encoding
* of a given entity, and convert only @len bytes of the output, this
* is needed on auto detect to allows any declared encoding later to
* convert the actual content after the xmlDecl
*
* Returns 0 in case of success, -1 otherwise
*/
static int
xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
xmlCharEncodingHandlerPtr handler, int len) {
int ret = 0;
if (handler != NULL) {
if (ctxt->input != NULL) {
ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
} else {
xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
NULL);
return(-1);
}
/*
* The parsing is now done in UTF8 natively
*/
ctxt->charset = XML_CHAR_ENCODING_UTF8;
} else
return(-1);
return(ret);
}
/**
* xmlSwitchToEncoding:
* @ctxt: the parser context
@ -1320,7 +1171,9 @@ xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
int
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
{
return (xmlSwitchToEncodingInt(ctxt, handler, -1));
if (ctxt == NULL)
return(-1);
return(xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, -1));
}
/************************************************************************
@ -1597,7 +1450,7 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
return(-1);
}
xmlDefaultSAXHandlerInit();
xmlInitParser();
if (ctxt->dict == NULL)
ctxt->dict = xmlDictCreate();
@ -1733,7 +1586,7 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
ctxt->options |= XML_PARSE_NOBLANKS;
}
ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
ctxt->vctxt.userData = ctxt;
ctxt->vctxt.error = xmlParserValidityError;
ctxt->vctxt.warning = xmlParserValidityWarning;
@ -2162,5 +2015,3 @@ xmlKeepBlanksDefault(int val) {
return(old);
}
#define bottom_parserInternals
#include "elfgcchack.h"