mirror of
https://github.com/reactos/reactos.git
synced 2024-12-25 16:50:57 +00:00
4f8cc9596c
svn path=/trunk/; revision=17143
1081 lines
42 KiB
C++
1081 lines
42 KiB
C++
/* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
|
|
See the file COPYING for copying permission.
|
|
*/
|
|
|
|
#ifndef XmlParse_INCLUDED
|
|
#define XmlParse_INCLUDED 1
|
|
|
|
#ifdef __VMS
|
|
/* 0 1 2 3 0 1 2 3
|
|
1234567890123456789012345678901 1234567890123456789012345678901 */
|
|
#define XML_SetProcessingInstructionHandler XML_SetProcessingInstrHandler
|
|
#define XML_SetUnparsedEntityDeclHandler XML_SetUnparsedEntDeclHandler
|
|
#define XML_SetStartNamespaceDeclHandler XML_SetStartNamespcDeclHandler
|
|
#define XML_SetExternalEntityRefHandlerArg XML_SetExternalEntRefHandlerArg
|
|
#endif
|
|
|
|
#include <stdlib.h>
|
|
|
|
#if defined(_MSC_EXTENSIONS) && !defined(__BEOS__) && !defined(__CYGWIN__)
|
|
#define XML_USE_MSC_EXTENSIONS 1
|
|
#endif
|
|
|
|
/* Expat tries very hard to make the API boundary very specifically
|
|
defined. There are two macros defined to control this boundary;
|
|
each of these can be defined before including this header to
|
|
achieve some different behavior, but doing so it not recommended or
|
|
tested frequently.
|
|
|
|
XMLCALL - The calling convention to use for all calls across the
|
|
"library boundary." This will default to cdecl, and
|
|
try really hard to tell the compiler that's what we
|
|
want.
|
|
|
|
XMLIMPORT - Whatever magic is needed to note that a function is
|
|
to be imported from a dynamically loaded library
|
|
(.dll, .so, or .sl, depending on your platform).
|
|
|
|
The XMLCALL macro was added in Expat 1.95.7. The only one which is
|
|
expected to be directly useful in client code is XMLCALL.
|
|
|
|
Note that on at least some Unix versions, the Expat library must be
|
|
compiled with the cdecl calling convention as the default since
|
|
system headers may assume the cdecl convention.
|
|
*/
|
|
#ifndef XMLCALL
|
|
#if defined(XML_USE_MSC_EXTENSIONS)
|
|
#define XMLCALL __cdecl
|
|
#elif defined(__GNUC__) && defined(__i386)
|
|
//MF#define XMLCALL __attribute__((cdecl))
|
|
#define XMLCALL//MF
|
|
#else
|
|
/* For any platform which uses this definition and supports more than
|
|
one calling convention, we need to extend this definition to
|
|
declare the convention used on that platform, if it's possible to
|
|
do so.
|
|
|
|
If this is the case for your platform, please file a bug report
|
|
with information on how to identify your platform via the C
|
|
pre-processor and how to specify the same calling convention as the
|
|
platform's malloc() implementation.
|
|
*/
|
|
#define XMLCALL
|
|
#endif
|
|
#endif /* not defined XMLCALL */
|
|
|
|
|
|
#if !defined(XML_STATIC) && !defined(XMLIMPORT)
|
|
#ifndef XML_BUILDING_EXPAT
|
|
/* using Expat from an application */
|
|
|
|
#ifdef XML_USE_MSC_EXTENSIONS
|
|
#define XMLIMPORT __declspec(dllimport)
|
|
#endif
|
|
|
|
#endif
|
|
#endif /* not defined XML_STATIC */
|
|
|
|
/* If we didn't define it above, define it away: */
|
|
#ifndef XMLIMPORT
|
|
#define XMLIMPORT
|
|
#endif
|
|
|
|
|
|
#define XMLPARSEAPI(type) XMLIMPORT type XMLCALL
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
#ifdef XML_UNICODE_WCHAR_T
|
|
#define XML_UNICODE
|
|
#endif
|
|
|
|
struct XML_ParserStruct;
|
|
typedef struct XML_ParserStruct *XML_Parser;
|
|
|
|
#ifdef XML_UNICODE /* Information is UTF-16 encoded. */
|
|
#ifdef XML_UNICODE_WCHAR_T
|
|
typedef wchar_t XML_Char;
|
|
typedef wchar_t XML_LChar;
|
|
#else
|
|
typedef unsigned short XML_Char;
|
|
typedef char XML_LChar;
|
|
#endif /* XML_UNICODE_WCHAR_T */
|
|
#else /* Information is UTF-8 encoded. */
|
|
typedef char XML_Char;
|
|
typedef char XML_LChar;
|
|
#endif /* XML_UNICODE */
|
|
|
|
/* Should this be defined using stdbool.h when C99 is available? */
|
|
typedef unsigned char XML_Bool;
|
|
#define XML_TRUE ((XML_Bool) 1)
|
|
#define XML_FALSE ((XML_Bool) 0)
|
|
|
|
/* The XML_Status enum gives the possible return values for several
|
|
API functions. The preprocessor #defines are included so this
|
|
stanza can be added to code that still needs to support older
|
|
versions of Expat 1.95.x:
|
|
|
|
#ifndef XML_STATUS_OK
|
|
#define XML_STATUS_OK 1
|
|
#define XML_STATUS_ERROR 0
|
|
#endif
|
|
|
|
Otherwise, the #define hackery is quite ugly and would have been
|
|
dropped.
|
|
*/
|
|
enum XML_Status {
|
|
XML_STATUS_ERROR = 0,
|
|
#define XML_STATUS_ERROR XML_STATUS_ERROR
|
|
XML_STATUS_OK = 1,
|
|
#define XML_STATUS_OK XML_STATUS_OK
|
|
XML_STATUS_SUSPENDED = 2,
|
|
#define XML_STATUS_SUSPENDED XML_STATUS_SUSPENDED
|
|
};
|
|
|
|
enum XML_Error {
|
|
XML_ERROR_NONE,
|
|
XML_ERROR_NO_MEMORY,
|
|
XML_ERROR_SYNTAX,
|
|
XML_ERROR_NO_ELEMENTS,
|
|
XML_ERROR_INVALID_TOKEN,
|
|
XML_ERROR_UNCLOSED_TOKEN,
|
|
XML_ERROR_PARTIAL_CHAR,
|
|
XML_ERROR_TAG_MISMATCH,
|
|
XML_ERROR_DUPLICATE_ATTRIBUTE,
|
|
XML_ERROR_JUNK_AFTER_DOC_ELEMENT,
|
|
XML_ERROR_PARAM_ENTITY_REF,
|
|
XML_ERROR_UNDEFINED_ENTITY,
|
|
XML_ERROR_RECURSIVE_ENTITY_REF,
|
|
XML_ERROR_ASYNC_ENTITY,
|
|
XML_ERROR_BAD_CHAR_REF,
|
|
XML_ERROR_BINARY_ENTITY_REF,
|
|
XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF,
|
|
XML_ERROR_MISPLACED_XML_PI,
|
|
XML_ERROR_UNKNOWN_ENCODING,
|
|
XML_ERROR_INCORRECT_ENCODING,
|
|
XML_ERROR_UNCLOSED_CDATA_SECTION,
|
|
XML_ERROR_EXTERNAL_ENTITY_HANDLING,
|
|
XML_ERROR_NOT_STANDALONE,
|
|
XML_ERROR_UNEXPECTED_STATE,
|
|
XML_ERROR_ENTITY_DECLARED_IN_PE,
|
|
XML_ERROR_FEATURE_REQUIRES_XML_DTD,
|
|
XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING,
|
|
XML_ERROR_UNBOUND_PREFIX,
|
|
XML_ERROR_SUSPENDED,
|
|
XML_ERROR_NOT_SUSPENDED,
|
|
XML_ERROR_ABORTED,
|
|
XML_ERROR_FINISHED,
|
|
XML_ERROR_SUSPEND_PE
|
|
};
|
|
|
|
enum XML_Content_Type {
|
|
XML_CTYPE_EMPTY = 1,
|
|
XML_CTYPE_ANY,
|
|
XML_CTYPE_MIXED,
|
|
XML_CTYPE_NAME,
|
|
XML_CTYPE_CHOICE,
|
|
XML_CTYPE_SEQ
|
|
};
|
|
|
|
enum XML_Content_Quant {
|
|
XML_CQUANT_NONE,
|
|
XML_CQUANT_OPT,
|
|
XML_CQUANT_REP,
|
|
XML_CQUANT_PLUS
|
|
};
|
|
|
|
/* If type == XML_CTYPE_EMPTY or XML_CTYPE_ANY, then quant will be
|
|
XML_CQUANT_NONE, and the other fields will be zero or NULL.
|
|
If type == XML_CTYPE_MIXED, then quant will be NONE or REP and
|
|
numchildren will contain number of elements that may be mixed in
|
|
and children point to an array of XML_Content cells that will be
|
|
all of XML_CTYPE_NAME type with no quantification.
|
|
|
|
If type == XML_CTYPE_NAME, then the name points to the name, and
|
|
the numchildren field will be zero and children will be NULL. The
|
|
quant fields indicates any quantifiers placed on the name.
|
|
|
|
CHOICE and SEQ will have name NULL, the number of children in
|
|
numchildren and children will point, recursively, to an array
|
|
of XML_Content cells.
|
|
|
|
The EMPTY, ANY, and MIXED types will only occur at top level.
|
|
*/
|
|
|
|
typedef struct XML_cp XML_Content;
|
|
|
|
struct XML_cp {
|
|
enum XML_Content_Type type;
|
|
enum XML_Content_Quant quant;
|
|
XML_Char * name;
|
|
unsigned int numchildren;
|
|
XML_Content * children;
|
|
};
|
|
|
|
|
|
/* This is called for an element declaration. See above for
|
|
description of the model argument. It's the caller's responsibility
|
|
to free model when finished with it.
|
|
*/
|
|
typedef void (XMLCALL *XML_ElementDeclHandler) (void *userData,
|
|
const XML_Char *name,
|
|
XML_Content *model);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetElementDeclHandler(XML_Parser parser,
|
|
XML_ElementDeclHandler eldecl);
|
|
|
|
/* The Attlist declaration handler is called for *each* attribute. So
|
|
a single Attlist declaration with multiple attributes declared will
|
|
generate multiple calls to this handler. The "default" parameter
|
|
may be NULL in the case of the "#IMPLIED" or "#REQUIRED"
|
|
keyword. The "isrequired" parameter will be true and the default
|
|
value will be NULL in the case of "#REQUIRED". If "isrequired" is
|
|
true and default is non-NULL, then this is a "#FIXED" default.
|
|
*/
|
|
typedef void (XMLCALL *XML_AttlistDeclHandler) (
|
|
void *userData,
|
|
const XML_Char *elname,
|
|
const XML_Char *attname,
|
|
const XML_Char *att_type,
|
|
const XML_Char *dflt,
|
|
int isrequired);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetAttlistDeclHandler(XML_Parser parser,
|
|
XML_AttlistDeclHandler attdecl);
|
|
|
|
/* The XML declaration handler is called for *both* XML declarations
|
|
and text declarations. The way to distinguish is that the version
|
|
parameter will be NULL for text declarations. The encoding
|
|
parameter may be NULL for XML declarations. The standalone
|
|
parameter will be -1, 0, or 1 indicating respectively that there
|
|
was no standalone parameter in the declaration, that it was given
|
|
as no, or that it was given as yes.
|
|
*/
|
|
typedef void (XMLCALL *XML_XmlDeclHandler) (void *userData,
|
|
const XML_Char *version,
|
|
const XML_Char *encoding,
|
|
int standalone);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetXmlDeclHandler(XML_Parser parser,
|
|
XML_XmlDeclHandler xmldecl);
|
|
|
|
|
|
typedef struct {
|
|
void *(XMLCALL *malloc_fcn)(size_t size);
|
|
void *(XMLCALL *realloc_fcn)(void *ptr, size_t size);
|
|
void (XMLCALL *free_fcn)(void *ptr);
|
|
} XML_Memory_Handling_Suite;
|
|
|
|
/* Constructs a new parser; encoding is the encoding specified by the
|
|
external protocol or NULL if there is none specified.
|
|
*/
|
|
XMLPARSEAPI(XML_Parser)
|
|
XML_ParserCreate(const XML_Char *encoding);
|
|
|
|
/* Constructs a new parser and namespace processor. Element type
|
|
names and attribute names that belong to a namespace will be
|
|
expanded; unprefixed attribute names are never expanded; unprefixed
|
|
element type names are expanded only if there is a default
|
|
namespace. The expanded name is the concatenation of the namespace
|
|
URI, the namespace separator character, and the local part of the
|
|
name. If the namespace separator is '\0' then the namespace URI
|
|
and the local part will be concatenated without any separator.
|
|
When a namespace is not declared, the name and prefix will be
|
|
passed through without expansion.
|
|
*/
|
|
XMLPARSEAPI(XML_Parser)
|
|
XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator);
|
|
|
|
|
|
/* Constructs a new parser using the memory management suite referred to
|
|
by memsuite. If memsuite is NULL, then use the standard library memory
|
|
suite. If namespaceSeparator is non-NULL it creates a parser with
|
|
namespace processing as described above. The character pointed at
|
|
will serve as the namespace separator.
|
|
|
|
All further memory operations used for the created parser will come from
|
|
the given suite.
|
|
*/
|
|
XMLPARSEAPI(XML_Parser)
|
|
XML_ParserCreate_MM(const XML_Char *encoding,
|
|
const XML_Memory_Handling_Suite *memsuite,
|
|
const XML_Char *namespaceSeparator);
|
|
|
|
/* Prepare a parser object to be re-used. This is particularly
|
|
valuable when memory allocation overhead is disproportionatly high,
|
|
such as when a large number of small documnents need to be parsed.
|
|
All handlers are cleared from the parser, except for the
|
|
unknownEncodingHandler. The parser's external state is re-initialized
|
|
except for the values of ns and ns_triplets.
|
|
|
|
Added in Expat 1.95.3.
|
|
*/
|
|
XMLPARSEAPI(XML_Bool)
|
|
XML_ParserReset(XML_Parser parser, const XML_Char *encoding);
|
|
|
|
/* atts is array of name/value pairs, terminated by 0;
|
|
names and values are 0 terminated.
|
|
*/
|
|
typedef void (XMLCALL *XML_StartElementHandler) (void *userData,
|
|
const XML_Char *name,
|
|
const XML_Char **atts);
|
|
|
|
typedef void (XMLCALL *XML_EndElementHandler) (void *userData,
|
|
const XML_Char *name);
|
|
|
|
|
|
/* s is not 0 terminated. */
|
|
typedef void (XMLCALL *XML_CharacterDataHandler) (void *userData,
|
|
const XML_Char *s,
|
|
int len);
|
|
|
|
/* target and data are 0 terminated */
|
|
typedef void (XMLCALL *XML_ProcessingInstructionHandler) (
|
|
void *userData,
|
|
const XML_Char *target,
|
|
const XML_Char *data);
|
|
|
|
/* data is 0 terminated */
|
|
typedef void (XMLCALL *XML_CommentHandler) (void *userData,
|
|
const XML_Char *data);
|
|
|
|
typedef void (XMLCALL *XML_StartCdataSectionHandler) (void *userData);
|
|
typedef void (XMLCALL *XML_EndCdataSectionHandler) (void *userData);
|
|
|
|
/* This is called for any characters in the XML document for which
|
|
there is no applicable handler. This includes both characters that
|
|
are part of markup which is of a kind that is not reported
|
|
(comments, markup declarations), or characters that are part of a
|
|
construct which could be reported but for which no handler has been
|
|
supplied. The characters are passed exactly as they were in the XML
|
|
document except that they will be encoded in UTF-8 or UTF-16.
|
|
Line boundaries are not normalized. Note that a byte order mark
|
|
character is not passed to the default handler. There are no
|
|
guarantees about how characters are divided between calls to the
|
|
default handler: for example, a comment might be split between
|
|
multiple calls.
|
|
*/
|
|
typedef void (XMLCALL *XML_DefaultHandler) (void *userData,
|
|
const XML_Char *s,
|
|
int len);
|
|
|
|
/* This is called for the start of the DOCTYPE declaration, before
|
|
any DTD or internal subset is parsed.
|
|
*/
|
|
typedef void (XMLCALL *XML_StartDoctypeDeclHandler) (
|
|
void *userData,
|
|
const XML_Char *doctypeName,
|
|
const XML_Char *sysid,
|
|
const XML_Char *pubid,
|
|
int has_internal_subset);
|
|
|
|
/* This is called for the start of the DOCTYPE declaration when the
|
|
closing > is encountered, but after processing any external
|
|
subset.
|
|
*/
|
|
typedef void (XMLCALL *XML_EndDoctypeDeclHandler)(void *userData);
|
|
|
|
/* This is called for entity declarations. The is_parameter_entity
|
|
argument will be non-zero if the entity is a parameter entity, zero
|
|
otherwise.
|
|
|
|
For internal entities (<!ENTITY foo "bar">), value will
|
|
be non-NULL and systemId, publicID, and notationName will be NULL.
|
|
The value string is NOT nul-terminated; the length is provided in
|
|
the value_length argument. Since it is legal to have zero-length
|
|
values, do not use this argument to test for internal entities.
|
|
|
|
For external entities, value will be NULL and systemId will be
|
|
non-NULL. The publicId argument will be NULL unless a public
|
|
identifier was provided. The notationName argument will have a
|
|
non-NULL value only for unparsed entity declarations.
|
|
|
|
Note that is_parameter_entity can't be changed to XML_Bool, since
|
|
that would break binary compatibility.
|
|
*/
|
|
typedef void (XMLCALL *XML_EntityDeclHandler) (
|
|
void *userData,
|
|
const XML_Char *entityName,
|
|
int is_parameter_entity,
|
|
const XML_Char *value,
|
|
int value_length,
|
|
const XML_Char *base,
|
|
const XML_Char *systemId,
|
|
const XML_Char *publicId,
|
|
const XML_Char *notationName);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetEntityDeclHandler(XML_Parser parser,
|
|
XML_EntityDeclHandler handler);
|
|
|
|
/* OBSOLETE -- OBSOLETE -- OBSOLETE
|
|
This handler has been superceded by the EntityDeclHandler above.
|
|
It is provided here for backward compatibility.
|
|
|
|
This is called for a declaration of an unparsed (NDATA) entity.
|
|
The base argument is whatever was set by XML_SetBase. The
|
|
entityName, systemId and notationName arguments will never be
|
|
NULL. The other arguments may be.
|
|
*/
|
|
typedef void (XMLCALL *XML_UnparsedEntityDeclHandler) (
|
|
void *userData,
|
|
const XML_Char *entityName,
|
|
const XML_Char *base,
|
|
const XML_Char *systemId,
|
|
const XML_Char *publicId,
|
|
const XML_Char *notationName);
|
|
|
|
/* This is called for a declaration of notation. The base argument is
|
|
whatever was set by XML_SetBase. The notationName will never be
|
|
NULL. The other arguments can be.
|
|
*/
|
|
typedef void (XMLCALL *XML_NotationDeclHandler) (
|
|
void *userData,
|
|
const XML_Char *notationName,
|
|
const XML_Char *base,
|
|
const XML_Char *systemId,
|
|
const XML_Char *publicId);
|
|
|
|
/* When namespace processing is enabled, these are called once for
|
|
each namespace declaration. The call to the start and end element
|
|
handlers occur between the calls to the start and end namespace
|
|
declaration handlers. For an xmlns attribute, prefix will be
|
|
NULL. For an xmlns="" attribute, uri will be NULL.
|
|
*/
|
|
typedef void (XMLCALL *XML_StartNamespaceDeclHandler) (
|
|
void *userData,
|
|
const XML_Char *prefix,
|
|
const XML_Char *uri);
|
|
|
|
typedef void (XMLCALL *XML_EndNamespaceDeclHandler) (
|
|
void *userData,
|
|
const XML_Char *prefix);
|
|
|
|
/* This is called if the document is not standalone, that is, it has an
|
|
external subset or a reference to a parameter entity, but does not
|
|
have standalone="yes". If this handler returns XML_STATUS_ERROR,
|
|
then processing will not continue, and the parser will return a
|
|
XML_ERROR_NOT_STANDALONE error.
|
|
If parameter entity parsing is enabled, then in addition to the
|
|
conditions above this handler will only be called if the referenced
|
|
entity was actually read.
|
|
*/
|
|
typedef int (XMLCALL *XML_NotStandaloneHandler) (void *userData);
|
|
|
|
/* This is called for a reference to an external parsed general
|
|
entity. The referenced entity is not automatically parsed. The
|
|
application can parse it immediately or later using
|
|
XML_ExternalEntityParserCreate.
|
|
|
|
The parser argument is the parser parsing the entity containing the
|
|
reference; it can be passed as the parser argument to
|
|
XML_ExternalEntityParserCreate. The systemId argument is the
|
|
system identifier as specified in the entity declaration; it will
|
|
not be NULL.
|
|
|
|
The base argument is the system identifier that should be used as
|
|
the base for resolving systemId if systemId was relative; this is
|
|
set by XML_SetBase; it may be NULL.
|
|
|
|
The publicId argument is the public identifier as specified in the
|
|
entity declaration, or NULL if none was specified; the whitespace
|
|
in the public identifier will have been normalized as required by
|
|
the XML spec.
|
|
|
|
The context argument specifies the parsing context in the format
|
|
expected by the context argument to XML_ExternalEntityParserCreate;
|
|
context is valid only until the handler returns, so if the
|
|
referenced entity is to be parsed later, it must be copied.
|
|
context is NULL only when the entity is a parameter entity.
|
|
|
|
The handler should return XML_STATUS_ERROR if processing should not
|
|
continue because of a fatal error in the handling of the external
|
|
entity. In this case the calling parser will return an
|
|
XML_ERROR_EXTERNAL_ENTITY_HANDLING error.
|
|
|
|
Note that unlike other handlers the first argument is the parser,
|
|
not userData.
|
|
*/
|
|
typedef int (XMLCALL *XML_ExternalEntityRefHandler) (
|
|
XML_Parser parser,
|
|
const XML_Char *context,
|
|
const XML_Char *base,
|
|
const XML_Char *systemId,
|
|
const XML_Char *publicId);
|
|
|
|
/* This is called in two situations:
|
|
1) An entity reference is encountered for which no declaration
|
|
has been read *and* this is not an error.
|
|
2) An internal entity reference is read, but not expanded, because
|
|
XML_SetDefaultHandler has been called.
|
|
Note: skipped parameter entities in declarations and skipped general
|
|
entities in attribute values cannot be reported, because
|
|
the event would be out of sync with the reporting of the
|
|
declarations or attribute values
|
|
*/
|
|
typedef void (XMLCALL *XML_SkippedEntityHandler) (
|
|
void *userData,
|
|
const XML_Char *entityName,
|
|
int is_parameter_entity);
|
|
|
|
/* This structure is filled in by the XML_UnknownEncodingHandler to
|
|
provide information to the parser about encodings that are unknown
|
|
to the parser.
|
|
|
|
The map[b] member gives information about byte sequences whose
|
|
first byte is b.
|
|
|
|
If map[b] is c where c is >= 0, then b by itself encodes the
|
|
Unicode scalar value c.
|
|
|
|
If map[b] is -1, then the byte sequence is malformed.
|
|
|
|
If map[b] is -n, where n >= 2, then b is the first byte of an
|
|
n-byte sequence that encodes a single Unicode scalar value.
|
|
|
|
The data member will be passed as the first argument to the convert
|
|
function.
|
|
|
|
The convert function is used to convert multibyte sequences; s will
|
|
point to a n-byte sequence where map[(unsigned char)*s] == -n. The
|
|
convert function must return the Unicode scalar value represented
|
|
by this byte sequence or -1 if the byte sequence is malformed.
|
|
|
|
The convert function may be NULL if the encoding is a single-byte
|
|
encoding, that is if map[b] >= -1 for all bytes b.
|
|
|
|
When the parser is finished with the encoding, then if release is
|
|
not NULL, it will call release passing it the data member; once
|
|
release has been called, the convert function will not be called
|
|
again.
|
|
|
|
Expat places certain restrictions on the encodings that are supported
|
|
using this mechanism.
|
|
|
|
1. Every ASCII character that can appear in a well-formed XML document,
|
|
other than the characters
|
|
|
|
$@\^`{}~
|
|
|
|
must be represented by a single byte, and that byte must be the
|
|
same byte that represents that character in ASCII.
|
|
|
|
2. No character may require more than 4 bytes to encode.
|
|
|
|
3. All characters encoded must have Unicode scalar values <=
|
|
0xFFFF, (i.e., characters that would be encoded by surrogates in
|
|
UTF-16 are not allowed). Note that this restriction doesn't
|
|
apply to the built-in support for UTF-8 and UTF-16.
|
|
|
|
4. No Unicode character may be encoded by more than one distinct
|
|
sequence of bytes.
|
|
*/
|
|
typedef struct {
|
|
int map[256];
|
|
void *data;
|
|
int (XMLCALL *convert)(void *data, const char *s);
|
|
void (XMLCALL *release)(void *data);
|
|
} XML_Encoding;
|
|
|
|
/* This is called for an encoding that is unknown to the parser.
|
|
|
|
The encodingHandlerData argument is that which was passed as the
|
|
second argument to XML_SetUnknownEncodingHandler.
|
|
|
|
The name argument gives the name of the encoding as specified in
|
|
the encoding declaration.
|
|
|
|
If the callback can provide information about the encoding, it must
|
|
fill in the XML_Encoding structure, and return XML_STATUS_OK.
|
|
Otherwise it must return XML_STATUS_ERROR.
|
|
|
|
If info does not describe a suitable encoding, then the parser will
|
|
return an XML_UNKNOWN_ENCODING error.
|
|
*/
|
|
typedef int (XMLCALL *XML_UnknownEncodingHandler) (
|
|
void *encodingHandlerData,
|
|
const XML_Char *name,
|
|
XML_Encoding *info);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetElementHandler(XML_Parser parser,
|
|
XML_StartElementHandler start,
|
|
XML_EndElementHandler end);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetStartElementHandler(XML_Parser parser,
|
|
XML_StartElementHandler handler);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetEndElementHandler(XML_Parser parser,
|
|
XML_EndElementHandler handler);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetCharacterDataHandler(XML_Parser parser,
|
|
XML_CharacterDataHandler handler);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetProcessingInstructionHandler(XML_Parser parser,
|
|
XML_ProcessingInstructionHandler handler);
|
|
XMLPARSEAPI(void)
|
|
XML_SetCommentHandler(XML_Parser parser,
|
|
XML_CommentHandler handler);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetCdataSectionHandler(XML_Parser parser,
|
|
XML_StartCdataSectionHandler start,
|
|
XML_EndCdataSectionHandler end);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetStartCdataSectionHandler(XML_Parser parser,
|
|
XML_StartCdataSectionHandler start);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetEndCdataSectionHandler(XML_Parser parser,
|
|
XML_EndCdataSectionHandler end);
|
|
|
|
/* This sets the default handler and also inhibits expansion of
|
|
internal entities. These entity references will be passed to the
|
|
default handler, or to the skipped entity handler, if one is set.
|
|
*/
|
|
XMLPARSEAPI(void)
|
|
XML_SetDefaultHandler(XML_Parser parser,
|
|
XML_DefaultHandler handler);
|
|
|
|
/* This sets the default handler but does not inhibit expansion of
|
|
internal entities. The entity reference will not be passed to the
|
|
default handler.
|
|
*/
|
|
XMLPARSEAPI(void)
|
|
XML_SetDefaultHandlerExpand(XML_Parser parser,
|
|
XML_DefaultHandler handler);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetDoctypeDeclHandler(XML_Parser parser,
|
|
XML_StartDoctypeDeclHandler start,
|
|
XML_EndDoctypeDeclHandler end);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetStartDoctypeDeclHandler(XML_Parser parser,
|
|
XML_StartDoctypeDeclHandler start);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetEndDoctypeDeclHandler(XML_Parser parser,
|
|
XML_EndDoctypeDeclHandler end);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
|
|
XML_UnparsedEntityDeclHandler handler);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetNotationDeclHandler(XML_Parser parser,
|
|
XML_NotationDeclHandler handler);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetNamespaceDeclHandler(XML_Parser parser,
|
|
XML_StartNamespaceDeclHandler start,
|
|
XML_EndNamespaceDeclHandler end);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetStartNamespaceDeclHandler(XML_Parser parser,
|
|
XML_StartNamespaceDeclHandler start);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetEndNamespaceDeclHandler(XML_Parser parser,
|
|
XML_EndNamespaceDeclHandler end);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetNotStandaloneHandler(XML_Parser parser,
|
|
XML_NotStandaloneHandler handler);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetExternalEntityRefHandler(XML_Parser parser,
|
|
XML_ExternalEntityRefHandler handler);
|
|
|
|
/* If a non-NULL value for arg is specified here, then it will be
|
|
passed as the first argument to the external entity ref handler
|
|
instead of the parser object.
|
|
*/
|
|
XMLPARSEAPI(void)
|
|
XML_SetExternalEntityRefHandlerArg(XML_Parser parser,
|
|
void *arg);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetSkippedEntityHandler(XML_Parser parser,
|
|
XML_SkippedEntityHandler handler);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetUnknownEncodingHandler(XML_Parser parser,
|
|
XML_UnknownEncodingHandler handler,
|
|
void *encodingHandlerData);
|
|
|
|
/* This can be called within a handler for a start element, end
|
|
element, processing instruction or character data. It causes the
|
|
corresponding markup to be passed to the default handler.
|
|
*/
|
|
XMLPARSEAPI(void)
|
|
XML_DefaultCurrent(XML_Parser parser);
|
|
|
|
/* If do_nst is non-zero, and namespace processing is in effect, and
|
|
a name has a prefix (i.e. an explicit namespace qualifier) then
|
|
that name is returned as a triplet in a single string separated by
|
|
the separator character specified when the parser was created: URI
|
|
+ sep + local_name + sep + prefix.
|
|
|
|
If do_nst is zero, then namespace information is returned in the
|
|
default manner (URI + sep + local_name) whether or not the name
|
|
has a prefix.
|
|
|
|
Note: Calling XML_SetReturnNSTriplet after XML_Parse or
|
|
XML_ParseBuffer has no effect.
|
|
*/
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_SetReturnNSTriplet(XML_Parser parser, int do_nst);
|
|
|
|
/* This value is passed as the userData argument to callbacks. */
|
|
XMLPARSEAPI(void)
|
|
XML_SetUserData(XML_Parser parser, void *userData);
|
|
|
|
/* Returns the last value set by XML_SetUserData or NULL. */
|
|
#define XML_GetUserData(parser) (*(void **)(parser))
|
|
|
|
/* This is equivalent to supplying an encoding argument to
|
|
XML_ParserCreate. On success XML_SetEncoding returns non-zero,
|
|
zero otherwise.
|
|
Note: Calling XML_SetEncoding after XML_Parse or XML_ParseBuffer
|
|
has no effect and returns XML_STATUS_ERROR.
|
|
*/
|
|
XMLPARSEAPI(enum XML_Status)
|
|
XML_SetEncoding(XML_Parser parser, const XML_Char *encoding);
|
|
|
|
/* If this function is called, then the parser will be passed as the
|
|
first argument to callbacks instead of userData. The userData will
|
|
still be accessible using XML_GetUserData.
|
|
*/
|
|
XMLPARSEAPI(void)
|
|
XML_UseParserAsHandlerArg(XML_Parser parser);
|
|
|
|
/* If useDTD == XML_TRUE is passed to this function, then the parser
|
|
will assume that there is an external subset, even if none is
|
|
specified in the document. In such a case the parser will call the
|
|
externalEntityRefHandler with a value of NULL for the systemId
|
|
argument (the publicId and context arguments will be NULL as well).
|
|
Note: If this function is called, then this must be done before
|
|
the first call to XML_Parse or XML_ParseBuffer, since it will
|
|
have no effect after that. Returns
|
|
XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING.
|
|
Note: If the document does not have a DOCTYPE declaration at all,
|
|
then startDoctypeDeclHandler and endDoctypeDeclHandler will not
|
|
be called, despite an external subset being parsed.
|
|
Note: If XML_DTD is not defined when Expat is compiled, returns
|
|
XML_ERROR_FEATURE_REQUIRES_XML_DTD.
|
|
*/
|
|
XMLPARSEAPI(enum XML_Error)
|
|
XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD);
|
|
|
|
|
|
/* Sets the base to be used for resolving relative URIs in system
|
|
identifiers in declarations. Resolving relative identifiers is
|
|
left to the application: this value will be passed through as the
|
|
base argument to the XML_ExternalEntityRefHandler,
|
|
XML_NotationDeclHandler and XML_UnparsedEntityDeclHandler. The base
|
|
argument will be copied. Returns XML_STATUS_ERROR if out of memory,
|
|
XML_STATUS_OK otherwise.
|
|
*/
|
|
XMLPARSEAPI(enum XML_Status)
|
|
XML_SetBase(XML_Parser parser, const XML_Char *base);
|
|
|
|
XMLPARSEAPI(const XML_Char *)
|
|
XML_GetBase(XML_Parser parser);
|
|
|
|
/* Returns the number of the attribute/value pairs passed in last call
|
|
to the XML_StartElementHandler that were specified in the start-tag
|
|
rather than defaulted. Each attribute/value pair counts as 2; thus
|
|
this correspondds to an index into the atts array passed to the
|
|
XML_StartElementHandler.
|
|
*/
|
|
XMLPARSEAPI(int)
|
|
XML_GetSpecifiedAttributeCount(XML_Parser parser);
|
|
|
|
/* Returns the index of the ID attribute passed in the last call to
|
|
XML_StartElementHandler, or -1 if there is no ID attribute. Each
|
|
attribute/value pair counts as 2; thus this correspondds to an
|
|
index into the atts array passed to the XML_StartElementHandler.
|
|
*/
|
|
XMLPARSEAPI(int)
|
|
XML_GetIdAttributeIndex(XML_Parser parser);
|
|
|
|
/* Parses some input. Returns XML_STATUS_ERROR if a fatal error is
|
|
detected. The last call to XML_Parse must have isFinal true; len
|
|
may be zero for this call (or any other).
|
|
|
|
Though the return values for these functions has always been
|
|
described as a Boolean value, the implementation, at least for the
|
|
1.95.x series, has always returned exactly one of the XML_Status
|
|
values.
|
|
*/
|
|
XMLPARSEAPI(enum XML_Status)
|
|
XML_Parse(XML_Parser parser, const char *s, int len, int isFinal);
|
|
|
|
XMLPARSEAPI(void *)
|
|
XML_GetBuffer(XML_Parser parser, int len);
|
|
|
|
XMLPARSEAPI(enum XML_Status)
|
|
XML_ParseBuffer(XML_Parser parser, int len, int isFinal);
|
|
|
|
/* Stops parsing, causing XML_Parse() or XML_ParseBuffer() to return.
|
|
Must be called from within a call-back handler, except when aborting
|
|
(resumable = 0) an already suspended parser. Some call-backs may
|
|
still follow because they would otherwise get lost. Examples:
|
|
- endElementHandler() for empty elements when stopped in
|
|
startElementHandler(),
|
|
- endNameSpaceDeclHandler() when stopped in endElementHandler(),
|
|
and possibly others.
|
|
|
|
Can be called from most handlers, including DTD related call-backs,
|
|
except when parsing an external parameter entity and resumable != 0.
|
|
Returns XML_STATUS_OK when successful, XML_STATUS_ERROR otherwise.
|
|
Possible error codes:
|
|
- XML_ERROR_SUSPENDED: when suspending an already suspended parser.
|
|
- XML_ERROR_FINISHED: when the parser has already finished.
|
|
- XML_ERROR_SUSPEND_PE: when suspending while parsing an external PE.
|
|
|
|
When resumable != 0 (true) then parsing is suspended, that is,
|
|
XML_Parse() and XML_ParseBuffer() return XML_STATUS_SUSPENDED.
|
|
Otherwise, parsing is aborted, that is, XML_Parse() and XML_ParseBuffer()
|
|
return XML_STATUS_ERROR with error code XML_ERROR_ABORTED.
|
|
|
|
*Note*:
|
|
This will be applied to the current parser instance only, that is, if
|
|
there is a parent parser then it will continue parsing when the
|
|
externalEntityRefHandler() returns. It is up to the implementation of
|
|
the externalEntityRefHandler() to call XML_StopParser() on the parent
|
|
parser (recursively), if one wants to stop parsing altogether.
|
|
|
|
When suspended, parsing can be resumed by calling XML_ResumeParser().
|
|
*/
|
|
XMLPARSEAPI(enum XML_Status)
|
|
XML_StopParser(XML_Parser parser, XML_Bool resumable);
|
|
|
|
/* Resumes parsing after it has been suspended with XML_StopParser().
|
|
Must not be called from within a handler call-back. Returns same
|
|
status codes as XML_Parse() or XML_ParseBuffer().
|
|
Additional error code XML_ERROR_NOT_SUSPENDED possible.
|
|
|
|
*Note*:
|
|
This must be called on the most deeply nested child parser instance
|
|
first, and on its parent parser only after the child parser has finished,
|
|
to be applied recursively until the document entity's parser is restarted.
|
|
That is, the parent parser will not resume by itself and it is up to the
|
|
application to call XML_ResumeParser() on it at the appropriate moment.
|
|
*/
|
|
XMLPARSEAPI(enum XML_Status)
|
|
XML_ResumeParser(XML_Parser parser);
|
|
|
|
enum XML_Parsing {
|
|
XML_INITIALIZED,
|
|
XML_PARSING,
|
|
XML_FINISHED,
|
|
XML_SUSPENDED
|
|
};
|
|
|
|
typedef struct {
|
|
enum XML_Parsing parsing;
|
|
XML_Bool finalBuffer;
|
|
} XML_ParsingStatus;
|
|
|
|
/* Returns status of parser with respect to being initialized, parsing,
|
|
finished, or suspended and processing the final buffer.
|
|
XXX XML_Parse() and XML_ParseBuffer() should return XML_ParsingStatus,
|
|
XXX with XML_FINISHED_OK or XML_FINISHED_ERROR replacing XML_FINISHED
|
|
*/
|
|
XMLPARSEAPI(void)
|
|
XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status);
|
|
|
|
/* Creates an XML_Parser object that can parse an external general
|
|
entity; context is a '\0'-terminated string specifying the parse
|
|
context; encoding is a '\0'-terminated string giving the name of
|
|
the externally specified encoding, or NULL if there is no
|
|
externally specified encoding. The context string consists of a
|
|
sequence of tokens separated by formfeeds (\f); a token consisting
|
|
of a name specifies that the general entity of the name is open; a
|
|
token of the form prefix=uri specifies the namespace for a
|
|
particular prefix; a token of the form =uri specifies the default
|
|
namespace. This can be called at any point after the first call to
|
|
an ExternalEntityRefHandler so longer as the parser has not yet
|
|
been freed. The new parser is completely independent and may
|
|
safely be used in a separate thread. The handlers and userData are
|
|
initialized from the parser argument. Returns NULL if out of memory.
|
|
Otherwise returns a new XML_Parser object.
|
|
*/
|
|
XMLPARSEAPI(XML_Parser)
|
|
XML_ExternalEntityParserCreate(XML_Parser parser,
|
|
const XML_Char *context,
|
|
const XML_Char *encoding);
|
|
|
|
enum XML_ParamEntityParsing {
|
|
XML_PARAM_ENTITY_PARSING_NEVER,
|
|
XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE,
|
|
XML_PARAM_ENTITY_PARSING_ALWAYS
|
|
};
|
|
|
|
/* Controls parsing of parameter entities (including the external DTD
|
|
subset). If parsing of parameter entities is enabled, then
|
|
references to external parameter entities (including the external
|
|
DTD subset) will be passed to the handler set with
|
|
XML_SetExternalEntityRefHandler. The context passed will be 0.
|
|
|
|
Unlike external general entities, external parameter entities can
|
|
only be parsed synchronously. If the external parameter entity is
|
|
to be parsed, it must be parsed during the call to the external
|
|
entity ref handler: the complete sequence of
|
|
XML_ExternalEntityParserCreate, XML_Parse/XML_ParseBuffer and
|
|
XML_ParserFree calls must be made during this call. After
|
|
XML_ExternalEntityParserCreate has been called to create the parser
|
|
for the external parameter entity (context must be 0 for this
|
|
call), it is illegal to make any calls on the old parser until
|
|
XML_ParserFree has been called on the newly created parser.
|
|
If the library has been compiled without support for parameter
|
|
entity parsing (ie without XML_DTD being defined), then
|
|
XML_SetParamEntityParsing will return 0 if parsing of parameter
|
|
entities is requested; otherwise it will return non-zero.
|
|
Note: If XML_SetParamEntityParsing is called after XML_Parse or
|
|
XML_ParseBuffer, then it has no effect and will always return 0.
|
|
*/
|
|
XMLPARSEAPI(int)
|
|
XML_SetParamEntityParsing(XML_Parser parser,
|
|
enum XML_ParamEntityParsing parsing);
|
|
|
|
/* If XML_Parse or XML_ParseBuffer have returned XML_STATUS_ERROR, then
|
|
XML_GetErrorCode returns information about the error.
|
|
*/
|
|
XMLPARSEAPI(enum XML_Error)
|
|
XML_GetErrorCode(XML_Parser parser);
|
|
|
|
/* These functions return information about the current parse
|
|
location. They may be called from any callback called to report
|
|
some parse event; in this case the location is the location of the
|
|
first of the sequence of characters that generated the event. When
|
|
called from callbacks generated by declarations in the document
|
|
prologue, the location identified isn't as neatly defined, but will
|
|
be within the relevant markup. When called outside of the callback
|
|
functions, the position indicated will be just past the last parse
|
|
event (regardless of whether there was an associated callback).
|
|
|
|
They may also be called after returning from a call to XML_Parse
|
|
or XML_ParseBuffer. If the return value is XML_STATUS_ERROR then
|
|
the location is the location of the character at which the error
|
|
was detected; otherwise the location is the location of the last
|
|
parse event, as described above.
|
|
*/
|
|
XMLPARSEAPI(int) XML_GetCurrentLineNumber(XML_Parser parser);
|
|
XMLPARSEAPI(int) XML_GetCurrentColumnNumber(XML_Parser parser);
|
|
XMLPARSEAPI(long) XML_GetCurrentByteIndex(XML_Parser parser);
|
|
|
|
/* Return the number of bytes in the current event.
|
|
Returns 0 if the event is in an internal entity.
|
|
*/
|
|
XMLPARSEAPI(int)
|
|
XML_GetCurrentByteCount(XML_Parser parser);
|
|
|
|
/* If XML_CONTEXT_BYTES is defined, returns the input buffer, sets
|
|
the integer pointed to by offset to the offset within this buffer
|
|
of the current parse position, and sets the integer pointed to by size
|
|
to the size of this buffer (the number of input bytes). Otherwise
|
|
returns a NULL pointer. Also returns a NULL pointer if a parse isn't
|
|
active.
|
|
|
|
NOTE: The character pointer returned should not be used outside
|
|
the handler that makes the call.
|
|
*/
|
|
XMLPARSEAPI(const char *)
|
|
XML_GetInputContext(XML_Parser parser,
|
|
int *offset,
|
|
int *size);
|
|
|
|
/* For backwards compatibility with previous versions. */
|
|
#define XML_GetErrorLineNumber XML_GetCurrentLineNumber
|
|
#define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber
|
|
#define XML_GetErrorByteIndex XML_GetCurrentByteIndex
|
|
|
|
/* Frees the content model passed to the element declaration handler */
|
|
XMLPARSEAPI(void)
|
|
XML_FreeContentModel(XML_Parser parser, XML_Content *model);
|
|
|
|
/* Exposing the memory handling functions used in Expat */
|
|
XMLPARSEAPI(void *)
|
|
XML_MemMalloc(XML_Parser parser, size_t size);
|
|
|
|
XMLPARSEAPI(void *)
|
|
XML_MemRealloc(XML_Parser parser, void *ptr, size_t size);
|
|
|
|
XMLPARSEAPI(void)
|
|
XML_MemFree(XML_Parser parser, void *ptr);
|
|
|
|
/* Frees memory used by the parser. */
|
|
XMLPARSEAPI(void)
|
|
XML_ParserFree(XML_Parser parser);
|
|
|
|
/* Returns a string describing the error. */
|
|
XMLPARSEAPI(const XML_LChar *)
|
|
XML_ErrorString(enum XML_Error code);
|
|
|
|
/* Return a string containing the version number of this expat */
|
|
XMLPARSEAPI(const XML_LChar *)
|
|
XML_ExpatVersion(void);
|
|
|
|
typedef struct {
|
|
int major;
|
|
int minor;
|
|
int micro;
|
|
} XML_Expat_Version;
|
|
|
|
/* Return an XML_Expat_Version structure containing numeric version
|
|
number information for this version of expat.
|
|
*/
|
|
XMLPARSEAPI(XML_Expat_Version)
|
|
XML_ExpatVersionInfo(void);
|
|
|
|
/* Added in Expat 1.95.5. */
|
|
enum XML_FeatureEnum {
|
|
XML_FEATURE_END = 0,
|
|
XML_FEATURE_UNICODE,
|
|
XML_FEATURE_UNICODE_WCHAR_T,
|
|
XML_FEATURE_DTD,
|
|
XML_FEATURE_CONTEXT_BYTES,
|
|
XML_FEATURE_MIN_SIZE,
|
|
XML_FEATURE_SIZEOF_XML_CHAR,
|
|
XML_FEATURE_SIZEOF_XML_LCHAR
|
|
/* Additional features must be added to the end of this enum. */
|
|
};
|
|
|
|
typedef struct {
|
|
enum XML_FeatureEnum feature;
|
|
const XML_LChar *name;
|
|
long int value;
|
|
} XML_Feature;
|
|
|
|
XMLPARSEAPI(const XML_Feature *)
|
|
XML_GetFeatureList(void);
|
|
|
|
|
|
/* Expat follows the GNU/Linux convention of odd number minor version for
|
|
beta/development releases and even number minor version for stable
|
|
releases. Micro is bumped with each release, and set to 0 with each
|
|
change to major or minor version.
|
|
*/
|
|
#define XML_MAJOR_VERSION 1
|
|
#define XML_MINOR_VERSION 95
|
|
#define XML_MICRO_VERSION 8
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif /* not XmlParse_INCLUDED */
|