blob: 3f4a9f8c5d836f47f33c1e97036576c443bc6b65 [file] [log] [blame]
Index: encoding.c
===================================================================
--- encoding.c (revision 3733)
+++ encoding.c (working copy)
@@ -58,7 +58,7 @@
static int xmlCharEncodingAliasesNb = 0;
static int xmlCharEncodingAliasesMax = 0;
-#ifdef LIBXML_ICONV_ENABLED
+#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
#if 0
#define DEBUG_ENCODING /* Define this to get encoding traces */
#endif
@@ -97,6 +97,54 @@
NULL, 0, val, NULL, NULL, 0, 0, msg, val);
}
+#ifdef LIBXML_ICU_ENABLED
+static uconv_t*
+openIcuConverter(const char* name, int toUnicode)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
+ if (conv == NULL)
+ return NULL;
+
+ conv->uconv = ucnv_open(name, &status);
+ if (U_FAILURE(status))
+ goto error;
+
+ status = U_ZERO_ERROR;
+ if (toUnicode) {
+ ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
+ NULL, NULL, NULL, &status);
+ }
+ else {
+ ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
+ NULL, NULL, NULL, &status);
+ }
+ if (U_FAILURE(status))
+ goto error;
+
+ status = U_ZERO_ERROR;
+ conv->utf8 = ucnv_open("UTF-8", &status);
+ if (U_SUCCESS(status))
+ return conv;
+
+error:
+ if (conv->uconv)
+ ucnv_close(conv->uconv);
+ xmlFree(conv);
+ return NULL;
+}
+
+static void
+closeIcuConverter(uconv_t *conv)
+{
+ if (conv != NULL) {
+ ucnv_close(conv->uconv);
+ ucnv_close(conv->utf8);
+ xmlFree(conv);
+ }
+}
+#endif /* LIBXML_ICU_ENABLED */
+
/************************************************************************
* *
* Conversions To/From UTF8 encoding *
@@ -1306,7 +1354,11 @@
#ifdef LIBXML_ICONV_ENABLED
handler->iconv_in = NULL;
handler->iconv_out = NULL;
-#endif /* LIBXML_ICONV_ENABLED */
+#endif
+#ifdef LIBXML_ICU_ENABLED
+ handler->uconv_in = NULL;
+ handler->uconv_out = NULL;
+#endif
/*
* registers and returns the handler.
@@ -1371,7 +1423,7 @@
xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
#endif /* LIBXML_OUTPUT_ENABLED */
-#ifndef LIBXML_ICONV_ENABLED
+#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
#ifdef LIBXML_ISO8859X_ENABLED
xmlRegisterCharEncodingHandlersISO8859x ();
#endif
@@ -1576,6 +1628,10 @@
xmlCharEncodingHandlerPtr enc;
iconv_t icv_in, icv_out;
#endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+ xmlCharEncodingHandlerPtr enc;
+ uconv_t *ucv_in, *ucv_out;
+#endif /* LIBXML_ICU_ENABLED */
char upper[100];
int i;
@@ -1642,6 +1698,35 @@
"iconv : problems with filters for '%s'\n", name);
}
#endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+ /* check whether icu can handle this */
+ ucv_in = openIcuConverter(name, 1);
+ ucv_out = openIcuConverter(name, 0);
+ if (ucv_in != NULL && ucv_out != NULL) {
+ enc = (xmlCharEncodingHandlerPtr)
+ xmlMalloc(sizeof(xmlCharEncodingHandler));
+ if (enc == NULL) {
+ closeIcuConverter(ucv_in);
+ closeIcuConverter(ucv_out);
+ return(NULL);
+ }
+ enc->name = xmlMemStrdup(name);
+ enc->input = NULL;
+ enc->output = NULL;
+ enc->uconv_in = ucv_in;
+ enc->uconv_out = ucv_out;
+#ifdef DEBUG_ENCODING
+ xmlGenericError(xmlGenericErrorContext,
+ "Found ICU converter handler for encoding %s\n", name);
+#endif
+ return enc;
+ } else if (ucv_in != NULL || ucv_out != NULL) {
+ closeIcuConverter(ucv_in);
+ closeIcuConverter(ucv_out);
+ xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
+ "ICU converter : problems with filters for '%s'\n", name);
+ }
+#endif /* LIBXML_ICU_ENABLED */
#ifdef DEBUG_ENCODING
xmlGenericError(xmlGenericErrorContext,
@@ -1732,6 +1817,75 @@
/************************************************************************
* *
+ * ICU based generic conversion functions *
+ * *
+ ************************************************************************/
+
+#ifdef LIBXML_ICU_ENABLED
+/**
+ * xmlUconvWrapper:
+ * @cd: ICU uconverter data structure
+ * @toUnicode : non-zero if toUnicode. 0 otherwise.
+ * @out: a pointer to an array of bytes to store the result
+ * @outlen: the length of @out
+ * @in: a pointer to an array of ISO Latin 1 chars
+ * @inlen: the length of @in
+ *
+ * Returns 0 if success, or
+ * -1 by lack of space, or
+ * -2 if the transcoding fails (for *in is not valid utf8 string or
+ * the result of transformation can't fit into the encoding we want), or
+ * -3 if there the last byte can't form a single output char.
+ *
+ * The value of @inlen after return is the number of octets consumed
+ * as the return value is positive, else unpredictable.
+ * The value of @outlen after return is the number of ocetes consumed.
+ */
+static int
+xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
+ const unsigned char *in, int *inlen) {
+ const char *ucv_in = (const char *) in;
+ char *ucv_out = (char *) out;
+ UErrorCode err = U_ZERO_ERROR;
+
+ if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
+ if (outlen != NULL) *outlen = 0;
+ return(-1);
+ }
+
+ /*
+ * TODO(jungshik)
+ * 1. is ucnv_convert(To|From)Algorithmic better?
+ * 2. had we better use an explicit pivot buffer?
+ * 3. error returned comes from 'fromUnicode' only even
+ * when toUnicode is true !
+ */
+ if (toUnicode) {
+ /* encoding => UTF-16 => UTF-8 */
+ ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
+ &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
+ 0, TRUE, &err);
+ } else {
+ /* UTF-8 => UTF-16 => encoding */
+ ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
+ &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
+ 0, TRUE, &err);
+ }
+ *inlen = ucv_in - (const char*) in;
+ *outlen = ucv_out - (char *) out;
+ if (U_SUCCESS(err))
+ return 0;
+ if (err == U_BUFFER_OVERFLOW_ERROR)
+ return -1;
+ if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
+ return -2;
+ /* if (err == U_TRUNCATED_CHAR_FOUND) */
+ return -3;
+}
+#endif /* LIBXML_ICU_ENABLED */
+
+/************************************************************************
+ * *
* The real API used by libxml for on-the-fly conversion *
* *
************************************************************************/
@@ -1794,6 +1948,16 @@
if (ret == -1) ret = -3;
}
#endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+ else if (handler->uconv_in != NULL) {
+ ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
+ &written, in->content, &toconv);
+ xmlBufferShrink(in, toconv);
+ out->use += written;
+ out->content[out->use] = 0;
+ if (ret == -1) ret = -3;
+ }
+#endif /* LIBXML_ICU_ENABLED */
#ifdef DEBUG_ENCODING
switch (ret) {
case 0:
@@ -1879,6 +2043,17 @@
ret = -3;
}
#endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+ else if (handler->uconv_in != NULL) {
+ ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
+ &written, in->content, &toconv);
+ xmlBufferShrink(in, toconv);
+ out->use += written;
+ out->content[out->use] = 0;
+ if (ret == -1)
+ ret = -3;
+ }
+#endif /* LIBXML_ICU_ENABLED */
switch (ret) {
case 0:
#ifdef DEBUG_ENCODING
@@ -1979,6 +2154,15 @@
out->content[out->use] = 0;
}
#endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+ else if (handler->uconv_out != NULL) {
+ ret = xmlUconvWrapper(handler->uconv_out, 0,
+ &out->content[out->use],
+ &written, NULL, &toconv);
+ out->use += written;
+ out->content[out->use] = 0;
+ }
+#endif /* LIBXML_ICU_ENABLED */
#ifdef DEBUG_ENCODING
xmlGenericError(xmlGenericErrorContext,
"initialized encoder\n");
@@ -2023,6 +2207,26 @@
}
}
#endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+ else if (handler->uconv_out != NULL) {
+ ret = xmlUconvWrapper(handler->uconv_out, 0,
+ &out->content[out->use],
+ &written, in->content, &toconv);
+ xmlBufferShrink(in, toconv);
+ out->use += written;
+ writtentot += written;
+ out->content[out->use] = 0;
+ if (ret == -1) {
+ if (written > 0) {
+ /*
+ * Can be a limitation of iconv
+ */
+ goto retry;
+ }
+ ret = -3;
+ }
+ }
+#endif /* LIBXML_ICU_ENABLED */
else {
xmlEncodingErr(XML_I18N_NO_OUTPUT,
"xmlCharEncOutFunc: no output function !\n", NULL);
@@ -2135,6 +2339,22 @@
xmlFree(handler);
}
#endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+ if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
+ if (handler->name != NULL)
+ xmlFree(handler->name);
+ handler->name = NULL;
+ if (handler->uconv_out != NULL) {
+ closeIcuConverter(handler->uconv_out);
+ handler->uconv_out = NULL;
+ }
+ if (handler->uconv_in != NULL) {
+ closeIcuConverter(handler->uconv_in);
+ handler->uconv_in = NULL;
+ }
+ xmlFree(handler);
+ }
+#endif
#ifdef DEBUG_ENCODING
if (ret)
xmlGenericError(xmlGenericErrorContext,
@@ -2210,6 +2430,22 @@
cur += toconv;
} while (ret == -2);
#endif
+#ifdef LIBXML_ICU_ENABLED
+ } else if (handler->uconv_out != NULL) {
+ do {
+ toconv = in->end - cur;
+ written = 32000;
+ ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0],
+ &written, cur, &toconv);
+ if (ret < 0) {
+ if (written > 0)
+ ret = -2;
+ else
+ return(-1);
+ }
+ unused += written;
+ cur += toconv;
+ } while (ret == -2);
} else {
/* could not find a converter */
return(-1);
@@ -2221,8 +2457,9 @@
}
return(in->consumed + (in->cur - in->base));
}
+#endif
-#ifndef LIBXML_ICONV_ENABLED
+#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
#ifdef LIBXML_ISO8859X_ENABLED
/**
@@ -3294,4 +3531,3 @@
#define bottom_encoding
#include "elfgcchack.h"
-
Index: include/libxml/encoding.h
===================================================================
--- include/libxml/encoding.h (revision 3733)
+++ include/libxml/encoding.h (working copy)
@@ -26,7 +26,25 @@
#ifdef LIBXML_ICONV_ENABLED
#include <iconv.h>
+#else
+#ifdef LIBXML_ICU_ENABLED
+#include <unicode/ucnv.h>
+#if 0
+/* Forward-declare UConverter here rather than pulling in <unicode/ucnv.h>
+ * to prevent unwanted ICU symbols being exposed to users of libxml2.
+ * One particular case is Qt4 conflicting on UChar32.
+ */
+#include <stdint.h>
+struct UConverter;
+typedef struct UConverter UConverter;
+#ifdef _MSC_VER
+typedef wchar_t UChar;
+#else
+typedef uint16_t UChar;
#endif
+#endif
+#endif
+#endif
#ifdef __cplusplus
extern "C" {
#endif
@@ -125,6 +143,13 @@
* Block defining the handlers for non UTF-8 encodings.
* If iconv is supported, there are two extra fields.
*/
+#ifdef LIBXML_ICU_ENABLED
+struct _uconv_t {
+ UConverter *uconv; /* for conversion between an encoding and UTF-16 */
+ UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
+};
+typedef struct _uconv_t uconv_t;
+#endif
typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler;
typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr;
@@ -136,6 +161,10 @@
iconv_t iconv_in;
iconv_t iconv_out;
#endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+ uconv_t *uconv_in;
+ uconv_t *uconv_out;
+#endif /* LIBXML_ICU_ENABLED */
};
#ifdef __cplusplus
Index: include/libxml/parser.h
===================================================================
--- include/libxml/parser.h (revision 3733)
+++ include/libxml/parser.h (working copy)
@@ -276,6 +276,7 @@
int nsNr; /* the number of inherited namespaces */
int nsMax; /* the size of the arrays */
const xmlChar * *nsTab; /* the array of prefix/namespace name */
+ struct _xmlParserCtxt *nsParent; /* parent context to inherit namespaces from */
int *attallocs; /* which attribute were allocated */
void * *pushTab; /* array of data for push */
xmlHashTablePtr attsDefault; /* defaulted attributes if any */
@@ -1207,6 +1208,7 @@
XML_WITH_DEBUG_MEM = 29,
XML_WITH_DEBUG_RUN = 30,
XML_WITH_ZLIB = 31,
+ XML_WITH_ICU = 32,
XML_WITH_NONE = 99999 /* just to be sure of allocation size */
} xmlFeature;
@@ -1217,4 +1219,3 @@
}
#endif
#endif /* __XML_PARSER_H__ */
-
Index: include/libxml/xmlversion.h.in
===================================================================
--- include/libxml/xmlversion.h.in (revision 3733)
+++ include/libxml/xmlversion.h.in (working copy)
@@ -269,6 +269,15 @@
#endif
/**
+ * LIBXML_ICU_ENABLED:
+ *
+ * Whether icu support is available
+ */
+#if @WITH_ICU@
+#define LIBXML_ICU_ENABLED
+#endif
+
+/**
* LIBXML_ISO8859X_ENABLED:
*
* Whether ISO-8859-* support is made available in case iconv is not
Index: parser.c
===================================================================
--- parser.c (revision 3733)
+++ parser.c (working copy)
@@ -816,6 +816,12 @@
#else
return(0);
#endif
+ case XML_WITH_ICU:
+#ifdef LIBXML_ICU_ENABLED
+ return(1);
+#else
+ return(0);
+#endif
default:
break;
}
@@ -7705,6 +7711,7 @@
return(NULL);
return(ctxt->nsTab[i + 1]);
}
+ if (ctxt->nsParent) return xmlGetNamespace(ctxt->nsParent, prefix);
return(NULL);
}
@@ -11948,6 +11955,8 @@
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
+ ctxt->nsParent = oldctxt;
+
oldsax = ctxt->sax;
ctxt->sax = oldctxt->sax;
xmlDetectSAX2(ctxt);
Index: win32/Makefile.msvc
===================================================================
--- win32/Makefile.msvc (revision 3733)
+++ win32/Makefile.msvc (working copy)
@@ -71,6 +71,9 @@
!if "$(WITH_ICONV)" == "1"
LIBS = $(LIBS) iconv.lib
!endif
++!if "$(WITH_ICU)" == "1"
++LIBS = $(LIBS) icu.lib
++!endif
!if "$(WITH_ZLIB)" == "1"
LIBS = $(LIBS) zdll.lib
!endif
Index: win32/configure.js
===================================================================
--- win32/configure.js (revision 3733)
+++ win32/configure.js (working copy)
@@ -40,6 +40,7 @@
var withXptr = true;
var withXinclude = true;
var withIconv = true;
+var withIcu = false;
var withIso8859x = false;
var withZlib = false;
var withDebug = true;
@@ -124,6 +125,7 @@
txt += " xptr: Enable XPointer support (" + (withXptr? "yes" : "no") + ")\n";
txt += " xinclude: Enable XInclude support (" + (withXinclude? "yes" : "no") + ")\n";
txt += " iconv: Enable iconv support (" + (withIconv? "yes" : "no") + ")\n";
+ txt += " icu: Enable icu support (" + (withIcu? "yes" : "no") + ")\n";
txt += " iso8859x: Enable ISO8859X support (" + (withIso8859x? "yes" : "no") + ")\n";
txt += " zlib: Enable zlib support (" + (withZlib? "yes" : "no") + ")\n";
txt += " xml_debug: Enable XML debbugging module (" + (withDebug? "yes" : "no") + ")\n";
@@ -233,6 +235,7 @@
vf.WriteLine("WITH_XPTR=" + (withXptr? "1" : "0"));
vf.WriteLine("WITH_XINCLUDE=" + (withXinclude? "1" : "0"));
vf.WriteLine("WITH_ICONV=" + (withIconv? "1" : "0"));
+ vf.WriteLine("WITH_ICU=" + (withIcu? "1" : "0"));
vf.WriteLine("WITH_ISO8859X=" + (withIso8859x? "1" : "0"));
vf.WriteLine("WITH_ZLIB=" + (withZlib? "1" : "0"));
vf.WriteLine("WITH_DEBUG=" + (withDebug? "1" : "0"));
@@ -319,6 +322,8 @@
of.WriteLine(s.replace(/\@WITH_XINCLUDE\@/, withXinclude? "1" : "0"));
} else if (s.search(/\@WITH_ICONV\@/) != -1) {
of.WriteLine(s.replace(/\@WITH_ICONV\@/, withIconv? "1" : "0"));
+ } else if (s.search(/\@WITH_ICU\@/) != -1) {
+ of.WriteLine(s.replace(/\@WITH_ICU\@/, withIcu? "1" : "0"));
} else if (s.search(/\@WITH_ISO8859X\@/) != -1) {
of.WriteLine(s.replace(/\@WITH_ISO8859X\@/, withIso8859x? "1" : "0"));
} else if (s.search(/\@WITH_ZLIB\@/) != -1) {
@@ -462,6 +467,8 @@
withXinclude = strToBool(arg.substring(opt.length + 1, arg.length));
else if (opt == "iconv")
withIconv = strToBool(arg.substring(opt.length + 1, arg.length));
+ else if (opt == "icu")
+ withIcu = strToBool(arg.substring(opt.length + 1, arg.length));
else if (opt == "iso8859x")
withIso8859x = strToBool(arg.substring(opt.length + 1, arg.length));
else if (opt == "zlib")
@@ -611,7 +618,13 @@
makefile = ".\\Makefile.mingw";
else if (compiler == "bcb")
makefile = ".\\Makefile.bcb";
-fso.CopyFile(makefile, ".\\Makefile", true);
+var new_makefile = ".\\Makefile";
+var f = fso.FileExists(new_makefile);
+if (f) {
+ var t = fso.GetFile(new_makefile);
+ t.Attributes =0;
+}
+fso.CopyFile(makefile, new_makefile, true);
WScript.Echo("Created Makefile.");
// Create the config.h.
var confighsrc = "..\\include\\win32config.h";
@@ -640,6 +653,7 @@
txtOut += " XPointer support: " + boolToStr(withXptr) + "\n";
txtOut += " XInclude support: " + boolToStr(withXinclude) + "\n";
txtOut += " iconv support: " + boolToStr(withIconv) + "\n";
+txtOut += " icu support: " + boolToStr(withIcu) + "\n";
txtOut += " iso8859x support: " + boolToStr(withIso8859x) + "\n";
txtOut += " zlib support: " + boolToStr(withZlib) + "\n";
txtOut += " Debugging module: " + boolToStr(withDebug) + "\n";
Index: xmlmodule.c
===================================================================
--- xmlmodule.c (revision 3733)
+++ xmlmodule.c (working copy)
@@ -300,7 +300,7 @@
static void *
xmlModulePlatformOpen(const char *name)
{
- return LoadLibrary(name);
+ return LoadLibraryA(name);
}
/*
Index: xmlregexp.c
===================================================================
--- xmlregexp.c (revision 3733)
+++ xmlregexp.c (working copy)
@@ -6464,7 +6464,7 @@
if (name != NULL) {
value += 30 * (*name);
while ((ch = *name++) != 0) {
- value = value ^ ((value << 5) + (value >> 3) + (unsigned long)ch);
+ value = value ^ ((value << 5) + (value >> 3) + (unsigned short)ch);
}
}
return (value);