Hello.

There are quite a long-standing problem in the code xinclude.c,
described in libxml2 bugzilla:
https://bugzilla.gnome.org/show_bug.cgi?id=633166

When you xinclude a text file, reading portions (by 4000 bytes) of the
buffer incorrectly handled the situation when the end comes across
portions of the bytes in a multibyte character.

I enclose a patch (also attached to the bugzilla bug report).

Please review the patch.

-- 
Regards, Vyt
diff --git a/xinclude.c b/xinclude.c
index 2916ffa..b58dc43 100644
--- a/xinclude.c
+++ b/xinclude.c
@@ -1880,6 +1880,8 @@ xmlXIncludeLoadTxt(xmlXIncludeCtxtPtr ctxt, const xmlChar *url, int nr) {
     /*
      * Scan all chars from the resource and add the to the node
      */
+    int xinclude_multibyte_fallback_used = 0;
+xinclude_multibyte_fallback:
     while (xmlParserInputBufferRead(buf, 128) > 0) {
 	int len;
 	const xmlChar *content;
@@ -1892,13 +1894,21 @@ xmlXIncludeLoadTxt(xmlXIncludeCtxtPtr ctxt, const xmlChar *url, int nr) {
 
 	    cur = xmlStringCurrentChar(NULL, &content[i], &l);
 	    if (!IS_CHAR(cur)) {
+			// Handle splitted multibyte char at buffer boundary
+			if ((len - i) < 4 && !xinclude_multibyte_fallback_used) {
+			xinclude_multibyte_fallback_used = 1;
+			xmlBufferShrink(buf->buffer, i);
+			goto xinclude_multibyte_fallback;
+			} else {
 		xmlXIncludeErr(ctxt, ctxt->incTab[nr]->ref,
 		               XML_XINCLUDE_INVALID_CHAR,
 			       "%s contains invalid char\n", URL);
 		xmlFreeParserInputBuffer(buf);
 		xmlFree(URL);
 		return(-1);
+			}
 	    } else {
+		xinclude_multibyte_fallback_used = 0;
 		xmlNodeAddContentLen(node, &content[i], l);
 	    }
 	    i += l;
_______________________________________________
xml mailing list, project page  http://xmlsoft.org/
[email protected]
https://mail.gnome.org/mailman/listinfo/xml

Reply via email to