I wrote:
> I've not looked at the details of the proposed patches, but will
> do so now that the direction to go in is apparent.

Erik's v2 is slightly wrong as to the save-and-restore logic for
the KeepBlanks setting: we need to restore in the error path too,
and we'd better mark the save variable volatile since it's modified
inside the PG_TRY.  I made some other cosmetic changes, mainly to
avoid calculating "options" when it won't be used.  I tested the
attached v3 against RHEL8's libxml2-2.9.7, as well as against today's
libxml2 git master, and it accepts the problematic input on both.

                        regards, tom lane

diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index f7b731825fc..3379d392260 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -1769,7 +1769,7 @@ xml_doctype_in_content(const xmlChar *str)
  * xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode).
  *
  * If parsed_nodes isn't NULL and we parse in CONTENT mode, the list
- * of parsed nodes from the xmlParseInNodeContext call will be returned
+ * of parsed nodes from the xmlParseBalancedChunkMemory call will be returned
  * to *parsed_nodes.  (It is caller's responsibility to free that.)
  *
  * Errors normally result in ereport(ERROR), but if escontext is an
@@ -1795,6 +1795,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
 	PgXmlErrorContext *xmlerrcxt;
 	volatile xmlParserCtxtPtr ctxt = NULL;
 	volatile xmlDocPtr doc = NULL;
+	volatile int save_keep_blanks = -1;
 
 	/*
 	 * This step looks annoyingly redundant, but we must do it to have a
@@ -1822,7 +1823,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
 	PG_TRY();
 	{
 		bool		parse_as_document = false;
-		int			options;
 		int			res_code;
 		size_t		count = 0;
 		xmlChar    *version = NULL;
@@ -1853,18 +1853,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
 				parse_as_document = true;
 		}
 
-		/*
-		 * Select parse options.
-		 *
-		 * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
-		 * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined by
-		 * internal DTD are applied'.  As for external DTDs, we try to support
-		 * them too (see SQL/XML:2008 GR 10.16.7.e), but that doesn't really
-		 * happen because xmlPgEntityLoader prevents it.
-		 */
-		options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
-			| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
-
 		/* initialize output parameters */
 		if (parsed_xmloptiontype != NULL)
 			*parsed_xmloptiontype = parse_as_document ? XMLOPTION_DOCUMENT :
@@ -1874,11 +1862,26 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
 
 		if (parse_as_document)
 		{
+			int			options;
+
+			/* set up parser context used by xmlCtxtReadDoc */
 			ctxt = xmlNewParserCtxt();
 			if (ctxt == NULL || xmlerrcxt->err_occurred)
 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
 							"could not allocate parser context");
 
+			/*
+			 * Select parse options.
+			 *
+			 * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
+			 * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined
+			 * by internal DTD are applied'.  As for external DTDs, we try to
+			 * support them too (see SQL/XML:2008 GR 10.16.7.e), but that
+			 * doesn't really happen because xmlPgEntityLoader prevents it.
+			 */
+			options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
+				| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
+
 			doc = xmlCtxtReadDoc(ctxt, utf8string,
 								 NULL,	/* no URL */
 								 "UTF-8",
@@ -1900,10 +1903,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
 		}
 		else
 		{
-			xmlNodePtr	root;
-			xmlNodePtr	oldroot PG_USED_FOR_ASSERTS_ONLY;
-
-			/* set up document with empty root node to be the context node */
+			/* set up document that xmlParseBalancedChunkMemory will add to */
 			doc = xmlNewDoc(version);
 			if (doc == NULL || xmlerrcxt->err_occurred)
 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
@@ -1916,36 +1916,23 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
 							"could not allocate XML document");
 			doc->standalone = standalone;
 
-			root = xmlNewNode(NULL, (const xmlChar *) "content-root");
-			if (root == NULL || xmlerrcxt->err_occurred)
-				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-							"could not allocate xml node");
-
-			/*
-			 * This attaches root to doc, so we need not free it separately;
-			 * and there can't yet be any old root to free.
-			 */
-			oldroot = xmlDocSetRootElement(doc, root);
-			Assert(oldroot == NULL);
+			/* set parse options --- have to do this the ugly way */
+			save_keep_blanks = xmlKeepBlanksDefault(preserve_whitespace ? 1 : 0);
 
 			/* allow empty content */
 			if (*(utf8string + count))
 			{
 				xmlNodePtr	node_list = NULL;
-				xmlParserErrors res;
-
-				res = xmlParseInNodeContext(root,
-											(char *) utf8string + count,
-											strlen((char *) utf8string + count),
-											options,
-											&node_list);
 
-				if (res != XML_ERR_OK || xmlerrcxt->err_occurred)
+				res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
+													   utf8string + count,
+													   &node_list);
+				if (res_code != 0 || xmlerrcxt->err_occurred)
 				{
-					xmlFreeNodeList(node_list);
 					xml_errsave(escontext, xmlerrcxt,
 								ERRCODE_INVALID_XML_CONTENT,
 								"invalid XML content");
+					xmlFreeNodeList(node_list);
 					goto fail;
 				}
 
@@ -1961,6 +1948,8 @@ fail:
 	}
 	PG_CATCH();
 	{
+		if (save_keep_blanks != -1)
+			xmlKeepBlanksDefault(save_keep_blanks);
 		if (doc != NULL)
 			xmlFreeDoc(doc);
 		if (ctxt != NULL)
@@ -1972,6 +1961,9 @@ fail:
 	}
 	PG_END_TRY();
 
+	if (save_keep_blanks != -1)
+		xmlKeepBlanksDefault(save_keep_blanks);
+
 	if (ctxt != NULL)
 		xmlFreeParserCtxt(ctxt);
 

Reply via email to