On 11/08/10 21:27, Tom Lane wrote:
Robert Haas<robertmh...@gmail.com>  writes:
On Mon, Aug 9, 2010 at 10:41 AM, Robert Haas<robertmh...@gmail.com>  wrote:
There's also the fact that it would probably end up parsing the data
twice.  Given xmloption, I'm inclined to think Tom has it right:
provided xml_is_well_formed() that follows xmloption, plus a specific
version for each of content and document.

Another reasonable option here would be to forget about having
xml_is_well_formed() per se and ONLY offer
xml_is_well_formed_content() and xml_is_well_formed_document().

We already have xml_is_well_formed(); just dropping it doesn't seem like
a helpful choice.

As a project management note, this CommitFest is over in 4 days, so
unless we have a new version of this patch real soon now we need to
defer it to the September 15th CommitFest

Yes.  Mike, are you expecting to submit a new version before the end of
the week?


Yes and here it is, apologies for the delay. I have re-implemented xml_is_well_formed such that it is sensitive to the XMLOPTION. The additional _document and _content methods are now present. Tests and documentation adjusted to suit.

Regards,

--
Mike Fowler
Registered Linux user: 379787
*** a/contrib/xml2/pgxml.sql.in
--- b/contrib/xml2/pgxml.sql.in
***************
*** 5,18 **** SET search_path = public;
  
  --SQL for XML parser
  
- CREATE OR REPLACE FUNCTION xml_is_well_formed(text) RETURNS bool
- AS 'MODULE_PATHNAME'
- LANGUAGE C STRICT IMMUTABLE;
- 
  -- deprecated old name for xml_is_well_formed
  CREATE OR REPLACE FUNCTION xml_valid(text) RETURNS bool
! AS 'MODULE_PATHNAME', 'xml_is_well_formed'
! LANGUAGE C STRICT IMMUTABLE;
  
  CREATE OR REPLACE FUNCTION xml_encode_special_chars(text) RETURNS text
  AS 'MODULE_PATHNAME'
--- 5,14 ----
  
  --SQL for XML parser
  
  -- deprecated old name for xml_is_well_formed
  CREATE OR REPLACE FUNCTION xml_valid(text) RETURNS bool
! AS 'xml_is_well_formed'
! LANGUAGE INTERNAL STRICT IMMUTABLE;
  
  CREATE OR REPLACE FUNCTION xml_encode_special_chars(text) RETURNS text
  AS 'MODULE_PATHNAME'
*** a/contrib/xml2/uninstall_pgxml.sql
--- b/contrib/xml2/uninstall_pgxml.sql
***************
*** 29,33 **** DROP FUNCTION xml_encode_special_chars(text);
  
  -- deprecated old name for xml_is_well_formed
  DROP FUNCTION xml_valid(text);
- 
- DROP FUNCTION xml_is_well_formed(text);
--- 29,31 ----
*** a/contrib/xml2/xpath.c
--- b/contrib/xml2/xpath.c
***************
*** 27,33 **** PG_MODULE_MAGIC;
  
  /* externally accessible functions */
  
- Datum		xml_is_well_formed(PG_FUNCTION_ARGS);
  Datum		xml_encode_special_chars(PG_FUNCTION_ARGS);
  Datum		xpath_nodeset(PG_FUNCTION_ARGS);
  Datum		xpath_string(PG_FUNCTION_ARGS);
--- 27,32 ----
***************
*** 71,97 **** pgxml_parser_init(void)
  }
  
  
- /* Returns true if document is well-formed */
- 
- PG_FUNCTION_INFO_V1(xml_is_well_formed);
- 
- Datum
- xml_is_well_formed(PG_FUNCTION_ARGS)
- {
- 	text	   *t = PG_GETARG_TEXT_P(0);		/* document buffer */
- 	int32		docsize = VARSIZE(t) - VARHDRSZ;
- 	xmlDocPtr	doctree;
- 
- 	pgxml_parser_init();
- 
- 	doctree = xmlParseMemory((char *) VARDATA(t), docsize);
- 	if (doctree == NULL)
- 		PG_RETURN_BOOL(false);	/* i.e. not well-formed */
- 	xmlFreeDoc(doctree);
- 	PG_RETURN_BOOL(true);
- }
- 
- 
  /* Encodes special characters (<, >, &, " and \r) as XML entities */
  
  PG_FUNCTION_INFO_V1(xml_encode_special_chars);
--- 70,75 ----
*** a/doc/src/sgml/func.sgml
--- b/doc/src/sgml/func.sgml
***************
*** 8625,8630 **** SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF '<towns><town>Tor
--- 8625,8736 ----
       supports XPath, which is a subset of XQuery.
      </para>
     </sect3>
+ 
+    <sect3>
+     <title>xml_is_well_formed</title>
+ 
+     <indexterm>
+      <primary>xml_is_well_formed</primary>
+      <secondary>well formed</secondary>
+     </indexterm>
+ 
+ <synopsis>
+ <function>xml_is_well_formed</function>(<replaceable>text</replaceable>)
+ </synopsis>
+ 
+     <para>
+      The function <function>xml_is_well_formed</function> evaluates whether
+      the <replaceable>text</replaceable> is well formed XML content, returning
+      a boolean. It is useful for predetermining whether a cast to the XML type
+      will succeed, and therefore honours the current value of the
+      <literal>XMLOPTION</literal> session configuration parameter.
+     </para>
+     <para>
+     Example:
+ <screen><![CDATA[
+ SET xmloption TO DOCUMENT;
+ SELECT xml_is_well_formed('<>');
+  xml_is_well_formed 
+ --------------------
+  f
+ (1 row)
+ 
+ SELECT xml_is_well_formed('<abc/>');
+  xml_is_well_formed 
+ --------------------
+  t
+ (1 row)
+ 
+ SET xmloption TO CONTENT;
+ SELECT xml_is_well_formed('abc');
+  xml_is_well_formed 
+ --------------------
+  t
+ (1 row)
+ ]]></screen>
+     </para>
+     <para>
+     In addition to the structure checks, the function ensures that namespaces are correcty matched.
+ <screen><![CDATA[
+ SET xmloption TO DOCUMENT;
+ SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff";>bar</my:foo>');
+  xml_is_well_formed
+ --------------------
+  f
+ (1 row)
+ 
+ SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff";>bar</pg:foo>');
+  xml_is_well_formed
+ --------------------
+  t
+ (1 row)
+ ]]></screen>
+     </para>
+    </sect3>
+ 
+    <sect3>
+     <title>xml_is_well_formed_document</title>
+ 
+     <indexterm>
+      <primary>xml_is_well_formed_document</primary>
+      <secondary>well formed</secondary>
+     </indexterm>
+ 
+ <synopsis>
+ <function>xml_is_well_formed_document</function>(<replaceable>text</replaceable>)
+ </synopsis>
+ 
+     <para>
+     This function is similar to <function>xml_is_well_formed</function>,
+     differing only in the handling of the <literal>XMLOPTION</literal>. The
+     <function>xml_is_well_formed_document</function> ignores <literal>XMLOPTION</literal>
+     and assumes that it is currently set to <literal>DOCUMENT</literal>. Therfore
+     it assists in predetermining whether a cast to the XML document type
+     will succeed.
+     </para>
+    </sect3>
+ 
+    <sect3>
+     <title>xml_is_well_formed_content</title>
+ 
+     <indexterm>
+      <primary>xml_is_well_formed_content</primary>
+      <secondary>well formed</secondary>
+     </indexterm>
+ 
+ <synopsis>
+ <function>xml_is_well_formed_content</function>(<replaceable>text</replaceable>)
+ </synopsis>
+ 
+     <para>
+     This function is similar to <function>xml_is_well_formed</function>,
+     differing only in the handling of the <literal>XMLOPTION</literal>. The
+     <function>xml_is_well_formed_content</function> ignores <literal>XMLOPTION</literal>
+     and assumes that it is currently set to <literal>CONTENT</literal>. Therfore
+     it assists in predetermining whether a cast to the XML content type
+     will succeed.
+     </para>
+    </sect3>
    </sect2>
  
    <sect2 id="functions-xml-processing">
*** a/src/backend/utils/adt/xml.c
--- b/src/backend/utils/adt/xml.c
***************
*** 3565,3567 **** xpath_exists(PG_FUNCTION_ARGS)
--- 3565,3636 ----
  	return 0;
  #endif
  }
+ 
+ #ifdef USE_LIBXML
+ bool
+ wellformed_xml(text *data, XmlOptionType xmloption_arg)
+ {
+ 	bool		result;
+ 	xmlDocPtr	doc = NULL;
+ 
+ 	/* We want to catch any exceptions and return false */
+ 	PG_TRY();
+ 	{
+ 		doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding());
+ 		result = true;
+ 	}
+ 	PG_CATCH();
+ 	{
+ 		FlushErrorState();
+ 		result = false;
+ 	}
+ 	PG_END_TRY();
+ 
+ 	if (doc)
+ 		xmlFreeDoc(doc);
+ 
+ 	return result;
+ }
+ #endif
+ 
+ Datum
+ xml_is_well_formed(PG_FUNCTION_ARGS)
+ {
+ #ifdef USE_LIBXML
+ 	text *data = PG_GETARG_TEXT_P(0);
+ 	
+ 	PG_RETURN_BOOL(wellformed_xml(data, xmloption));
+ 	
+ #else
+ 	NO_XML_SUPPORT();
+ 	return false;
+ #endif   /* not USE_LIBXML */
+ }
+ 
+ Datum
+ xml_is_well_formed_document(PG_FUNCTION_ARGS)
+ {
+ #ifdef USE_LIBXML
+ 	text *data = PG_GETARG_TEXT_P(0);
+ 	
+ 	PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
+ 	
+ #else
+ 	NO_XML_SUPPORT();
+ 	return false;
+ #endif   /* not USE_LIBXML */
+ }
+ 
+ Datum
+ xml_is_well_formed_content(PG_FUNCTION_ARGS)
+ {
+ #ifdef USE_LIBXML
+ 	text *data = PG_GETARG_TEXT_P(0);
+ 	
+ 	PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
+ 	
+ #else
+ 	NO_XML_SUPPORT();
+ 	return false;
+ #endif   /* not USE_LIBXML */
+ }
*** a/src/include/catalog/pg_proc.h
--- b/src/include/catalog/pg_proc.h
***************
*** 4424,4429 **** DESCR("test XML value against XPath expression, with namespace support");
--- 4424,4436 ----
  DATA(insert OID = 3050 (  xpath_exists	 PGNSP PGUID 14 1 0 0 f f f t f i 2 0 16 "25 142" _null_ _null_ _null_ _null_ "select pg_catalog.xpath_exists($1, $2, ''{}''::pg_catalog.text[])" _null_ _null_ _null_ ));
  DESCR("test XML value against XPath expression");
  
+ DATA(insert OID = 3051 (  xml_is_well_formed			 PGNSP PGUID 12 1 0 0 f f f t f i 1 0 16 "25" _null_ _null_ _null_ _null_ xml_is_well_formed _null_ _null_ _null_ ));
+ DESCR("determine if a text fragment is well formed XML");
+ DATA(insert OID = 3052 (  xml_is_well_formed_document	 PGNSP PGUID 12 1 0 0 f f f t f i 1 0 16 "25" _null_ _null_ _null_ _null_ xml_is_well_formed_document _null_ _null_ _null_ ));
+ DESCR("determine if a text fragment is well formed XML document");
+ DATA(insert OID = 3053 (  xml_is_well_formed_content	 PGNSP PGUID 12 1 0 0 f f f t f i 1 0 16 "25" _null_ _null_ _null_ _null_ xml_is_well_formed_content _null_ _null_ _null_ ));
+ DESCR("tdetermine if a text fragment is well formed XML content");
+ 
  /* uuid */
  DATA(insert OID = 2952 (  uuid_in		   PGNSP PGUID 12 1 0 0 f f f t f i 1 0 2950 "2275" _null_ _null_ _null_ _null_ uuid_in _null_ _null_ _null_ ));
  DESCR("I/O");
*** a/src/include/utils/xml.h
--- b/src/include/utils/xml.h
***************
*** 40,45 **** extern Datum xpath(PG_FUNCTION_ARGS);
--- 40,49 ----
  extern Datum xpath_exists(PG_FUNCTION_ARGS);
  extern Datum xmlexists(PG_FUNCTION_ARGS);
  
+ extern Datum xml_is_well_formed(PG_FUNCTION_ARGS);
+ extern Datum xml_is_well_formed_document(PG_FUNCTION_ARGS);
+ extern Datum xml_is_well_formed_content(PG_FUNCTION_ARGS);
+ 
  extern Datum table_to_xml(PG_FUNCTION_ARGS);
  extern Datum query_to_xml(PG_FUNCTION_ARGS);
  extern Datum cursor_to_xml(PG_FUNCTION_ARGS);
*** a/src/test/regress/expected/xml.out
--- b/src/test/regress/expected/xml.out
***************
*** 599,601 **** SELECT COUNT(id) FROM xmltest, query WHERE xmlexists(expr PASSING BY REF data);
--- 599,682 ----
       2
  (1 row)
  
+ -- Test xml_is_well_formed and variants
+ SELECT xml_is_well_formed_document('<foo>bar</foo>');
+  xml_is_well_formed_document 
+ -----------------------------
+  t
+ (1 row)
+ 
+ SELECT xml_is_well_formed_document('abc');
+  xml_is_well_formed_document 
+ -----------------------------
+  f
+ (1 row)
+ 
+ SELECT xml_is_well_formed_content('<foo>bar</foo>');
+  xml_is_well_formed_content 
+ ----------------------------
+  t
+ (1 row)
+ 
+ SELECT xml_is_well_formed_content('abc');
+  xml_is_well_formed_content 
+ ----------------------------
+  t
+ (1 row)
+ 
+ SET xmloption TO DOCUMENT;
+ SELECT xml_is_well_formed('<>');
+  xml_is_well_formed 
+ --------------------
+  f
+ (1 row)
+ 
+ SELECT xml_is_well_formed('<abc/>');
+  xml_is_well_formed 
+ --------------------
+  t
+ (1 row)
+ 
+ SELECT xml_is_well_formed('<foo>bar</foo>');
+  xml_is_well_formed 
+ --------------------
+  t
+ (1 row)
+ 
+ SELECT xml_is_well_formed('<foo>bar</foo');
+  xml_is_well_formed 
+ --------------------
+  f
+ (1 row)
+ 
+ SELECT xml_is_well_formed('<foo><bar>baz</foo>');
+  xml_is_well_formed 
+ --------------------
+  f
+ (1 row)
+ 
+ SELECT xml_is_well_formed('<local:data xmlns:local="http://127.0.0.1";><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>');
+  xml_is_well_formed 
+ --------------------
+  t
+ (1 row)
+ 
+ SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff";>bar</my:foo>');
+  xml_is_well_formed 
+ --------------------
+  f
+ (1 row)
+ 
+ SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff";>bar</pg:foo>');
+  xml_is_well_formed 
+ --------------------
+  t
+ (1 row)
+ 
+ SET xmloption TO CONTENT;
+ SELECT xml_is_well_formed('abc');
+  xml_is_well_formed 
+ --------------------
+  t
+ (1 row)
+ 
*** a/src/test/regress/sql/xml.sql
--- b/src/test/regress/sql/xml.sql
***************
*** 190,192 **** SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beers/myns:nam
--- 190,212 ----
  CREATE TABLE query ( expr TEXT );
  INSERT INTO query VALUES ('/menu/beers/cost[text() = ''lots'']');
  SELECT COUNT(id) FROM xmltest, query WHERE xmlexists(expr PASSING BY REF data);
+ 
+ -- Test xml_is_well_formed and variants
+ 
+ SELECT xml_is_well_formed_document('<foo>bar</foo>');
+ SELECT xml_is_well_formed_document('abc');
+ SELECT xml_is_well_formed_content('<foo>bar</foo>');
+ SELECT xml_is_well_formed_content('abc');
+ 
+ SET xmloption TO DOCUMENT;
+ SELECT xml_is_well_formed('<>');
+ SELECT xml_is_well_formed('<abc/>');
+ SELECT xml_is_well_formed('<foo>bar</foo>');
+ SELECT xml_is_well_formed('<foo>bar</foo');
+ SELECT xml_is_well_formed('<foo><bar>baz</foo>');
+ SELECT xml_is_well_formed('<local:data xmlns:local="http://127.0.0.1";><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>');
+ SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff";>bar</my:foo>');
+ SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff";>bar</pg:foo>');
+ 
+ SET xmloption TO CONTENT;
+ SELECT xml_is_well_formed('abc');
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to