On 11/08/10 21:27, Tom Lane wrote:
Robert Haas<robertmh...@gmail.com> writes:
On Mon, Aug 9, 2010 at 10:41 AM, Robert Haas<robertmh...@gmail.com> wrote:
There's also the fact that it would probably end up parsing the data
twice. Given xmloption, I'm inclined to think Tom has it right:
provided xml_is_well_formed() that follows xmloption, plus a specific
version for each of content and document.
Another reasonable option here would be to forget about having
xml_is_well_formed() per se and ONLY offer
xml_is_well_formed_content() and xml_is_well_formed_document().
We already have xml_is_well_formed(); just dropping it doesn't seem like
a helpful choice.
As a project management note, this CommitFest is over in 4 days, so
unless we have a new version of this patch real soon now we need to
defer it to the September 15th CommitFest
Yes. Mike, are you expecting to submit a new version before the end of
the week?
Yes and here it is, apologies for the delay. I have re-implemented
xml_is_well_formed such that it is sensitive to the XMLOPTION. The
additional _document and _content methods are now present. Tests and
documentation adjusted to suit.
Regards,
--
Mike Fowler
Registered Linux user: 379787
*** a/contrib/xml2/pgxml.sql.in
--- b/contrib/xml2/pgxml.sql.in
***************
*** 5,18 **** SET search_path = public;
--SQL for XML parser
- CREATE OR REPLACE FUNCTION xml_is_well_formed(text) RETURNS bool
- AS 'MODULE_PATHNAME'
- LANGUAGE C STRICT IMMUTABLE;
-
-- deprecated old name for xml_is_well_formed
CREATE OR REPLACE FUNCTION xml_valid(text) RETURNS bool
! AS 'MODULE_PATHNAME', 'xml_is_well_formed'
! LANGUAGE C STRICT IMMUTABLE;
CREATE OR REPLACE FUNCTION xml_encode_special_chars(text) RETURNS text
AS 'MODULE_PATHNAME'
--- 5,14 ----
--SQL for XML parser
-- deprecated old name for xml_is_well_formed
CREATE OR REPLACE FUNCTION xml_valid(text) RETURNS bool
! AS 'xml_is_well_formed'
! LANGUAGE INTERNAL STRICT IMMUTABLE;
CREATE OR REPLACE FUNCTION xml_encode_special_chars(text) RETURNS text
AS 'MODULE_PATHNAME'
*** a/contrib/xml2/uninstall_pgxml.sql
--- b/contrib/xml2/uninstall_pgxml.sql
***************
*** 29,33 **** DROP FUNCTION xml_encode_special_chars(text);
-- deprecated old name for xml_is_well_formed
DROP FUNCTION xml_valid(text);
-
- DROP FUNCTION xml_is_well_formed(text);
--- 29,31 ----
*** a/contrib/xml2/xpath.c
--- b/contrib/xml2/xpath.c
***************
*** 27,33 **** PG_MODULE_MAGIC;
/* externally accessible functions */
- Datum xml_is_well_formed(PG_FUNCTION_ARGS);
Datum xml_encode_special_chars(PG_FUNCTION_ARGS);
Datum xpath_nodeset(PG_FUNCTION_ARGS);
Datum xpath_string(PG_FUNCTION_ARGS);
--- 27,32 ----
***************
*** 71,97 **** pgxml_parser_init(void)
}
- /* Returns true if document is well-formed */
-
- PG_FUNCTION_INFO_V1(xml_is_well_formed);
-
- Datum
- xml_is_well_formed(PG_FUNCTION_ARGS)
- {
- text *t = PG_GETARG_TEXT_P(0); /* document buffer */
- int32 docsize = VARSIZE(t) - VARHDRSZ;
- xmlDocPtr doctree;
-
- pgxml_parser_init();
-
- doctree = xmlParseMemory((char *) VARDATA(t), docsize);
- if (doctree == NULL)
- PG_RETURN_BOOL(false); /* i.e. not well-formed */
- xmlFreeDoc(doctree);
- PG_RETURN_BOOL(true);
- }
-
-
/* Encodes special characters (<, >, &, " and \r) as XML entities */
PG_FUNCTION_INFO_V1(xml_encode_special_chars);
--- 70,75 ----
*** a/doc/src/sgml/func.sgml
--- b/doc/src/sgml/func.sgml
***************
*** 8625,8630 **** SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF '<towns><town>Tor
--- 8625,8736 ----
supports XPath, which is a subset of XQuery.
</para>
</sect3>
+
+ <sect3>
+ <title>xml_is_well_formed</title>
+
+ <indexterm>
+ <primary>xml_is_well_formed</primary>
+ <secondary>well formed</secondary>
+ </indexterm>
+
+ <synopsis>
+ <function>xml_is_well_formed</function>(<replaceable>text</replaceable>)
+ </synopsis>
+
+ <para>
+ The function <function>xml_is_well_formed</function> evaluates whether
+ the <replaceable>text</replaceable> is well formed XML content, returning
+ a boolean. It is useful for predetermining whether a cast to the XML type
+ will succeed, and therefore honours the current value of the
+ <literal>XMLOPTION</literal> session configuration parameter.
+ </para>
+ <para>
+ Example:
+ <screen><![CDATA[
+ SET xmloption TO DOCUMENT;
+ SELECT xml_is_well_formed('<>');
+ xml_is_well_formed
+ --------------------
+ f
+ (1 row)
+
+ SELECT xml_is_well_formed('<abc/>');
+ xml_is_well_formed
+ --------------------
+ t
+ (1 row)
+
+ SET xmloption TO CONTENT;
+ SELECT xml_is_well_formed('abc');
+ xml_is_well_formed
+ --------------------
+ t
+ (1 row)
+ ]]></screen>
+ </para>
+ <para>
+ In addition to the structure checks, the function ensures that namespaces are correcty matched.
+ <screen><![CDATA[
+ SET xmloption TO DOCUMENT;
+ SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</my:foo>');
+ xml_is_well_formed
+ --------------------
+ f
+ (1 row)
+
+ SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</pg:foo>');
+ xml_is_well_formed
+ --------------------
+ t
+ (1 row)
+ ]]></screen>
+ </para>
+ </sect3>
+
+ <sect3>
+ <title>xml_is_well_formed_document</title>
+
+ <indexterm>
+ <primary>xml_is_well_formed_document</primary>
+ <secondary>well formed</secondary>
+ </indexterm>
+
+ <synopsis>
+ <function>xml_is_well_formed_document</function>(<replaceable>text</replaceable>)
+ </synopsis>
+
+ <para>
+ This function is similar to <function>xml_is_well_formed</function>,
+ differing only in the handling of the <literal>XMLOPTION</literal>. The
+ <function>xml_is_well_formed_document</function> ignores <literal>XMLOPTION</literal>
+ and assumes that it is currently set to <literal>DOCUMENT</literal>. Therfore
+ it assists in predetermining whether a cast to the XML document type
+ will succeed.
+ </para>
+ </sect3>
+
+ <sect3>
+ <title>xml_is_well_formed_content</title>
+
+ <indexterm>
+ <primary>xml_is_well_formed_content</primary>
+ <secondary>well formed</secondary>
+ </indexterm>
+
+ <synopsis>
+ <function>xml_is_well_formed_content</function>(<replaceable>text</replaceable>)
+ </synopsis>
+
+ <para>
+ This function is similar to <function>xml_is_well_formed</function>,
+ differing only in the handling of the <literal>XMLOPTION</literal>. The
+ <function>xml_is_well_formed_content</function> ignores <literal>XMLOPTION</literal>
+ and assumes that it is currently set to <literal>CONTENT</literal>. Therfore
+ it assists in predetermining whether a cast to the XML content type
+ will succeed.
+ </para>
+ </sect3>
</sect2>
<sect2 id="functions-xml-processing">
*** a/src/backend/utils/adt/xml.c
--- b/src/backend/utils/adt/xml.c
***************
*** 3565,3567 **** xpath_exists(PG_FUNCTION_ARGS)
--- 3565,3636 ----
return 0;
#endif
}
+
+ #ifdef USE_LIBXML
+ bool
+ wellformed_xml(text *data, XmlOptionType xmloption_arg)
+ {
+ bool result;
+ xmlDocPtr doc = NULL;
+
+ /* We want to catch any exceptions and return false */
+ PG_TRY();
+ {
+ doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding());
+ result = true;
+ }
+ PG_CATCH();
+ {
+ FlushErrorState();
+ result = false;
+ }
+ PG_END_TRY();
+
+ if (doc)
+ xmlFreeDoc(doc);
+
+ return result;
+ }
+ #endif
+
+ Datum
+ xml_is_well_formed(PG_FUNCTION_ARGS)
+ {
+ #ifdef USE_LIBXML
+ text *data = PG_GETARG_TEXT_P(0);
+
+ PG_RETURN_BOOL(wellformed_xml(data, xmloption));
+
+ #else
+ NO_XML_SUPPORT();
+ return false;
+ #endif /* not USE_LIBXML */
+ }
+
+ Datum
+ xml_is_well_formed_document(PG_FUNCTION_ARGS)
+ {
+ #ifdef USE_LIBXML
+ text *data = PG_GETARG_TEXT_P(0);
+
+ PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
+
+ #else
+ NO_XML_SUPPORT();
+ return false;
+ #endif /* not USE_LIBXML */
+ }
+
+ Datum
+ xml_is_well_formed_content(PG_FUNCTION_ARGS)
+ {
+ #ifdef USE_LIBXML
+ text *data = PG_GETARG_TEXT_P(0);
+
+ PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
+
+ #else
+ NO_XML_SUPPORT();
+ return false;
+ #endif /* not USE_LIBXML */
+ }
*** a/src/include/catalog/pg_proc.h
--- b/src/include/catalog/pg_proc.h
***************
*** 4424,4429 **** DESCR("test XML value against XPath expression, with namespace support");
--- 4424,4436 ----
DATA(insert OID = 3050 ( xpath_exists PGNSP PGUID 14 1 0 0 f f f t f i 2 0 16 "25 142" _null_ _null_ _null_ _null_ "select pg_catalog.xpath_exists($1, $2, ''{}''::pg_catalog.text[])" _null_ _null_ _null_ ));
DESCR("test XML value against XPath expression");
+ DATA(insert OID = 3051 ( xml_is_well_formed PGNSP PGUID 12 1 0 0 f f f t f i 1 0 16 "25" _null_ _null_ _null_ _null_ xml_is_well_formed _null_ _null_ _null_ ));
+ DESCR("determine if a text fragment is well formed XML");
+ DATA(insert OID = 3052 ( xml_is_well_formed_document PGNSP PGUID 12 1 0 0 f f f t f i 1 0 16 "25" _null_ _null_ _null_ _null_ xml_is_well_formed_document _null_ _null_ _null_ ));
+ DESCR("determine if a text fragment is well formed XML document");
+ DATA(insert OID = 3053 ( xml_is_well_formed_content PGNSP PGUID 12 1 0 0 f f f t f i 1 0 16 "25" _null_ _null_ _null_ _null_ xml_is_well_formed_content _null_ _null_ _null_ ));
+ DESCR("tdetermine if a text fragment is well formed XML content");
+
/* uuid */
DATA(insert OID = 2952 ( uuid_in PGNSP PGUID 12 1 0 0 f f f t f i 1 0 2950 "2275" _null_ _null_ _null_ _null_ uuid_in _null_ _null_ _null_ ));
DESCR("I/O");
*** a/src/include/utils/xml.h
--- b/src/include/utils/xml.h
***************
*** 40,45 **** extern Datum xpath(PG_FUNCTION_ARGS);
--- 40,49 ----
extern Datum xpath_exists(PG_FUNCTION_ARGS);
extern Datum xmlexists(PG_FUNCTION_ARGS);
+ extern Datum xml_is_well_formed(PG_FUNCTION_ARGS);
+ extern Datum xml_is_well_formed_document(PG_FUNCTION_ARGS);
+ extern Datum xml_is_well_formed_content(PG_FUNCTION_ARGS);
+
extern Datum table_to_xml(PG_FUNCTION_ARGS);
extern Datum query_to_xml(PG_FUNCTION_ARGS);
extern Datum cursor_to_xml(PG_FUNCTION_ARGS);
*** a/src/test/regress/expected/xml.out
--- b/src/test/regress/expected/xml.out
***************
*** 599,601 **** SELECT COUNT(id) FROM xmltest, query WHERE xmlexists(expr PASSING BY REF data);
--- 599,682 ----
2
(1 row)
+ -- Test xml_is_well_formed and variants
+ SELECT xml_is_well_formed_document('<foo>bar</foo>');
+ xml_is_well_formed_document
+ -----------------------------
+ t
+ (1 row)
+
+ SELECT xml_is_well_formed_document('abc');
+ xml_is_well_formed_document
+ -----------------------------
+ f
+ (1 row)
+
+ SELECT xml_is_well_formed_content('<foo>bar</foo>');
+ xml_is_well_formed_content
+ ----------------------------
+ t
+ (1 row)
+
+ SELECT xml_is_well_formed_content('abc');
+ xml_is_well_formed_content
+ ----------------------------
+ t
+ (1 row)
+
+ SET xmloption TO DOCUMENT;
+ SELECT xml_is_well_formed('<>');
+ xml_is_well_formed
+ --------------------
+ f
+ (1 row)
+
+ SELECT xml_is_well_formed('<abc/>');
+ xml_is_well_formed
+ --------------------
+ t
+ (1 row)
+
+ SELECT xml_is_well_formed('<foo>bar</foo>');
+ xml_is_well_formed
+ --------------------
+ t
+ (1 row)
+
+ SELECT xml_is_well_formed('<foo>bar</foo');
+ xml_is_well_formed
+ --------------------
+ f
+ (1 row)
+
+ SELECT xml_is_well_formed('<foo><bar>baz</foo>');
+ xml_is_well_formed
+ --------------------
+ f
+ (1 row)
+
+ SELECT xml_is_well_formed('<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>');
+ xml_is_well_formed
+ --------------------
+ t
+ (1 row)
+
+ SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</my:foo>');
+ xml_is_well_formed
+ --------------------
+ f
+ (1 row)
+
+ SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</pg:foo>');
+ xml_is_well_formed
+ --------------------
+ t
+ (1 row)
+
+ SET xmloption TO CONTENT;
+ SELECT xml_is_well_formed('abc');
+ xml_is_well_formed
+ --------------------
+ t
+ (1 row)
+
*** a/src/test/regress/sql/xml.sql
--- b/src/test/regress/sql/xml.sql
***************
*** 190,192 **** SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beers/myns:nam
--- 190,212 ----
CREATE TABLE query ( expr TEXT );
INSERT INTO query VALUES ('/menu/beers/cost[text() = ''lots'']');
SELECT COUNT(id) FROM xmltest, query WHERE xmlexists(expr PASSING BY REF data);
+
+ -- Test xml_is_well_formed and variants
+
+ SELECT xml_is_well_formed_document('<foo>bar</foo>');
+ SELECT xml_is_well_formed_document('abc');
+ SELECT xml_is_well_formed_content('<foo>bar</foo>');
+ SELECT xml_is_well_formed_content('abc');
+
+ SET xmloption TO DOCUMENT;
+ SELECT xml_is_well_formed('<>');
+ SELECT xml_is_well_formed('<abc/>');
+ SELECT xml_is_well_formed('<foo>bar</foo>');
+ SELECT xml_is_well_formed('<foo>bar</foo');
+ SELECT xml_is_well_formed('<foo><bar>baz</foo>');
+ SELECT xml_is_well_formed('<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>');
+ SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</my:foo>');
+ SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</pg:foo>');
+
+ SET xmloption TO CONTENT;
+ SELECT xml_is_well_formed('abc');
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers