Re: [HACKERS] regexp_match() returning text

Emre Hasegeli Sat, 04 Jun 2016 05:00:07 -0700

> The main problem being solved is the use of a SETOF result.  I'm inclined to
> prefer that the final, single, result is still an array.


I have changed it like that.  New patch attached.

> I've got a style issue with the information_schema - I like to call it
> useless-use-of-E'' -  but that was there long before this patch...

We better not touch it.

> /* user mustn't specify 'g' for regexp_split */ - do we add "or
> regexp_match" or just removed the extraneous detail?

I don't think it would be a nice error message.

> There seems to be scope creep regarding "regexp_split_to_table" that I'm
> surprised to find.  Related to that is the unexpected removal of the
> "force_glob" parameter to setup_regexp_matches.  You took what was a single
> block of code and now duplicated it without any explanation in the commit
> message (a code comment wouldn't work for this kind of change).  The change
> to flags from passing a pointer to text to passing in a pointer to a
> previously derived pg_re_flags makes more sense on its face, and it is
> apparently a non-public API, but again constitutes a refactoring that at
> least would ideally be a separate commit from the one the introduces the new
> behavior.

That check doesn't belong to setup_regexp_matches() in the first place.
The arguments of the function are organised to be caller agnostic,
and then it gives an error on behalf of regexp_split().  The check
fits better to the regexp_split() functions even with duplication.

I can split it to another patch, but I think these kind of changes most
often go together.

Would you mind adding yourself to the reviewers on the Commitfest app?
I think you have already read though it.

Thanks for all the feedback.

From a6f24b34fdb39eaaca1d3819f7f528b1689725a4 Mon Sep 17 00:00:00 2001
From: Emre Hasegeli <e...@hasegeli.com>
Date: Sun, 29 May 2016 18:53:37 +0200
Subject: [PATCH] Add regexp_match()

---
 doc/src/sgml/citext.sgml              |   5 ++
 doc/src/sgml/func.sgml                |  62 +++++++++++++-----
 src/backend/utils/adt/regexp.c        | 119 +++++++++++++++++++++++++---------
 src/include/catalog/pg_proc.h         |   4 ++
 src/include/utils/builtins.h          |   2 +
 src/test/regress/expected/regex.out   |  28 ++++++++
 src/test/regress/expected/strings.out |   4 +-
 src/test/regress/sql/regex.sql        |   7 ++
 8 files changed, 183 insertions(+), 48 deletions(-)

diff --git a/doc/src/sgml/citext.sgml b/doc/src/sgml/citext.sgml
index 7fdf302..9b4c68f 100644
--- a/doc/src/sgml/citext.sgml
+++ b/doc/src/sgml/citext.sgml
@@ -119,20 +119,25 @@ SELECT * FROM users WHERE nick = 'Larry';
   </para>
 
   <para>
    Similarly, all of the following functions perform matching
    case-insensitively if their arguments are <type>citext</>:
   </para>
 
   <itemizedlist>
    <listitem>
     <para>
+      <function>regexp_match()</>
+    </para>
+   </listitem>
+   <listitem>
+    <para>
       <function>regexp_matches()</>
     </para>
    </listitem>
    <listitem>
     <para>
       <function>regexp_replace()</>
     </para>
    </listitem>
    <listitem>
     <para>
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index ff7545d..64e975e 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -1935,32 +1935,49 @@
         or, if the argument is null, return <literal>NULL</>.
         Embedded single-quotes and backslashes are properly doubled.
        </entry>
        <entry><literal>quote_nullable(42.5)</literal></entry>
        <entry><literal>'42.5'</literal></entry>
       </row>
 
       <row>
        <entry>
         <indexterm>
+         <primary>regexp_match</primary>
+        </indexterm>
+        <literal><function>regexp_match(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type> [, <parameter>flags</parameter> <type>text</type>])</function></literal>
+       </entry>
+       <entry><type>text[]</type></entry>
+       <entry>
+        Return a single captured substring resulting from matching a POSIX
+        regular expression against the <parameter>string</parameter>. See
+        <xref linkend="functions-posix-regexp"> for more information.
+       </entry>
+       <entry><literal>regexp_match('foobarbequebaz', '(bar)(beque)')</literal></entry>
+       <entry><literal>{bar,beque}</literal></entry>
+      </row>
+
+      <row>
+       <entry>
+        <indexterm>
          <primary>regexp_matches</primary>
         </indexterm>
         <literal><function>regexp_matches(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type> [, <parameter>flags</parameter> <type>text</type>])</function></literal>
        </entry>
        <entry><type>setof text[]</type></entry>
        <entry>
         Return all captured substrings resulting from matching a POSIX regular
         expression against the <parameter>string</parameter>. See
         <xref linkend="functions-posix-regexp"> for more information.
        </entry>
-       <entry><literal>regexp_matches('foobarbequebaz', '(bar)(beque)')</literal></entry>
-       <entry><literal>{bar,beque}</literal></entry>
+       <entry><literal>regexp_matches('foobarbequebaz', 'ba.', 'g')</literal></entry>
+       <entry><literal>{bar}</literal><para><literal>{baz}</literal></para> (2 rows)</entry>
       </row>
 
       <row>
        <entry>
         <indexterm>
          <primary>regexp_replace</primary>
         </indexterm>
         <literal><function>regexp_replace(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type>, <parameter>replacement</parameter> <type>text</type> [, <parameter>flags</parameter> <type>text</type>])</function></literal>
        </entry>
        <entry><type>text</type></entry>
@@ -4009,20 +4026,23 @@ substring('foobar' from '#"o_b#"%' for '#')    <lineannotation>NULL</lineannotat
     <primary>regular expression</primary>
     <seealso>pattern matching</seealso>
    </indexterm>
    <indexterm>
     <primary>substring</primary>
    </indexterm>
    <indexterm>
     <primary>regexp_replace</primary>
    </indexterm>
    <indexterm>
+    <primary>regexp_match</primary>
+   </indexterm>
+   <indexterm>
     <primary>regexp_matches</primary>
    </indexterm>
    <indexterm>
     <primary>regexp_split_to_table</primary>
    </indexterm>
    <indexterm>
     <primary>regexp_split_to_array</primary>
    </indexterm>
 
    <para>
@@ -4168,66 +4188,78 @@ substring('foobar' from 'o(.)b')   <lineannotation>o</lineannotation>
 regexp_replace('foobarbaz', 'b..', 'X')
                                    <lineannotation>fooXbaz</lineannotation>
 regexp_replace('foobarbaz', 'b..', 'X', 'g')
                                    <lineannotation>fooXX</lineannotation>
 regexp_replace('foobarbaz', 'b(..)', E'X\\1Y', 'g')
                                    <lineannotation>fooXarYXazY</lineannotation>
 </programlisting>
    </para>
 
     <para>
+     The <function>regexp_match</> function returns a text array of
+     the first captured substrings resulting from matching a POSIX
+     regular expression pattern.  It has the syntax
+     <function>regexp_match</function>(<replaceable>string</>,
+     <replaceable>pattern</> <optional>, <replaceable>flags</> </optional>).
+     If the <replaceable>pattern</> does not match, the function returns
+     <literal>NULL</>.  The <replaceable>flags</> parameter is
+     an optional text string containing zero or more single-letter flags
+     that change the function's behavior.  Supported flags
+     are described in <xref linkend="posix-embedded-options-table">.
+    </para>
+
+    <para>
      The <function>regexp_matches</> function returns a text array of
      all of the captured substrings resulting from matching a POSIX
-     regular expression pattern.  It has the syntax
-     <function>regexp_matches</function>(<replaceable>string</>, <replaceable>pattern</>
-     <optional>, <replaceable>flags</> </optional>).
+     regular expression pattern.  It has the same syntax as
+     <function>regexp_match</function>.
      The function can return no rows, one row, or multiple rows (see
      the <literal>g</> flag below).  If the <replaceable>pattern</>
      does not match, the function returns no rows.  If the pattern
      contains no parenthesized subexpressions, then each row
      returned is a single-element text array containing the substring
      matching the whole pattern.  If the pattern contains parenthesized
      subexpressions, the function returns a text array whose
      <replaceable>n</>'th element is the substring matching the
      <replaceable>n</>'th parenthesized subexpression of the pattern
      (not counting <quote>non-capturing</> parentheses; see below for
      details).
-     The <replaceable>flags</> parameter is an optional text
-     string containing zero or more single-letter flags that change the
-     function's behavior.  Flag <literal>g</> causes the function to find
+     It accepts the same <replaceable>flags</> parameter that
+     change the function's behavior with additional flag <literal>g</>. 
+     This flag causes the function to find
      each match in the string, not only the first one, and return a row for
      each such match.  Supported flags (though
      not <literal>g</>)
      are described in <xref linkend="posix-embedded-options-table">.
     </para>
 
    <para>
     Some examples:
 <programlisting>
-SELECT regexp_matches('foobarbequebaz', '(bar)(beque)');
+SELECT regexp_match('foobarbequebaz', 'barbeque');
+ regexp_match 
+--------------
+ {barbeque}
+(1 row)
+
+SELECT regexp_match('foobarbequebaz', '(bar)(beque)');
  regexp_matches 
 ----------------
  {bar,beque}
 (1 row)
 
 SELECT regexp_matches('foobarbequebazilbarfbonk', '(b[^b]+)(b[^b]+)', 'g');
  regexp_matches 
 ----------------
  {bar,beque}
  {bazil,barf}
 (2 rows)
-
-SELECT regexp_matches('foobarbequebaz', 'barbeque');
- regexp_matches 
-----------------
- {barbeque}
-(1 row)
 </programlisting>
    </para>
 
    <para>
     It is possible to force <function>regexp_matches()</> to always
     return one row by using a sub-select;  this is particularly useful
     in a <literal>SELECT</> target list when you want all rows
     returned, even non-matching ones:
 <programlisting>
 SELECT col1, (SELECT regexp_matches(col2, '(bar)(beque)')) FROM tab;
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index 5b216e0..9b47b5e 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -40,21 +40,21 @@
 	(PG_NARGS() > (_n) ? PG_GETARG_TEXT_PP(_n) : NULL)
 
 
 /* all the options of interest for regex functions */
 typedef struct pg_re_flags
 {
 	int			cflags;			/* compile flags for Spencer's regex code */
 	bool		glob;			/* do it globally (for each occurrence) */
 } pg_re_flags;
 
-/* cross-call state for regexp_matches(), also regexp_split() */
+/* cross-call state for regexp_match and regexp_split functions */
 typedef struct regexp_matches_ctx
 {
 	text	   *orig_str;		/* data string in original TEXT form */
 	int			nmatches;		/* number of places where pattern matched */
 	int			npatterns;		/* number of capturing subpatterns */
 	/* We store start char index and end+1 char index for each match */
 	/* so the number of entries in match_locs is nmatches * npatterns * 2 */
 	int		   *match_locs;		/* 0-based character indexes */
 	int			next_match;		/* 0-based index of next match to process */
 	/* workspace for build_regexp_matches_result() */
@@ -100,23 +100,22 @@ typedef struct cached_re_str
 	Oid			cre_collation;	/* collation to use */
 	regex_t		cre_re;			/* the compiled regular expression */
 } cached_re_str;
 
 static int	num_res = 0;		/* # of cached re's */
 static cached_re_str re_array[MAX_CACHED_RES];	/* cached re's */
 
 
 /* Local functions */
 static regexp_matches_ctx *setup_regexp_matches(text *orig_str, text *pattern,
-					 text *flags,
+					 pg_re_flags *flags,
 					 Oid collation,
-					 bool force_glob,
 					 bool use_subpatterns,
 					 bool ignore_degenerate);
 static void cleanup_regexp_matches(regexp_matches_ctx *matchctx);
 static ArrayType *build_regexp_matches_result(regexp_matches_ctx *matchctx);
 static Datum build_regexp_split_result(regexp_matches_ctx *splitctx);
 
 
 /*
  * RE_compile_and_cache - compile a RE, caching if possible
  *
@@ -834,43 +833,93 @@ similar_escape(PG_FUNCTION_ARGS)
 
 	*r++ = ')';
 	*r++ = '$';
 
 	SET_VARSIZE(result, r - ((char *) result));
 
 	PG_RETURN_TEXT_P(result);
 }
 
 /*
+ * regexp_match()
+ *		Return the match of a pattern within a string.
+ */
+Datum
+regexp_match(PG_FUNCTION_ARGS)
+{
+	text	   *orig_str = PG_GETARG_TEXT_PP(0);
+	text	   *pattern = PG_GETARG_TEXT_PP(1);
+	text	   *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
+	pg_re_flags re_flags;
+	regexp_matches_ctx *matchctx;
+
+	/* Determine options */
+	parse_re_flags(&re_flags, flags);
+	/* User mustn't specify 'g'. */
+	if (re_flags.glob)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("regexp_match does not support the global option"),
+				 errhint("Use regexp_matches function instead.")));
+
+	matchctx = setup_regexp_matches(orig_str, pattern, &re_flags,
+									PG_GET_COLLATION(), true, false);
+
+	Assert(matchctx->nmatches == 0 || matchctx->nmatches == 1);
+
+	if (matchctx->nmatches == 1)
+	{
+		/* Pre-create workspace that build_regexp_matches_result needs */
+		matchctx->elems = (Datum *) palloc(sizeof(Datum) * matchctx->npatterns);
+		matchctx->nulls = (bool *) palloc(sizeof(bool) * matchctx->npatterns);
+
+		PG_RETURN_DATUM(PointerGetDatum(build_regexp_matches_result(matchctx)));
+	}
+
+	PG_RETURN_NULL();
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_match_no_flags(PG_FUNCTION_ARGS)
+{
+	return regexp_match(fcinfo);
+}
+
+/*
  * regexp_matches()
  *		Return a table of matches of a pattern within a string.
  */
 Datum
 regexp_matches(PG_FUNCTION_ARGS)
 {
 	FuncCallContext *funcctx;
 	regexp_matches_ctx *matchctx;
 
 	if (SRF_IS_FIRSTCALL())
 	{
 		text	   *pattern = PG_GETARG_TEXT_PP(1);
 		text	   *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
 		MemoryContext oldcontext;
+		pg_re_flags re_flags;
 
 		funcctx = SRF_FIRSTCALL_INIT();
 		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
 
+		/* Determine options */
+		parse_re_flags(&re_flags, flags);
+
 		/* be sure to copy the input string into the multi-call ctx */
 		matchctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
-										flags,
+										&re_flags,
 										PG_GET_COLLATION(),
-										false, true, false);
+										true, false);
 
 		/* Pre-create workspace that build_regexp_matches_result needs */
 		matchctx->elems = (Datum *) palloc(sizeof(Datum) * matchctx->npatterns);
 		matchctx->nulls = (bool *) palloc(sizeof(bool) * matchctx->npatterns);
 
 		MemoryContextSwitchTo(oldcontext);
 		funcctx->user_fctx = (void *) matchctx;
 	}
 
 	funcctx = SRF_PERCALL_SETUP();
@@ -892,92 +941,78 @@ regexp_matches(PG_FUNCTION_ARGS)
 }
 
 /* This is separate to keep the opr_sanity regression test from complaining */
 Datum
 regexp_matches_no_flags(PG_FUNCTION_ARGS)
 {
 	return regexp_matches(fcinfo);
 }
 
 /*
- * setup_regexp_matches --- do the initial matching for regexp_matches()
- *		or regexp_split()
+ * setup_regexp_matches --- do the initial matching for regexp_match
+ *		and regexp_split functions
  *
  * To avoid having to re-find the compiled pattern on each call, we do
  * all the matching in one swoop.  The returned regexp_matches_ctx contains
  * the locations of all the substrings matching the pattern.
  *
- * The three bool parameters have only two patterns (one for each caller)
+ * The two bool parameters have only two patterns (one for each caller)
  * but it seems clearer to distinguish the functionality this way than to
  * key it all off one "is_split" flag.
  */
 static regexp_matches_ctx *
-setup_regexp_matches(text *orig_str, text *pattern, text *flags,
+setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
 					 Oid collation,
-					 bool force_glob, bool use_subpatterns,
+					 bool use_subpatterns,
 					 bool ignore_degenerate)
 {
 	regexp_matches_ctx *matchctx = palloc0(sizeof(regexp_matches_ctx));
 	int			orig_len;
 	pg_wchar   *wide_str;
 	int			wide_len;
-	pg_re_flags re_flags;
 	regex_t    *cpattern;
 	regmatch_t *pmatch;
 	int			pmatch_len;
 	int			array_len;
 	int			array_idx;
 	int			prev_match_end;
 	int			start_search;
 
 	/* save original string --- we'll extract result substrings from it */
 	matchctx->orig_str = orig_str;
 
 	/* convert string to pg_wchar form for matching */
 	orig_len = VARSIZE_ANY_EXHDR(orig_str);
 	wide_str = (pg_wchar *) palloc(sizeof(pg_wchar) * (orig_len + 1));
 	wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len);
 
-	/* determine options */
-	parse_re_flags(&re_flags, flags);
-	if (force_glob)
-	{
-		/* user mustn't specify 'g' for regexp_split */
-		if (re_flags.glob)
-			ereport(ERROR,
-					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-				 errmsg("regexp_split does not support the global option")));
-		/* but we find all the matches anyway */
-		re_flags.glob = true;
-	}
-
 	/* set up the compiled pattern */
-	cpattern = RE_compile_and_cache(pattern, re_flags.cflags, collation);
+	cpattern = RE_compile_and_cache(pattern, re_flags->cflags, collation);
 
 	/* do we want to remember subpatterns? */
 	if (use_subpatterns && cpattern->re_nsub > 0)
 	{
 		matchctx->npatterns = cpattern->re_nsub;
 		pmatch_len = cpattern->re_nsub + 1;
 	}
 	else
 	{
 		use_subpatterns = false;
 		matchctx->npatterns = 1;
 		pmatch_len = 1;
 	}
 
 	/* temporary output space for RE package */
 	pmatch = palloc(sizeof(regmatch_t) * pmatch_len);
 
 	/* the real output space (grown dynamically if needed) */
-	array_len = re_flags.glob ? 256 : 32;
+	array_len = re_flags->glob ? 256 : 32;
 	matchctx->match_locs = (int *) palloc(sizeof(int) * array_len);
 	array_idx = 0;
 
 	/* search for the pattern, perhaps repeatedly */
 	prev_match_end = 0;
 	start_search = 0;
 	while (RE_wchar_execute(cpattern, wide_str, wide_len, start_search,
 							pmatch_len, pmatch))
 	{
 		/*
@@ -1011,21 +1046,21 @@ setup_regexp_matches(text *orig_str, text *pattern, text *flags,
 			else
 			{
 				matchctx->match_locs[array_idx++] = pmatch[0].rm_so;
 				matchctx->match_locs[array_idx++] = pmatch[0].rm_eo;
 			}
 			matchctx->nmatches++;
 		}
 		prev_match_end = pmatch[0].rm_eo;
 
 		/* if not glob, stop after one match */
-		if (!re_flags.glob)
+		if (!re_flags->glob)
 			break;
 
 		/*
 		 * Advance search position.  Normally we start the next search at the
 		 * end of the previous match; but if the match was of zero length, we
 		 * have to advance by one character, or we'd just find the same match
 		 * again.
 		 */
 		start_search = prev_match_end;
 		if (pmatch[0].rm_so == pmatch[0].rm_eo)
@@ -1108,29 +1143,40 @@ Datum
 regexp_split_to_table(PG_FUNCTION_ARGS)
 {
 	FuncCallContext *funcctx;
 	regexp_matches_ctx *splitctx;
 
 	if (SRF_IS_FIRSTCALL())
 	{
 		text	   *pattern = PG_GETARG_TEXT_PP(1);
 		text	   *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
 		MemoryContext oldcontext;
+		pg_re_flags re_flags;
 
 		funcctx = SRF_FIRSTCALL_INIT();
 		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
 
+		/* Determine options */
+		parse_re_flags(&re_flags, flags);
+		/* User mustn't specify 'g' for regexp_split. */
+		if (re_flags.glob)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+			 errmsg("regexp_split_to_table does not support the global option")));
+		/* We find all the matches anyway. */
+		re_flags.glob = true;
+
 		/* be sure to copy the input string into the multi-call ctx */
 		splitctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
-										flags,
+										&re_flags,
 										PG_GET_COLLATION(),
-										true, false, true);
+										false, true);
 
 		MemoryContextSwitchTo(oldcontext);
 		funcctx->user_fctx = (void *) splitctx;
 	}
 
 	funcctx = SRF_PERCALL_SETUP();
 	splitctx = (regexp_matches_ctx *) funcctx->user_fctx;
 
 	if (splitctx->next_match <= splitctx->nmatches)
 	{
@@ -1156,26 +1202,37 @@ regexp_split_to_table_no_flags(PG_FUNCTION_ARGS)
 /*
  * regexp_split_to_array()
  *		Split the string at matches of the pattern, returning the
  *		split-out substrings as an array.
  */
 Datum
 regexp_split_to_array(PG_FUNCTION_ARGS)
 {
 	ArrayBuildState *astate = NULL;
 	regexp_matches_ctx *splitctx;
+	pg_re_flags re_flags;
+
+	/* Determine options */
+	parse_re_flags(&re_flags, PG_GETARG_TEXT_PP_IF_EXISTS(2));
+	/* User mustn't specify 'g'. */
+	if (re_flags.glob)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+		 errmsg("regexp_split_to_array does not support the global option")));
+	/* We find all the matches anyway. */
+	re_flags.glob = true;
 
 	splitctx = setup_regexp_matches(PG_GETARG_TEXT_PP(0),
 									PG_GETARG_TEXT_PP(1),
-									PG_GETARG_TEXT_PP_IF_EXISTS(2),
+									&re_flags,
 									PG_GET_COLLATION(),
-									true, false, true);
+									false, true);
 
 	while (splitctx->next_match <= splitctx->nmatches)
 	{
 		astate = accumArrayResult(astate,
 								  build_regexp_split_result(splitctx),
 								  false,
 								  TEXTOID,
 								  CurrentMemoryContext);
 		splitctx->next_match++;
 	}
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index d94943b..6c8b93b 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -1898,20 +1898,24 @@ DESCR("trim spaces from both ends of string");
 DATA(insert OID =  936 (  substring    PGNSP PGUID 12 1 0 0 0 f f f f t f i s 3 0 25 "25 23 23" _null_ _null_ _null_ _null_ _null_	text_substr _null_ _null_ _null_ ));
 DESCR("extract portion of string");
 DATA(insert OID =  937 (  substring    PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 25 "25 23" _null_ _null_ _null_ _null_ _null_ text_substr_no_len _null_ _null_ _null_ ));
 DESCR("extract portion of string");
 DATA(insert OID =  2087 ( replace	   PGNSP PGUID 12 1 0 0 0 f f f f t f i s 3 0 25 "25 25 25" _null_ _null_ _null_ _null_ _null_	replace_text _null_ _null_ _null_ ));
 DESCR("replace all occurrences in string of old_substr with new_substr");
 DATA(insert OID =  2284 ( regexp_replace	   PGNSP PGUID 12 1 0 0 0 f f f f t f i s 3 0 25 "25 25 25" _null_ _null_ _null_ _null_ _null_	textregexreplace_noopt _null_ _null_ _null_ ));
 DESCR("replace text using regexp");
 DATA(insert OID =  2285 ( regexp_replace	   PGNSP PGUID 12 1 0 0 0 f f f f t f i s 4 0 25 "25 25 25 25" _null_ _null_ _null_ _null_ _null_ textregexreplace _null_ _null_ _null_ ));
 DESCR("replace text using regexp");
+DATA(insert OID =  3380 ( regexp_match     PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 1009 "25 25" _null_ _null_ _null_ _null_ _null_ regexp_match_no_flags _null_ _null_ _null_ ));
+DESCR("find one match for regexp");
+DATA(insert OID =  3381 ( regexp_match     PGNSP PGUID 12 1 0 0 0 f f f f t f i s 3 0 1009 "25 25 25" _null_ _null_ _null_ _null_ _null_ regexp_match _null_ _null_ _null_ ));
+DESCR("find one match for regexp");
 DATA(insert OID =  2763 ( regexp_matches   PGNSP PGUID 12 1 1 0 0 f f f f t t i s 2 0 1009 "25 25" _null_ _null_ _null_ _null_ _null_ regexp_matches_no_flags _null_ _null_ _null_ ));
 DESCR("find all match groups for regexp");
 DATA(insert OID =  2764 ( regexp_matches   PGNSP PGUID 12 1 10 0 0 f f f f t t i s 3 0 1009 "25 25 25" _null_ _null_ _null_ _null_ _null_ regexp_matches _null_ _null_ _null_ ));
 DESCR("find all match groups for regexp");
 DATA(insert OID =  2088 ( split_part   PGNSP PGUID 12 1 0 0 0 f f f f t f i s 3 0 25 "25 25 23" _null_ _null_ _null_ _null_ _null_	split_text _null_ _null_ _null_ ));
 DESCR("split string by field_sep and return field_num");
 DATA(insert OID =  2765 ( regexp_split_to_table PGNSP PGUID 12 1 1000 0 0 f f f f t t i s 2 0 25 "25 25" _null_ _null_ _null_ _null_ _null_	regexp_split_to_table_no_flags _null_ _null_ _null_ ));
 DESCR("split string by pattern");
 DATA(insert OID =  2766 ( regexp_split_to_table PGNSP PGUID 12 1 1000 0 0 f f f f t t i s 3 0 25 "25 25 25" _null_ _null_ _null_ _null_ _null_	regexp_split_to_table _null_ _null_ _null_ ));
 DESCR("split string by pattern");
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 01976a1..9e295df 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -614,20 +614,22 @@ extern Datum nameregexne(PG_FUNCTION_ARGS);
 extern Datum textregexeq(PG_FUNCTION_ARGS);
 extern Datum textregexne(PG_FUNCTION_ARGS);
 extern Datum nameicregexeq(PG_FUNCTION_ARGS);
 extern Datum nameicregexne(PG_FUNCTION_ARGS);
 extern Datum texticregexeq(PG_FUNCTION_ARGS);
 extern Datum texticregexne(PG_FUNCTION_ARGS);
 extern Datum textregexsubstr(PG_FUNCTION_ARGS);
 extern Datum textregexreplace_noopt(PG_FUNCTION_ARGS);
 extern Datum textregexreplace(PG_FUNCTION_ARGS);
 extern Datum similar_escape(PG_FUNCTION_ARGS);
+extern Datum regexp_match(PG_FUNCTION_ARGS);
+extern Datum regexp_match_no_flags(PG_FUNCTION_ARGS);
 extern Datum regexp_matches(PG_FUNCTION_ARGS);
 extern Datum regexp_matches_no_flags(PG_FUNCTION_ARGS);
 extern Datum regexp_split_to_table(PG_FUNCTION_ARGS);
 extern Datum regexp_split_to_table_no_flags(PG_FUNCTION_ARGS);
 extern Datum regexp_split_to_array(PG_FUNCTION_ARGS);
 extern Datum regexp_split_to_array_no_flags(PG_FUNCTION_ARGS);
 extern char *regexp_fixed_prefix(text *text_re, bool case_insensitive,
 					Oid collation, bool *exact);
 
 /* regproc.c */
diff --git a/src/test/regress/expected/regex.out b/src/test/regress/expected/regex.out
index af09719..47c0f49 100644
--- a/src/test/regress/expected/regex.out
+++ b/src/test/regress/expected/regex.out
@@ -83,20 +83,48 @@ select substring('a' from '((a))+');
 -----------
  a
 (1 row)
 
 select substring('a' from '((a)+)');
  substring 
 -----------
  a
 (1 row)
 
+-- Test regexp_match()
+select regexp_match('abc', '');
+ regexp_match 
+--------------
+ {""}
+(1 row)
+
+select regexp_match('abc', 'bc');
+ regexp_match 
+--------------
+ {bc}
+(1 row)
+
+select regexp_match('abc', 'd') is null;
+ ?column? 
+----------
+ t
+(1 row)
+
+select regexp_match('abc', '(B)(c)', 'i');
+ regexp_match 
+--------------
+ {b,c}
+(1 row)
+
+select regexp_match('abc', 'Bd', 'ig'); -- error
+ERROR:  regexp_match does not support the global option
+HINT:  Use regexp_matches function instead.
 -- Test lookahead constraints
 select regexp_matches('ab', 'a(?=b)b*');
  regexp_matches 
 ----------------
  {ab}
 (1 row)
 
 select regexp_matches('a', 'a(?=b)b*');
  regexp_matches 
 ----------------
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 19708c3..35cadb2 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -674,23 +674,23 @@ SELECT regexp_split_to_array('123456','.');
  {"","","","","","",""}
 (1 row)
 
 -- errors
 SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'zippy') AS foo;
 ERROR:  invalid regexp option: "z"
 SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'iz');
 ERROR:  invalid regexp option: "z"
 -- global option meaningless for regexp_split
 SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'g') AS foo;
-ERROR:  regexp_split does not support the global option
+ERROR:  regexp_split_to_table does not support the global option
 SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'g');
-ERROR:  regexp_split does not support the global option
+ERROR:  regexp_split_to_array does not support the global option
 -- change NULL-display back
 \pset null ''
 -- E021-11 position expression
 SELECT POSITION('4' IN '1234567890') = '4' AS "4";
  4 
 ---
  t
 (1 row)
 
 SELECT POSITION('5' IN '1234567890') = '5' AS "5";
diff --git a/src/test/regress/sql/regex.sql b/src/test/regress/sql/regex.sql
index 1028ca6..1361b62 100644
--- a/src/test/regress/sql/regex.sql
+++ b/src/test/regress/sql/regex.sql
@@ -18,20 +18,27 @@ select 'abc abd abc' ~ '^(\w+)( \1)+$' as f;
 select 'abc abc abd' ~ '^(\w+)( \1)+$' as f;
 select 'abc abc abc' ~ '^(.+)( \1)+$' as t;
 select 'abc abd abc' ~ '^(.+)( \1)+$' as f;
 select 'abc abc abd' ~ '^(.+)( \1)+$' as f;
 
 -- Test some cases that crashed in 9.2beta1 due to pmatch[] array overrun
 select substring('asd TO foo' from ' TO (([a-z0-9._]+|"([^"]+|"")+")+)');
 select substring('a' from '((a))+');
 select substring('a' from '((a)+)');
 
+-- Test regexp_match()
+select regexp_match('abc', '');
+select regexp_match('abc', 'bc');
+select regexp_match('abc', 'd') is null;
+select regexp_match('abc', '(B)(c)', 'i');
+select regexp_match('abc', 'Bd', 'ig'); -- error
+
 -- Test lookahead constraints
 select regexp_matches('ab', 'a(?=b)b*');
 select regexp_matches('a', 'a(?=b)b*');
 select regexp_matches('abc', 'a(?=b)b*(?=c)c*');
 select regexp_matches('ab', 'a(?=b)b*(?=c)c*');
 select regexp_matches('ab', 'a(?!b)b*');
 select regexp_matches('a', 'a(?!b)b*');
 select regexp_matches('b', '(?=b)b');
 select regexp_matches('a', '(?=b)b');
 
-- 
2.7.4 (Apple Git-66)

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] regexp_match() returning text

Reply via email to