Hi!

Patch looks resonable, but I see some place to improvement:
spg_text_leaf_consistent() only needs to check with text_startswith() if reconstucted value came to leaf consistent is shorter than given prefix. For example, if level >= length of prefix then we guarantee that fully reconstracted is matched too. But do not miss that you may need to return value for index only scan, consult returnData field

In attachment rebased and minorly edited version of your patch.


--
Teodor Sigaev                                   E-mail: teo...@sigaev.ru
                                                   WWW: http://www.sigaev.ru/
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 2f59af25a6..4dc11d8df2 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -2274,6 +2274,21 @@
        <entry><literal>ph</literal></entry>
       </row>
 
+      <row>
+       <entry>
+        <indexterm>
+         <primary>text_startswith</primary>
+        </indexterm>
+        <literal><function>text_startswith(<parameter>string</parameter>, <parameter>prefix</parameter>)</function></literal>
+       </entry>
+       <entry><type>bool</type></entry>
+       <entry>
+        Returns true if <parameter>string</parameter> starts with <parameter>prefix</parameter>.
+       </entry>
+       <entry><literal>text_startswith('alphabet', 'alph')</literal></entry>
+       <entry><literal>t</literal></entry>
+      </row>
+
       <row>
        <entry>
         <indexterm>
@@ -4033,6 +4048,12 @@ cast(-44 as bit(12))           <lineannotation>111111010100</lineannotation>
     ILIKE</function>, respectively.  All of these operators are
     <productname>PostgreSQL</productname>-specific.
    </para>
+
+   <para>
+    There is also the prefix operator <literal>^@</literal> and corresponding
+    <function>text_startswith</function> function which covers cases when only
+    searching by beginning of the string is needed.
+   </para>
   </sect2>
 
 
diff --git a/doc/src/sgml/spgist.sgml b/doc/src/sgml/spgist.sgml
index e47f70be89..06b7519052 100644
--- a/doc/src/sgml/spgist.sgml
+++ b/doc/src/sgml/spgist.sgml
@@ -161,6 +161,7 @@
        <literal>~&lt;~</literal>
        <literal>~&gt;=~</literal>
        <literal>~&gt;~</literal>
+       <literal>^@</literal>
       </entry>
      </row>
      <row>
diff --git a/src/backend/access/spgist/spgtextproc.c b/src/backend/access/spgist/spgtextproc.c
index f156b2166e..f92ff68a5f 100644
--- a/src/backend/access/spgist/spgtextproc.c
+++ b/src/backend/access/spgist/spgtextproc.c
@@ -496,7 +496,7 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
 			 * well end with a partial multibyte character, so that applying
 			 * any encoding-sensitive test to it would be risky anyhow.)
 			 */
-			if (strategy > 10)
+			if (strategy > 10 && strategy != RTPrefixStrategyNumber)
 			{
 				if (collate_is_c)
 					strategy -= 10;
@@ -526,6 +526,10 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
 					if (r < 0)
 						res = false;
 					break;
+				case RTPrefixStrategyNumber:
+					if (r != 0)
+						res = false;
+					break;
 				default:
 					elog(ERROR, "unrecognized strategy number: %d",
 						 in->scankeys[j].sk_strategy);
@@ -601,10 +605,25 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS)
 	for (j = 0; j < in->nkeys; j++)
 	{
 		StrategyNumber strategy = in->scankeys[j].sk_strategy;
-		text	   *query = DatumGetTextPP(in->scankeys[j].sk_argument);
-		int			queryLen = VARSIZE_ANY_EXHDR(query);
+		text	   *query;
+		int			queryLen;
 		int			r;
 
+		/* for this strategy collation is not important */
+		if (strategy == RTPrefixStrategyNumber)
+		{
+			res = DatumGetBool(DirectFunctionCall2(text_startswith,
+				out->leafValue, in->scankeys[j].sk_argument));
+
+			if (res) /* no need to consider remaining conditions */
+				break;
+
+			continue;
+		}
+
+		query = DatumGetTextPP(in->scankeys[j].sk_argument);
+		queryLen = VARSIZE_ANY_EXHDR(query);
+
 		if (strategy > 10)
 		{
 			/* Collation-aware comparison */
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index bf240aa9c5..de3ace442a 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -1204,7 +1204,6 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
 	Oid			vartype;
 	Oid			opfamily;
 	Pattern_Prefix_Status pstatus;
-	Const	   *patt;
 	Const	   *prefix = NULL;
 	Selectivity rest_selec = 0;
 	double		nullfrac = 0.0;
@@ -1316,9 +1315,21 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
 	 * but because we want to be sure we cache compiled regexps under the
 	 * right cache key, so that they can be re-used at runtime.
 	 */
-	patt = (Const *) other;
-	pstatus = pattern_fixed_prefix(patt, ptype, collation,
-								   &prefix, &rest_selec);
+	if (ptype == Pattern_Type_Prefix)
+	{
+		char	*s = TextDatumGetCString(constval);
+		prefix = string_to_const(s, vartype);
+		pstatus = Pattern_Prefix_Partial;
+		rest_selec = 1.0;	/* all */
+		pfree(s);
+	}
+	else
+	{
+		Const *patt = (Const *) other;
+
+		pstatus = pattern_fixed_prefix(patt, ptype, collation,
+									   &prefix, &rest_selec);
+	}
 
 	/*
 	 * If necessary, coerce the prefix constant to the right type.
@@ -1488,6 +1499,16 @@ likesel(PG_FUNCTION_ARGS)
 }
 
 /*
+ *		prefixsel			- selectivity of prefix operator
+ */
+Datum
+prefixsel(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
+/*
+ *
  *		iclikesel			- Selectivity of ILIKE pattern match.
  */
 Datum
@@ -2906,6 +2927,15 @@ likejoinsel(PG_FUNCTION_ARGS)
 	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
 }
 
+/*
+ *		prefixjoinsel			- Join selectivity of prefix operator
+ */
+Datum
+prefixjoinsel(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
 /*
  *		iclikejoinsel			- Join selectivity of ILIKE pattern match.
  */
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 4346410d5a..cb721e9a57 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1761,6 +1761,34 @@ text_ge(PG_FUNCTION_ARGS)
 	PG_RETURN_BOOL(result);
 }
 
+Datum
+text_startswith(PG_FUNCTION_ARGS)
+{
+	Datum		arg1 = PG_GETARG_DATUM(0);
+	Datum		arg2 = PG_GETARG_DATUM(1);
+	bool		result;
+	Size		len1,
+				len2;
+
+	len1 = toast_raw_datum_size(arg1);
+	len2 = toast_raw_datum_size(arg2);
+	if (len2 > len1)
+		result = false;
+	else
+	{
+		text	   *targ1 = DatumGetTextPP(arg1);
+		text	   *targ2 = DatumGetTextPP(arg2);
+
+		result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
+						 len2 - VARHDRSZ) == 0);
+
+		PG_FREE_IF_COPY(targ1, 0);
+		PG_FREE_IF_COPY(targ2, 1);
+	}
+
+	PG_RETURN_BOOL(result);
+}
+
 Datum
 bttextcmp(PG_FUNCTION_ARGS)
 {
diff --git a/src/include/access/stratnum.h b/src/include/access/stratnum.h
index bddfac4c10..0db11a1117 100644
--- a/src/include/access/stratnum.h
+++ b/src/include/access/stratnum.h
@@ -68,8 +68,9 @@ typedef uint16 StrategyNumber;
 #define RTSubEqualStrategyNumber		25	/* for inet <<= */
 #define RTSuperStrategyNumber			26	/* for inet << */
 #define RTSuperEqualStrategyNumber		27	/* for inet >>= */
+#define RTPrefixStrategyNumber			28	/* for text ^@ */
 
-#define RTMaxStrategyNumber				27
+#define RTMaxStrategyNumber				28
 
 
 #endif							/* STRATNUM_H */
diff --git a/src/include/catalog/pg_amop.h b/src/include/catalog/pg_amop.h
index 03af581df4..a01033b405 100644
--- a/src/include/catalog/pg_amop.h
+++ b/src/include/catalog/pg_amop.h
@@ -795,6 +795,7 @@ DATA(insert (	4017   25 25 2 s	2315 4000 0 ));
 DATA(insert (	4017   25 25 3 s	98	4000 0 ));
 DATA(insert (	4017   25 25 4 s	2317 4000 0 ));
 DATA(insert (	4017   25 25 5 s	2318 4000 0 ));
+DATA(insert (	4017   25 25 28 s	3449 4000 0 ));
 DATA(insert (	4017   25 25 11 s	664 4000 0 ));
 DATA(insert (	4017   25 25 12 s	665 4000 0 ));
 DATA(insert (	4017   25 25 14 s	667 4000 0 ));
diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h
index e74f963eb5..4f7f4812fe 100644
--- a/src/include/catalog/pg_operator.h
+++ b/src/include/catalog/pg_operator.h
@@ -134,6 +134,8 @@ DESCR("less than");
 DATA(insert OID =  98 ( "="		   PGNSP PGUID b t t	25	25	16	98 531 texteq eqsel eqjoinsel ));
 DESCR("equal");
 #define TextEqualOperator	98
+DATA(insert OID =  3449 ( "^@"	   PGNSP PGUID b f f	25	25	16	0 0 text_startswith prefixsel prefixjoinsel ));
+DESCR("starts with");
 
 DATA(insert OID = 349 (  "||"	   PGNSP PGUID b f f 2277 2283 2277 0 0 array_append   -	   -	 ));
 DESCR("append element onto end of array");
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index bfc90098f8..a99a628be9 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -209,6 +209,7 @@ DATA(insert OID =  64 (  int2lt			   PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16
 DATA(insert OID =  65 (  int4eq			   PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "23 23" _null_ _null_ _null_ _null_ _null_ int4eq _null_ _null_ _null_ ));
 DATA(insert OID =  66 (  int4lt			   PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "23 23" _null_ _null_ _null_ _null_ _null_ int4lt _null_ _null_ _null_ ));
 DATA(insert OID =  67 (  texteq			   PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ texteq _null_ _null_ _null_ ));
+DATA(insert OID =  3450 ( text_startswith  PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ text_startswith _null_ _null_ _null_ ));
 DATA(insert OID =  68 (  xideq			   PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "28 28" _null_ _null_ _null_ _null_ _null_ xideq _null_ _null_ _null_ ));
 DATA(insert OID = 3308 (  xidneq		   PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "28 28" _null_ _null_ _null_ _null_ _null_ xidneq _null_ _null_ _null_ ));
 DATA(insert OID =  69 (  cideq			   PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "29 29" _null_ _null_ _null_ _null_ _null_ cideq _null_ _null_ _null_ ));
@@ -2568,6 +2569,10 @@ DATA(insert OID = 1828 ( nlikejoinsel		PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0
 DESCR("join selectivity of NOT LIKE");
 DATA(insert OID = 1829 ( icregexnejoinsel	PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_  _null_ icregexnejoinsel _null_ _null_ _null_ ));
 DESCR("join selectivity of case-insensitive regex non-match");
+DATA(insert OID = 2579 ( prefixsel			PGNSP PGUID 12 1 0 0 0 f f f t f s s 4 0 701 "2281 26 2281 23" _null_ _null_ _null_ _null_ _null_ prefixsel _null_ _null_ _null_ ));
+DESCR("restriction selectivity of exact prefix");
+DATA(insert OID = 2580 ( prefixjoinsel		PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_  _null_ prefixjoinsel _null_ _null_ _null_ ));
+DESCR("join selectivity of exact prefix");
 
 /* Aggregate-related functions */
 DATA(insert OID = 1830 (  float8_avg	   PGNSP PGUID 12 1 0 0 0 f f f t f i s 1 0 701 "1022" _null_ _null_ _null_ _null_ _null_ float8_avg _null_ _null_ _null_ ));
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 299c9f846a..95e44280c4 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -87,8 +87,11 @@ typedef struct VariableStatData
 
 typedef enum
 {
-	Pattern_Type_Like, Pattern_Type_Like_IC,
-	Pattern_Type_Regex, Pattern_Type_Regex_IC
+	Pattern_Type_Like,
+	Pattern_Type_Like_IC,
+	Pattern_Type_Regex,
+	Pattern_Type_Regex_IC,
+	Pattern_Type_Prefix
 } Pattern_Type;
 
 typedef enum
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index 057faff2e5..6d32d4cb5b 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -372,6 +372,12 @@ SELECT count(*) FROM radix_text_tbl WHERE t ~>~  'Worth
     48
 (1 row)
 
+SELECT count(*) FROM radix_text_tbl WHERE t ^@  'Worth';
+ count 
+-------
+     2
+(1 row)
+
 SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
                        f1                        
 -------------------------------------------------
@@ -1182,6 +1188,21 @@ SELECT count(*) FROM radix_text_tbl WHERE t ~>~  'Worth
     48
 (1 row)
 
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM radix_text_tbl WHERE t ^@	 'Worth';
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Aggregate
+   ->  Index Only Scan using sp_radix_ind on radix_text_tbl
+         Index Cond: (t ^@ 'Worth'::text)
+(3 rows)
+
+SELECT count(*) FROM radix_text_tbl WHERE t ^@	 'Worth';
+ count 
+-------
+     2
+(1 row)
+
 EXPLAIN (COSTS OFF)
 SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
                      QUERY PLAN                      
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 01608d2c04..919248cdd2 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -718,6 +718,7 @@ sha224(bytea)
 sha256(bytea)
 sha384(bytea)
 sha512(bytea)
+text_startswith(text,text)
 macaddr8_eq(macaddr8,macaddr8)
 macaddr8_lt(macaddr8,macaddr8)
 macaddr8_le(macaddr8,macaddr8)
@@ -1887,7 +1888,8 @@ ORDER BY 1, 2, 3;
        4000 |           25 | <<=
        4000 |           26 | >>
        4000 |           27 | >>=
-(121 rows)
+       4000 |           28 | ^@
+(122 rows)
 
 -- Check that all opclass search operators have selectivity estimators.
 -- This is not absolutely required, but it seems a reasonable thing
diff --git a/src/test/regress/expected/spgist.out b/src/test/regress/expected/spgist.out
index 2d75bbf8dc..0011634b76 100644
--- a/src/test/regress/expected/spgist.out
+++ b/src/test/regress/expected/spgist.out
@@ -32,6 +32,15 @@ insert into spgist_text_tbl (id, t)
 select g, 'f' || repeat('o', 100) || g from generate_series(1, 10000) g
 union all
 select g, 'baaaaaaaaaaaaaar' || g from generate_series(1, 1000) g;
+explain (costs off) select * from spgist_text_tbl where t ^@ 'baaaaaaaaaaaaaar11';
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Bitmap Heap Scan on spgist_text_tbl
+   Recheck Cond: (t ^@ 'baaaaaaaaaaaaaar11'::text)
+   ->  Bitmap Index Scan on spgist_text_idx
+         Index Cond: (t ^@ 'baaaaaaaaaaaaaar11'::text)
+(4 rows)
+
 -- Do a lot of insertions that have to split an existing node. Hopefully
 -- one of these will cause the page to run out of space, causing the inner
 -- tuple to be moved to another page.
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index 7f17588b0d..28c71abd74 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -224,6 +224,8 @@ SELECT count(*) FROM radix_text_tbl WHERE t >    'Worth
 
 SELECT count(*) FROM radix_text_tbl WHERE t ~>~  'Worth                         St  ';
 
+SELECT count(*) FROM radix_text_tbl WHERE t ^@  'Worth';
+
 SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
 
 SELECT circle_center(f1), round(radius(f1)) as radius FROM gcircle_tbl ORDER BY f1 <-> '(200,300)'::point LIMIT 10;
@@ -441,6 +443,10 @@ EXPLAIN (COSTS OFF)
 SELECT count(*) FROM radix_text_tbl WHERE t ~>~  'Worth                         St  ';
 SELECT count(*) FROM radix_text_tbl WHERE t ~>~  'Worth                         St  ';
 
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM radix_text_tbl WHERE t ^@	 'Worth';
+SELECT count(*) FROM radix_text_tbl WHERE t ^@	 'Worth';
+
 EXPLAIN (COSTS OFF)
 SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
 SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
diff --git a/src/test/regress/sql/spgist.sql b/src/test/regress/sql/spgist.sql
index 77b43a2d3e..2b57fc836c 100644
--- a/src/test/regress/sql/spgist.sql
+++ b/src/test/regress/sql/spgist.sql
@@ -43,6 +43,8 @@ select g, 'f' || repeat('o', 100) || g from generate_series(1, 10000) g
 union all
 select g, 'baaaaaaaaaaaaaar' || g from generate_series(1, 1000) g;
 
+explain (costs off) select * from spgist_text_tbl where t ^@ 'baaaaaaaaaaaaaar11';
+
 -- Do a lot of insertions that have to split an existing node. Hopefully
 -- one of these will cause the page to run out of space, causing the inner
 -- tuple to be moved to another page.

Reply via email to