Hi!
Patch looks resonable, but I see some place to improvement:
spg_text_leaf_consistent() only needs to check with text_startswith() if
reconstucted value came to leaf consistent is shorter than given prefix. For
example, if level >= length of prefix then we guarantee that fully reconstracted
is matched too. But do not miss that you may need to return value for index only
scan, consult returnData field
In attachment rebased and minorly edited version of your patch.
--
Teodor Sigaev E-mail: teo...@sigaev.ru
WWW: http://www.sigaev.ru/
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 2f59af25a6..4dc11d8df2 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -2274,6 +2274,21 @@
<entry><literal>ph</literal></entry>
</row>
+ <row>
+ <entry>
+ <indexterm>
+ <primary>text_startswith</primary>
+ </indexterm>
+ <literal><function>text_startswith(<parameter>string</parameter>, <parameter>prefix</parameter>)</function></literal>
+ </entry>
+ <entry><type>bool</type></entry>
+ <entry>
+ Returns true if <parameter>string</parameter> starts with <parameter>prefix</parameter>.
+ </entry>
+ <entry><literal>text_startswith('alphabet', 'alph')</literal></entry>
+ <entry><literal>t</literal></entry>
+ </row>
+
<row>
<entry>
<indexterm>
@@ -4033,6 +4048,12 @@ cast(-44 as bit(12)) <lineannotation>111111010100</lineannotation>
ILIKE</function>, respectively. All of these operators are
<productname>PostgreSQL</productname>-specific.
</para>
+
+ <para>
+ There is also the prefix operator <literal>^@</literal> and corresponding
+ <function>text_startswith</function> function which covers cases when only
+ searching by beginning of the string is needed.
+ </para>
</sect2>
diff --git a/doc/src/sgml/spgist.sgml b/doc/src/sgml/spgist.sgml
index e47f70be89..06b7519052 100644
--- a/doc/src/sgml/spgist.sgml
+++ b/doc/src/sgml/spgist.sgml
@@ -161,6 +161,7 @@
<literal>~<~</literal>
<literal>~>=~</literal>
<literal>~>~</literal>
+ <literal>^@</literal>
</entry>
</row>
<row>
diff --git a/src/backend/access/spgist/spgtextproc.c b/src/backend/access/spgist/spgtextproc.c
index f156b2166e..f92ff68a5f 100644
--- a/src/backend/access/spgist/spgtextproc.c
+++ b/src/backend/access/spgist/spgtextproc.c
@@ -496,7 +496,7 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
* well end with a partial multibyte character, so that applying
* any encoding-sensitive test to it would be risky anyhow.)
*/
- if (strategy > 10)
+ if (strategy > 10 && strategy != RTPrefixStrategyNumber)
{
if (collate_is_c)
strategy -= 10;
@@ -526,6 +526,10 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
if (r < 0)
res = false;
break;
+ case RTPrefixStrategyNumber:
+ if (r != 0)
+ res = false;
+ break;
default:
elog(ERROR, "unrecognized strategy number: %d",
in->scankeys[j].sk_strategy);
@@ -601,10 +605,25 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS)
for (j = 0; j < in->nkeys; j++)
{
StrategyNumber strategy = in->scankeys[j].sk_strategy;
- text *query = DatumGetTextPP(in->scankeys[j].sk_argument);
- int queryLen = VARSIZE_ANY_EXHDR(query);
+ text *query;
+ int queryLen;
int r;
+ /* for this strategy collation is not important */
+ if (strategy == RTPrefixStrategyNumber)
+ {
+ res = DatumGetBool(DirectFunctionCall2(text_startswith,
+ out->leafValue, in->scankeys[j].sk_argument));
+
+ if (res) /* no need to consider remaining conditions */
+ break;
+
+ continue;
+ }
+
+ query = DatumGetTextPP(in->scankeys[j].sk_argument);
+ queryLen = VARSIZE_ANY_EXHDR(query);
+
if (strategy > 10)
{
/* Collation-aware comparison */
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index bf240aa9c5..de3ace442a 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -1204,7 +1204,6 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
Oid vartype;
Oid opfamily;
Pattern_Prefix_Status pstatus;
- Const *patt;
Const *prefix = NULL;
Selectivity rest_selec = 0;
double nullfrac = 0.0;
@@ -1316,9 +1315,21 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
* but because we want to be sure we cache compiled regexps under the
* right cache key, so that they can be re-used at runtime.
*/
- patt = (Const *) other;
- pstatus = pattern_fixed_prefix(patt, ptype, collation,
- &prefix, &rest_selec);
+ if (ptype == Pattern_Type_Prefix)
+ {
+ char *s = TextDatumGetCString(constval);
+ prefix = string_to_const(s, vartype);
+ pstatus = Pattern_Prefix_Partial;
+ rest_selec = 1.0; /* all */
+ pfree(s);
+ }
+ else
+ {
+ Const *patt = (Const *) other;
+
+ pstatus = pattern_fixed_prefix(patt, ptype, collation,
+ &prefix, &rest_selec);
+ }
/*
* If necessary, coerce the prefix constant to the right type.
@@ -1488,6 +1499,16 @@ likesel(PG_FUNCTION_ARGS)
}
/*
+ * prefixsel - selectivity of prefix operator
+ */
+Datum
+prefixsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
+/*
+ *
* iclikesel - Selectivity of ILIKE pattern match.
*/
Datum
@@ -2906,6 +2927,15 @@ likejoinsel(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
}
+/*
+ * prefixjoinsel - Join selectivity of prefix operator
+ */
+Datum
+prefixjoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
/*
* iclikejoinsel - Join selectivity of ILIKE pattern match.
*/
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 4346410d5a..cb721e9a57 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1761,6 +1761,34 @@ text_ge(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(result);
}
+Datum
+text_startswith(PG_FUNCTION_ARGS)
+{
+ Datum arg1 = PG_GETARG_DATUM(0);
+ Datum arg2 = PG_GETARG_DATUM(1);
+ bool result;
+ Size len1,
+ len2;
+
+ len1 = toast_raw_datum_size(arg1);
+ len2 = toast_raw_datum_size(arg2);
+ if (len2 > len1)
+ result = false;
+ else
+ {
+ text *targ1 = DatumGetTextPP(arg1);
+ text *targ2 = DatumGetTextPP(arg2);
+
+ result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
+ len2 - VARHDRSZ) == 0);
+
+ PG_FREE_IF_COPY(targ1, 0);
+ PG_FREE_IF_COPY(targ2, 1);
+ }
+
+ PG_RETURN_BOOL(result);
+}
+
Datum
bttextcmp(PG_FUNCTION_ARGS)
{
diff --git a/src/include/access/stratnum.h b/src/include/access/stratnum.h
index bddfac4c10..0db11a1117 100644
--- a/src/include/access/stratnum.h
+++ b/src/include/access/stratnum.h
@@ -68,8 +68,9 @@ typedef uint16 StrategyNumber;
#define RTSubEqualStrategyNumber 25 /* for inet <<= */
#define RTSuperStrategyNumber 26 /* for inet << */
#define RTSuperEqualStrategyNumber 27 /* for inet >>= */
+#define RTPrefixStrategyNumber 28 /* for text ^@ */
-#define RTMaxStrategyNumber 27
+#define RTMaxStrategyNumber 28
#endif /* STRATNUM_H */
diff --git a/src/include/catalog/pg_amop.h b/src/include/catalog/pg_amop.h
index 03af581df4..a01033b405 100644
--- a/src/include/catalog/pg_amop.h
+++ b/src/include/catalog/pg_amop.h
@@ -795,6 +795,7 @@ DATA(insert ( 4017 25 25 2 s 2315 4000 0 ));
DATA(insert ( 4017 25 25 3 s 98 4000 0 ));
DATA(insert ( 4017 25 25 4 s 2317 4000 0 ));
DATA(insert ( 4017 25 25 5 s 2318 4000 0 ));
+DATA(insert ( 4017 25 25 28 s 3449 4000 0 ));
DATA(insert ( 4017 25 25 11 s 664 4000 0 ));
DATA(insert ( 4017 25 25 12 s 665 4000 0 ));
DATA(insert ( 4017 25 25 14 s 667 4000 0 ));
diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h
index e74f963eb5..4f7f4812fe 100644
--- a/src/include/catalog/pg_operator.h
+++ b/src/include/catalog/pg_operator.h
@@ -134,6 +134,8 @@ DESCR("less than");
DATA(insert OID = 98 ( "=" PGNSP PGUID b t t 25 25 16 98 531 texteq eqsel eqjoinsel ));
DESCR("equal");
#define TextEqualOperator 98
+DATA(insert OID = 3449 ( "^@" PGNSP PGUID b f f 25 25 16 0 0 text_startswith prefixsel prefixjoinsel ));
+DESCR("starts with");
DATA(insert OID = 349 ( "||" PGNSP PGUID b f f 2277 2283 2277 0 0 array_append - - ));
DESCR("append element onto end of array");
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index bfc90098f8..a99a628be9 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -209,6 +209,7 @@ DATA(insert OID = 64 ( int2lt PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16
DATA(insert OID = 65 ( int4eq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "23 23" _null_ _null_ _null_ _null_ _null_ int4eq _null_ _null_ _null_ ));
DATA(insert OID = 66 ( int4lt PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "23 23" _null_ _null_ _null_ _null_ _null_ int4lt _null_ _null_ _null_ ));
DATA(insert OID = 67 ( texteq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ texteq _null_ _null_ _null_ ));
+DATA(insert OID = 3450 ( text_startswith PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ text_startswith _null_ _null_ _null_ ));
DATA(insert OID = 68 ( xideq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "28 28" _null_ _null_ _null_ _null_ _null_ xideq _null_ _null_ _null_ ));
DATA(insert OID = 3308 ( xidneq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "28 28" _null_ _null_ _null_ _null_ _null_ xidneq _null_ _null_ _null_ ));
DATA(insert OID = 69 ( cideq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "29 29" _null_ _null_ _null_ _null_ _null_ cideq _null_ _null_ _null_ ));
@@ -2568,6 +2569,10 @@ DATA(insert OID = 1828 ( nlikejoinsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0
DESCR("join selectivity of NOT LIKE");
DATA(insert OID = 1829 ( icregexnejoinsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ _null_ icregexnejoinsel _null_ _null_ _null_ ));
DESCR("join selectivity of case-insensitive regex non-match");
+DATA(insert OID = 2579 ( prefixsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 4 0 701 "2281 26 2281 23" _null_ _null_ _null_ _null_ _null_ prefixsel _null_ _null_ _null_ ));
+DESCR("restriction selectivity of exact prefix");
+DATA(insert OID = 2580 ( prefixjoinsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ _null_ prefixjoinsel _null_ _null_ _null_ ));
+DESCR("join selectivity of exact prefix");
/* Aggregate-related functions */
DATA(insert OID = 1830 ( float8_avg PGNSP PGUID 12 1 0 0 0 f f f t f i s 1 0 701 "1022" _null_ _null_ _null_ _null_ _null_ float8_avg _null_ _null_ _null_ ));
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 299c9f846a..95e44280c4 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -87,8 +87,11 @@ typedef struct VariableStatData
typedef enum
{
- Pattern_Type_Like, Pattern_Type_Like_IC,
- Pattern_Type_Regex, Pattern_Type_Regex_IC
+ Pattern_Type_Like,
+ Pattern_Type_Like_IC,
+ Pattern_Type_Regex,
+ Pattern_Type_Regex_IC,
+ Pattern_Type_Prefix
} Pattern_Type;
typedef enum
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index 057faff2e5..6d32d4cb5b 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -372,6 +372,12 @@ SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth
48
(1 row)
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+ count
+-------
+ 2
+(1 row)
+
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
f1
-------------------------------------------------
@@ -1182,6 +1188,21 @@ SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth
48
(1 row)
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+ QUERY PLAN
+------------------------------------------------------------
+ Aggregate
+ -> Index Only Scan using sp_radix_ind on radix_text_tbl
+ Index Cond: (t ^@ 'Worth'::text)
+(3 rows)
+
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+ count
+-------
+ 2
+(1 row)
+
EXPLAIN (COSTS OFF)
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
QUERY PLAN
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 01608d2c04..919248cdd2 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -718,6 +718,7 @@ sha224(bytea)
sha256(bytea)
sha384(bytea)
sha512(bytea)
+text_startswith(text,text)
macaddr8_eq(macaddr8,macaddr8)
macaddr8_lt(macaddr8,macaddr8)
macaddr8_le(macaddr8,macaddr8)
@@ -1887,7 +1888,8 @@ ORDER BY 1, 2, 3;
4000 | 25 | <<=
4000 | 26 | >>
4000 | 27 | >>=
-(121 rows)
+ 4000 | 28 | ^@
+(122 rows)
-- Check that all opclass search operators have selectivity estimators.
-- This is not absolutely required, but it seems a reasonable thing
diff --git a/src/test/regress/expected/spgist.out b/src/test/regress/expected/spgist.out
index 2d75bbf8dc..0011634b76 100644
--- a/src/test/regress/expected/spgist.out
+++ b/src/test/regress/expected/spgist.out
@@ -32,6 +32,15 @@ insert into spgist_text_tbl (id, t)
select g, 'f' || repeat('o', 100) || g from generate_series(1, 10000) g
union all
select g, 'baaaaaaaaaaaaaar' || g from generate_series(1, 1000) g;
+explain (costs off) select * from spgist_text_tbl where t ^@ 'baaaaaaaaaaaaaar11';
+ QUERY PLAN
+-------------------------------------------------------
+ Bitmap Heap Scan on spgist_text_tbl
+ Recheck Cond: (t ^@ 'baaaaaaaaaaaaaar11'::text)
+ -> Bitmap Index Scan on spgist_text_idx
+ Index Cond: (t ^@ 'baaaaaaaaaaaaaar11'::text)
+(4 rows)
+
-- Do a lot of insertions that have to split an existing node. Hopefully
-- one of these will cause the page to run out of space, causing the inner
-- tuple to be moved to another page.
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index 7f17588b0d..28c71abd74 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -224,6 +224,8 @@ SELECT count(*) FROM radix_text_tbl WHERE t > 'Worth
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
SELECT circle_center(f1), round(radius(f1)) as radius FROM gcircle_tbl ORDER BY f1 <-> '(200,300)'::point LIMIT 10;
@@ -441,6 +443,10 @@ EXPLAIN (COSTS OFF)
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+
EXPLAIN (COSTS OFF)
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
diff --git a/src/test/regress/sql/spgist.sql b/src/test/regress/sql/spgist.sql
index 77b43a2d3e..2b57fc836c 100644
--- a/src/test/regress/sql/spgist.sql
+++ b/src/test/regress/sql/spgist.sql
@@ -43,6 +43,8 @@ select g, 'f' || repeat('o', 100) || g from generate_series(1, 10000) g
union all
select g, 'baaaaaaaaaaaaaar' || g from generate_series(1, 1000) g;
+explain (costs off) select * from spgist_text_tbl where t ^@ 'baaaaaaaaaaaaaar11';
+
-- Do a lot of insertions that have to split an existing node. Hopefully
-- one of these will cause the page to run out of space, causing the inner
-- tuple to be moved to another page.