On Sun, Mar 24, 2019 at 11:52 AM Julien Rouhaud <[email protected]> wrote:
>
> Marc (in Cc) reported me a problematic query using a GIN index hit in
> production. The issue is that even if an GIN opclass says that the
> index can be used for an operator, it's still possible that some
> values aren't really compatible and requires a full index scan.
>
> One simple example is with a GIN pg_trgm index (but other opclasses
> have similar restrictions) , doing a LIKE with wildcard on both side,
> where the pattern is shorter than a trigram, e.g. col LIKE '%a%'. So,
> a where clause of the form:
>
> WHERE col LIKE '%verylongpattern%' AND col LIKE '%a%'
>
> is much more expensive than
>
> WHERE col LKE '%verylongpattern%'
>
> While there's nothing to do if the unhandled const is the only
> predicate, if there are multiple AND-ed predicates and at least one of
> them doesn't require a full index scan, we can avoid it.
>
> Attached patch tries to fix the issue by detecting such cases and
> dropping the unhandled quals in the BitmapIndexScan, letting the
> recheck in BitmapHeapScan do the proper filtering. I'm not happy to
> call the extractQuery support functions an additional time, but i
> didn't find a cleaner way. This is of course intended for pg13.
Patch doesn't apply anymore (thanks cfbot). Rebased patch attached.
diff --git a/contrib/pg_trgm/expected/pg_trgm.out b/contrib/pg_trgm/expected/pg_trgm.out
index b3e709f496..6d96fca65f 100644
--- a/contrib/pg_trgm/expected/pg_trgm.out
+++ b/contrib/pg_trgm/expected/pg_trgm.out
@@ -3498,6 +3498,37 @@ select count(*) from test_trgm where t ~ '[qwerty]{2}-?[qwerty]{2}';
1000
(1 row)
+explain (costs off)
+ select * from test_trgm where t like '%0%' and t like '%azerty%';
+ QUERY PLAN
+---------------------------------------------
+ Bitmap Heap Scan on test_trgm
+ Recheck Cond: (t ~~ '%azerty%'::text)
+ Filter: (t ~~ '%0%'::text)
+ -> Bitmap Index Scan on trgm_idx
+ Index Cond: (t ~~ '%azerty%'::text)
+(5 rows)
+
+select * from test_trgm where t like '%0%' and t like '%azerty%';
+ t
+---
+(0 rows)
+
+explain (costs off)
+ select * from test_trgm where t like '%0%' and t like '%az%';
+ QUERY PLAN
+------------------------------------------------------------------
+ Bitmap Heap Scan on test_trgm
+ Recheck Cond: ((t ~~ '%0%'::text) AND (t ~~ '%az%'::text))
+ -> Bitmap Index Scan on trgm_idx
+ Index Cond: ((t ~~ '%0%'::text) AND (t ~~ '%az%'::text))
+(4 rows)
+
+select * from test_trgm where t like '%0%' and t like '%az%';
+ t
+---
+(0 rows)
+
create table test2(t text COLLATE "C");
insert into test2 values ('abcdef');
insert into test2 values ('quark');
diff --git a/contrib/pg_trgm/sql/pg_trgm.sql b/contrib/pg_trgm/sql/pg_trgm.sql
index 08459e64c3..9cdb6fda14 100644
--- a/contrib/pg_trgm/sql/pg_trgm.sql
+++ b/contrib/pg_trgm/sql/pg_trgm.sql
@@ -55,6 +55,13 @@ select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu098
select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t;
select count(*) from test_trgm where t ~ '[qwerty]{2}-?[qwerty]{2}';
+explain (costs off)
+ select * from test_trgm where t like '%0%' and t like '%azerty%';
+select * from test_trgm where t like '%0%' and t like '%azerty%';
+explain (costs off)
+ select * from test_trgm where t like '%0%' and t like '%az%';
+select * from test_trgm where t like '%0%' and t like '%az%';
+
create table test2(t text COLLATE "C");
insert into test2 values ('abcdef');
insert into test2 values ('quark');
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index c208e9bfb0..82fdf44905 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -33,6 +33,7 @@
#include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
#include "utils/lsyscache.h"
+#include "utils/index_selfuncs.h"
#include "utils/selfuncs.h"
@@ -973,6 +974,24 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
return NIL;
}
+ /*
+ * GIN access method can require a full index scan. However, if there are
+ * multiple AND-ed quals and at least one of them doesn't require a full
+ * index scan, we can avoid the full scan by dropping all the quals
+ * requiring it and let the recheck do the proper filtering.
+ */
+ if (index_clauses != NIL && list_length(index_clauses) > 1 &&
+ index->relam == GIN_AM_OID)
+ {
+ Relids old_outer_relids = bms_copy(outer_relids);
+
+ bms_free(outer_relids);
+ outer_relids = bms_copy(rel->lateral_relids);
+
+ index_clauses = gin_get_optimizable_quals(root, index, index_clauses,
+ &outer_relids, old_outer_relids);
+ }
+
/* We do not want the index's rel itself listed in outer_relids */
outer_relids = bms_del_member(outer_relids, rel->relid);
/* Enforce convention that outer_relids is exactly NULL if empty */
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index d7e3f09f1a..0080b1de4b 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -6767,6 +6767,97 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
*indexPages = dataPagesFetched;
}
+/*
+ * Given a list of implicitly AND-ed quals, return the cured list of quals that
+ * can be used for a BitmapIndexScan that would not require a full index scan
+ * if any, otherwise the original list of quals.
+ */
+List *gin_get_optimizable_quals(PlannerInfo *root, IndexOptInfo *index,
+ List *index_clauses, Relids *outer_relids, Relids old_outer_relids)
+{
+ List *result = NIL;
+ GinQualCounts counts;
+ bool matchPossible,
+ haveOneFullScan = false;
+ ListCell *lc;
+
+ Assert(index->relam == GIN_AM_OID);
+
+ foreach(lc, index_clauses)
+ {
+ IndexClause *iclause = lfirst_node(IndexClause, lc);
+ IndexClause *newiclause = makeNode(IndexClause);
+ ListCell *lc2;
+
+ memset(&counts, 0, sizeof(counts));
+
+ newiclause->rinfo = iclause->rinfo;
+ newiclause->indexquals = NIL;
+ newiclause->lossy = iclause->lossy;
+ newiclause->indexcol = iclause->indexcol;
+ newiclause->indexcols = iclause->indexcols;
+
+ foreach(lc2, iclause->indexquals)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
+ Expr *clause = rinfo->clause;
+
+ if (IsA(clause, OpExpr))
+ {
+ matchPossible = gincost_opexpr(root,
+ index,
+ iclause->indexcol,
+ (OpExpr *) clause,
+ &counts);
+ if (!matchPossible)
+ break;
+ }
+ else if (IsA(clause, ScalarArrayOpExpr))
+ {
+ double numEntries = 1;
+
+ matchPossible = gincost_scalararrayopexpr(root,
+ index,
+ iclause->indexcol,
+ (ScalarArrayOpExpr *) clause,
+ numEntries,
+ &counts);
+ if (!matchPossible)
+ break;
+ }
+ else
+ {
+ /* shouldn't be anything else for a GIN index */
+ elog(ERROR, "unsupported GIN indexqual type: %d",
+ (int) nodeTag(clause));
+ }
+
+ if (counts.haveFullScan)
+ haveOneFullScan = true;
+ else
+ {
+ newiclause->indexquals = lappend(newiclause->indexquals, rinfo);
+ *outer_relids = bms_add_members(*outer_relids,
+ rinfo->clause_relids);
+ }
+ }
+
+ if (list_length(newiclause->indexquals))
+ result = lappend(result, newiclause);
+ }
+
+ if (!matchPossible || !haveOneFullScan || result == NIL)
+ {
+ bms_free(*outer_relids);
+ *outer_relids = old_outer_relids;
+ list_free(result);
+
+ return index_clauses;
+ }
+
+ return result;
+}
+
/*
* BRIN has search behavior completely different from other index types
*/
diff --git a/src/include/utils/index_selfuncs.h b/src/include/utils/index_selfuncs.h
index b81556d7a1..085ebd942a 100644
--- a/src/include/utils/index_selfuncs.h
+++ b/src/include/utils/index_selfuncs.h
@@ -20,6 +20,7 @@
#define INDEX_SELFUNCS_H
#include "access/amapi.h"
+#include "nodes/pathnodes.h"
/* Functions in selfuncs.c */
extern void brincostestimate(struct PlannerInfo *root,
@@ -70,5 +71,10 @@ extern void gincostestimate(struct PlannerInfo *root,
Selectivity *indexSelectivity,
double *indexCorrelation,
double *indexPages);
+extern List *gin_get_optimizable_quals(struct PlannerInfo *root,
+ IndexOptInfo *index,
+ List *index_clauses,
+ Relids *outer_relids,
+ Relids old_outer_relids);
#endif /* INDEX_SELFUNCS_H */