On Sun, Dec 22, 2019 at 06:16:48PM -0600, Justin Pryzby wrote: > On Tue, Nov 19, 2019 at 01:34:21PM -0600, Justin Pryzby wrote: > > Tom implemented "Planner support functions": > > https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=a391ff3c3d418e404a2c6e4ff0865a107752827b > > https://www.postgresql.org/docs/12/xfunc-optimization.html > > > > I wondered whether there was any consideration to extend that to allow > > providing improved estimates of "group by". That currently requires > > manually > > by creating an expression index, if the function is IMMUTABLE (which is not > > true for eg. date_trunc of timestamptz). > > I didn't hear back so tried implementing this for date_trunc(). Currently, > the
> I currently assume that the input data has 1 second granularity: ... > If the input timestamps have (say) hourly granularity, rowcount will be > *underestimated* by 3600x, which is worse than the behavior in master of > overestimating by (for "day") 24x. > > I'm trying to think of ways to address that: In the attached, I handled that by using histogram and variable's initial ndistinct estimate, giving good estimates even for intermediate granularities of input timestamps. |postgres=# DROP TABLE IF EXISTS t; CREATE TABLE t(i) AS SELECT a FROM generate_series(now(), now()+'11 day'::interval, '15 minutes')a,generate_series(1,9)b; ANALYZE t; | |postgres=# explain analyze SELECT date_trunc('hour',i) i FROM t GROUP BY 1; | HashAggregate (cost=185.69..188.99 rows=264 width=8) (actual time=42.110..42.317 rows=265 loops=1) | |postgres=# explain analyze SELECT date_trunc('minute',i) i FROM t GROUP BY 1; | HashAggregate (cost=185.69..198.91 rows=1057 width=8) (actual time=41.685..42.264 rows=1057 loops=1) | |postgres=# explain analyze SELECT date_trunc('day',i) i FROM t GROUP BY 1; | HashAggregate (cost=185.69..185.83 rows=11 width=8) (actual time=46.672..46.681 rows=12 loops=1) | |postgres=# explain analyze SELECT date_trunc('second',i) i FROM t GROUP BY 1; | HashAggregate (cost=185.69..198.91 rows=1057 width=8) (actual time=41.816..42.435 rows=1057 loops=1)
>From 772876dbd64ea0b1d2bb28f9ab67f577c4050468 Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Sun, 15 Dec 2019 20:27:24 -0600 Subject: [PATCH v2 1/2] Planner support functions for GROUP BY f().. ..implemented for date_trunc() See also a391ff3c3d418e404a2c6e4ff0865a107752827b --- src/backend/optimizer/util/plancat.c | 47 +++++++++++++++++ src/backend/utils/adt/selfuncs.c | 28 +++++++++++ src/backend/utils/adt/timestamp.c | 97 ++++++++++++++++++++++++++++++++++++ src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_proc.dat | 15 ++++-- src/include/nodes/nodes.h | 3 +- src/include/nodes/supportnodes.h | 17 +++++++ src/include/optimizer/plancat.h | 2 + 8 files changed, 206 insertions(+), 5 deletions(-) diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index c15654e..2469ca6 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -2009,6 +2009,53 @@ get_function_rows(PlannerInfo *root, Oid funcid, Node *node) } /* + * Return a multiplier [0..1] to help estimate effect on rowcount of GROUP BY + * f(x), relative to input x. + */ +double +get_function_groupby(PlannerInfo *root, Oid funcid, Node *node, Node *var) +{ + HeapTuple proctup; + Form_pg_proc procform; + + proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(proctup)) + elog(ERROR, "cache lookup failed for function %u", funcid); + procform = (Form_pg_proc) GETSTRUCT(proctup); + + if (OidIsValid(procform->prosupport)) + { + SupportRequestGroupBy *sresult; + SupportRequestGroupBy req; + + req.type = T_SupportRequestGroupBy; + req.root = root; + req.funcid = funcid; + req.node = node; + req.var = var; + req.factor = 1; /* just for sanity */ + + sresult = (SupportRequestGroupBy *) + DatumGetPointer(OidFunctionCall1(procform->prosupport, + PointerGetDatum(&req))); + + if (sresult == &req) + { + /* Success */ + ReleaseSysCache(proctup); + return req.factor; + } + } + + /* XXX No support function, or it failed */ + + ReleaseSysCache(proctup); + + return 1; +} + + +/* * has_unique_index * * Detect whether there is a unique index on the specified attribute diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index ff02b5a..eb0b86f 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -3154,10 +3154,38 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, */ foreach(l2, varshere) { + double ret; Node *var = (Node *) lfirst(l2); examine_variable(root, var, 0, &vardata); varinfos = add_unique_group_var(root, varinfos, var, &vardata); + + /* If we group by a function of a simple var, try to call its support function to help estimate GROUP BY */ + if (HeapTupleIsValid(vardata.statsTuple) && + IsA(groupexpr, FuncExpr) && IsA(var, Var)) + // && (varRelid == 0 || varRelid == ((Var *) basenode)->varno)) + { + Form_pg_statistic stats = (Form_pg_statistic)GETSTRUCT(vardata.statsTuple); + FuncExpr *expr = (FuncExpr *) groupexpr; + + Var *v = (Var*) var; + RangeTblEntry *rte = root->simple_rte_array[v->varno]; + char *reln = get_rel_name(rte->relid); + char *coln = get_attname(rte->relid, v->varattno, true); + ret = get_function_groupby(root, expr->funcid, groupexpr, var); + + fprintf(stderr, "HERE %s %d %s.%s ndistinct=%f ret=%f\n", __FUNCTION__, __LINE__, + reln?reln:"null", + coln?coln:"null", + stats->stadistinct, ret); + + Assert(ret>=0); + Assert(ret<=1); + numdistinct *= ret; + + /* Can we do anything special with stats->stadistinct that's not already done in general? */ + } + ReleaseVariableStats(vardata); } } diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index 945b8f8..359ad32 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -5554,3 +5554,100 @@ generate_series_timestamptz(PG_FUNCTION_ARGS) SRF_RETURN_DONE(funcctx); } } + + +/* + * Planner support function for date_trunc + * Try to estimate the factor by which to correct the estimate of ngroups for GROUP BY. + */ +Datum +date_trunc_support(PG_FUNCTION_ARGS) +{ + Node *rawreq; + SupportRequestGroupBy *req; + List *args; + Node *arg1, *arg2; + Node *var; + char *start; + int typmod; + + rawreq = (Node *) PG_GETARG_POINTER(0); + if (!IsA(rawreq, SupportRequestGroupBy)) + PG_RETURN_POINTER(NULL); + + req = (SupportRequestGroupBy *) rawreq; + if (!is_funcclause(req->node)) /* be paranoid */ + PG_RETURN_POINTER(NULL); + + args = ((FuncExpr *) req->node)->args; + arg1 = linitial(args); + arg2 = lsecond(args); + /* arg3 may be the timezone */ + + var = req->var; + + /* XXX Assumes the input has 1-second granularity */ + + // XXX: only work on const? + start = TextDatumGetCString(((Const*)arg1)->constvalue); + + // XXX: not working due to promotion ? + // exprType(var) is still not right, since date_trunc(a, b::date) uses b's type and not date.. + // ...but date_trunc('', x::date) is weird + // exprType(arg2)==TIMESTAMPOID || exprType(arg2)==TIMESTAMPTZOID) + // if (req->funcid==1217 || req->funcid==1284 || req->funcid==2020) + /* Reset if it's not a timestamp */ + if (exprType(var)==DATEOID) { + req->factor = 60*60*24; + } else if (exprType(var)==TIMESTAMPOID || exprType(var)==TIMESTAMPTZOID) { + int typmod = exprTypmod(arg2); // XXX: vartypmod ? + /* If the input has decimal digits, the grouping effect is stronger */ + if (typmod != -1) { + req->factor /= 2<<typmod; + if (strcmp(start, "microseconds")==0) { + /* do nothing? */ + } else if (strcmp(start, "milliseconds")==0) { + /* do nothing? */ + } + } + + if (strcmp(start, "second")==0) { + /* do nothing */ + } else if (strcmp(start, "minute")==0) { + req->factor /= 60; + } else if (strcmp(start, "hour")==0) { + req->factor /= 60*60; + } + } + + // else { elog(ERROR, "unknown type %u", exprType(var)); } + + if (strcmp(start, "day")==0) { + req->factor /= 60*60*24; + } else if (strcmp(start, "week")==0) { + req->factor /= 60*60*24*7; + } else if (strcmp(start, "month")==0) { + /* 30 days */ + req->factor /= 60*60*24*30; + } else if (strcmp(start, "quarter")==0) { + req->factor /= 60*60*24*30*3; + } else if (strcmp(start, "year")==0) { + req->factor /= 60*60*24*365.24; + } else if (strcmp(start, "decade")==0) { + req->factor /= 60*60*24*365.25*10; + } else if (strcmp(start, "century")==0) { + req->factor /= 60*60*24*365.25*100; + } else if (strcmp(start, "millennium")==0) { + req->factor /= 60*60*24*365.25*1000; + } else if (req->factor > 1) { + /* Maybe a DATE with finer graularity trunc */ + req->factor = 1; + } + // else { elog(ERROR, "", ); } + + /* Fudge Factor, since the input may be already "grouped", say at multiples of 15min, */ + /* or otherwise have course granularity to begin with */ + // factor/=4; + + PG_RETURN_POINTER(req); +} diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index eca67a1..e2c05be 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201911242 +#define CATALOG_VERSION_NO 201911243 #endif diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index aae50d6..7a8c3d1 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -2350,11 +2350,14 @@ { oid => '1217', descr => 'truncate timestamp with time zone to specified units', proname => 'date_trunc', provolatile => 's', prorettype => 'timestamptz', - proargtypes => 'text timestamptz', prosrc => 'timestamptz_trunc' }, + proargtypes => 'text timestamptz', prosrc => 'timestamptz_trunc', + prosupport => 'date_trunc_support', }, { oid => '1284', descr => 'truncate timestamp with time zone to specified units in specified time zone', proname => 'date_trunc', provolatile => 's', prorettype => 'timestamptz', - proargtypes => 'text timestamptz text', prosrc => 'timestamptz_trunc_zone' }, + proargtypes => 'text timestamptz text', prosrc => 'timestamptz_trunc_zone', + prosupport => 'date_trunc_support', }, +# XXX: { oid => '1218', descr => 'truncate interval to specified units', proname => 'date_trunc', prorettype => 'interval', proargtypes => 'text interval', prosrc => 'interval_trunc' }, @@ -5632,7 +5635,13 @@ proargtypes => 'timestamptz', prosrc => 'timestamptz_time' }, { oid => '2020', descr => 'truncate timestamp to specified units', proname => 'date_trunc', prorettype => 'timestamp', - proargtypes => 'text timestamp', prosrc => 'timestamp_trunc' }, + proargtypes => 'text timestamp', prosrc => 'timestamp_trunc', + prosupport => 'date_trunc_support', }, + +{ oid => '5449', descr => 'planner support for date_trunc', + proname => 'date_trunc_support', prorettype => 'internal', + proargtypes => 'internal', prosrc => 'date_trunc_support', }, + { oid => '2021', descr => 'extract field from timestamp', proname => 'date_part', prorettype => 'float8', proargtypes => 'text timestamp', prosrc => 'timestamp_part' }, diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 8692a32..97e7796 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -513,7 +513,8 @@ typedef enum NodeTag T_SupportRequestSelectivity, /* in nodes/supportnodes.h */ T_SupportRequestCost, /* in nodes/supportnodes.h */ T_SupportRequestRows, /* in nodes/supportnodes.h */ - T_SupportRequestIndexCondition /* in nodes/supportnodes.h */ + T_SupportRequestIndexCondition, /* in nodes/supportnodes.h */ + T_SupportRequestGroupBy, /* in nodes/supportnodes.h */ } NodeTag; /* diff --git a/src/include/nodes/supportnodes.h b/src/include/nodes/supportnodes.h index 460d75b..cb4ea44 100644 --- a/src/include/nodes/supportnodes.h +++ b/src/include/nodes/supportnodes.h @@ -168,6 +168,23 @@ typedef struct SupportRequestRows double rows; /* number of rows expected to be returned */ } SupportRequestRows; +/* How many fewer rows are output after GROUPing BY a function */ +typedef struct SupportRequestGroupBy +{ + NodeTag type; + + /* Input fields: */ + struct PlannerInfo *root; /* Planner's infrastructure (could be NULL) */ + Oid funcid; /* function we are inquiring about */ + Node *var; /* original (2nd) argument */ + Node *node; /* parse node invoking function */ + + /* Output fields: */ + double factor; /* [0..1] fraction of rows in GROUP BY f(x) + relative to GROUP BY x */ + +} SupportRequestGroupBy; + /* * The IndexCondition request allows the support function to generate * a directly-indexable condition based on a target function call that is diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h index bbb27f8..cbe5386 100644 --- a/src/include/optimizer/plancat.h +++ b/src/include/optimizer/plancat.h @@ -70,6 +70,8 @@ extern void add_function_cost(PlannerInfo *root, Oid funcid, Node *node, extern double get_function_rows(PlannerInfo *root, Oid funcid, Node *node); +extern double get_function_groupby(PlannerInfo *root, Oid funcid, Node *node, Node *var); + extern bool has_row_triggers(PlannerInfo *root, Index rti, CmdType event); extern bool has_stored_generated_columns(PlannerInfo *root, Index rti); -- 2.7.4
>From e287fa474fea487ace0ee7d476f84b6f787cc2a7 Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Tue, 24 Dec 2019 22:02:22 -0600 Subject: [PATCH v2 2/2] Pass ndistinct and minmax to allow good estimates even with timestamps of granularity other than 1sec --- src/backend/optimizer/util/plancat.c | 8 ++- src/backend/utils/adt/selfuncs.c | 36 ++++++++--- src/backend/utils/adt/timestamp.c | 113 ++++++++++++++++++++++------------- src/include/nodes/supportnodes.h | 2 + src/include/optimizer/plancat.h | 2 +- 5 files changed, 110 insertions(+), 51 deletions(-) diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 2469ca6..aab794c 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -2011,9 +2011,13 @@ get_function_rows(PlannerInfo *root, Oid funcid, Node *node) /* * Return a multiplier [0..1] to help estimate effect on rowcount of GROUP BY * f(x), relative to input x. + * + * minmax is an array of (min,max) values for the variable, which might be + * useful to determine its granularity (like timestamps per second, minute or + * hour). */ double -get_function_groupby(PlannerInfo *root, Oid funcid, Node *node, Node *var) +get_function_groupby(PlannerInfo *root, Oid funcid, Node *node, Node *var, double ndistinct, Datum *minmax) { HeapTuple proctup; Form_pg_proc procform; @@ -2033,6 +2037,8 @@ get_function_groupby(PlannerInfo *root, Oid funcid, Node *node, Node *var) req.funcid = funcid; req.node = node; req.var = var; + req.ndistinct = ndistinct; + req.minmax = minmax; req.factor = 1; /* just for sanity */ sresult = (SupportRequestGroupBy *) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index eb0b86f..a7f396d 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -116,6 +116,7 @@ #include "miscadmin.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/optimizer.h" @@ -3168,16 +3169,37 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, Form_pg_statistic stats = (Form_pg_statistic)GETSTRUCT(vardata.statsTuple); FuncExpr *expr = (FuncExpr *) groupexpr; - Var *v = (Var*) var; + Datum minmax[2]; + TypeCacheEntry *tce; + Var *v = (Var*) var; RangeTblEntry *rte = root->simple_rte_array[v->varno]; - char *reln = get_rel_name(rte->relid); - char *coln = get_attname(rte->relid, v->varattno, true); - ret = get_function_groupby(root, expr->funcid, groupexpr, var); - - fprintf(stderr, "HERE %s %d %s.%s ndistinct=%f ret=%f\n", __FUNCTION__, __LINE__, + char *reln = get_rel_name(rte->relid); + char *coln = get_attname(rte->relid, v->varattno, true); + + /* Seems like maybe this should be defined here and pass a single argument to groupby helper? */ + SupportRequestGroupBy req = { + .type = T_SupportRequestGroupBy, + .root = root, + .funcid = expr->funcid, + .node = groupexpr, + .var = var, + .ndistinct = stats->stadistinct >= 0 ? stats->stadistinct : + -stats->stadistinct * vardata.rel->tuples, + .minmax = minmax, + .factor = 1, /* just for sanity */ + }; + + fprintf(stderr, "HERE %s %d %s.%s tuples=%f, stadistinct=%f ndistinct=%f ret=%f\n", __FUNCTION__, __LINE__, reln?reln:"null", coln?coln:"null", - stats->stadistinct, ret); + vardata.rel->tuples, + stats->stadistinct, req.ndistinct, ret); + + tce = lookup_type_cache(v->vartype, TYPECACHE_LT_OPR); + if (get_variable_range(root, &vardata, tce->lt_opr, minmax, minmax+1)) + ret = get_function_groupby(root, expr->funcid, groupexpr, var, req.ndistinct, minmax); + else + ret = get_function_groupby(root, expr->funcid, groupexpr, var, req.ndistinct, NULL); Assert(ret>=0); Assert(ret<=1); diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index 359ad32..0314ef9 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -5563,13 +5563,44 @@ generate_series_timestamptz(PG_FUNCTION_ARGS) Datum date_trunc_support(PG_FUNCTION_ARGS) { - Node *rawreq; + Node *rawreq; SupportRequestGroupBy *req; - List *args; - Node *arg1, *arg2; - Node *var; + List *args; + Node *arg1, *arg2; + Node *var; char *start; - int typmod; + int typmod, i; + long unsigned int diff; + + const struct { + const char *name; /* Granularity name */ + const int factor; /* Multiplier */ + const double minsecsec; /* Minimum number of distinct second values per second in range of the histogram */ + } grans[] = { + /* XXX: these factors not applied unless...typmod>1 ? */ + { "microseconds", 1 }, + { "milliseconds", 1 }, + + /* + * These factors not applied unless minmax indicates the incoming + * timestamp is at fine enough granularity that truncating to courser + * granularity would affect result. + */ + { "second", 1, 1.0 }, + { "minute", 60, 1.0/60 }, + { "hour", 60, 1.0/60/60 }, + { "day", 24, 1.0/60/60/24 }, // XXX: should not handle for DATEOID + + /* These factors applied up to the specified granularity */ + { "week", 7 }, + { "month", 30 }, + { "quarter", 3 }, + { "year", 4 }, + { "decade", 10 }, + { "century", 10 }, + { "millennium", 10 }, + + }; rawreq = (Node *) PG_GETARG_POINTER(0); if (!IsA(rawreq, SupportRequestGroupBy)) @@ -5584,13 +5615,45 @@ date_trunc_support(PG_FUNCTION_ARGS) arg2 = lsecond(args); /* arg3 may be the timezone */ - var = req->var; + // XXX: handle if these are null ? + diff = req->minmax ? timestamptz_to_time_t(DatumGetTimestamp(req->minmax[1])) - + timestamptz_to_time_t(DatumGetTimestamp(req->minmax[0])) + : 0; - /* XXX Assumes the input has 1-second granularity */ + fprintf(stderr, "got distinct %f diff=%ld\n", req->ndistinct, diff); // XXX: only work on const? start = TextDatumGetCString(((Const*)arg1)->constvalue); + for (i=0; ; ++i) { + if (i >= sizeof(grans)/sizeof(*grans)) + /* Unhandled truncation granularity */ + PG_RETURN_POINTER(NULL); + + fprintf(stderr, "applying factor %s %d: cur=%f %f -gt %f\n", + grans[i].name, grans[i].factor, req->factor, + req->ndistinct/diff, grans[i].minsecsec ); + + if (req->ndistinct / diff >= grans[i].minsecsec) { + if (req->ndistinct / diff > grans[i-1].minsecsec) + /* Apply the factor in full strength */ + req->factor /= grans[i].factor; + else { + /* interpolate: if at (say) 15 minute granularity, then apply a 4x hourly correction, not 60x */ + // req->factor /= grans[i].factor / (req->ndistinct / diff / grans[i].minsecsec); + req->factor /= req->ndistinct / diff / grans[i].minsecsec; // XXX: is this right + fprintf(stderr, "applying partial factor %f\n", req->ndistinct / diff / grans[i].minsecsec); + } + } + + if (strcmp(start, grans[i].name) == 0) + break; + } + + PG_RETURN_POINTER(req); + +#if 0 + // var = req->var; // XXX: not working due to promotion ? // exprType(var) is still not right, since date_trunc(a, b::date) uses b's type and not date.. // ...but date_trunc('', x::date) is weird @@ -5604,50 +5667,16 @@ date_trunc_support(PG_FUNCTION_ARGS) /* If the input has decimal digits, the grouping effect is stronger */ if (typmod != -1) { req->factor /= 2<<typmod; - if (strcmp(start, "microseconds")==0) { - /* do nothing? */ - } else if (strcmp(start, "milliseconds")==0) { - /* do nothing? */ - } } - if (strcmp(start, "second")==0) { - /* do nothing */ - } else if (strcmp(start, "minute")==0) { - req->factor /= 60; - } else if (strcmp(start, "hour")==0) { - req->factor /= 60*60; - } } // else { elog(ERROR, "unknown type %u", exprType(var)); } - if (strcmp(start, "day")==0) { - req->factor /= 60*60*24; - } else if (strcmp(start, "week")==0) { - req->factor /= 60*60*24*7; - } else if (strcmp(start, "month")==0) { - /* 30 days */ - req->factor /= 60*60*24*30; - } else if (strcmp(start, "quarter")==0) { - req->factor /= 60*60*24*30*3; - } else if (strcmp(start, "year")==0) { - req->factor /= 60*60*24*365.24; - } else if (strcmp(start, "decade")==0) { - req->factor /= 60*60*24*365.25*10; - } else if (strcmp(start, "century")==0) { - req->factor /= 60*60*24*365.25*100; - } else if (strcmp(start, "millennium")==0) { - req->factor /= 60*60*24*365.25*1000; - } else if (req->factor > 1) { /* Maybe a DATE with finer graularity trunc */ req->factor = 1; } - // else { elog(ERROR, "", ); } - /* Fudge Factor, since the input may be already "grouped", say at multiples of 15min, */ - /* or otherwise have course granularity to begin with */ - // factor/=4; +#endif - PG_RETURN_POINTER(req); } diff --git a/src/include/nodes/supportnodes.h b/src/include/nodes/supportnodes.h index cb4ea44..f5f33bf 100644 --- a/src/include/nodes/supportnodes.h +++ b/src/include/nodes/supportnodes.h @@ -178,6 +178,8 @@ typedef struct SupportRequestGroupBy Oid funcid; /* function we are inquiring about */ Node *var; /* original (2nd) argument */ Node *node; /* parse node invoking function */ + double ndistinct; /* Initial estimate of ndistinct of the variable */ + Datum *minmax; /* Array of (min,max) values for variable */ /* Output fields: */ double factor; /* [0..1] fraction of rows in GROUP BY f(x) diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h index cbe5386..c13d40d 100644 --- a/src/include/optimizer/plancat.h +++ b/src/include/optimizer/plancat.h @@ -70,7 +70,7 @@ extern void add_function_cost(PlannerInfo *root, Oid funcid, Node *node, extern double get_function_rows(PlannerInfo *root, Oid funcid, Node *node); -extern double get_function_groupby(PlannerInfo *root, Oid funcid, Node *node, Node *var); +extern double get_function_groupby(PlannerInfo *root, Oid funcid, Node *node, Node *var, double ndistinct, Datum *minmax); extern bool has_row_triggers(PlannerInfo *root, Index rti, CmdType event); -- 2.7.4