Cleaned-up patch attached and applied. Catalog version updated in
separate patch.
---------------------------------------------------------------------------
Matteo Beccati wrote:
> Hi,
>
> >>Moving it in contrib/ltree would be more difficult to me because it
> >>depends on other functions declared in selfuncs.c
> >>(get_restriction_variable, etc).
> >
> > I'd be willing to consider exporting those functions from selfuncs.c.
>
> In the meanwhile here is the latest patch which uses both mcv and
> histogram values.
>
>
> BTW, when restoring my test database I've found out that there were many
> errors on ALTER INDEX "something" OWNER TO ... :
>
> ERROR: "something" is not a table, view, or sequence
>
> This using 8.1devel pg_restore and a 8.0.3 compressed dump. I could be
> wrong, but I didn't get those errors a few days ago (some cvs updates ago).
>
>
> Best regards
> --
> Matteo Beccati
> http://phpadsnew.com/
> http://phppgads.com/
--
Bruce Momjian http://candle.pha.pa.us
EnterpriseDB http://www.enterprisedb.com
+ If your life is a hard drive, Christ can be your backup. +
Index: contrib/ltree/ltree.sql.in
===================================================================
RCS file: /cvsroot/pgsql/contrib/ltree/ltree.sql.in,v
retrieving revision 1.10
diff -c -c -r1.10 ltree.sql.in
*** contrib/ltree/ltree.sql.in 27 Feb 2006 16:09:48 -0000 1.10
--- contrib/ltree/ltree.sql.in 26 Apr 2006 18:25:16 -0000
***************
*** 230,236 ****
RIGHTARG = ltree,
PROCEDURE = ltree_isparent,
COMMUTATOR = '<@',
! RESTRICT = contsel,
JOIN = contjoinsel
);
--- 230,236 ----
RIGHTARG = ltree,
PROCEDURE = ltree_isparent,
COMMUTATOR = '<@',
! RESTRICT = parentsel,
JOIN = contjoinsel
);
***************
*** 248,254 ****
RIGHTARG = ltree,
PROCEDURE = ltree_risparent,
COMMUTATOR = '@>',
! RESTRICT = contsel,
JOIN = contjoinsel
);
--- 248,254 ----
RIGHTARG = ltree,
PROCEDURE = ltree_risparent,
COMMUTATOR = '@>',
! RESTRICT = parentsel,
JOIN = contjoinsel
);
Index: src/backend/utils/adt/geo_selfuncs.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/geo_selfuncs.c,v
retrieving revision 1.27
diff -c -c -r1.27 geo_selfuncs.c
*** src/backend/utils/adt/geo_selfuncs.c 5 Mar 2006 15:58:42 -0000
1.27
--- src/backend/utils/adt/geo_selfuncs.c 26 Apr 2006 18:25:21 -0000
***************
*** 20,26 ****
#include "utils/geo_decls.h"
-
/*
* Selectivity functions for geometric operators. These are bogus --
unless
* we know the actual key distribution in the index, we can't make a good
--- 20,25 ----
***************
*** 93,95 ****
--- 92,95 ----
{
PG_RETURN_FLOAT8(0.001);
}
+
Index: src/backend/utils/adt/selfuncs.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v
retrieving revision 1.199
diff -c -c -r1.199 selfuncs.c
*** src/backend/utils/adt/selfuncs.c 20 Apr 2006 17:50:18 -0000 1.199
--- src/backend/utils/adt/selfuncs.c 26 Apr 2006 18:25:28 -0000
***************
*** 4852,4854 ****
--- 4852,5033 ----
PG_RETURN_VOID();
}
+
+
+ #define DEFAULT_PARENT_SEL 0.001
+
+ /*
+ * parentsel - Selectivity of parent relationship for ltree data types.
+ */
+ Datum
+ parentsel(PG_FUNCTION_ARGS)
+ {
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Oid operator = PG_GETARG_OID(1);
+ List *args = (List *) PG_GETARG_POINTER(2);
+ int varRelid = PG_GETARG_INT32(3);
+ VariableStatData vardata;
+ Node *other;
+ bool varonleft;
+ Datum *values;
+ int nvalues;
+ float4 *numbers;
+ int nnumbers;
+ double selec = 0.0;
+
+ /*
+ * If expression is not variable <@ something or something <@ variable,
+ * then punt and return a default estimate.
+ */
+ if (!get_restriction_variable(root, args, varRelid,
+ &vardata,
&other, &varonleft))
+ PG_RETURN_FLOAT8(DEFAULT_PARENT_SEL);
+
+ /*
+ * If the something is a NULL constant, assume operator is strict and
+ * return zero, ie, operator will never return TRUE.
+ */
+ if (IsA(other, Const) &&
+ ((Const *) other)->constisnull)
+ {
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(0.0);
+ }
+
+ if (HeapTupleIsValid(vardata.statsTuple))
+ {
+ Form_pg_statistic stats;
+ double mcvsum = 0.0;
+ double mcvsel = 0.0;
+ double hissel = 0.0;
+
+ stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
+
+ if (IsA(other, Const))
+ {
+ /* Variable is being compared to a known non-null
constant */
+ Datum constval = ((Const *)
other)->constvalue;
+ bool match = false;
+ int i;
+
+ /*
+ * Is the constant "<@" to any of the column's most
common values?
+ */
+ if (get_attstatsslot(vardata.statsTuple,
+
vardata.atttype, vardata.atttypmod,
+
STATISTIC_KIND_MCV, InvalidOid,
+ &values,
&nvalues,
+ &numbers,
&nnumbers))
+ {
+ FmgrInfo contproc;
+
+ fmgr_info(get_opcode(operator), &contproc);
+
+ for (i = 0; i < nvalues; i++)
+ {
+ /* be careful to apply operator right
way 'round */
+ if (varonleft)
+ match =
DatumGetBool(FunctionCall2(&contproc,
+
values[i],
+
constval));
+ else
+ match =
DatumGetBool(FunctionCall2(&contproc,
+
constval,
+
values[i]));
+
+ /* calculate total selectivity of all
most-common-values */
+ mcvsum += numbers[i];
+
+ /* calculate selectivity of matching
most-common-values */
+ if (match)
+ mcvsel += numbers[i];
+ }
+ }
+ else
+ {
+ /* no most-common-values info available */
+ values = NULL;
+ numbers = NULL;
+ i = nvalues = nnumbers = 0;
+ }
+
+ free_attstatsslot(vardata.atttype, values, nvalues,
NULL, 0);
+
+ /*
+ * Is the constant "<@" to any of the column's
histogram values?
+ */
+ if (get_attstatsslot(vardata.statsTuple,
+
vardata.atttype, vardata.atttypmod,
+
STATISTIC_KIND_HISTOGRAM, InvalidOid,
+ &values,
&nvalues,
+ NULL, NULL))
+ {
+ FmgrInfo contproc;
+
+ fmgr_info(get_opcode(operator), &contproc);
+
+ for (i = 0; i < nvalues; i++)
+ {
+ /* be careful to apply operator right
way 'round */
+ if (varonleft)
+ match =
DatumGetBool(FunctionCall2(&contproc,
+
values[i],
+
constval));
+ else
+ match =
DatumGetBool(FunctionCall2(&contproc,
+
constval,
+
values[i]));
+ /* count matching histogram values */
+ if (match)
+ hissel++;
+ }
+
+ if (hissel > 0.0)
+ {
+ /*
+ * some matching values found inside
histogram, divide
+ * matching entries number by total
histogram entries to
+ * get the histogram related selectivity
+ */
+ hissel /= nvalues;
+ }
+ }
+ else
+ {
+ /* no histogram info available */
+ values = NULL;
+ i = nvalues = 0;
+ }
+
+ free_attstatsslot(vardata.atttype, values, nvalues,
+ NULL, 0);
+
+
+ /*
+ * calculate selectivity based on MCV and histogram
result
+ * histogram selectivity needs to be scaled down if
there are any
+ * most-common-values
+ */
+ selec = mcvsel + hissel * (1.0 - mcvsum);
+
+ /*
+ * don't return 0.0 selectivity unless all table values
are inside
+ * mcv
+ */
+ if (selec == 0.0 && mcvsum != 1.0)
+ selec = DEFAULT_PARENT_SEL;
+ }
+ else
+ selec = DEFAULT_PARENT_SEL;
+ }
+ else
+ selec = DEFAULT_PARENT_SEL;
+
+ ReleaseVariableStats(vardata);
+
+ /* result should be in range, but make sure... */
+ CLAMP_PROBABILITY(selec);
+
+ PG_RETURN_FLOAT8((float8) selec);
+ }
+
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.406
diff -c -c -r1.406 pg_proc.h
*** src/include/catalog/pg_proc.h 25 Apr 2006 00:25:20 -0000 1.406
--- src/include/catalog/pg_proc.h 26 Apr 2006 18:25:35 -0000
***************
*** 3812,3817 ****
--- 3812,3819 ----
DESCR("GiST support");
DATA(insert OID = 2592 ( gist_circle_compress PGNSP PGUID 12 f f t f
i 1 2281 "2281" _null_ _null_ _null_ gist_circle_compress - _null_ ));
DESCR("GiST support");
+ DATA(insert OID = 2599 ( parentsel PGNSP PGUID 12 f f t f s 4
701 "2281 26 2281 23" _null_ _null_ _null_ parentsel - _null_ ));
+ DESCR("enhanced restriction selectivity for ltree isparent comparison
operators");
/*
Index: src/include/utils/selfuncs.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/utils/selfuncs.h,v
retrieving revision 1.28
diff -c -c -r1.28 selfuncs.h
*** src/include/utils/selfuncs.h 5 Mar 2006 15:59:07 -0000 1.28
--- src/include/utils/selfuncs.h 26 Apr 2006 18:25:35 -0000
***************
*** 134,137 ****
--- 134,139 ----
extern Datum hashcostestimate(PG_FUNCTION_ARGS);
extern Datum gistcostestimate(PG_FUNCTION_ARGS);
+ extern Datum parentsel(PG_FUNCTION_ARGS);
+
#endif /* SELFUNCS_H */
---------------------------(end of broadcast)---------------------------
TIP 9: In versions below 8.0, the planner will ignore your desire to
choose an index scan if your joining column's datatypes do not
match