On 27.06.2011 13:45, Alexander Korotkov wrote:
I've added information about testing on some real-life dataset to wiki page.
This dataset have a speciality: data is ordered inside it. In this case
tradeoff was inverse in comparison with expectations about "fast build"
algrorithm. Index built is longer but index quality is significantly better.
I think high speed of regular index built is because sequential inserts are
into near tree parts. That's why number of actual page reads and writes is
low. The difference in tree quality I can't *convincingly explain now.*
I've also maked tests with shuffled data of this dataset. In this case
results was similar to random generated data.
Hmm, I assume the CPU overhead is coming from the penalty calls in this
case too. There's some low-hanging optimization fruit in
gist_box_penalty(), see attached patch. I tested this with:
CREATE TABLE points (a point);
CREATE INDEX i_points ON points using gist (a);
INSERT INTO points SELECT point(random(), random()) FROM
generate_series(1,1000000);
and running "checkpoint; reindex index i_points;" a few times with and
without the patch. The patch reduced the runtime from about 17.5 s to
15.5 s. oprofile confirms that the time spent in gist_box_penalty() and
rt_box_union() is reduced significantly.
This is all without the fast GiST index build patch, so this is
worthwhile on its own. If penalty function is called more, then this
becomes even more significant.
--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com
*** a/src/backend/access/gist/gistproc.c
--- b/src/backend/access/gist/gistproc.c
***************
*** 23,29 ****
static bool gist_box_leaf_consistent(BOX *key, BOX *query,
StrategyNumber strategy);
! static double size_box(Datum dbox);
static bool rtree_internal_consistent(BOX *key, BOX *query,
StrategyNumber strategy);
--- 23,29 ----
static bool gist_box_leaf_consistent(BOX *key, BOX *query,
StrategyNumber strategy);
! static double size_box(BOX *box);
static bool rtree_internal_consistent(BOX *key, BOX *query,
StrategyNumber strategy);
***************
*** 32,63 **** static bool rtree_internal_consistent(BOX *key, BOX *query,
* Box ops
**************************************************/
! static Datum
! rt_box_union(PG_FUNCTION_ARGS)
{
- BOX *a = PG_GETARG_BOX_P(0);
- BOX *b = PG_GETARG_BOX_P(1);
- BOX *n;
-
- n = (BOX *) palloc(sizeof(BOX));
-
n->high.x = Max(a->high.x, b->high.x);
n->high.y = Max(a->high.y, b->high.y);
n->low.x = Min(a->low.x, b->low.x);
n->low.y = Min(a->low.y, b->low.y);
-
- PG_RETURN_BOX_P(n);
}
! static Datum
! rt_box_inter(PG_FUNCTION_ARGS)
{
- BOX *a = PG_GETARG_BOX_P(0);
- BOX *b = PG_GETARG_BOX_P(1);
- BOX *n;
-
- n = (BOX *) palloc(sizeof(BOX));
-
n->high.x = Min(a->high.x, b->high.x);
n->high.y = Min(a->high.y, b->high.y);
n->low.x = Max(a->low.x, b->low.x);
--- 32,56 ----
* Box ops
**************************************************/
! /*
! * Calculates union of two boxes, a and b. The result is stored in *n.
! */
! static void
! rt_box_union(BOX *n, BOX *a, BOX *b)
{
n->high.x = Max(a->high.x, b->high.x);
n->high.y = Max(a->high.y, b->high.y);
n->low.x = Min(a->low.x, b->low.x);
n->low.y = Min(a->low.y, b->low.y);
}
! /*
! * Calculates intersection of two boxes, a and b. The result is stored in *n.
! * Returns false if the boxes don't intersect;
! */
! static bool
! rt_box_inter(BOX *n, BOX *a, BOX *b)
{
n->high.x = Min(a->high.x, b->high.x);
n->high.y = Min(a->high.y, b->high.y);
n->low.x = Max(a->low.x, b->low.x);
***************
*** 65,76 **** rt_box_inter(PG_FUNCTION_ARGS)
if (n->high.x < n->low.x || n->high.y < n->low.y)
{
! pfree(n);
! /* Indicate "no intersection" by returning NULL pointer */
! n = NULL;
}
!
! PG_RETURN_BOX_P(n);
}
/*
--- 58,67 ----
if (n->high.x < n->low.x || n->high.y < n->low.y)
{
! /* Indicate "no intersection" by returning false */
! return false;
}
! return true;
}
/*
***************
*** 187,196 **** gist_box_penalty(PG_FUNCTION_ARGS)
GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0);
GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
float *result = (float *) PG_GETARG_POINTER(2);
! Datum ud;
! ud = DirectFunctionCall2(rt_box_union, origentry->key, newentry->key);
! *result = (float) (size_box(ud) - size_box(origentry->key));
PG_RETURN_POINTER(result);
}
--- 178,189 ----
GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0);
GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
float *result = (float *) PG_GETARG_POINTER(2);
! BOX *origbox = DatumGetBoxP(origentry->key);
! BOX *newbox = DatumGetBoxP(newentry->key);
! BOX unionbox;
! rt_box_union(&unionbox, origbox, newbox);
! *result = (float) (size_box(&unionbox) - size_box(origbox));
PG_RETURN_POINTER(result);
}
***************
*** 209,214 **** chooseLR(GIST_SPLITVEC *v,
--- 202,209 ----
LRr = *union2;
BOX RLl = *union2,
RLr = *union1;
+ BOX LRintersection,
+ RLintersection;
double sizeLR,
sizeRL;
***************
*** 217,224 **** chooseLR(GIST_SPLITVEC *v,
adjustBox(&RLl, DatumGetBoxP(v->spl_ldatum));
adjustBox(&RLr, DatumGetBoxP(v->spl_rdatum));
! sizeLR = size_box(DirectFunctionCall2(rt_box_inter, BoxPGetDatum(&LRl), BoxPGetDatum(&LRr)));
! sizeRL = size_box(DirectFunctionCall2(rt_box_inter, BoxPGetDatum(&RLl), BoxPGetDatum(&RLr)));
if (sizeLR > sizeRL)
firstToLeft = false;
--- 212,225 ----
adjustBox(&RLl, DatumGetBoxP(v->spl_ldatum));
adjustBox(&RLr, DatumGetBoxP(v->spl_rdatum));
! if (rt_box_inter(&LRintersection, &LRl, &LRr))
! sizeLR = size_box(&LRintersection);
! else
! sizeLR = 0.0;
! if (rt_box_inter(&RLintersection, &RLl, &RLr))
! sizeRL = size_box(&RLintersection);
! else
! sizeRL = 0.0;
if (sizeLR > sizeRL)
firstToLeft = false;
***************
*** 504,520 **** gist_box_picksplit(PG_FUNCTION_ARGS)
direction = 'y';
else
{
! Datum interLR = DirectFunctionCall2(rt_box_inter,
! BoxPGetDatum(unionL),
! BoxPGetDatum(unionR));
! Datum interBT = DirectFunctionCall2(rt_box_inter,
! BoxPGetDatum(unionB),
! BoxPGetDatum(unionT));
double sizeLR,
sizeBT;
! sizeLR = size_box(interLR);
! sizeBT = size_box(interBT);
if (sizeLR < sizeBT)
direction = 'x';
--- 505,523 ----
direction = 'y';
else
{
! BOX interLR;
! BOX interBT;
double sizeLR,
sizeBT;
! if (rt_box_inter(&interLR, unionL, unionR))
! sizeLR = size_box(&interLR);
! else
! sizeLR = 0.0;
! if (rt_box_inter(&interBT, unionB, unionT))
! sizeBT = size_box(&interBT);
! else
! sizeBT = 0.0;
if (sizeLR < sizeBT)
direction = 'x';
***************
*** 634,644 **** gist_box_leaf_consistent(BOX *key, BOX *query, StrategyNumber strategy)
}
static double
! size_box(Datum dbox)
{
! BOX *box = DatumGetBoxP(dbox);
!
! if (box == NULL || box->high.x <= box->low.x || box->high.y <= box->low.y)
return 0.0;
return (box->high.x - box->low.x) * (box->high.y - box->low.y);
}
--- 637,645 ----
}
static double
! size_box(BOX *box)
{
! if (box->high.x <= box->low.x || box->high.y <= box->low.y)
return 0.0;
return (box->high.x - box->low.x) * (box->high.y - box->low.y);
}
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers