On Thu, 22 Jun 2023 at 20:59, Yuya Watari <watari.y...@gmail.com> wrote: > Table 1: Planning time and its speedup of Join Order Benchmark > (n: the number of partitions of each table) > (Speedup: higher is better)
> 64 | 115.7% > 128 | 142.9% > 256 | 187.7% Thanks for benchmarking. It certainly looks like a win for larger sets. Would you be able to profile the 256 partition case to see where exactly master is so slow? (I'm surprised this patch improves performance that much.) I think it's also important to check we don't slow anything down for more normal-sized sets. The vast majority of sets will contain just a single word, so we should probably focus on making sure we're not slowing anything down for those. To get the ball rolling on that I used the attached plan_times.patch so that the planner writes the number of elapsed nanosecond from calling standard_planner(). Patching with this then running make installcheck kicks out about 35k log lines with times on it. I ran this on a Linux AMD 3990x machine and also an Apple M2 pro machine. Taking the sum of the nanoseconds and converting into seconds, I see: AMD 3990x master: 1.384267931 seconds patched 1.339178764 seconds (3.37% faster) M2 pro: master: 0.58293 seconds patched: 0.581483 seconds (0.25% faster) So it certainly does not look any slower. Perhaps a little faster with the zen2 machine. (The m2 only seems to have microsecond resolution on the timer code whereas the zen2 has nanosecond. I don't think this matters much as the planner takes enough microseconds to plan even for simple queries) I've also attached the v4 patch again as I'll add this patch to the commitfest and if I don't do that then the CFbot will pick up Ranier's patch instead of mine. David
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 1e4dd27dba..3c713782f1 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -17,6 +17,7 @@ #include <limits.h> #include <math.h> +#include <time.h> #include "access/genam.h" #include "access/htup_details.h" @@ -274,11 +275,22 @@ planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams) { PlannedStmt *result; + struct timespec start, end; + + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start); if (planner_hook) result = (*planner_hook) (parse, query_string, cursorOptions, boundParams); else result = standard_planner(parse, query_string, cursorOptions, boundParams); + + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end); + + elog(LOG, + "planning time in %f nanoseconds", + ((double) (end.tv_sec * 1000000000 + end.tv_nsec) - + (double) (start.tv_sec * 1000000000 + start.tv_nsec))); + return result; }
diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index 7ba3cf635b..9cda3b1cc1 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -5,8 +5,16 @@ * * A bitmap set can represent any set of nonnegative integers, although * it is mainly intended for sets where the maximum value is not large, - * say at most a few hundred. By convention, we always represent the - * empty set by a NULL pointer. + * say at most a few hundred. By convention, we always represent a set with + * the minimum possible number of words, i.e, there are never any trailing + * zero words. Enforcing this requires that an empty set is represented as + * NULL. Because an empty Bitmapset is represented as NULL, a non-NULL + * Bitmapset always has at least 1 Bitmapword. We can exploit this fact to + * speedup various loops over the Bitmapset's words array by using "do while" + * loops instead of "for" loops. This means the code does not waste time + * checking the loop condition before the first iteration. For Bitmapsets + * containing only a single word (likely the majority of them) this reduces + * the loop condition tests by half. * * * Copyright (c) 2003-2023, PostgreSQL Global Development Group @@ -64,8 +72,6 @@ #error "invalid BITS_PER_BITMAPWORD" #endif -static bool bms_is_empty_internal(const Bitmapset *a); - /* * bms_copy - make a palloc'd copy of a bitmapset @@ -85,18 +91,11 @@ bms_copy(const Bitmapset *a) } /* - * bms_equal - are two bitmapsets equal? - * - * This is logical not physical equality; in particular, a NULL pointer will - * be reported as equal to a palloc'd value containing no members. + * bms_equal - are two bitmapsets equal? or both NULL? */ bool bms_equal(const Bitmapset *a, const Bitmapset *b) { - const Bitmapset *shorter; - const Bitmapset *longer; - int shortlen; - int longlen; int i; /* Handle cases where either input is NULL */ @@ -108,30 +107,19 @@ bms_equal(const Bitmapset *a, const Bitmapset *b) } else if (b == NULL) return false; - /* Identify shorter and longer input */ - if (a->nwords <= b->nwords) - { - shorter = a; - longer = b; - } - else - { - shorter = b; - longer = a; - } - /* And process */ - shortlen = shorter->nwords; - for (i = 0; i < shortlen; i++) - { - if (shorter->words[i] != longer->words[i]) - return false; - } - longlen = longer->nwords; - for (; i < longlen; i++) + + /* can't be equal if the word counts don't match */ + if (a->nwords != b->nwords) + return false; + + /* check each word matches */ + i = 0; + do { - if (longer->words[i] != 0) + if (a->words[i] != b->words[i]) return false; - } + } while (++i < a->nwords); + return true; } @@ -146,7 +134,6 @@ bms_equal(const Bitmapset *a, const Bitmapset *b) int bms_compare(const Bitmapset *a, const Bitmapset *b) { - int shortlen; int i; /* Handle cases where either input is NULL */ @@ -154,28 +141,20 @@ bms_compare(const Bitmapset *a, const Bitmapset *b) return (b == NULL) ? 0 : -1; else if (b == NULL) return +1; - /* Handle cases where one input is longer than the other */ - shortlen = Min(a->nwords, b->nwords); - for (i = shortlen; i < a->nwords; i++) - { - if (a->words[i] != 0) - return +1; - } - for (i = shortlen; i < b->nwords; i++) - { - if (b->words[i] != 0) - return -1; - } - /* Process words in common */ - i = shortlen; - while (--i >= 0) + + /* the set with the most words must be greater */ + if (a->nwords != b->nwords) + return (a->nwords > b->nwords) ? +1 : -1; + + i = a->nwords - 1; + do { bitmapword aw = a->words[i]; bitmapword bw = b->words[i]; if (aw != bw) return (aw > bw) ? +1 : -1; - } + } while (--i >= 0); return 0; } @@ -248,8 +227,11 @@ bms_union(const Bitmapset *a, const Bitmapset *b) } /* And union the shorter input into the result */ otherlen = other->nwords; - for (i = 0; i < otherlen; i++) + i = 0; + do + { result->words[i] |= other->words[i]; + } while (++i < otherlen); return result; } @@ -261,6 +243,7 @@ bms_intersect(const Bitmapset *a, const Bitmapset *b) { Bitmapset *result; const Bitmapset *other; + int lastnonzero; int resultlen; int i; @@ -280,14 +263,24 @@ bms_intersect(const Bitmapset *a, const Bitmapset *b) } /* And intersect the longer input with the result */ resultlen = result->nwords; - for (i = 0; i < resultlen; i++) + lastnonzero = -1; + i = 0; + do + { result->words[i] &= other->words[i]; + + if (result->words[i] != 0) + lastnonzero = i; + } while (++i < resultlen); /* If we computed an empty result, we must return NULL */ - if (bms_is_empty_internal(result)) + if (lastnonzero == -1) { pfree(result); return NULL; } + + /* get rid of trailing zero words */ + result->nwords = lastnonzero + 1; return result; } @@ -298,7 +291,6 @@ Bitmapset * bms_difference(const Bitmapset *a, const Bitmapset *b) { Bitmapset *result; - int shortlen; int i; /* Handle cases where either input is NULL */ @@ -317,10 +309,40 @@ bms_difference(const Bitmapset *a, const Bitmapset *b) /* Copy the left input */ result = bms_copy(a); + /* And remove b's bits from result */ - shortlen = Min(a->nwords, b->nwords); - for (i = 0; i < shortlen; i++) - result->words[i] &= ~b->words[i]; + if (result->nwords > b->nwords) + { + /* + * We'll never need to remove trailing zero words when 'a' has more + * words than 'b' as the additional words must be non-zero. + */ + i = 0; + do + { + result->words[i] &= ~b->words[i]; + } while (++i < b->nwords); + } + else + { + int lastnonzero = -1; + + /* we may need to remove trailing zero words from the result. */ + i = 0; + do + { + result->words[i] &= ~b->words[i]; + + /* remember the last non-zero word */ + if (result->words[i] != 0) + lastnonzero = i; + } while (++i < result->nwords); + + /* trim off trailing zero words */ + result->nwords = lastnonzero + 1; + } + Assert(result->nwords != 0); + /* Need not check for empty result, since we handled that case above */ return result; } @@ -331,8 +353,6 @@ bms_difference(const Bitmapset *a, const Bitmapset *b) bool bms_is_subset(const Bitmapset *a, const Bitmapset *b) { - int shortlen; - int longlen; int i; /* Handle cases where either input is NULL */ @@ -340,23 +360,18 @@ bms_is_subset(const Bitmapset *a, const Bitmapset *b) return true; /* empty set is a subset of anything */ if (b == NULL) return false; - /* Check common words */ - shortlen = Min(a->nwords, b->nwords); - for (i = 0; i < shortlen; i++) + + /* 'a' can't be a subset of 'b' if it contains more words */ + if (a->nwords > b->nwords) + return false; + + /* Check all 'a' members are set in 'b' */ + i = 0; + do { if ((a->words[i] & ~b->words[i]) != 0) return false; - } - /* Check extra words */ - if (a->nwords > b->nwords) - { - longlen = a->nwords; - for (; i < longlen; i++) - { - if (a->words[i] != 0) - return false; - } - } + } while (++i < a->nwords); return true; } @@ -370,7 +385,6 @@ bms_subset_compare(const Bitmapset *a, const Bitmapset *b) { BMS_Comparison result; int shortlen; - int longlen; int i; /* Handle cases where either input is NULL */ @@ -385,7 +399,8 @@ bms_subset_compare(const Bitmapset *a, const Bitmapset *b) /* Check common words */ result = BMS_EQUAL; /* status so far */ shortlen = Min(a->nwords, b->nwords); - for (i = 0; i < shortlen; i++) + i = 0; + do { bitmapword aword = a->words[i]; bitmapword bword = b->words[i]; @@ -404,35 +419,21 @@ bms_subset_compare(const Bitmapset *a, const Bitmapset *b) return BMS_DIFFERENT; result = BMS_SUBSET1; } - } + } while (++i < shortlen); /* Check extra words */ if (a->nwords > b->nwords) { - longlen = a->nwords; - for (; i < longlen; i++) - { - if (a->words[i] != 0) - { - /* a is not a subset of b */ - if (result == BMS_SUBSET1) - return BMS_DIFFERENT; - result = BMS_SUBSET2; - } - } + /* if a has more words then a is not a subset of b */ + if (result == BMS_SUBSET1) + return BMS_DIFFERENT; + return BMS_SUBSET2; } else if (a->nwords < b->nwords) { - longlen = b->nwords; - for (; i < longlen; i++) - { - if (b->words[i] != 0) - { - /* b is not a subset of a */ - if (result == BMS_SUBSET2) - return BMS_DIFFERENT; - result = BMS_SUBSET1; - } - } + /* if b has more words then b is not a subset of a */ + if (result == BMS_SUBSET2) + return BMS_DIFFERENT; + return BMS_SUBSET1; } return result; } @@ -518,11 +519,12 @@ bms_overlap(const Bitmapset *a, const Bitmapset *b) return false; /* Check words in common */ shortlen = Min(a->nwords, b->nwords); - for (i = 0; i < shortlen; i++) + i = 0; + do { if ((a->words[i] & b->words[i]) != 0) return true; - } + } while (++i < shortlen); return false; } @@ -563,7 +565,6 @@ bms_overlap_list(const Bitmapset *a, const List *b) bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b) { - int shortlen; int i; /* Handle cases where either input is NULL */ @@ -571,19 +572,16 @@ bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b) return false; if (b == NULL) return true; - /* Check words in common */ - shortlen = Min(a->nwords, b->nwords); - for (i = 0; i < shortlen; i++) + /* if 'a' has more words then it must contain additional members */ + if (a->nwords > b->nwords) + return true; + /* Check all 'a' members are set in 'b' */ + i = 0; + do { if ((a->words[i] & ~b->words[i]) != 0) return true; - } - /* Check extra words in a */ - for (; i < a->nwords; i++) - { - if (a->words[i] != 0) - return true; - } + } while (++i < a->nwords); return false; } @@ -602,7 +600,8 @@ bms_singleton_member(const Bitmapset *a) if (a == NULL) elog(ERROR, "bitmapset is empty"); nwords = a->nwords; - for (wordnum = 0; wordnum < nwords; wordnum++) + wordnum = 0; + do { bitmapword w = a->words[wordnum]; @@ -613,9 +612,10 @@ bms_singleton_member(const Bitmapset *a) result = wordnum * BITS_PER_BITMAPWORD; result += bmw_rightmost_one_pos(w); } - } - if (result < 0) - elog(ERROR, "bitmapset is empty"); + } while (++wordnum < nwords); + + /* we don't expect non-NULL sets to be empty */ + Assert(result >= 0); return result; } @@ -640,7 +640,8 @@ bms_get_singleton_member(const Bitmapset *a, int *member) if (a == NULL) return false; nwords = a->nwords; - for (wordnum = 0; wordnum < nwords; wordnum++) + wordnum = 0; + do { bitmapword w = a->words[wordnum]; @@ -651,9 +652,10 @@ bms_get_singleton_member(const Bitmapset *a, int *member) result = wordnum * BITS_PER_BITMAPWORD; result += bmw_rightmost_one_pos(w); } - } - if (result < 0) - return false; + } while (++wordnum < nwords); + + /* we don't expect non-NULL sets to be empty */ + Assert(result >= 0); *member = result; return true; } @@ -671,14 +673,15 @@ bms_num_members(const Bitmapset *a) if (a == NULL) return 0; nwords = a->nwords; - for (wordnum = 0; wordnum < nwords; wordnum++) + wordnum = 0; + do { bitmapword w = a->words[wordnum]; /* No need to count the bits in a zero word */ if (w != 0) result += bmw_popcount(w); - } + } while (++wordnum < nwords); return result; } @@ -697,7 +700,8 @@ bms_membership(const Bitmapset *a) if (a == NULL) return BMS_EMPTY_SET; nwords = a->nwords; - for (wordnum = 0; wordnum < nwords; wordnum++) + wordnum = 0; + do { bitmapword w = a->words[wordnum]; @@ -707,34 +711,10 @@ bms_membership(const Bitmapset *a) return BMS_MULTIPLE; result = BMS_SINGLETON; } - } + } while (++wordnum < nwords); return result; } -/* - * bms_is_empty_internal - is a set empty? - * - * This is now used only locally, to detect cases where a function has - * computed an empty set that we must now get rid of. Hence, we can - * assume the input isn't NULL. - */ -static bool -bms_is_empty_internal(const Bitmapset *a) -{ - int nwords; - int wordnum; - - nwords = a->nwords; - for (wordnum = 0; wordnum < nwords; wordnum++) - { - bitmapword w = a->words[wordnum]; - - if (w != 0) - return false; - } - return true; -} - /* * These operations all "recycle" their non-const inputs, ie, either @@ -773,8 +753,11 @@ bms_add_member(Bitmapset *a, int x) a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(wordnum + 1)); a->nwords = wordnum + 1; /* zero out the enlarged portion */ - for (i = oldnwords; i < a->nwords; i++) + i = oldnwords; + do + { a->words[i] = 0; + } while (++i < a->nwords); } a->words[wordnum] |= ((bitmapword) 1 << bitnum); @@ -800,14 +783,31 @@ bms_del_member(Bitmapset *a, int x) return NULL; wordnum = WORDNUM(x); bitnum = BITNUM(x); - if (wordnum < a->nwords) - a->words[wordnum] &= ~((bitmapword) 1 << bitnum); - /* If we computed an empty result, we must return NULL */ - if (bms_is_empty_internal(a)) + + /* member can't exist. Return 'a' unmodified */ + if (unlikely(wordnum >= a->nwords)) + return a; + + a->words[wordnum] &= ~((bitmapword) 1 << bitnum); + + /* when last word becomes empty, trim off all trailing empty words */ + if (a->words[wordnum] == 0 && wordnum == a->nwords - 1) { + /* find the last non-empty word and make that the new final word */ + for (int i = wordnum - 1; i >= 0; i--) + { + if (a->words[i] != 0) + { + a->nwords = i + 1; + return a; + } + } + + /* the set is now empty */ pfree(a); return NULL; } + return a; } @@ -840,8 +840,11 @@ bms_add_members(Bitmapset *a, const Bitmapset *b) } /* And union the shorter input into the result */ otherlen = other->nwords; - for (i = 0; i < otherlen; i++) + i = 0; + do + { result->words[i] |= other->words[i]; + } while (++i < otherlen); if (result != a) pfree(a); return result; @@ -887,8 +890,11 @@ bms_add_range(Bitmapset *a, int lower, int upper) a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(uwordnum + 1)); a->nwords = uwordnum + 1; /* zero out the enlarged portion */ - for (i = oldnwords; i < a->nwords; i++) + i = oldnwords; + do + { a->words[i] = 0; + } while (++i < a->nwords); } wordnum = lwordnum = WORDNUM(lower); @@ -927,6 +933,7 @@ bms_add_range(Bitmapset *a, int lower, int upper) Bitmapset * bms_int_members(Bitmapset *a, const Bitmapset *b) { + int lastnonzero; int shortlen; int i; @@ -940,16 +947,25 @@ bms_int_members(Bitmapset *a, const Bitmapset *b) } /* Intersect b into a; we need never copy */ shortlen = Min(a->nwords, b->nwords); - for (i = 0; i < shortlen; i++) + lastnonzero = -1; + i = 0; + do + { a->words[i] &= b->words[i]; - for (; i < a->nwords; i++) - a->words[i] = 0; + + if (a->words[i] != 0) + lastnonzero = i; + } while (++i < shortlen); + /* If we computed an empty result, we must return NULL */ - if (bms_is_empty_internal(a)) + if (lastnonzero == -1) { pfree(a); return NULL; } + + /* get rid of trailing zero words */ + a->nwords = lastnonzero + 1; return a; } @@ -959,7 +975,6 @@ bms_int_members(Bitmapset *a, const Bitmapset *b) Bitmapset * bms_del_members(Bitmapset *a, const Bitmapset *b) { - int shortlen; int i; /* Handle cases where either input is NULL */ @@ -968,15 +983,44 @@ bms_del_members(Bitmapset *a, const Bitmapset *b) if (b == NULL) return a; /* Remove b's bits from a; we need never copy */ - shortlen = Min(a->nwords, b->nwords); - for (i = 0; i < shortlen; i++) - a->words[i] &= ~b->words[i]; - /* If we computed an empty result, we must return NULL */ - if (bms_is_empty_internal(a)) + if (a->nwords > b->nwords) { - pfree(a); - return NULL; + /* + * We'll never need to remove trailing zero words when 'a' has more + * words than 'b'. + */ + i = 0; + do + { + a->words[i] &= ~b->words[i]; + } while (++i < b->nwords); } + else + { + int lastnonzero = -1; + + /* we may need to remove trailing zero words from the result. */ + i = 0; + do + { + a->words[i] &= ~b->words[i]; + + /* remember the last non-zero word */ + if (a->words[i] != 0) + lastnonzero = i; + } while (++i < a->nwords); + + /* check if 'a' has become empty */ + if (lastnonzero == -1) + { + pfree(a); + return NULL; + } + + /* trim off any trailing zero words */ + a->nwords = lastnonzero + 1; + } + return a; } @@ -1009,8 +1053,11 @@ bms_join(Bitmapset *a, Bitmapset *b) } /* And union the shorter input into the result */ otherlen = other->nwords; - for (i = 0; i < otherlen; i++) + i = 0; + do + { result->words[i] |= other->words[i]; + } while (++i < otherlen); if (other != result) /* pure paranoia */ pfree(other); return result; @@ -1140,28 +1187,14 @@ bms_prev_member(const Bitmapset *a, int prevbit) /* * bms_hash_value - compute a hash key for a Bitmapset - * - * Note: we must ensure that any two bitmapsets that are bms_equal() will - * hash to the same value; in practice this means that trailing all-zero - * words must not affect the result. Hence we strip those before applying - * hash_any(). */ uint32 bms_hash_value(const Bitmapset *a) { - int lastword; - if (a == NULL) return 0; /* All empty sets hash to 0 */ - for (lastword = a->nwords; --lastword >= 0;) - { - if (a->words[lastword] != 0) - break; - } - if (lastword < 0) - return 0; /* All empty sets hash to 0 */ return DatumGetUInt32(hash_any((const unsigned char *) a->words, - (lastword + 1) * sizeof(bitmapword))); + a->nwords * sizeof(bitmapword))); } /*