On Fri, Oct 7, 2022 at 2:29 PM John Naylor <john.nay...@enterprisedb.com> wrote: > > On Fri, Sep 16, 2022 at 1:01 PM Masahiko Sawada <sawada.m...@gmail.com> wrote: > > In addition to two patches, I've attached the third patch. It's not > > part of radix tree implementation but introduces a contrib module > > bench_radix_tree, a tool for radix tree performance benchmarking. It > > measures loading and lookup performance of both the radix tree and a > > flat array. > > Hi Masahiko, I've been using these benchmarks, along with my own variations, > to try various things that I've mentioned. I'm long overdue for an update, > but the picture is not yet complete.
Thanks! > For now, I have two questions that I can't figure out on my own: > > 1. There seems to be some non-obvious limit on the number of keys that are > loaded (or at least what the numbers report). This is independent of the > number of tids per block. Example below: > > john=# select * from bench_shuffle_search(0, 8*1000*1000); > NOTICE: num_keys = 8000000, height = 3, n4 = 0, n16 = 1, n32 = 0, n128 = > 250000, n256 = 981 > nkeys | rt_mem_allocated | array_mem_allocated | rt_load_ms | > array_load_ms | rt_search_ms | array_serach_ms > ---------+------------------+---------------------+------------+---------------+--------------+----------------- > 8000000 | 268435456 | 48000000 | 661 | > 29 | 276 | 389 > > john=# select * from bench_shuffle_search(0, 9*1000*1000); > NOTICE: num_keys = 8388608, height = 3, n4 = 0, n16 = 1, n32 = 0, n128 = > 262144, n256 = 1028 > nkeys | rt_mem_allocated | array_mem_allocated | rt_load_ms | > array_load_ms | rt_search_ms | array_serach_ms > ---------+------------------+---------------------+------------+---------------+--------------+----------------- > 8388608 | 276824064 | 54000000 | 718 | > 33 | 311 | 446 > > The array is the right size, but nkeys hasn't kept pace. Can you reproduce > this? Attached is the patch I'm using to show the stats when running the > test. (Side note: The numbers look unfavorable for radix tree because I'm > using 1 tid per block here.) Yes, I can reproduce this. In tid_to_key_off() we need to cast to uint64 when packing offset number and block number: tid_i = ItemPointerGetOffsetNumber(tid); tid_i |= ItemPointerGetBlockNumber(tid) << shift; > > 2. I found that bench_shuffle_search() is much *faster* for traditional > binary search on an array than bench_seq_search(). I've found this to be true > in every case. This seems counterintuitive to me -- any idea why this is? > Example: > > john=# select * from bench_seq_search(0, 1000000); > NOTICE: num_keys = 1000000, height = 2, n4 = 0, n16 = 0, n32 = 31251, n128 = > 1, n256 = 122 > nkeys | rt_mem_allocated | array_mem_allocated | rt_load_ms | > array_load_ms | rt_search_ms | array_serach_ms > ---------+------------------+---------------------+------------+---------------+--------------+----------------- > 1000000 | 10199040 | 180000000 | 168 | > 106 | 827 | 3348 > > john=# select * from bench_shuffle_search(0, 1000000); > NOTICE: num_keys = 1000000, height = 2, n4 = 0, n16 = 0, n32 = 31251, n128 = > 1, n256 = 122 > nkeys | rt_mem_allocated | array_mem_allocated | rt_load_ms | > array_load_ms | rt_search_ms | array_serach_ms > ---------+------------------+---------------------+------------+---------------+--------------+----------------- > 1000000 | 10199040 | 180000000 | 171 | > 107 | 827 | 1400 > Ugh, in shuffle_itemptrs(), we shuffled itemptrs instead of itemptr: for (int i = 0; i < nitems - 1; i++) { int j = shuffle_randrange(&state, i, nitems - 1); ItemPointerData t = itemptrs[j]; itemptrs[j] = itemptrs[i]; itemptrs[i] = t; With the fix, the results on my environment were: postgres(1:4093192)=# select * from bench_seq_search(0, 10000000); 2022-10-07 16:57:03.124 JST [4093192] LOG: num_keys = 10000000, height = 3, n4 = 0, n16 = 1, n32 = 312500, n128 = 0, n256 = 1226 nkeys | rt_mem_allocated | array_mem_allocated | rt_load_ms | array_load_ms | rt_search_ms | array_serach_ms ----------+------------------+---------------------+------------+---------------+--------------+----------------- 10000000 | 101826560 | 1800000000 | 846 | 486 | 6096 | 21128 (1 row) Time: 28975.566 ms (00:28.976) postgres(1:4093192)=# select * from bench_shuffle_search(0, 10000000); 2022-10-07 16:57:37.476 JST [4093192] LOG: num_keys = 10000000, height = 3, n4 = 0, n16 = 1, n32 = 312500, n128 = 0, n256 = 1226 nkeys | rt_mem_allocated | array_mem_allocated | rt_load_ms | array_load_ms | rt_search_ms | array_serach_ms ----------+------------------+---------------------+------------+---------------+--------------+----------------- 10000000 | 101826560 | 1800000000 | 845 | 484 | 32700 | 152583 (1 row) I've attached a patch to fix them. Also, I realized that bsearch() could be optimized out so I added code to prevent it: Regards, -- Masahiko Sawada PostgreSQL Contributors Team RDS Open Source Databases Amazon Web Services: https://aws.amazon.com
diff --git a/contrib/bench_radix_tree/bench_radix_tree.c b/contrib/bench_radix_tree/bench_radix_tree.c index 0778da2d7b..d4c8040357 100644 --- a/contrib/bench_radix_tree/bench_radix_tree.c +++ b/contrib/bench_radix_tree/bench_radix_tree.c @@ -27,20 +27,17 @@ PG_FUNCTION_INFO_V1(bench_shuffle_search); PG_FUNCTION_INFO_V1(bench_load_random_int); PG_FUNCTION_INFO_V1(bench_fixed_height_search); -static radix_tree *rt = NULL; -static ItemPointer itemptrs = NULL; - static uint64 tid_to_key_off(ItemPointer tid, uint32 *off) { - uint32 upper; + uint64 upper; uint32 shift = pg_ceil_log2_32(MaxHeapTuplesPerPage); int64 tid_i; Assert(ItemPointerGetOffsetNumber(tid) < MaxHeapTuplesPerPage); tid_i = ItemPointerGetOffsetNumber(tid); - tid_i |= ItemPointerGetBlockNumber(tid) << shift; + tid_i |= (uint64) ItemPointerGetBlockNumber(tid) << shift; /* log(sizeof(uint64) * BITS_PER_BYTE, 2) = log(64, 2) = 6 */ *off = tid_i & ((1 << 6) - 1); @@ -70,10 +67,10 @@ shuffle_itemptrs(ItemPointer itemptr, uint64 nitems) for (int i = 0; i < nitems - 1; i++) { int j = shuffle_randrange(&state, i, nitems - 1); - ItemPointerData t = itemptrs[j]; + ItemPointerData t = itemptr[j]; - itemptrs[j] = itemptrs[i]; - itemptrs[i] = t; + itemptr[j] = itemptr[i]; + itemptr[i] = t; } } @@ -138,6 +135,8 @@ bench_search(FunctionCallInfo fcinfo, bool shuffle) { BlockNumber minblk = PG_GETARG_INT32(0); BlockNumber maxblk = PG_GETARG_INT32(1); + ItemPointer itemptrs = NULL; + radix_tree *rt = NULL; uint64 ntids; uint64 key; uint64 last_key = PG_UINT64_MAX;; @@ -185,6 +184,8 @@ bench_search(FunctionCallInfo fcinfo, bool shuffle) TimestampDifference(start_time, end_time, &secs, &usecs); rt_load_ms = secs * 1000 + usecs / 1000; + rt_stats(rt); + /* measure the load time of the array */ itemptrs = MemoryContextAllocHuge(CurrentMemoryContext, sizeof(ItemPointerData) * ntids); @@ -210,12 +211,14 @@ bench_search(FunctionCallInfo fcinfo, bool shuffle) ItemPointer tid = &(tids[i]); uint64 key, val; uint32 off; + volatile bool ret; /* prevent calling rt_search from being optimized out */ CHECK_FOR_INTERRUPTS(); key = tid_to_key_off(tid, &off); - rt_search(rt, key, &val); + ret = rt_search(rt, key, &val); + (void) ret; } end_time = GetCurrentTimestamp(); TimestampDifference(start_time, end_time, &secs, &usecs); @@ -226,12 +229,16 @@ bench_search(FunctionCallInfo fcinfo, bool shuffle) for (int i = 0; i < ntids; i++) { ItemPointer tid = &(tids[i]); + volatile bool ret; /* prevent calling bsearch from being optimized out */ - bsearch((void *) tid, - (void *) itemptrs, - ntids, - sizeof(ItemPointerData), - vac_cmp_itemptr); + CHECK_FOR_INTERRUPTS(); + + ret = bsearch((void *) tid, + (void *) itemptrs, + ntids, + sizeof(ItemPointerData), + vac_cmp_itemptr); + (void) ret; } end_time = GetCurrentTimestamp(); TimestampDifference(start_time, end_time, &secs, &usecs); @@ -294,6 +301,8 @@ bench_load_random_int(PG_FUNCTION_ARGS) TimestampDifference(start_time, end_time, &secs, &usecs); load_time_ms = secs * 1000 + usecs / 1000; + rt_stats(rt); + MemSet(nulls, false, sizeof(nulls)); values[0] = Int64GetDatum(rt_memory_usage(rt)); values[1] = Int64GetDatum(load_time_ms);