On Wed, Nov 16, 2022 at 12:33 PM Masahiko Sawada <sawada.m...@gmail.com>
wrote:
>
> On Wed, Nov 16, 2022 at 1:46 PM John Naylor
> <john.nay...@enterprisedb.com> wrote:
> >
> >
> > On Tue, Nov 15, 2022 at 11:59 AM Masahiko Sawada <sawada.m...@gmail.com>
wrote:
> > > Thanks! Please let me know if there is something I can help with.
> >
> > I didn't get very far because the tests fail on 0004 in rt_verify_node:
> >
> > TRAP: failed Assert("n4->chunks[i - 1] < n4->chunks[i]"), File:
"../src/backend/lib/radixtree.c", Line: 2186, PID: 18242
>
> Which tests do you use to get this assertion failure? I've confirmed
> there is a bug in 0005 patch but without it, "make check-world"
> passed.

Hmm, I started over and rebuilt and it didn't reproduce. Not sure what
happened, sorry for the noise.

I'm attaching a test I wrote to stress test branch prediction in search,
and while trying it out I found two possible issues.

It's based on the random int load test, but tests search speed. Run like
this:

select * from bench_search_random_nodes(10 * 1000 * 1000)

It also takes some care to include all the different node kinds,
restricting the possible keys by AND-ing with a filter. Here's a simple
demo:

filter = ((uint64)1<<40)-1;
LOG:  num_keys = 9999967, height = 4, n4 = 17513814, n32 = 6320, n128 =
62663, n256 = 3130

Just using random integers leads to >99% using the smallest node. I wanted
to get close to having the same number of each, but that's difficult while
still using random inputs. I ended up using

filter = (((uint64) 0x7F<<32) | (0x07<<24) | (0xFF<<16) | 0xFF)

which gives

LOG:  num_keys = 9291812, height = 4, n4 = 262144, n32 = 79603, n128 =
182670, n256 = 1024

Which seems okay for the task. One puzzling thing I found while trying
various filters is that sometimes the reported tree height would change.
For example:

filter = (((uint64) 1<<32) | (0xFF<<24));
LOG:  num_keys = 9999944, height = 7, n4 = 47515559, n32 = 6209, n128 =
62632, n256 = 3161

1) Any idea why the tree height would be reported as 7 here? I didn't
expect that.

2) It seems that 0004 actually causes a significant slowdown in this test
(as in the attached, using the second filter above and with turboboost
disabled):

v9 0003: 2062 2051 2050
v9 0004: 2346 2316 2321

That means my idea for the pointer struct might have some problems, at
least as currently implemented. Maybe in the course of separating out and
polishing that piece, an inefficiency will fall out. Or, it might be
another reason to template local and shared separately. Not sure yet. I
also haven't tried to adjust this test for the shared memory case.

--
John Naylor
EDB: http://www.enterprisedb.com
diff --git a/contrib/bench_radix_tree/bench_radix_tree--1.0.sql 
b/contrib/bench_radix_tree/bench_radix_tree--1.0.sql
index 0874201d7e..e0205b364e 100644
--- a/contrib/bench_radix_tree/bench_radix_tree--1.0.sql
+++ b/contrib/bench_radix_tree/bench_radix_tree--1.0.sql
@@ -43,6 +43,14 @@ returns record
 as 'MODULE_PATHNAME'
 LANGUAGE C STRICT VOLATILE PARALLEL UNSAFE;
 
+create function bench_search_random_nodes(
+cnt int8,
+OUT mem_allocated int8,
+OUT search_ms int8)
+returns record
+as 'MODULE_PATHNAME'
+LANGUAGE C STRICT VOLATILE PARALLEL UNSAFE;
+
 create function bench_fixed_height_search(
 fanout int4,
 OUT fanout int4,
diff --git a/contrib/bench_radix_tree/bench_radix_tree.c 
b/contrib/bench_radix_tree/bench_radix_tree.c
index 7abb237e96..a43fc61c2d 100644
--- a/contrib/bench_radix_tree/bench_radix_tree.c
+++ b/contrib/bench_radix_tree/bench_radix_tree.c
@@ -29,6 +29,7 @@ PG_FUNCTION_INFO_V1(bench_seq_search);
 PG_FUNCTION_INFO_V1(bench_shuffle_search);
 PG_FUNCTION_INFO_V1(bench_load_random_int);
 PG_FUNCTION_INFO_V1(bench_fixed_height_search);
+PG_FUNCTION_INFO_V1(bench_search_random_nodes);
 
 static uint64
 tid_to_key_off(ItemPointer tid, uint32 *off)
@@ -347,6 +348,77 @@ bench_load_random_int(PG_FUNCTION_ARGS)
        PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, 
nulls)));
 }
 
+/* copy of splitmix64() */
+static uint64
+hash64(uint64 x)
+{
+       x ^= x >> 30;
+       x *= UINT64CONST(0xbf58476d1ce4e5b9);
+       x ^= x >> 27;
+       x *= UINT64CONST(0x94d049bb133111eb);
+       x ^= x >> 31;
+       return x;
+}
+
+/* attempts to have a relatively even population of node kinds */
+Datum
+bench_search_random_nodes(PG_FUNCTION_ARGS)
+{
+       uint64          cnt = (uint64) PG_GETARG_INT64(0);
+       radix_tree *rt;
+       TupleDesc       tupdesc;
+       TimestampTz start_time,
+                               end_time;
+       long            secs;
+       int                     usecs;
+       int64           search_time_ms;
+       Datum           values[2] = {0};
+       bool            nulls[2] = {0};
+       /* from trial and error */
+       const uint64 filter = (((uint64) 0x7F<<32) | (0x07<<24) | (0xFF<<16) | 
0xFF);
+
+       /* Build a tuple descriptor for our result type */
+       if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+               elog(ERROR, "return type must be a row type");
+
+       rt = rt_create(CurrentMemoryContext);
+
+       for (uint64 i = 0; i < cnt; i++)
+       {
+               const uint64 hash = hash64(i);
+               const uint64 key = hash & filter;
+
+               rt_set(rt, key, key);
+       }
+
+       start_time = GetCurrentTimestamp();
+       for (uint64 i = 0; i < cnt; i++)
+       {
+               const uint64 hash = hash64(i);
+               const uint64 key = hash & filter;
+               uint64          val;
+               volatile bool ret;              /* prevent calling rt_search 
from being
+                                                                * optimized 
out */
+
+               CHECK_FOR_INTERRUPTS();
+
+               ret = rt_search(rt, key, &val);
+               (void) ret;
+       }
+       end_time = GetCurrentTimestamp();
+
+       TimestampDifference(start_time, end_time, &secs, &usecs);
+       search_time_ms = secs * 1000 + usecs / 1000;
+
+       rt_stats(rt);
+
+       values[0] = Int64GetDatum(rt_memory_usage(rt));
+       values[1] = Int64GetDatum(search_time_ms);
+
+       rt_free(rt);
+       PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, 
nulls)));
+}
+
 Datum
 bench_fixed_height_search(PG_FUNCTION_ARGS)
 {

Reply via email to