This is an automated email from the ASF dual-hosted git repository. nickva pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/couchdb-jiffy.git
commit f84f7845d33fe77d5d69ae351d7c2fe75ff5cb8b Author: Nick Vatamaniuc <[email protected]> AuthorDate: Fri Apr 3 23:44:49 2026 -0400 Faster map encoding with enif_make_map_from_arrays Optimize map encoding: instead of updating the map element by element, use `enif_make_map_from_arrays` to create the map in a single API call. The documentation for the `enif_make_map_from_arrays` indicates `If successful, this function sets *map_out to the new map and returns true. Returns false there are any duplicate keys.` However, it turns out this API has a bug, at least as of the time of this commit (Apr, 2026), where it does not properly detect duplicates and instead can return an invalid map object [1]. To work around the issue, if we see a large map, we first remove the duplicates as we do for props, and only then call `enif_make_map_from_arrays` Since `enif_make_map_from_arrays` is newer we bump the minimum Erlang version by 1 version number: from 20 to 21. That still gives as an 8 years worth of Erlang versions support. And lastly, since the "seen" deduplication set was the only thing keeping us from dropping C++, implement a simple open addressing hash table to replace and it drop C++ as a dependency. The hash table is very basic since we know its size ahead of time we don't have to resize it. We don't remove objects from it and we get to use enif_* functions for hashing and comparison and such. So it's a tiny bit more code but it's dropping a large dependency. Overall, this yields a decent 1.5x-2x speed-ups for object-heavy benchmarks, at the expense of a slightly messier code and losing OTP 20 support: ``` Formatting results... ===== With input Blockchain ===== Name ips average deviation median 99th % jiffy 8.72 K 114.73 us +/-15.02% 106.39 us 186.28 us jiffy (master) 4.85 K 206.03 us +/-65.05% 164.38 us 869.99 us Comparison: jiffy 8.72 K jiffy (master) 4.85 K - 1.80x slower +91.30 us ===== With input Giphy ===== Name ips average deviation median 99th % jiffy 791.56 1.26 ms +/-48.35% 1.27 ms 2.26 ms jiffy (master) 510.21 1.96 ms +/-62.14% 1.84 ms 3.31 ms Comparison: jiffy 791.56 jiffy (master) 510.21 - 1.55x slower +0.70 ms ===== With input GitHub ===== Name ips average deviation median 99th % jiffy 2.99 K 334.18 us +/-31.88% 292.58 us 672.58 us jiffy (master) 1.58 K 634.12 us +/-107.87% 636.50 us 1112.16 us Comparison: jiffy 2.99 K jiffy (master) 1.58 K - 1.90x slower +299.94 us ===== With input GovTrack ===== Name ips average deviation median 99th % jiffy 23.28 42.96 ms +/-8.28% 41.99 ms 57.78 ms jiffy (master) 9.98 100.21 ms +/-8.27% 98.87 ms 133.05 ms Comparison: jiffy 23.28 jiffy (master) 9.98 - 2.33x slower +57.25 ms ===== With input Issue 90 ===== Name ips average deviation median 99th % jiffy 51.78 19.31 ms +/-11.90% 18.46 ms 27.40 ms jiffy (master) 45.29 22.08 ms +/-6.48% 21.50 ms 28.60 ms Comparison: jiffy 51.78 jiffy (master) 45.29 - 1.14x slower +2.77 ms ===== With input JSON Generator ===== Name ips average deviation median 99th % jiffy 774.15 1.29 ms +/-34.64% 1.26 ms 2.23 ms jiffy (master) 326.05 3.07 ms +/-9.14% 3.01 ms 4.22 ms Comparison: jiffy 774.15 jiffy (master) 326.05 - 2.37x slower +1.78 ms ===== With input JSON Generator (Pretty) ===== Name ips average deviation median 99th % jiffy 843.42 1.19 ms +/-17.96% 1.21 ms 1.89 ms jiffy (master) 429.22 2.33 ms +/-12.48% 2.23 ms 3.38 ms Comparison: jiffy 843.42 jiffy (master) 429.22 - 1.96x slower +1.14 ms ===== With input Large Numbers ===== Name ips average deviation median 99th % jiffy (master) 268.80 3.72 ms +/-9.06% 3.69 ms 4.86 ms jiffy 255.66 3.91 ms +/-52.92% 3.63 ms 10.46 ms Comparison: jiffy (master) 268.80 jiffy 255.66 - 1.05x slower +0.191 ms ===== With input Pokedex ===== Name ips average deviation median 99th % jiffy 881.07 1.13 ms +/-98.50% 0.99 ms 2.13 ms jiffy (master) 417.85 2.39 ms +/-19.26% 2.43 ms 3.94 ms Comparison: jiffy 881.07 jiffy (master) 417.85 - 2.11x slower +1.26 ms ===== With input UTF-8 escaped ===== Name ips average deviation median 99th % jiffy (master) 8.80 K 113.69 us +/-26.59% 109.55 us 186.69 us jiffy 8.12 K 123.12 us +/-63.53% 115.93 us 234.39 us Comparison: jiffy (master) 8.80 K jiffy 8.12 K - 1.08x slower +9.42 us ===== With input UTF-8 unescaped ===== Name ips average deviation median 99th % jiffy 13.05 K 76.63 us +/-35.80% 72.95 us 140.51 us jiffy (master) 12.56 K 79.64 us +/-12.14% 77.81 us 123.34 us Comparison: jiffy 13.05 K jiffy (master) 12.56 K - 1.04x slower +3.01 us ``` [1] https://github.com/erlang/otp/issues/10975 --- .github/workflows/ci.yml | 2 +- c_src/objects.c | 230 ++++++++++++++++++++++++++++++++++++ c_src/objects.cc | 77 ------------ rebar.config | 9 +- test/jiffy_06_object_tests.erl | 54 +++++++++ test/jiffy_16_dedupe_keys_tests.erl | 37 ++++++ 6 files changed, 324 insertions(+), 85 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 46a1090..f8c89cb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ jobs: strategy: fail-fast: false matrix: - otp_version: [20,23,26,27,28] + otp_version: [21,23,26,27,28] os: [ubuntu-latest] rebar: [rebar, rebar3] diff --git a/c_src/objects.c b/c_src/objects.c new file mode 100644 index 0000000..184606d --- /dev/null +++ b/c_src/objects.c @@ -0,0 +1,230 @@ +// This file is part of Jiffy released under the MIT license. +// See the LICENSE file for more information. + +#include <assert.h> +#include <string.h> + +#include "erl_nif.h" + +// Must match MAP_SMALL_MAP_LIMIT in OTP's erts/emulator/beam/erl_map.h. +// enif_make_map_from_arrays correctly rejects duplicate keys for +// flatmaps but not for hashmaps (see OTP issue #10975) we rely on +// detecting when we can skip deduplicating based on the small map +// size limit. +#define JIFFY_MAP_SMALL_MAP_LIMIT 32 + +// Limit up to when we'll allocate the hash table on the stack. +// Must be a power of 2, since we're using a bitmask. Also, we want +// to stay well below 4KB on the stack. That can be checked wit +// something like: +// clang -O3 -S -fverbose-asm c_src/objects.c -I$(erlnif_include) -Ic_src && grep rsp objects.s +// subq $2680, %rsp +// +#define HT_STACK_SLOTS 128 + +typedef struct { + ERL_NIF_TERM key; + int used; +} ht_slot; + +// We're using masks, so get the next power of 2 starting with 16. +static unsigned int +ht_size_power_of_2(unsigned int count) +{ + unsigned int p = 16; + unsigned int target = count * 2; + while(p < target) { + p <<= 1; + } + return p; +} + +// Insert key into table. Returns 1 if inserted, 0 if duplicate. For the salt +// pass the monotonic nanosecond time or something like that. +static inline int +ht_insert(ErlNifEnv* env, ht_slot* table, unsigned int mask, ERL_NIF_TERM key, ErlNifUInt64 salt) +{ + ErlNifUInt64 h = enif_hash(ERL_NIF_INTERNAL_HASH, key, salt); + unsigned int idx = (unsigned int)(h & mask); + while(table[idx].used) { + if(enif_compare(table[idx].key, key) == 0) { + return 0; + } + idx = (idx + 1) & mask; + } + table[idx].key = key; + table[idx].used = 1; + return 1; +} + + +static int +make_map(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out) +{ + ERL_NIF_TERM ret; + ERL_NIF_TERM key; + ERL_NIF_TERM val; + + // Get the item count up front so we can size the arrays and + // (if needed) the dedup hash table. + unsigned int list_len = 0; + int rv = enif_get_list_length(env, pairs, &list_len); + assert(rv && "pairs must be a list"); + assert((list_len % 2) == 0 && "Unbalanced object pairs."); + unsigned int count = list_len / 2; + + if(count == 0) { + *out = enif_make_new_map(env); + return 1; + } + + // Stack allocation is a pointer bump. If we don't initialize and use + // it, it should not be a performance hit. Using alloca is another + // option, but that one is non-standard so avoid it for now. + ERL_NIF_TERM small_keys[JIFFY_MAP_SMALL_MAP_LIMIT]; + ERL_NIF_TERM small_vals[JIFFY_MAP_SMALL_MAP_LIMIT]; + ERL_NIF_TERM* keys = (count <= JIFFY_MAP_SMALL_MAP_LIMIT) + ? small_keys + : (ERL_NIF_TERM*) enif_alloc(count * sizeof(ERL_NIF_TERM)); + ERL_NIF_TERM* vals = (count <= JIFFY_MAP_SMALL_MAP_LIMIT) + ? small_vals + : (ERL_NIF_TERM*) enif_alloc(count * sizeof(ERL_NIF_TERM)); + + // Go in reverse order so that last write wins. It's just the behavior + // we had before and we're preserving it here for backward compatibility. + unsigned int i = count; + while(enif_get_list_cell(env, pairs, &val, &pairs)) { + if(!enif_get_list_cell(env, pairs, &key, &pairs)) { + assert(0 && "Unbalanced object pairs."); + } + --i; + keys[i] = key; + vals[i] = val; + } + + // With OTP issue #10975 we know enif_make_map_from_arrays works for + // small maps (<=32 keys), so we can safely use it and not dedup first. + if(count <= JIFFY_MAP_SMALL_MAP_LIMIT) { + rv = enif_make_map_from_arrays(env, keys, vals, count, &ret); + if(rv) { + *out = ret; + if(keys != small_keys) { + enif_free(keys); + } + if(vals != small_vals) { + enif_free(vals); + } + return 1; + } + // Found dups: fall through, dedup, and try again. + } + + // Build a hash table for dedup. Walk arrays from the back so the + // last JSON value for each key is seen first (last-write-wins). + unsigned int ht_size = ht_size_power_of_2(count); + ht_slot stack_table[HT_STACK_SLOTS]; + ht_slot* table = (ht_size <= HT_STACK_SLOTS) + ? stack_table + : (ht_slot*) enif_alloc(ht_size * sizeof(ht_slot)); + memset(table, 0, ht_size * sizeof(ht_slot)); + unsigned int mask = ht_size - 1; + ErlNifUInt64 salt = (ErlNifUInt64) enif_monotonic_time(ERL_NIF_NSEC); + + unsigned int unique = 0; + for(int j = (int)count - 1; j >= 0; j--) { + if(ht_insert(env, table, mask, keys[j], salt)) { + unique++; + keys[count - unique] = keys[j]; + vals[count - unique] = vals[j]; + } + } + + if(table != stack_table) { + enif_free(table); + } + + rv = enif_make_map_from_arrays(env, + keys + count - unique, + vals + count - unique, + unique, &ret); + assert(rv && "enif_make_map_from_arrays failed after dedup"); + + if(keys != small_keys) { + enif_free(keys); + } + if(vals != small_vals) { + enif_free(vals); + } + + *out = ret; + return 1; +} + + +// Build an EJSON proplist {[{key, val}, ...]} from the interleaved pairs list. +// +// When dedupe_keys is set, only the last occurrence of each key is kept +// (last-wins). +// +static int +make_proplist(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out, + int dedupe_keys) +{ + ERL_NIF_TERM ret; + ERL_NIF_TERM key; + ERL_NIF_TERM val; + + ht_slot stack_table[HT_STACK_SLOTS]; + ht_slot* table = NULL; + unsigned int mask = 0; + ErlNifUInt64 salt = 0; + + if(dedupe_keys) { + unsigned int list_len = 0; + int rv = enif_get_list_length(env, pairs, &list_len); + assert(rv && "pairs must be a list"); + unsigned int count = list_len / 2; + unsigned int ht_size = ht_size_power_of_2(count > 0 ? count : 1); + table = (ht_size <= HT_STACK_SLOTS) + ? stack_table + : (ht_slot*) enif_alloc(ht_size * sizeof(ht_slot)); + memset(table, 0, ht_size * sizeof(ht_slot)); + mask = ht_size - 1; + salt = (ErlNifUInt64) enif_monotonic_time(ERL_NIF_NSEC); + } + + ret = enif_make_list(env, 0); + while(enif_get_list_cell(env, pairs, &val, &pairs)) { + if(!enif_get_list_cell(env, pairs, &key, &pairs)) { + assert(0 && "Unbalanced object pairs."); + } + if(dedupe_keys) { + if(ht_insert(env, table, mask, key, salt)) { + val = enif_make_tuple2(env, key, val); + ret = enif_make_list_cell(env, val, ret); + } + } else { + val = enif_make_tuple2(env, key, val); + ret = enif_make_list_cell(env, val, ret); + } + } + + if(dedupe_keys && table != stack_table) { + enif_free(table); + } + + *out = enif_make_tuple1(env, ret); + return 1; +} + + +int +make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out, + int ret_map, int dedupe_keys) +{ + if(ret_map) { + return make_map(env, pairs, out); + } else { + return make_proplist(env, pairs, out, dedupe_keys); + } +} diff --git a/c_src/objects.cc b/c_src/objects.cc deleted file mode 100644 index 121e062..0000000 --- a/c_src/objects.cc +++ /dev/null @@ -1,77 +0,0 @@ -// This file is part of Jiffy released under the MIT license. -// See the LICENSE file for more information. - -#include <string> - -#if defined(__cplusplus) && (__cplusplus >= 201103L) - #include <unordered_set> - typedef std::unordered_set<std::string> string_set; -#else - #include <set> - typedef std::set<std::string> string_set; -#endif - -#include <assert.h> - -#include "erl_nif.h" - -#define BEGIN_C extern "C" { -#define END_C } - -BEGIN_C - -int -make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out, - int ret_map, int dedupe_keys) -{ - ERL_NIF_TERM ret; - ERL_NIF_TERM key; - ERL_NIF_TERM val; - - string_set seen; - - ERL_NIF_TERM old_val; - - if(ret_map) { - ret = enif_make_new_map(env); - while(enif_get_list_cell(env, pairs, &val, &pairs)) { - if(!enif_get_list_cell(env, pairs, &key, &pairs)) { - assert(0 == 1 && "Unbalanced object pairs."); - } - if(!enif_get_map_value(env, ret, key, &old_val)) { - if(!enif_make_map_put(env, ret, key, val, &ret)) { - return 0; - } - } - } - *out = ret; - return 1; - } - - ret = enif_make_list(env, 0); - while(enif_get_list_cell(env, pairs, &val, &pairs)) { - if(!enif_get_list_cell(env, pairs, &key, &pairs)) { - assert(0 == 1 && "Unbalanced object pairs."); - } - if(dedupe_keys) { - ErlNifBinary bin; - if(!enif_inspect_binary(env, key, &bin)) { - return 0; - } - std::string skey((char*) bin.data, bin.size); - if(seen.count(skey) == 0) { - seen.insert(skey); - val = enif_make_tuple2(env, key, val); - ret = enif_make_list_cell(env, val, ret); - } - } else { - val = enif_make_tuple2(env, key, val); - ret = enif_make_list_cell(env, val, ret); - } - } - *out = enif_make_tuple1(env, ret); - - return 1; -} - -END_C diff --git a/rebar.config b/rebar.config index e86c145..960b5ba 100644 --- a/rebar.config +++ b/rebar.config @@ -1,9 +1,8 @@ -{require_min_otp_vsn, "20"}. +{require_min_otp_vsn, "21"}. {port_specs, [ {"priv/jiffy.so", [ "c_src/*.c", - "c_src/*.cc", "c_src/ryu/*.c" ]} ]}. @@ -17,13 +16,9 @@ {"(linux|solaris|freebsd|netbsd|openbsd|dragonfly|darwin|gnu)", "CFLAGS", "$CFLAGS -Ic_src/ -g -Wall $FLTO_FLAG -Werror -O3 -fvisibility=hidden"}, - {"(linux|solaris|freebsd|netbsd|openbsd|dragonfly|darwin|gnu)", - "CXXFLAGS", "$CXXFLAGS -Ic_src/ -g -Wall $FLTO_FLAG -Werror -O3 -fvisibility=hidden"}, {"(linux|solaris|freebsd|netbsd|openbsd|dragonfly|darwin|gnu)", - "LDFLAGS", "$LDFLAGS $FLTO_FLAG -lstdc++"}, - - {"win32", "CXXFLAGS", "$CXXFLAGS /O2 /DNDEBUG"} + "LDFLAGS", "$LDFLAGS $FLTO_FLAG"} ]}. {eunit_opts, [ diff --git a/test/jiffy_06_object_tests.erl b/test/jiffy_06_object_tests.erl index 67e2283..db43c92 100644 --- a/test/jiffy_06_object_tests.erl +++ b/test/jiffy_06_object_tests.erl @@ -67,3 +67,57 @@ cases(error) -> <<"{\"key\": 123 true">>, <<"{\"key\": 123,}">> ]. + + +% We put terms for large objects (> 32 keys) on the heap vs on the stack. So we +% have to ensure we test large object as well not just smaller ones to exercise +% that path. +% +large_object_maps_test_() -> + Opts = [return_maps], + [ + {"20 duplicate keys (10 as 0, 10 as 1)", + fun() -> + KVs = [{I rem 2, I} || I <- lists:seq(1, 20)], + {Json, Expected} = large_obj_kvs(KVs), + round_trip(Json, Expected, Opts) + end}, + {"25 keys", + fun() -> + {Json, Expected} = large_obj(25), + round_trip(Json, Expected, Opts) + end}, + {"100 keys", + fun() -> + {Json, Expected} = large_obj(100), + round_trip(Json, Expected, Opts) + end}, + {"1000 keys", + fun() -> + {Json, Expected} = large_obj(1000), + round_trip(Json, Expected, Opts) + end}, + {"100 duplicate keys (50 as 0, 50 as 1)", + fun() -> + KVs = [{I rem 2, I} || I <- lists:seq(1, 100)], + {Json, Expected} = large_obj_kvs(KVs), + round_trip(Json, Expected, Opts) + end} + ]. + +round_trip(Json, Expected, Opts) -> + Decoded = dec(Json, Opts), + ?assertEqual(Expected, Decoded), + Json1 = enc(Decoded), + Decoded1 = dec(Json1, Opts), + ?assertEqual(Expected, Decoded1). + +large_obj(N) when is_integer(N) -> + large_obj_kvs([{I, I} || I <- lists:seq(0, N - 1)]). + +large_obj_kvs(KVs) when is_list(KVs) -> + KVs1 = [{"k" ++ integer_to_list(K), V} || {K, V} <- KVs], + Chunks = [io_lib:format("\"~s\":~B", [K, V]) || {K, V} <- KVs1], + Json = iolist_to_binary(["{", lists:join(",", Chunks), "}"]), + Expected = maps:from_list([{list_to_binary(K), V} || {K, V} <- KVs1]), + {Json, Expected}. diff --git a/test/jiffy_16_dedupe_keys_tests.erl b/test/jiffy_16_dedupe_keys_tests.erl index 9719f49..90c2264 100644 --- a/test/jiffy_16_dedupe_keys_tests.erl +++ b/test/jiffy_16_dedupe_keys_tests.erl @@ -5,6 +5,43 @@ -include_lib("eunit/include/eunit.hrl"). +% Duplicate keys with `return_maps`. We settled on +% last value wins semantics in such cases so test that +% we preserve that +% +duplicate_keys_maps_test_() -> + Opts = [return_maps], + Cases = [ + % Simple duplicate + { + <<"{\"a\":1,\"a\":2}">>, + #{<<"a">> => 2} + }, + % Duplicate with other keys + { + <<"{\"a\":1,\"b\":2,\"a\":3}">>, + #{<<"a">> => 3, <<"b">> => 2} + }, + % Triple duplicate + { + <<"{\"x\":1,\"x\":2,\"x\":3}">>, + #{<<"x">> => 3} + }, + % Duplicate in nested object + { + <<"{\"outer\":{\"k\":1,\"k\":2}}">>, + #{<<"outer">> => #{<<"k">> => 2}} + }, + % No duplicates + { + <<"{\"a\":1,\"b\":2,\"c\":3}">>, + #{<<"a">> => 1, <<"b">> => 2, <<"c">> => 3} + } + ], + {"Test duplicate keys with maps", lists:map(fun({Json, Result}) -> + ?_assertEqual(Result, jiffy:decode(Json, Opts)) + end, Cases)}. + dedupe_keys_test_() -> Opts = [dedupe_keys], Cases = [
