On 9 Feb 2026, at 14:29, Eli Britstein wrote:
> From: Gaetan Rivet <[email protected]>
>
> Add a reference-counted key-value map.
Thanks Gaetan for the patch. I did a detailed review and found some issues
and comments below. The test cases could benefit from incremental verification
after each operation (like test-cmap.c), testing duplicate key behavior, and
iterator modification safety.
Cheers,
Eelco
> Duplicates take a reference on the original entry within the map,
> leaving it in place. To be able to execute an entry creation after
> determining whether it is already present or not in the map,
> store relevant initialization and de-initialization functions.
initialization should be US initialisation :)
> Signed-off-by: Gaetan Rivet <[email protected]>
> Co-authored-by: Eli Britstein <[email protected]>
> Signed-off-by: Eli Britstein <[email protected]>
> ---
> lib/automake.mk | 2 +
> lib/refmap.c | 470 +++++++++++++++++++++++
> lib/refmap.h | 103 ++++++
> tests/automake.mk | 1 +
> tests/library.at | 5 +
> tests/test-refmap.c | 676 ++++++++++++++++++++++++++++++++++
> utilities/checkpatch_dict.txt | 2 +
> 7 files changed, 1259 insertions(+)
> create mode 100644 lib/refmap.c
> create mode 100644 lib/refmap.h
> create mode 100644 tests/test-refmap.c
>
> diff --git a/lib/automake.mk b/lib/automake.mk
> index 9634bed7b..cf7b3d18c 100644
> --- a/lib/automake.mk
> +++ b/lib/automake.mk
> @@ -323,6 +323,8 @@ lib_libopenvswitch_la_SOURCES = \
> lib/rculist.h \
> lib/reconnect.c \
> lib/reconnect.h \
> + lib/refmap.c \
> + lib/refmap.h \
> lib/rstp.c \
> lib/rstp.h \
> lib/rstp-common.h \
> diff --git a/lib/refmap.c b/lib/refmap.c
> new file mode 100644
> index 000000000..e06a55102
> --- /dev/null
> +++ b/lib/refmap.c
> @@ -0,0 +1,470 @@
> +/*
> + * Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +
Remove one newline.
> +#include <config.h>
> +
> +#include "cmap.h"
> +#include "hash.h"
> +#include "fatal-signal.h"
> +#include "openvswitch/vlog.h"
> +#include "openvswitch/list.h"
> +#include "ovs-atomic.h"
> +#include "ovs-thread.h"
> +#include "refmap.h"
> +#include "timeval.h"
> +
> +VLOG_DEFINE_THIS_MODULE(refmap);
> +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(600, 600);
> +
> +static struct ovs_mutex refmap_destroy_lock = OVS_MUTEX_INITIALIZER;
> +static struct ovs_list refmap_destroy_list
> + OVS_GUARDED_BY(refmap_destroy_lock) =
> + OVS_LIST_INITIALIZER(&refmap_destroy_list);
> +
> +struct refmap {
> + struct cmap map;
> + struct ovs_mutex map_lock;
> + size_t key_size;
> + size_t value_size;
> + refmap_value_init value_init;
> + refmap_value_uninit value_uninit;
> + refmap_value_format value_format;
> + char *name;
> + struct ovs_list in_destroy_list;
> +};
> +
> +struct refmap_node {
> + struct ovsrcu_inline_node rcu_node;
> + /* CMAP related: */
> + struct cmap_node map_node;
> + uint32_t hash;
> + /* Content: */
> + struct ovs_refcount refcount;
> + /* Key, then Value follows. */
> +};
> +
> +static void
> +refmap_destroy__(struct refmap *rfm, bool global_destroy);
> +
> +static void
> +refmap_destroy_unregister_protected(struct refmap *rfm)
> + OVS_REQUIRES(refmap_destroy_lock)
> +{
> + ovs_list_remove(&rfm->in_destroy_list);
> +}
> +
> +static void
> +refmap_destroy_unregister(struct refmap *rfm)
> + OVS_EXCLUDED(refmap_destroy_lock)
> +{
> + ovs_mutex_lock(&refmap_destroy_lock);
> + refmap_destroy_unregister_protected(rfm);
> + ovs_mutex_unlock(&refmap_destroy_lock);
> +}
> +
> +static void
> +refmap_destroy_register(struct refmap *rfm)
> + OVS_EXCLUDED(refmap_destroy_lock)
> +{
> + ovs_mutex_lock(&refmap_destroy_lock);
> + ovs_list_push_back(&refmap_destroy_list, &rfm->in_destroy_list);
> + ovs_mutex_unlock(&refmap_destroy_lock);
> +}
> +
> +static void
> +refmap_destroy_all(void *aux OVS_UNUSED)
> +{
> + struct refmap *rfm;
> +
> + ovs_mutex_lock(&refmap_destroy_lock);
> + LIST_FOR_EACH_SAFE (rfm, in_destroy_list, &refmap_destroy_list) {
> + refmap_destroy_unregister_protected(rfm);
> + refmap_destroy__(rfm, true);
> + }
> + ovs_mutex_unlock(&refmap_destroy_lock);
> + ovs_mutex_destroy(&refmap_destroy_lock);
> +}
> +
> +static void
> +refmap_fatal_signal_hook(void *aux OVS_UNUSED)
> +{
> + /* This argument is only for the type check in 'ovsrcu_postpone',
> + * it is not otherwise used. */
> + int dummy_arg;
Make this a static int so it will not be flagged by static analysers.
> +
> + /* Do not run all destroys right in the signal handler.
> + * Let other modules execute their own cleanup, and then
> + * iterate over any remaining to warn about leaks. */
> + ovsrcu_postpone(refmap_destroy_all, &dummy_arg);
> +}
> +
> +struct refmap *
> +refmap_create(const char *name,
refmap_create() allocates the struct, but other maps (hmap/cmap/smap) use
xxx_init() where users allocate. Should this follow the standard pattern?
> + size_t key_size,
> + size_t value_size,
> + refmap_value_init value_init,
> + refmap_value_uninit value_uninit,
> + refmap_value_format value_format)
> +{
> + static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
> + struct refmap *rfm;
> +
> + if (ovsthread_once_start(&once)) {
> + fatal_signal_add_hook(refmap_fatal_signal_hook, NULL, NULL, true);
> + ovsthread_once_done(&once);
> + }
> +
> + rfm = xzalloc(sizeof *rfm);
> + rfm->name = xstrdup(name);
> + rfm->key_size = key_size;
> + rfm->value_size = value_size;
> + rfm->value_init = value_init;
> + rfm->value_uninit = value_uninit;
> + rfm->value_format = value_format;
> +
> + ovs_mutex_init(&rfm->map_lock);
> + cmap_init(&rfm->map);
> +
> + refmap_destroy_register(rfm);
> +
> + return rfm;
> +}
> +
> +static void
> +refmap_destroy__(struct refmap *rfm, bool global_destroy)
> +{
> + bool leaks_detected = false;
> +
> + if (rfm == NULL) {
> + return;
> + }
> +
> + VLOG_DBG("%s: destroying the map", rfm->name);
Should we RL all debug messages? Guess it should be ok as destroying remaps
should not happen at a high rate.
> +
> + ovs_mutex_lock(&rfm->map_lock);
> + if (!cmap_is_empty(&rfm->map)) {
> + struct refmap_node *node;
> +
> + VLOG_WARN("%s: %s called with elements remaining in the map",
> + rfm->name, __func__);
> + leaks_detected = true;
> + CMAP_FOR_EACH (node, map_node, &rfm->map) {
> + /* No need to remove the node from the CMAP, it will
> + * be destroyed immediately. */
Do we need to call value_uninit?
> + ovsrcu_postpone_inline(free, node, rcu_node);
> + }
> + }
> + cmap_destroy(&rfm->map);
> + ovs_mutex_unlock(&rfm->map_lock);
> +
> + ovs_mutex_destroy(&rfm->map_lock);
> + free(rfm->name);
> + free(rfm);
> +
> + /* During the very last stage of execution of RCU callbacks,
> + * the VLOG subsystem has been disabled. All logs are thus muted.
> + * If leaks are detected, abort the process, even though we were
> + * exiting due to a fatal signal. The SIGABRT generated will still
> + * be visible. */
Double space between new sentences.
> + if (global_destroy && leaks_detected) {
> + ovs_abort(-1, "Refmap values leak detected.");
> + }
> +}
> +
> +void
> +refmap_destroy(struct refmap *rfm)
> +{
> + if (rfm == NULL) {
OVS code does not use comparison to NULL, just !rfm.
> + return;
> + }
> +
Should the below not happen at the next RCU grace period?
> + refmap_destroy_unregister(rfm);
> + refmap_destroy__(rfm, false);
> +}
> +
> +static size_t
> +refmap_key_size(struct refmap *rfm)
Maybe this function should be called refmap_rounded_key_size() or else it's not
clear why this is used rather than directly use rfm->key_size.
> +{
> + return ROUND_UP(rfm->key_size, 8);
> +}
> +
> +static void *
> +refmap_node_key(struct refmap_node *node)
> +{
> + if (!node) {
> + return NULL;
> + }
> +
> + return node + 1;
> +}
> +
> +static void *
> +refmap_node_value(struct refmap *rfm, struct refmap_node *node)
> +{
> + if (!node) {
> + return NULL;
> + }
> +
> + return ((char *) refmap_node_key(node)) + refmap_key_size(rfm);
> +}
> +
> +static size_t
> +refmap_node_total_size(struct refmap *rfm)
> +{
> + return sizeof(struct refmap_node) +
> + refmap_key_size(rfm) + rfm->value_size;
> +}
> +
> +static struct refmap_node *
> +refmap_node_from_value(struct refmap *rfm, void *value)
> +{
> + size_t offset = sizeof(struct refmap_node) + refmap_key_size(rfm);
> +
> + if ((uintptr_t) value < offset) {
> + return NULL;
> + }
> + return (void *) (((char *) value) - offset);
> +}
> +
> +void
> +refmap_for_each(struct refmap *rfm,
> + void (*cb)(void *value, void *key, void *arg), void *arg)
A macro for this would be more in line with existing implementations and
avoids the requirement for a callback function for simple operations, i.e.,
something like REFMAP_FOR_EACH().
> +{
> + struct refmap_node *node;
> +
> + CMAP_FOR_EACH (node, map_node, &rfm->map) {
> + void *key, *value;
> +
> + if (!ovs_refcount_try_ref_rcu(&node->refcount)) {
> + continue;
> + }
> + value = refmap_node_value(rfm, node);
> + key = refmap_node_key(node);
> + refmap_ref(rfm, key, NULL);
The double ref/unref is looks wrong. refmap_ref() does an unnecessary
CMAP_FOR_EACH
lookup for a node we already have, and could even find a different node if the
original was deleted/recreated. Should this just be:
if (!ovs_refcount_try_ref_rcu(&node->refcount)) {
continue;
}
value = refmap_node_value(rfm, node);
key = refmap_node_key(node);
cb(value, key, arg);
refmap_unref(rfm, value);
> + cb(value, key, arg);
> + ovs_refcount_unref(&node->refcount);
> + refmap_unref(rfm, value);
> + }
> +}
> +
> +static uint32_t
> +refmap_key_hash(const struct refmap *rfm, const void *key)
> +{
> + return hash_bytes(key, rfm->key_size, 0);
> +}
> +
> +static void *
> +refmap_lookup_protected(struct refmap *rfm, void *key, uint32_t hash)
The refmap_lookup() functions returning void *value creates unnecessary
conversions, as most callers immediately convert back to node (node ->
value -> node). Maybe return struct refmap_node * directly. Same
for the unprotected variant.
> +{
> + struct refmap_node *node;
> +
> + CMAP_FOR_EACH_WITH_HASH_PROTECTED (node, map_node, hash, &rfm->map) {
> + if (!memcmp(key, refmap_node_key(node), rfm->key_size) &&
> + ovs_refcount_read(&node->refcount) != 0) {
> + return refmap_node_value(rfm, node);
> + }
> + }
> +
> + return NULL;
> +}
> +
> +static void *
> +refmap_lookup(struct refmap *rfm, void *key, uint32_t hash)
> +{
> + struct refmap_node *node;
> +
> + CMAP_FOR_EACH_WITH_HASH (node, map_node, hash, &rfm->map) {
> + if (!memcmp(key, refmap_node_key(node), rfm->key_size) &&
> + ovs_refcount_read(&node->refcount) != 0) {
> + return refmap_node_value(rfm, node);
> + }
> + }
> +
> + return NULL;
> +}
> +
> +static bool
> +refmap_value_tryref(struct refmap *rfm, void *value)
> +{
> + struct refmap_node *node;
> +
> + if (!value) {
> + return false;
> + }
> +
> + node = refmap_node_from_value(rfm, value);
> + ovs_assert(NULL != node);
OVS code does not use comparison to NULL, so ovs_assert(node);
> + return ovs_refcount_try_ref_rcu(&node->refcount);
> +}
> +
> +void *
> +refmap_try_ref(struct refmap *rfm, void *key)
> +{
> + struct refmap_node *node;
> + void *value;
> +
> + value = refmap_lookup(rfm, key, refmap_key_hash(rfm, key));
> + if (!refmap_value_tryref(rfm, value)) {
> + return NULL;
> + }
> + node = refmap_node_from_value(rfm, value);
> + refmap_ref(rfm, key, NULL);
> + ovs_refcount_unref(&node->refcount);
Why this combination? No need to do another lookup in refmap_ref?
> + return value;
> +}
> +
> +void *
> +refmap_ref(struct refmap *rfm, void *key, void *arg)
> +{
> + struct refmap_node *node = NULL;
> + uint32_t hash;
> + void *value;
> + bool error;
'bool error = false;' (and move it up).
> +
> + error = false;
> +
> + hash = refmap_key_hash(rfm, key);
> +
> + value = refmap_lookup(rfm, key, hash);
> + if (refmap_value_tryref(rfm, value)) {
> + goto log_value;
> + }
> +
> + ovs_mutex_lock(&rfm->map_lock);
> +
> + value = refmap_lookup_protected(rfm, key, hash);
> + if (refmap_value_tryref(rfm, value)) {
> + ovs_mutex_unlock(&rfm->map_lock);
> + goto log_value;
> + }
> +
> + node = xzalloc(refmap_node_total_size(rfm));
> + node->hash = hash;
> + ovs_refcount_init(&node->refcount);
> + memcpy(refmap_node_key(node), key, rfm->key_size);
> + value = refmap_node_value(rfm, node);
> + if (rfm->value_init(value, arg) == 0) {
> + cmap_insert(&rfm->map, &node->map_node, node->hash);
> + } else {
> + value = NULL;
> + error = true;
> + }
> + ovs_mutex_unlock(&rfm->map_lock);
> +
> +log_value:
> + if (OVS_UNLIKELY(!VLOG_DROP_DBG((&rl)))) {
> + struct ds s = DS_EMPTY_INITIALIZER;
> +
> + if (rfm->value_format) {
> + ds_put_cstr(&s, ", '");
> + if (value) {
> + rfm->value_format(&s, key, value, arg);
> + }
> + ds_put_cstr(&s, "'");
> + }
> + if (value) {
> + node = refmap_node_from_value(rfm, value);
> + }
> + VLOG_DBG("%s: %p ref, refcnt=%d%s", rfm->name,
> + value, ovs_refcount_read(&node->refcount),
> + ds_cstr(&s)); + ds_destroy(&s);
> + }
> +
> + if (error) {
> + free(node);
> + return NULL;
> + }
> +
> + return value;
> +}
> +
> +bool
> +refmap_ref_value(struct refmap *rfm, void *value, bool safe)
> +{
The safe bool sounds dangerous to me, why not always use
ovs_refcount_try_ref_rcu()?
> + struct refmap_node *node;
> +
> + node = refmap_node_from_value(rfm, value);
> + ovs_assert(NULL != node);
OVS code does not use comparison to NULL, so ovs_assert(node);
> + if (safe) {
> + ovs_refcount_ref(&node->refcount);
> + } else if (!ovs_refcount_try_ref_rcu(&node->refcount)) {
> + return false;
> + }
> +
> + if (OVS_UNLIKELY(!VLOG_DROP_DBG((&rl)))) {
> + VLOG_DBG("%s: %p ref, refcnt=%d", rfm->name,
> + value, ovs_refcount_read(&node->refcount));
Should we also do value_format here, as the initial reference debug
has? As that message might have been lost and the value pointer does
not tell much.
> + }
> +
> + return true;
> +}
> +
> +bool
> +refmap_unref(struct refmap *rfm, void *value)
> +{
> + struct refmap_node *node;
> +
> + if (value == NULL) {
> + return false;
> + }
> +
> + node = refmap_node_from_value(rfm, value);
> +
Remove empty line, as you have above.
> + ovs_assert(NULL != node);
OVS code does not use comparison to NULL, so ovs_assert(node);
> +
> + if (OVS_UNLIKELY(!VLOG_DROP_DBG((&rl)))) {
> + struct ds s = DS_EMPTY_INITIALIZER;
> +
> + VLOG_DBG("%s: %p deref, refcnt=%d", rfm->name,
> + value, ovs_refcount_read(&node->refcount));
Should we also do value_format here, you haver the ds string, so I guess you
removed the code (or forgot to add it).
> + ds_destroy(&s);
> + }
> +
> + if (ovs_refcount_unref(&node->refcount) == 1) {
> + ovs_mutex_lock(&rfm->map_lock);
> + rfm->value_uninit(refmap_node_value(rfm, node));
> + cmap_remove(&rfm->map, &node->map_node, node->hash);
> + ovs_mutex_unlock(&rfm->map_lock);
> + ovsrcu_postpone_inline(free, node, rcu_node);
> + return true;
> + }
> + return false;
> +}
> +
> +void *
> +refmap_key_from_value(struct refmap *rfm, void *value)
> +{
> + return refmap_node_key(refmap_node_from_value(rfm, value));
> +}
> +
> +unsigned int
> +refmap_value_refcount_read(struct refmap *rfm, void *value)
The refmap_value_refcount_read() API requires the caller to hold a reference,
so a returned value of 1 only indicates you were the sole owner at the moment
of the read, but may no longer be by the time you receive the value. This
makes it unsuitable for logic decisions and only useful for debug logging.
We should consider removing it or documenting it as debug-only with a
comment explaining the race condition.
> +{
> + struct refmap_node *node;
> +
> + if (!value) {
> + return 0;
> + }
> +
> + node = refmap_node_from_value(rfm, value);
> + if (node) {
> + return ovs_refcount_read(&node->refcount);
> + }
> +
> + return 0;
> +}
> diff --git a/lib/refmap.h b/lib/refmap.h
> new file mode 100644
> index 000000000..83c5c0203
> --- /dev/null
> +++ b/lib/refmap.h
> @@ -0,0 +1,103 @@
> +/*
> + * Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
> + *
Here it says 2024, is it because this is old code, or should it be updated?
Same for other places.
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#ifndef REFMAP_H
> +#define REFMAP_H
> +
> +#include <config.h>
> +
> +#include <stddef.h>
> +#include <stdint.h>
> +
> +#include "openvswitch/dynamic-string.h"
> +
> +/*
> + * Reference map
> + * =============
> + *
> + * This key-value store acts like a regular concurrent hashmap,
> + * except that insertion takes a reference on the value if already
> + * present.
The documentation should clarify that keys must be fully initialized with
no padding bytes, as both hash_bytes() and memcmp() operate on the entire
key_size.
> + * As the value creation is dependent on it being already present
> + * within the structure and the user cannot predict that, this structure
> + * requires definitions for value_init and value_uninit functions,
> + * that will be called only at creation (first reference taken) and
> + * destruction (last reference released).
> + *
Maybe add an example? Also, it's important to document that when
refmap_unref() releases the last reference, value_uninit() is called
immediately (not RCU-delayed), making the value unavailable right away.
This differs from cmap where the memory remains valid until the RCU grace
period.
> + * Thread safety
> + * =============
> + *
> + * MT-unsafe:
> + * * refmap_create
> + * * refmap_destroy
> + *
> + * MT-safe:
> + * * refmap_for_each
> + * * refmap_ref
> + * * refmap_ref_value
> + * * refmap_try_ref
> + * * refmap_unref
> + *
> + */
> +
> +struct refmap;
> +
> +/* Called once on a newly created 'value', i.e. when the first
> + * reference is taken. */
> +typedef int (*refmap_value_init)(void *value, void *arg);
> +
> +/* Called once on the last dereference to value. */
> +typedef void (*refmap_value_uninit)(void *value);
> +
Note that value_uninit() for an old entry can execute before value_init()
completes for a new entry with the same key, potentially causing resource
conflicts if the value manages resources tied to the key. Should this be
fixed or documented?
> +/* Format a (key, value, arg) tuple in 's'. */
The refmap_value_format comment should clarify it's for debug logging
and that the callback is optional (can be NULL).
> +typedef struct ds *(*refmap_value_format)(struct ds *s, void *key,
> + void *value, void *arg);
> +
> +/* Allocate and return a map handle.
> + *
> + * The user must ensure that the 'key' type (of which 'key_size' is the size)
> + * does not contain padding. The macros 'OVS_PACKED' or 'OVS_ASSERT_PACKED'
> + * (if one does not want a packed struct) can be used to enforce this
> property.
> + */
Double space between new sentences.
> +struct refmap *refmap_create(const char *name,
> + size_t key_size,
> + size_t value_size,
> + refmap_value_init value_init,
> + refmap_value_uninit value_uninit,
> + refmap_value_format value_format);
> +
> +/* Frees the map memory. */
> +void refmap_destroy(struct refmap *rfm);
> +
> +/* refmap_try_ref takes a reference for the found value upon success. It's
> the
> + * user's responsibility to unref it. */
Double space between new sentences.
> +void *refmap_try_ref(struct refmap *rfm, void *key);
> +void *refmap_ref(struct refmap *rfm, void *key, void *arg);
> +bool refmap_ref_value(struct refmap *rfm, void *value, bool safe);
> +void refmap_for_each(struct refmap *rfm,
> + void (*cb)(void *value, void *key, void *arg),
> + void *arg);
> +void *refmap_key_from_value(struct refmap *rfm, void *value);
> +
> +/* Return 'true' if it was the last 'value' dereference and
> + * 'value_uninit' has been called. */
> +bool refmap_unref(struct refmap *rfm, void *value);
> +
> +unsigned int
> +refmap_value_refcount_read(struct refmap *rfm, void *value);
> +
> +#endif /* REFMAP_H */
> diff --git a/tests/automake.mk b/tests/automake.mk
> index da4d2e0b8..f61e66866 100644
> --- a/tests/automake.mk
> +++ b/tests/automake.mk
> @@ -502,6 +502,7 @@ tests_ovstest_SOURCES = \
> tests/test-rculist.c \
> tests/test-rcu-inline.c \
> tests/test-reconnect.c \
> + tests/test-refmap.c \
> tests/test-rstp.c \
> tests/test-sflow.c \
> tests/test-sha1.c \
> diff --git a/tests/library.at b/tests/library.at
> index 16820ff49..2715ccd04 100644
> --- a/tests/library.at
> +++ b/tests/library.at
> @@ -44,6 +44,11 @@ AT_CHECK([ovstest test-ccmap check 1], [0], [...
> ])
> AT_CLEANUP
>
> +AT_SETUP([refmap])
> +AT_KEYWORDS([refmap])
> +AT_CHECK([ovstest test-refmap check], [0], [])
> +AT_CLEANUP
> +
> AT_SETUP([atomic operations])
> AT_CHECK([ovstest test-atomic])
> AT_CLEANUP
> diff --git a/tests/test-refmap.c b/tests/test-refmap.c
> new file mode 100644
> index 000000000..21390b97e
> --- /dev/null
> +++ b/tests/test-refmap.c
> @@ -0,0 +1,676 @@
> +/*
> + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION &
> AFFILIATES.
> + * All rights reserved.
> + * SPDX-License-Identifier: Apache-2.0
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +
> +#undef NDEBUG
> +#include <assert.h>
> +#include <errno.h>
> +#include <getopt.h>
> +#include <string.h>
> +
> +#include "ovs-atomic.h"
> +#include "ovs-numa.h"
> +#include "ovs-rcu.h"
> +#include "ovs-thread.h"
> +#include "ovstest.h"
> +#include "random.h"
> +#include "refmap.h"
> +#include "timeval.h"
> +#include "util.h"
> +
> +#include "openvswitch/util.h"
> +#include "openvswitch/vlog.h"
> +
> +#define N 100
> +
> +static struct refmap_test_params {
> + unsigned int n_threads;
> + unsigned int n_ids;
> + int step_idx;
> + bool debug;
> + bool csv_format;
> +} params = {
> + .n_threads = 1,
> + .n_ids = N,
> + .debug = false,
> + .csv_format = false,
> +};
> +
> +DEFINE_STATIC_PER_THREAD_DATA(unsigned int, thread_id, OVSTHREAD_ID_UNSET);
> +
> +static unsigned int
> +thread_id(void)
> +{
> + static atomic_count next_id = ATOMIC_COUNT_INIT(0);
> + unsigned int id = *thread_id_get();
> +
> + if (OVS_UNLIKELY(id == OVSTHREAD_ID_UNSET)) {
> + id = atomic_count_inc(&next_id);
> + *thread_id_get() = id;
> + }
> +
> + return id;
> +}
> +
> +OVS_ASSERT_PACKED(struct key,
> + size_t idx;
> + bool b;
> + uint8_t pad[7];
> +);
> +
> +struct value {
> + void *hdl;
> +};
> +
> +struct arg {
> + void *ptr;
> +};
> +
> +static int
> +value_init(void *value_, void *arg_)
> +{
> + struct value *value = value_;
> + struct arg *arg = arg_;
> +
> + /* Verify that we don't double-init value. */
> + ovs_assert(value->hdl == NULL);
OVS code does not use comparison to NULL, so ovs_assert(!value->hdl);
> +
> + value->hdl = arg->ptr;
> + return 0;
> +}
> +
> +static void
> +value_uninit(void *value_)
> +{
> + struct value *value = value_;
> +
> + /* Verify that we don't double-uninit value. */
> + ovs_assert(value->hdl != NULL);
OVS code does not use comparison to NULL, so ovs_assert(!value->hdl);
> +
> + value->hdl = NULL;
> +}
> +
> +static struct ds *
> +value_format(struct ds *s,
> + void *key_, void *value_, void *arg_)
> +{
> + struct key *key = key_;
> + struct value *value = value_;
> + struct arg *arg = arg_;
> +
> + ds_put_format(s, "idx=%"PRIuSIZE", b=%s, hdl=%p, ptr=%p",
> + key->idx, key->b ? "1" : "0",
> + value->hdl, arg->ptr);
> + return s;
> +}
> +
> +static void
> +run_check(struct ovs_cmdl_context *ctx OVS_UNUSED)
> +{
> + struct value *values[N];
> + struct key keys[N];
> + struct refmap *rfm;
> +
> + rfm = refmap_create("check-rfm", sizeof(struct key), sizeof(struct
> value),
> + value_init, value_uninit, value_format);
> +
> + memset(keys, 0, sizeof keys);
> + for (int i = 0; i < N; i++) {
> + struct arg arg = {
> + .ptr = &keys[i],
> + };
> + struct value *value;
> +
> + keys[i].idx = i;
> + ovs_assert(NULL == refmap_try_ref(rfm, &keys[i]));
OVS code does not use comparison to NULL, so ovs_assert(!...)
> + value = refmap_ref(rfm, &keys[i], &arg);
> + ovs_assert(value != NULL);
> + ovs_assert(value == refmap_ref(rfm, &keys[i], &arg));
> + refmap_unref(rfm, value);
> + ovs_assert(value == refmap_try_ref(rfm, &keys[i]));
> + refmap_unref(rfm, value);
> + values[i] = value;
> + }
> +
> + for (int i = 0; i < N; i++) {
> + /* Verify that value_init is properly called. */
> + ovs_assert(values[i]->hdl != NULL);
> + }
> +
> + for (int i = 0; i < N; i++) {
> + refmap_unref(rfm, values[i]);
> + }
> +
> + for (int i = 0; i < N; i++) {
> + ovs_assert(NULL == refmap_try_ref(rfm, &keys[i]));
> + }
> +
> + for (int i = 0; i < N; i++) {
> + /* Verify that value_uninit is executed. */
> + ovs_assert(values[i]->hdl == NULL);
> + }
> +
> + refmap_destroy(rfm);
> +}
> +
> +static uint32_t *ids;
> +static void **values;
> +static atomic_uint *thread_working_ms; /* Measured work time. */
> +
> +static struct ovs_barrier barrier_outer;
> +static struct ovs_barrier barrier_inner;
> +
> +static atomic_uint running_time_ms;
> +static atomic_bool stop;
> +
> +static unsigned int
> +elapsed(unsigned int start)
> +{
> + unsigned int running_time_ms_;
> +
> + atomic_read(&running_time_ms, &running_time_ms_);
> +
> + return running_time_ms_ - start;
> +}
> +
> +static void *
> +clock_main(void *arg OVS_UNUSED)
> +{
> + struct timeval start;
> + struct timeval end;
> +
> + xgettimeofday(&start);
> + for (;;) {
> + bool stop_;
> +
> + atomic_read(&stop, &stop_);
> + if (stop_) {
> + break;
> + }
> + xgettimeofday(&end);
> + atomic_store(&running_time_ms,
> + timeval_to_msec(&end) - timeval_to_msec(&start));
> + xnanosleep(1000);
> + }
> +
> + return NULL;
> +}
> +
> +enum step_id {
> + STEP_NONE,
> + STEP_ALLOC,
> + STEP_REF,
> + STEP_UNREF,
> + STEP_FREE,
> + STEP_MIXED,
> + STEP_POS_QUERY,
> + STEP_NEG_QUERY,
> +};
> +
> +static const char *step_names[] = {
> + [STEP_NONE] = "<bug>",
> + [STEP_ALLOC] = "alloc",
> + [STEP_REF] = "ref",
> + [STEP_UNREF] = "unref",
> + [STEP_FREE] = "free",
> + [STEP_MIXED] = "mixed",
> + [STEP_POS_QUERY] = "pos-query",
> + [STEP_NEG_QUERY] = "neg-query",
> +};
> +
> +#define MAX_N_STEP 10
> +
> +#define FOREACH_STEP(STEP_VAR, SCHEDULE) \
> + for (int __idx = 0, STEP_VAR = (SCHEDULE)[__idx]; \
> + (STEP_VAR = (SCHEDULE)[__idx]) != STEP_NONE; \
> + __idx++)
> +
> +struct test_case {
> + int idx;
> + enum step_id schedule[MAX_N_STEP];
> +};
> +
> +static void
> +print_header(void)
> +{
> + if (params.csv_format) {
> + return;
> + }
> +
> + printf("Benchmarking n=%u on %u thread%s.\n",
> + params.n_ids, params.n_threads,
> + params.n_threads > 1 ? "s" : "");
> +
> + printf(" step\\thread: ");
> + printf(" Avg");
> + for (size_t i = 0; i < params.n_threads; i++) {
> + printf(" %3" PRIuSIZE, i + 1);
> + }
> + printf("\n");
> +}
> +
> +static void
> +print_test_header(struct test_case *test)
> +{
> + if (params.csv_format) {
> + return;
> + }
> +
> + printf("[%d]---------------------------", test->idx);
> + for (size_t i = 0; i < params.n_threads; i++) {
> + printf("-------");
> + }
> + printf("\n");
> +}
> +
> +static void
> +print_test_result(struct test_case *test, enum step_id step, int step_idx)
> +{
> + char test_name[50];
> + uint64_t *twm;
> + uint64_t avg;
> + size_t i;
> +
> + twm = xcalloc(params.n_threads, sizeof *twm);
> + for (i = 0; i < params.n_threads; i++) {
> + atomic_read(&thread_working_ms[i], &twm[i]);
> + }
> +
> + avg = 0;
> + for (i = 0; i < params.n_threads; i++) {
> + avg += twm[i];
> + }
> + ovs_assert(params.n_threads);
> + avg /= params.n_threads;
> +
> + snprintf(test_name, sizeof test_name, "%d.%d-%s",
> + test->idx, step_idx,
> + step_names[step]);
> + if (params.csv_format) {
> + printf("%s,%" PRIu64, test_name, avg);
> + } else {
> + printf("%*s: ", 18, test_name);
> + printf(" %6" PRIu64, avg);
> + for (i = 0; i < params.n_threads; i++) {
> + printf(" %6" PRIu64, twm[i]);
> + }
> + printf(" ms");
> + }
> + printf("\n");
> +
> + free(twm);
> +}
> +
> +static struct test_case test_cases[] = {
> + {
> + .schedule = {
> + STEP_ALLOC,
> + STEP_FREE,
> + },
> + },
> + {
> + .schedule = {
> + STEP_ALLOC,
> + STEP_REF,
> + STEP_UNREF,
> + STEP_FREE,
> + },
> + },
> + {
> + .schedule = {
> + STEP_MIXED,
> + STEP_FREE,
> + },
> + },
> + {
> + .schedule = {
> + STEP_ALLOC,
> + STEP_POS_QUERY,
> + /* Test negative query with map full. */
> + STEP_NEG_QUERY,
> + STEP_FREE,
> + /* Test negative query with map empty. */
> + STEP_NEG_QUERY,
> + },
> + },
> +};
> +
> +static void
> +swap_ptr(void **a, void **b)
> +{
> + void *t;
> + t = *a;
> + *a = *b;
> + *b = t;
> +}
> +
> +struct aux {
> + struct test_case test;
> + struct refmap *rfm;
> +};
> +
> +static void *
> +benchmark_thread_worker(void *aux_)
> +{
> + unsigned int tid = thread_id();
> + unsigned int n_ids_per_thread;
> + unsigned int start_idx;
> + struct aux *aux = aux_;
> + struct refmap *rfm;
> + unsigned int start;
> + uint32_t *th_ids;
> + void **th_privs;
> + void *value;
> + size_t i;
> +
> + n_ids_per_thread = params.n_ids / params.n_threads;
> + start_idx = tid * n_ids_per_thread;
> + th_privs = &values[start_idx];
> + th_ids = &ids[start_idx];
> +
> + for (;;) {
> + bool stop_;
> +
> + ovs_barrier_block(&barrier_outer);
> + atomic_read(&stop, &stop_);
> + if (stop_) {
> + break;
> + }
> + /* Wait for main thread to finish initializing
> + * rfm and step schedule. */
> + ovs_barrier_block(&barrier_inner);
> + rfm = aux->rfm;
> +
> + FOREACH_STEP(step, aux->test.schedule) {
> + ovs_barrier_block(&barrier_inner);
> + atomic_read(&running_time_ms, &start);
> + switch (step) {
> + case STEP_ALLOC:
> + case STEP_REF:
> + for (i = 0; i < n_ids_per_thread; i++) {
> + struct key key = {
> + .idx = start_idx + i,
> + };
> + struct arg arg = {
> + .ptr = &th_ids[i],
> + };
> +
> + th_privs[i] = refmap_ref(rfm, &key, &arg);
> + }
> + break;
> + case STEP_POS_QUERY:
> + for (i = 0; i < n_ids_per_thread; i++) {
> + struct key key = {
> + .idx = start_idx + i,
> + };
> + value = refmap_try_ref(rfm, &key);
> + refmap_unref(rfm, value);
> + }
> + break;
> + case STEP_NEG_QUERY:
> + for (i = 0; i < n_ids_per_thread; i++) {
> + struct key key = {
> + .idx = params.n_ids + 1,
> + };
> + value = refmap_try_ref(rfm, &key);
> + refmap_unref(rfm, value);
> + }
> + break;
> + case STEP_UNREF:
> + case STEP_FREE:
> + for (i = 0; i < n_ids_per_thread; i++) {
> + refmap_unref(rfm, th_privs[i]);
> + }
> + break;
> + case STEP_MIXED:
> + for (i = 0; i < n_ids_per_thread; i++) {
> + struct arg arg;
> + struct key key;
> + int shuffled;
> +
> + /* Mixed mode is doing:
> + * 1. Alloc.
> + * 2. Shuffle two elements.
> + * 3. Delete shuffled element.
> + * 4. Alloc again.
> + * The loop ends with all elements allocated.
> + */
> +
> + memset(&key, 0, sizeof key);
> + key.idx = start_idx + i;
> + shuffled = random_range(i + 1);
> +
> + arg.ptr = &th_ids[i];
> + th_privs[i] = refmap_ref(rfm, &key, &arg);
> + swap_ptr(&th_privs[i], &th_privs[shuffled]);
> + refmap_unref(rfm, th_privs[i]);
> + arg.ptr = &th_ids[i];
> + th_privs[i] = refmap_ref(rfm, &key, &arg);
> + }
> + break;
> + default:
> + fprintf(stderr, "[%u]: Reached step %d\n",
> + tid, step);
> + OVS_NOT_REACHED();
> + break;
> + }
> + atomic_store(&thread_working_ms[tid], elapsed(start));
> + ovs_barrier_block(&barrier_inner);
> + /* Main thread prints result now. */
> + }
> + }
> +
> + return NULL;
> +}
> +
> +static void
> +benchmark_thread_main(struct aux *aux)
> +{
> + int step_idx;
> +
> + memset(ids, 0, params.n_ids * sizeof *ids);
> + memset(values, 0, params.n_ids * sizeof *values);
> +
> + aux->rfm = refmap_create("benchmark-rfm", sizeof(struct key),
> + sizeof(struct value), value_init, value_uninit,
> + value_format);
> +
> + print_test_header(&aux->test);
> + ovs_barrier_block(&barrier_inner);
> + /* Init is done, worker can start preparing to work. */
> + step_idx = 0;
> + FOREACH_STEP(step, aux->test.schedule) {
> + ovs_barrier_block(&barrier_inner);
> + /* Workers do the scheduled work now. */
> + ovs_barrier_block(&barrier_inner);
> + print_test_result(&aux->test, step, step_idx++);
> + }
> +
> + refmap_destroy(aux->rfm);
> +}
> +
> +static bool
> +parse_benchmark_params(int argc, char *argv[])
> +{
> + long int l_threads = 0;
> + long int l_ids = 0;
> + bool valid = true;
> + long int l;
> + int i;
> +
> + params.step_idx = -1;
> + for (i = 0; i < argc; i++) {
> + if (!strcmp(argv[i], "benchmark") ||
> + !strcmp(argv[i], "debug")) {
> + continue;
> + } else if (!strcmp(argv[i], "csv")) {
> + params.csv_format = true;
> + } else if (!strncmp(argv[i], "step=", 5)) {
> + if (!str_to_long(&argv[i][5], 10, &l)) {
> + fprintf(stderr,
> + "Invalid parameter '%s', expected positive
> integer.\n",
> + argv[i]);
> + valid = false;
> + goto out;
> + }
> + params.step_idx = l;
> + } else {
> + if (!str_to_long(argv[i], 10, &l)) {
> + fprintf(stderr,
> + "Invalid parameter '%s', expected positive
> integer.\n",
> + argv[i]);
> + valid = false;
> + goto out;
> + }
> + if (l_ids == 0) {
> + l_ids = l;
> + } else if (l_threads == 0) {
> + l_threads = l;
> + } else {
> + fprintf(stderr,
> + "Invalid parameter '%s', too many integer values.\n",
> + argv[i]);
> + valid = false;
> + goto out;
> + }
> + }
> + }
> +
> + if (l_ids != 0) {
> + params.n_ids = l_ids;
> + } else {
> + fprintf(stderr, "Invalid parameters: no number of elements
> given.\n");
> + valid = false;
> + }
> +
> + if (l_threads != 0) {
> + params.n_threads = l_threads;
> + } else {
> + fprintf(stderr, "Invalid parameters: no number of threads given.\n");
> + valid = false;
> + }
> +
> +out:
> + return valid;
> +}
> +
> +static void
> +run_benchmark(struct ovs_cmdl_context *ctx)
> +{
> + pthread_t *threads;
> + pthread_t clock;
> + struct aux aux;
> + size_t i;
> +
> + if (!parse_benchmark_params(ctx->argc, ctx->argv)) {
> + return;
> + }
> +
> + ids = xcalloc(params.n_ids, sizeof *ids);
> + values = xcalloc(params.n_ids, sizeof *values);
> + thread_working_ms = xcalloc(params.n_threads,
> + sizeof *thread_working_ms);
> + atomic_init(&stop, false);
> +
> + clock = ovs_thread_create("clock", clock_main, NULL);
> +
> + ovsrcu_quiesce_start();
> + ovs_barrier_init(&barrier_outer, params.n_threads + 1);
> + ovs_barrier_init(&barrier_inner, params.n_threads + 1);
> + threads = xmalloc(params.n_threads * sizeof *threads);
> + for (i = 0; i < params.n_threads; i++) {
> + threads[i] = ovs_thread_create("worker",
> + benchmark_thread_worker, &aux);
> + }
> +
> + print_header();
> + for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
> + test_cases[i].idx = i;
> + if (params.step_idx != -1 &&
> + params.step_idx != i) {
> + continue;
> + }
> + /* If we don't block workers from progressing now,
> + * there would be a race for access to aux.test,
> + * leading to some workers not respecting the schedule.
> + */
> + ovs_barrier_block(&barrier_outer);
> + memcpy(&aux.test, &test_cases[i], sizeof aux.test);
> + benchmark_thread_main(&aux);
> + }
> + atomic_store(&stop, true);
> + ovs_barrier_block(&barrier_outer);
> +
> + for (i = 0; i < params.n_threads; i++) {
> + xpthread_join(threads[i], NULL);
> + }
> + free(threads);
> +
> + ovs_barrier_destroy(&barrier_outer);
> + ovs_barrier_destroy(&barrier_inner);
> + free(ids);
> + free(values);
> + free(thread_working_ms);
> + xpthread_join(clock, NULL);
> +}
> +
> +static const struct ovs_cmdl_command commands[] = {
> + {"check", "[debug]", 0, 1, run_check, OVS_RO},
> + {"benchmark", "<nb elem> <nb threads> [step=<uint>] [csv]", 0, 4,
> + run_benchmark, OVS_RO},
> + {NULL, NULL, 0, 0, NULL, OVS_RO},
> +};
> +
> +static void
> +parse_test_params(int argc, char *argv[])
> +{
> + int i;
> +
> + for (i = 0; i < argc; i++) {
> + if (!strcmp(argv[i], "debug")) {
> + params.debug = true;
> + }
> + }
> +}
> +
> +static void
> +refmap_test_main(int argc, char *argv[])
> +{
> + struct ovs_cmdl_context ctx = {
> + .argc = argc - optind,
> + .argv = argv + optind,
> + };
> +
> + parse_test_params(argc - optind, argv + optind);
> +
> + vlog_set_levels(NULL, VLF_ANY_DESTINATION, VLL_OFF);
> + if (params.debug) {
> + vlog_set_levels_from_string_assert("refmap:console:dbg");
> + }
> +
> + /* Quiesce to start the RCU. */
> + ovsrcu_quiesce();
> +
> + set_program_name(argv[0]);
> + ovs_cmdl_run_command(&ctx, commands);
> +
> + ovsrcu_exit();
> +}
> +
> +OVSTEST_REGISTER("test-refmap", refmap_test_main);
> diff --git a/utilities/checkpatch_dict.txt b/utilities/checkpatch_dict.txt
> index 13f107246..ccc9249a2 100644
> --- a/utilities/checkpatch_dict.txt
> +++ b/utilities/checkpatch_dict.txt
> @@ -105,6 +105,7 @@ icmp
> icmp4
> icmpv6
> idl
> +ie
> ifdef
> ifindex
> initializer
> @@ -224,6 +225,7 @@ rebased
> recirc
> recirculation
> recirculations
> +refmap
> revalidate
> revalidation
> revalidator
> --
> 2.34.1
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev