Hi all,

I found that bihash might return zero'ed value (0xff's) if deletion and 
searching were performed in parallel on different threads. This is reproducible 
by only ~100 lines of code, from 21.06 all the way up to git master, and on 
multiple machines from Coffee Lake to Tiger Lake.

The code (a plugin) and test command is shown below. Given how easy it is to 
reproduce the problem, I'm not sure whether this is a bug or something missing 
on my end. Any advice is welcomed. Thanks!

======== bihash_race.c ========

#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
#include <vppinfra/bihash_8_8.h>

static volatile int br_run;
static clib_bihash_8_8_t br_bihash_8_8;
static int test_run_cnt[2];

static void
bihash_race_set (u64 key)
{
  clib_bihash_kv_8_8_t kv = { .key = key, .value = 1 };

  clib_bihash_add_del_8_8 (&br_bihash_8_8, &kv, BIHASH_ADD);
}

static void
bihash_race_del (u64 key)
{
  clib_bihash_kv_8_8_t kv = { .key = key, .value = 1 };

  clib_bihash_add_del_8_8 (&br_bihash_8_8, &kv, BIHASH_DEL);
}

static void
bihash_race_get (u64 key)
{
  clib_bihash_kv_8_8_t kv = { .key = key, .value = 1 };
  clib_bihash_kv_8_8_t value;

  if (!clib_bihash_search_8_8 (&br_bihash_8_8, &kv, &value))
    {
      if (value.value != 1)
        {
          clib_warning ("suspicious value: 0x%lx\n", value.value);
        }
    }
}

static clib_error_t *
bihash_race_init (vlib_main_t *vm)
{
  clib_bihash_init_8_8 (&br_bihash_8_8, "bihash_race hash", 4096, 4096 * 256);

  return 0;
}

VLIB_NODE_FN (br_test_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
  u64 k = 0x01010101UL;

  if (!br_run || vm->thread_index == 0)
    {
      return 0;
    }

  // perform bihash set and delete for 50000 times each in thread 1
  if (vm->thread_index == 1 && test_run_cnt[0] < 100000)
    {
      if (test_run_cnt[1] & 1)
        bihash_race_del (k);
      else
        bihash_race_set (k);
      test_run_cnt[0]++;
      return 0;
    }

  // perform bihash lookup for 100000 times in thread 2
  if (vm->thread_index == 2 && test_run_cnt[1] < 100000)
    {
      bihash_race_get (k);
      test_run_cnt[1]++;
      return 0;
    }
  return 0;
}

void
bihash_race_test_reset ()
{
  memset (test_run_cnt, 0, sizeof (test_run_cnt));
  clib_warning ("========== test reset ==========");
}

static clib_error_t *
bihash_race_test_command_fn (vlib_main_t *vm, unformat_input_t *input,
                             vlib_cli_command_t *cmd)
{
  u8 state = ~0;

  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
    {
      if (unformat (input, "on"))
        state = 1;
      else if (unformat (input, "off"))
        state = 0;
      else
        return clib_error_return (0, "invalid input");
    }

  if (state)
    {
      br_run = 1;
    }
  else
    {
      br_run = 0;
      bihash_race_test_reset ();
      vlib_cli_output (vm, "========== test reset ==========");
    }

  return 0;
}

VLIB_CLI_COMMAND (bihash_race_test_command, static) = {
  .path = "set bihash-race-test",
  .short_help = "set bihash-race-test <on>|<off>",
  .function = bihash_race_test_command_fn,
};

VLIB_REGISTER_NODE (br_test_node) = {
  .name = "bihash-race-test",
  .type = VLIB_NODE_TYPE_INPUT,
  .state = VLIB_NODE_STATE_POLLING,
};

VLIB_INIT_FUNCTION (bihash_race_init);

VLIB_PLUGIN_REGISTER () = {
  .version = VPP_BUILD_VER,
  .description = "bihash race test",
};

========= CMakeLists.txt =========

add_vpp_plugin(bihash_race
  SOURCES
  bihash_race.c
)

========= startup.conf =========

unix {
  nodaemon
  full-coredump
  cli-listen /run/vpp/cli.sock
  gid 1000
  interactive
}

cpu {
  workers 3
  main-core 1
}

dpdk {
  no-pci
}

========= test command =========

terminal1 # vpp -c startup.conf
terminal2 # while true; do vppctl set bihash-race-test on; sleep 1; vppctl set 
bihash-race-test off; done

VPP will spit out a lot of "suspicious value: 0xffffffffffffffff" in terminal1, 
while the code above never saves such value into bihash - the value comes from 
the !is_add branch in clib_bihash_add_del_inline_with_hash. Changing the memset 
value (and clib_bihash_is_free_* obviously) in this branch will lead to this 
new value being returned.

Regards,
Hao Tian
-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.
View/Reply Online (#22701): https://lists.fd.io/g/vpp-dev/message/22701
Mute This Topic: https://lists.fd.io/mt/97599770/21656
Group Owner: vpp-dev+ow...@lists.fd.io
Unsubscribe: https://lists.fd.io/g/vpp-dev/leave/1480452/21656/631435203/xyzzy 
[arch...@mail-archive.com]
-=-=-=-=-=-=-=-=-=-=-=-

Reply via email to