Read at face value, the vpp binary API memory segment (VA range 0x30000000 and 
above) is out of memory. The failing allocation is a request for 60 bytes, 
which is not an unreasonable ask.

Before doing anything else, try increasing the size of the API segment and see 
if the problem recurs.

We do not support 18.01 at this point, but I’ll sketch the work necessary to 
track down a binary API segment memory leak in that code version.

It will take a certain amount of scaffolding work to track down binary API 
segment memory leaks. In src/vppinfra/mheap_bootstrap.h, set 
MHEAP_HAVE_SMALL_OBJECT_CACHE to 0 in all cases.

Patch the current “memory-trace” debug CLI command, and the current “show 
memory” command – see below. Boot vpp and enable memory tracing on the api 
segment.

Do whatever you normally do to cause the out-of-memory crash, but not for quite 
as long (😉)...

Then, “show memory api-segment” to look for API segment memory leaks.

Good luck.

Dave

static clib_error_t *
enable_disable_memory_trace (vlib_main_t * vm,
                     unformat_input_t * input,
                     vlib_cli_command_t * cmd)
{
  unformat_input_t _line_input, *line_input = &_line_input;
  int enable;
  int api_segment = 0;
  void *oldheap;


  if (!unformat_user (input, unformat_line_input, line_input))
    return 0;

  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
    {
      if (unformat (line_input, "%U", unformat_vlib_enable_disable, &enable))
     ;
      else if (unformat (line_input, "api-segment"))
     api_segment = 1;
      else
     {
       unformat_free (line_input);
       return clib_error_return (0, "invalid input");
     }
    }
  unformat_free (line_input);

  if (api_segment)
    oldheap = vl_msg_push_heap ();
  clib_mem_trace (enable);
  if (api_segment)
    vl_msg_pop_heap (oldheap);

  return 0;
}

/* *INDENT-OFF* */
VLIB_CLI_COMMAND (enable_disable_memory_trace_command, static) = {
  .path = "memory-trace",
  .short_help = "on|off [api-segment] Enable/disable memory allocation trace",
  .function = enable_disable_memory_trace,
};
/* *INDENT-ON* */


static clib_error_t *
show_memory_usage (vlib_main_t * vm,
              unformat_input_t * input, vlib_cli_command_t * cmd)
{
  int verbose __attribute__ ((unused)) = 0, api_segment = 0;
  clib_error_t *error;
  u32 index = 0;

  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
    {
      if (unformat (input, "verbose"))
     verbose = 1;
      else if (unformat (input, "api-segment"))
     api_segment = 1;
      else
     {
       error = clib_error_return (0, "unknown input `%U'",
                          format_unformat_error, input);
       return error;
     }
    }

  if (api_segment)
    {
      void *oldheap = vl_msg_push_heap ();
      u8 *s_in_svm =
     format (0, "%U\n", format_mheap, clib_mem_get_heap (), 1);
      vl_msg_pop_heap (oldheap);
      u8 *s = vec_dup (s_in_svm);

      oldheap = vl_msg_push_heap ();
      vec_free (s_in_svm);
      vl_msg_pop_heap (oldheap);
      vlib_cli_output (vm, "API segment start:");
      vlib_cli_output (vm, "%v", s);
      vlib_cli_output (vm, "API segment end:");
      vec_free (s);
    }

#if USE_DLMALLOC == 0
  /* *INDENT-OFF* */
  foreach_vlib_main (
  ({
      mheap_t *h = mheap_header (clib_per_cpu_mheaps[index]);
      vlib_cli_output (vm, "%sThread %d %s\n", index ? "\n":"", index,
                  vlib_worker_threads[index].name);
      vlib_cli_output (vm, "  %U\n", format_page_map, pointer_to_uword (h) -
                 h->vm_alloc_offset_from_header,
                  h->vm_alloc_size);
      vlib_cli_output (vm, "  %U\n", format_mheap, clib_per_cpu_mheaps[index],
                       verbose);
      index++;
  }));
  /* *INDENT-ON* */
#else
  {
    uword clib_mem_trace_enable_disable (uword enable);
    uword was_enabled;

    /*
     * Note: the foreach_vlib_main cause allocator traffic,
     * so shut off tracing before we go there...
     */
    was_enabled = clib_mem_trace_enable_disable (0);

    /* *INDENT-OFF* */
   foreach_vlib_main (
    ({
      struct dlmallinfo mi;
      void *mspace;
      mspace = clib_per_cpu_mheaps[index];

      mi = mspace_mallinfo (mspace);
      vlib_cli_output (vm, "%sThread %d %s\n", index ? "\n":"", index,
                  vlib_worker_threads[index].name);
      vlib_cli_output (vm, "  %U\n", format_page_map,
                       pointer_to_uword (mspace_least_addr(mspace)),
                       mi.arena);
      vlib_cli_output (vm, "  %U\n", format_mheap, clib_per_cpu_mheaps[index],
                       verbose);
      index++;
    }));
    /* *INDENT-ON* */

    /* Restore the trace flag */
    clib_mem_trace_enable_disable (was_enabled);
  }
#endif /* USE_DLMALLOC */
  return 0;
}

/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_memory_usage_command, static) = {
  .path = "show memory",
  .short_help = "[verbose | api-segment] Show current memory usage",
  .function = show_memory_usage,
};
/* *INDENT-ON* */


From: vpp-dev@lists.fd.io <vpp-dev@lists.fd.io> On Behalf Of siddarth rai
Sent: Tuesday, October 23, 2018 6:38 AM
To: vpp-dev@lists.fd.io
Subject: [vpp-dev] VPP crashing out of dead_client_scan()

Hi all,
I am facing an occasional VPP crash from dead_client_scan() when I restart a 
client .
I am using VPP version v18.01.1-100~g3a6948c.  Upgrading to a newer version is 
not an option for me currently.

Here is the backtrace :

Program terminated with signal 6, Aborted.
#0  0x00002ad10a37f207 in raise () from /lib64/libc.so.6
Missing separate debuginfos, use: debuginfo-install OPWVmepCR-99.9-el7.x86_64
(gdb) bt
#0  0x00002ad10a37f207 in raise () from /lib64/libc.so.6
#1  0x00002ad10a3808f8 in abort () from /lib64/libc.so.6
#2  0x0000000000405ef3 in os_panic () at 
/bfs-build/build-area.44/builds/LinuxNBngp_mainline_RH7/2018-10-18-1026/third-party/vpp/vpp_1801/build-data/../src/vpp/vnet/main.c:268
#3  0x00002ad1097fb6d2 in clib_mem_alloc_aligned_at_offset 
(os_out_of_memory_on_failure=1, align_offset=<optimized out>, align=4, size=60)
    at 
/bfs-build/build-area.44/builds/LinuxNBngp_mainline_RH7/2018-10-18-1026/third-party/vpp/vpp_1801/build-data/../src/vppinfra/mem.h:105
#4  vec_resize_allocate_memory (v=<optimized out>, 
length_increment=length_increment@entry=4, data_bytes=<optimized out>, 
header_bytes=<optimized out>, header_bytes@entry=0, 
data_align=data_align@entry=4)
    at 
/bfs-build/build-area.44/builds/LinuxNBngp_mainline_RH7/2018-10-18-1026/third-party/vpp/vpp_1801/build-data/../src/vppinfra/vec.c:84
#5  0x00002ad1097bd5bc in _vec_resize (data_align=0, header_bytes=0, 
data_bytes=<optimized out>, length_increment=4, v=<optimized out>)
    at 
/bfs-build/build-area.44/builds/LinuxNBngp_mainline_RH7/2018-10-18-1026/third-party/vpp/vpp_1801/build-data/../src/vppinfra/vec.h:142
#6  format_integer (s=<optimized out>, s@entry=0x308d6a84 
"svm_client_scan_this_region_nolock:", number=<optimized out>, 
options=options@entry=0x2ad10d3cabf0)
    at 
/bfs-build/build-area.44/builds/LinuxNBngp_mainline_RH7/2018-10-18-1026/third-party/vpp/vpp_1801/build-data/../src/vppinfra/format.c:535
#7  0x00002ad1097be32e in do_percent (va=0x2ad10d3cac78, fmt=<optimized out>, 
_s=<synthetic pointer>)
    at 
/bfs-build/build-area.44/builds/LinuxNBngp_mainline_RH7/2018-10-18-1026/third-party/vpp/vpp_1801/build-data/../src/vppinfra/format.c:314
#8  va_format (s=0x308d6a84 "svm_client_scan_this_region_nolock:", 
fmt=<optimized out>, va=va@entry=0x2ad10d3cac78)
    at 
/bfs-build/build-area.44/builds/LinuxNBngp_mainline_RH7/2018-10-18-1026/third-party/vpp/vpp_1801/build-data/../src/vppinfra/format.c:404
#9  0x00002ad1097bd707 in format (s=<optimized out>, 
fmt=fmt@entry=0x2ad10980b5a3 "%wd:")
    at 
/bfs-build/build-area.44/builds/LinuxNBngp_mainline_RH7/2018-10-18-1026/third-party/vpp/vpp_1801/build-data/../src/vppinfra/format.c:423
#10 0x00002ad1097b9234 in _clib_error (how_to_die=how_to_die@entry=4, 
function_name=function_name@entry=0x2ad10959a0a0 <__FUNCTION__.11526> 
"svm_client_scan_this_region_nolock", line_number=line_number@entry=1205,
    fmt=fmt@entry=0x2ad10959a00f "%s: cleanup ghost pid %d") at 
/bfs-build/build-area.44/builds/LinuxNBngp_mainline_RH7/2018-10-18-1026/third-party/vpp/vpp_1801/build-data/../src/vppinfra/error.c:122
#11 0x00002ad109586b11 in svm_client_scan_this_region_nolock (rp=0x30021000) at 
/bfs-build/build-area.44/builds/LinuxNBngp_mainline_RH7/2018-10-18-1026/third-party/vpp/vpp_1801/build-data/../src/svm/svm.c:1204
#12 0x00002ad10820b896 in dead_client_scan (am=<optimized out>, now=<optimized 
out>, shm=0x3004300c)
    at 
/bfs-build/build-area.44/builds/LinuxNBngp_mainline_RH7/2018-10-18-1026/third-party/vpp/vpp_1801/build-data/../src/vlibmemory/memory_vlib.c:803
#13 memclnt_process (vm=0x2ad1086af260 <vlib_global_main>, node=0x2ad10d3c2000, 
f=<optimized out>)
    at 
/bfs-build/build-area.44/builds/LinuxNBngp_mainline_RH7/2018-10-18-1026/third-party/vpp/vpp_1801/build-data/../src/vlibmemory/memory_vlib.c:1091
#14 0x00002ad10845b656 in vlib_process_bootstrap (_a=<optimized out>) at 
/bfs-build/build-area.44/builds/LinuxNBngp_mainline_RH7/2018-10-18-1026/third-party/vpp/vpp_1801/build-data/../src/vlib/main.c:1231
#15 0x00002ad1097c6838 in clib_calljmp () at 
/bfs-build/build-area.44/builds/LinuxNBngp_mainline_RH7/2018-10-18-1026/third-party/vpp/vpp_1801/build-data/../src/vppinfra/longjmp.S:110
#16 0x00002ad10d249e30 in ?? ()
#17 0x00002ad10845c999 in vlib_process_startup (f=0x0, p=0x2ad10d3c2000, 
vm=0x2ad1086af260 <vlib_global_main>)
    at 
/bfs-build/build-area.44/builds/LinuxNBngp_mainline_RH7/2018-10-18-1026/third-party/vpp/vpp_1801/build-data/../src/vlib/main.c:1253
#18 dispatch_process (vm=0x2ad1086af260 <vlib_global_main>, p=0x2ad10d3c2000, 
last_time_stamp=59319358378453656, f=0x0)
    at 
/bfs-build/build-area.44/builds/LinuxNBngp_mainline_RH7/2018-10-18-1026/third-party/vpp/vpp_1801/build-data/../src/vlib/main.c:1296

Any help will be greatly appreciated.

Regards,
Siddarth
-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.

View/Reply Online (#10920): https://lists.fd.io/g/vpp-dev/message/10920
Mute This Topic: https://lists.fd.io/mt/27567087/21656
Group Owner: vpp-dev+ow...@lists.fd.io
Unsubscribe: https://lists.fd.io/g/vpp-dev/unsub  [arch...@mail-archive.com]
-=-=-=-=-=-=-=-=-=-=-=-

Reply via email to