We have a small Pacific 16.2.7 test cluster that has been ticking over for a couple of years with no problems whatever. The last "event" was 14 days ago when I was testing some OSD replacement procedures - nothing remarkable.

At 0146 this morning though mon03 signalled an abort in the RocksDB code. The monitor crashed, and systemd successfully restarted it 10 seconds later. Although its difficult to tell if any RBD VMs were doing much it is unlikely. Deep-scrubs are likely to have been running. There is nothing of interest in the OS logs.

Config summary:

 * Built manually (not using cephadm)
 * Debian 10 (buster)
 * Host mon01 - mon, mgr, mds, rgw
 * Host mon02 - mon, mgr, mds, rgw
 * Host mon03 - mon
 * Hosts osd01-03 - each have 2 Optane NVMe for HDD DB/WAL, 24 x HDD, 2
   x NVMe OSD
 * Running at very low load and capacity utilisation

It's working fine now, but wondering if anyone knows what might have happened and if there is any lurking problem that should be looked at.

Crash info (slightly sanitised) below...

Thanks, Chris


ceph@xxxxxxmon03:~$ ceph crash info 
2022-02-24T01:46:41.241025Z_7bcaa4fa-d202-4e48-91ac-f3070493bc73
{
    "backtrace": [
        "/lib/x86_64-linux-gnu/libpthread.so.0(+0x12730) [0x7fb338e7d730]",
        "gsignal()",
        "abort()",
        "/lib/x86_64-linux-gnu/libc.so.6(+0x2240f) [0x7fb33894b40f]",
        "/lib/x86_64-linux-gnu/libc.so.6(+0x30102) [0x7fb338959102]",
        "(rocksdb::BlockBasedTableBuilder::Add(rocksdb::Slice const&, rocksdb::Slice 
const&)+0x119) [0x5633d27a792f]",
        
"(rocksdb::CompactionJob::ProcessKeyValueCompaction(rocksdb::CompactionJob::SubcompactionState*)+0xaf8)
 [0x5633d275cf04]",
        "(rocksdb::CompactionJob::Run()+0x235) [0x5633d275adfb]",
        "(rocksdb::DBImpl::BackgroundCompaction(bool*, rocksdb::JobContext*, 
rocksdb::LogBuffer*, rocksdb::DBImpl::PrepickedCompaction*, 
rocksdb::Env::Priority)+0x248a) [0x5633d248a74a]",
        
"(rocksdb::DBImpl::BackgroundCallCompaction(rocksdb::DBImpl::PrepickedCompaction*, 
rocksdb::Env::Priority)+0x20d) [0x5633d2487a93]",
        "(rocksdb::DBImpl::BGWorkCompaction(void*)+0xc5) [0x5633d248637d]",
        "(void std::__invoke_impl<void, void (*&)(void*), void*&>(std::__invoke_other, void 
(*&)(void*), void*&)+0x34) [0x5633d26e7f6e]",
        "(std::__invoke_result<void (*&)(void*), void*&>::type std::__invoke<void (*&)(void*), 
void*&>(void (*&)(void*), void*&)+0x37) [0x5633d26e7ad3]",
        "(void std::_Bind<void (*(void*))(void*)>::__call<void, , 0ul>(std::tuple<>&&, 
std::_Index_tuple<0ul>)+0x48) [0x5633d26e71c2]",
        "(void std::_Bind<void (*(void*))(void*)>::operator()<, void>()+0x24) 
[0x5633d26e6318]",
        "(std::_Function_handler<void (), std::_Bind<void (*(void*))(void*)> 
>::_M_invoke(std::_Any_data const&)+0x20) [0x5633d26e5404]",
        "(std::function<void ()>::operator()() const+0x32) [0x5633d242c58c]",
        "(rocksdb::ThreadPoolImpl::Impl::BGThread(unsigned long)+0x26b) 
[0x5633d26e1941]",
        "(rocksdb::ThreadPoolImpl::Impl::BGThreadWrapper(void*)+0x108) 
[0x5633d26e1aa4]",
        "(void std::__invoke_impl<void, void (*)(void*), 
rocksdb::BGThreadMetadata*>(std::__invoke_other, void (*&&)(void*), 
rocksdb::BGThreadMetadata*&&)+0x34) [0x5633d26e4bdf]",
        "(std::__invoke_result<void (*)(void*), rocksdb::BGThreadMetadata*>::type std::__invoke<void 
(*)(void*), rocksdb::BGThreadMetadata*>(void (*&&)(void*), rocksdb::BGThreadMetadata*&&)+0x37) 
[0x5633d26e3dbf]",
        "(decltype (__invoke((_S_declval<0ul>)(), (_S_declval<1ul>)())) 
std::thread::_Invoker<std::tuple<void (*)(void*), rocksdb::BGThreadMetadata*> >::_M_invoke<0ul, 
1ul>(std::_Index_tuple<0ul, 1ul>)+0x43) [0x5633d26e8779]",
        "(std::thread::_Invoker<std::tuple<void (*)(void*), rocksdb::BGThreadMetadata*> 
>::operator()()+0x18) [0x5633d26e8734]",
        "(std::thread::_State_impl<std::thread::_Invoker<std::tuple<void (*)(void*), 
rocksdb::BGThreadMetadata*> > >::_M_run()+0x1c) [0x5633d26e8718]",
        "/lib/x86_64-linux-gnu/libstdc++.so.6(+0xbbb2f) [0x7fb338d44b2f]",
        "/lib/x86_64-linux-gnu/libpthread.so.0(+0x7fa3) [0x7fb338e72fa3]",
        "clone()"
    ],
    "ceph_version": "16.2.7",
    "crash_id": 
"2022-02-24T01:46:41.241025Z_7bcaa4fa-d202-4e48-91ac-f3070493bc73",
    "entity_name": "mon.xxxxxxmon03",
    "os_id": "10",
    "os_name": "Debian GNU/Linux 10 (buster)",
    "os_version": "10 (buster)",
    "os_version_id": "10",
    "process_name": "ceph-mon",
    "stack_sig": 
"f5274691c6982e320f630eb9e025f3db660bd3a110bd7ec1400c7ae121feebb7",
    "timestamp": "2022-02-24T01:46:41.241025Z",
    "utsname_hostname": "xxxxxxmon03.x.y.z",
    "utsname_machine": "x86_64",
    "utsname_release": "4.19.0-18-amd64",
    "utsname_sysname": "Linux",
    "utsname_version": "#1 SMP Debian 4.19.208-1 (2021-09-29)"
}
_______________________________________________
ceph-users mailing list -- ceph-users@ceph.io
To unsubscribe send an email to ceph-users-le...@ceph.io

Reply via email to