During start it consumes ~90% CPU, strace shows, that OSD process doing something with LevelDB.
Compact is disabled:
r...@storage07.main01.ceph.apps.prod.int.grcc:~$ cat /etc/ceph/ceph.conf | grep compact
#leveldb_compact_on_mount = true

But with debug_leveldb=20 I see, that compaction is running, but why?

2017-07-17 09:27:37.394008 7f4ed2293700 1 leveldb: Compacting 1@1 + 12@2 files 2017-07-17 09:27:37.593890 7f4ed2293700 1 leveldb: Generated table #76778: 277817 keys, 2125970 bytes 2017-07-17 09:27:37.718954 7f4ed2293700 1 leveldb: Generated table #76779: 221451 keys, 2124338 bytes 2017-07-17 09:27:37.777362 7f4ed2293700 1 leveldb: Generated table #76780: 63755 keys, 809913 bytes 2017-07-17 09:27:37.919094 7f4ed2293700 1 leveldb: Generated table #76781: 231475 keys, 2026376 bytes 2017-07-17 09:27:38.035906 7f4ed2293700 1 leveldb: Generated table #76782: 190956 keys, 1573332 bytes 2017-07-17 09:27:38.127597 7f4ed2293700 1 leveldb: Generated table #76783: 148675 keys, 1260956 bytes 2017-07-17 09:27:38.286183 7f4ed2293700 1 leveldb: Generated table #76784: 294105 keys, 2123438 bytes 2017-07-17 09:27:38.469562 7f4ed2293700 1 leveldb: Generated table #76785: 299617 keys, 2124267 bytes 2017-07-17 09:27:38.619666 7f4ed2293700 1 leveldb: Generated table #76786: 277305 keys, 2124936 bytes 2017-07-17 09:27:38.711423 7f4ed2293700 1 leveldb: Generated table #76787: 110536 keys, 951545 bytes 2017-07-17 09:27:38.869917 7f4ed2293700 1 leveldb: Generated table #76788: 296199 keys, 2123506 bytes 2017-07-17 09:27:39.028395 7f4ed2293700 1 leveldb: Generated table #76789: 248634 keys, 2096715 bytes 2017-07-17 09:27:39.028414 7f4ed2293700 1 leveldb: Compacted 1@1 + 12@2 files => 21465292 bytes 2017-07-17 09:27:39.053288 7f4ed2293700 1 leveldb: compacted to: files[ 0 0 48 549 948 0 0 ]
2017-07-17 09:27:39.054014 7f4ed2293700  1 leveldb: Delete type=2 #76741

Strace:

open("/var/lib/ceph/osd/ceph-195/current/omap/043788.ldb", O_RDONLY) = 18
stat("/var/lib/ceph/osd/ceph-195/current/omap/043788.ldb", {st_mode=S_IFREG|0644, st_size=2154394, ...}) = 0
mmap(NULL, 2154394, PROT_READ, MAP_SHARED, 18, 0) = 0x7f96a67a0000
close(18)                               = 0
brk(0x55d156640000)                     = 0x55d156640000
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], ~[KILL STOP RTMIN RT_1], 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[KILL STOP RTMIN RT_1], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
munmap(0x7f96fca08000, 2125056)




On 17.07.2017 09:13, Anton Dmitriev wrote:
Thanks for reply.
I restarted OSD with debug_ms = 1/1 and debug_osd = 20/20.

Look at this:
2017-07-17 08:57:52.077481 7f4db319c840 0 xfsfilestorebackend(/var/lib/ceph/osd/ceph-167) detect_feature: extsize is disabled by conf 2017-07-17 09:04:04.345065 7f4db319c840 0 filestore(/var/lib/ceph/osd/ceph-167) mount: enabling WRITEAHEAD journal mode: checkpoint is not enabled

Nothing is happening for a 6 minutes. Maybe try another debug option? Maybe debug_filestore ?


2017-07-17 08:57:32.072881 7f4ca5aaa840 10 -- :/6773 wait: closing pipes
2017-07-17 08:57:32.072882 7f4ca5aaa840 10 -- :/6773 reaper
2017-07-17 08:57:32.072884 7f4ca5aaa840 10 -- :/6773 reaper done
2017-07-17 08:57:32.072886 7f4ca5aaa840 10 -- :/6773 wait: waiting for pipes to close
2017-07-17 08:57:32.072888 7f4ca5aaa840 10 -- :/6773 wait: done.
2017-07-17 08:57:32.072889 7f4ca5aaa840  1 -- :/6773 shutdown complete.
2017-07-17 08:57:52.015484 7f4db319c840 0 set uid:gid to 4402:4402 (ceph:ceph) 2017-07-17 08:57:52.015493 7f4db319c840 0 ceph version 10.2.9 (2ee413f77150c0f375ff6f10edd6c8f9c7d060d0), process ceph-osd, pid 3966896
2017-07-17 08:57:52.015664 7f4db319c840  5 object store type is filestore
2017-07-17 08:57:52.016680 7f4db319c840 1 -- 10.17.12.130:0/0 learned my addr 10.17.12.130:0/0 2017-07-17 08:57:52.016688 7f4db319c840 1 accepter.accepter.bind my_inst.addr is 10.17.12.130:6867/3966896 need_addr=0 2017-07-17 08:57:52.016721 7f4db319c840 1 -- 10.17.27.14:0/0 learned my addr 10.17.27.14:0/0 2017-07-17 08:57:52.016725 7f4db319c840 1 accepter.accepter.bind my_inst.addr is 10.17.27.14:6847/3966896 need_addr=0 2017-07-17 08:57:52.016750 7f4db319c840 1 -- 10.17.27.14:0/0 learned my addr 10.17.27.14:0/0 2017-07-17 08:57:52.016754 7f4db319c840 1 accepter.accepter.bind my_inst.addr is 10.17.27.14:6848/3966896 need_addr=0 2017-07-17 08:57:52.016783 7f4db319c840 1 -- 10.17.12.130:0/0 learned my addr 10.17.12.130:0/0 2017-07-17 08:57:52.016787 7f4db319c840 1 accepter.accepter.bind my_inst.addr is 10.17.12.130:6868/3966896 need_addr=0 2017-07-17 08:57:52.016789 7f4db319c840 0 pidfile_write: ignore empty --pid-file 2017-07-17 08:57:52.022153 7f4db319c840 10 ErasureCodePluginSelectJerasure: load: jerasure_sse4 2017-07-17 08:57:52.024194 7f4db319c840 10 load: jerasure load: lrc load: isa 2017-07-17 08:57:52.024388 7f4db319c840 1 -- 10.17.12.130:6867/3966896 messenger.start
2017-07-17 08:57:52.024418 7f4db319c840  1 -- :/0 messenger.start
2017-07-17 08:57:52.024439 7f4db319c840 1 -- 10.17.12.130:6868/3966896 messenger.start 2017-07-17 08:57:52.024462 7f4db319c840 1 -- 10.17.27.14:6848/3966896 messenger.start 2017-07-17 08:57:52.024481 7f4db319c840 1 -- 10.17.27.14:6847/3966896 messenger.start
2017-07-17 08:57:52.024501 7f4db319c840  1 -- :/0 messenger.start
2017-07-17 08:57:52.024574 7f4db319c840 2 osd.167 0 mounting /var/lib/ceph/osd/ceph-167 /var/lib/ceph/osd/ceph-167/journal 2017-07-17 08:57:52.025031 7f4db319c840 0 filestore(/var/lib/ceph/osd/ceph-167) backend xfs (magic 0x58465342) 2017-07-17 08:57:52.032406 7f4db319c840 0 genericfilestorebackend(/var/lib/ceph/osd/ceph-167) detect_features: FIEMAP ioctl is disabled via 'filestore fiemap' config option 2017-07-17 08:57:52.032415 7f4db319c840 0 genericfilestorebackend(/var/lib/ceph/osd/ceph-167) detect_features: SEEK_DATA/SEEK_HOLE is disabled via 'filestore seek data hole' config option 2017-07-17 08:57:52.032430 7f4db319c840 0 genericfilestorebackend(/var/lib/ceph/osd/ceph-167) detect_features: splice is supported 2017-07-17 08:57:52.077404 7f4db319c840 0 genericfilestorebackend(/var/lib/ceph/osd/ceph-167) detect_features: syncfs(2) syscall fully supported (by glibc and kernel) 2017-07-17 08:57:52.077481 7f4db319c840 0 xfsfilestorebackend(/var/lib/ceph/osd/ceph-167) detect_feature: extsize is disabled by conf 2017-07-17 09:04:04.345065 7f4db319c840 0 filestore(/var/lib/ceph/osd/ceph-167) mount: enabling WRITEAHEAD journal mode: checkpoint is not enabled
2017-07-17 09:04:05.751209 7f4db319c840  2 osd.167 0 boot
2017-07-17 09:04:05.751385 7f4db319c840 20 osd.167 0 configured osd_max_object_name[space]_len looks ok 2017-07-17 09:04:05.751468 7f4db319c840 10 osd.167 0 read_superblock sb(2dc4a5fc-8acd-480e-a444-f091d02271b8 osd.167 559efd6b-7a47-4400-9645-39e4c8b210e9 e224604 [223964,224604] lci=[224038,224604])
2017-07-17 09:04:05.775781 7f4db319c840 10 open_all_classes
2017-07-17 09:04:05.775833 7f4db319c840 10 open_all_classes found cephfs
2017-07-17 09:04:05.775853 7f4db319c840 10 _get_class adding new class name cephfs 0x55aadeb42548 2017-07-17 09:04:05.775858 7f4db319c840 10 _load_class cephfs from /usr/lib/rados-classes/libcls_cephfs.so 2017-07-17 09:04:05.776094 7f4db319c840 0 <cls> cls/cephfs/cls_cephfs.cc:202: loading cephfs_size_scan
2017-07-17 09:04:05.776101 7f4db319c840 10 register_class cephfs status 3



On 17.07.2017 08:41, Irek Fasikhov wrote:
Hi, Anton.
You need to run the OSD with debug_ms = 1/1 and debug_osd = 20/20 for detailed information.

2017-07-17 8:26 GMT+03:00 Anton Dmitriev <t...@enumnet.ru <mailto:t...@enumnet.ru>>:

    Hi, all!

    After upgrading from 10.2.7 to 10.2.9 I see that restarting osds
    by 'restart ceph-osd id=N' or 'restart ceph-osd-all' takes about
    10 minutes for getting OSD from DOWN to UP. The same situation on
    all 208 OSDs on 7 servers.

    Also very long OSD start after rebooting servers.

    Before upgrade it took no more than 2 minutes.

    Does anyone has the same situation like mine?


    2017-07-17 08:07:26.895600 7fac2d656840  0 set uid:gid to
    4402:4402 (ceph:ceph)
    2017-07-17 08:07:26.895615 7fac2d656840  0 ceph version 10.2.9
    (2ee413f77150c0f375ff6f10edd6c8f9c7d060d0), process ceph-osd, pid
    197542
    2017-07-17 08:07:26.897018 7fac2d656840  0 pidfile_write: ignore
    empty --pid-file
    2017-07-17 08:07:26.906489 7fac2d656840  0
    filestore(/var/lib/ceph/osd/ceph-0) backend xfs (magic 0x58465342)
    2017-07-17 08:07:26.917074 7fac2d656840  0
    genericfilestorebackend(/var/lib/ceph/osd/ceph-0)
    detect_features: FIEMAP ioctl is disabled via 'filestore fiemap'
    config option
    2017-07-17 08:07:26.917092 7fac2d656840  0
    genericfilestorebackend(/var/lib/ceph/osd/ceph-0)
    detect_features: SEEK_DATA/SEEK_HOLE is disabled via 'filestore
    seek data hole' config option
    2017-07-17 08:07:26.917112 7fac2d656840  0
    genericfilestorebackend(/var/lib/ceph/osd/ceph-0)
    detect_features: splice is supported
    2017-07-17 08:07:27.037031 7fac2d656840  0
    genericfilestorebackend(/var/lib/ceph/osd/ceph-0)
    detect_features: syncfs(2) syscall fully supported (by glibc and
    kernel)
    2017-07-17 08:07:27.037154 7fac2d656840  0
    xfsfilestorebackend(/var/lib/ceph/osd/ceph-0) detect_feature:
    extsize is disabled by conf
    2017-07-17 08:15:17.839072 7fac2d656840  0
    filestore(/var/lib/ceph/osd/ceph-0) mount: enabling WRITEAHEAD
    journal mode: checkpoint is not enabled
    2017-07-17 08:15:20.150446 7fac2d656840  0 <cls>
    cls/hello/cls_hello.cc:305: loading cls_hello
    2017-07-17 08:15:20.152483 7fac2d656840  0 <cls>
    cls/cephfs/cls_cephfs.cc:202: loading cephfs_size_scan
    2017-07-17 08:15:20.210428 7fac2d656840  0 osd.0 224167 crush map
    has features 2200130813952, adjusting msgr requires for clients
    2017-07-17 08:15:20.210443 7fac2d656840  0 osd.0 224167 crush map
    has features 2200130813952 was 8705, adjusting msgr requires for mons
    2017-07-17 08:15:20.210448 7fac2d656840  0 osd.0 224167 crush map
    has features 2200130813952, adjusting msgr requires for osds
    2017-07-17 08:15:58.902173 7fac2d656840  0 osd.0 224167 load_pgs
    2017-07-17 08:16:19.083406 7fac2d656840  0 osd.0 224167 load_pgs
    opened 242 pgs
    2017-07-17 08:16:19.083969 7fac2d656840  0 osd.0 224167 using 0
    op queue with priority op cut off at 64.
    2017-07-17 08:16:19.109547 7fac2d656840 -1 osd.0 224167
    log_to_monitors {default=true}
    2017-07-17 08:16:19.522448 7fac2d656840  0 osd.0 224167 done with
    init, starting boot process

-- Dmitriev Anton

    _______________________________________________
    ceph-users mailing list
    ceph-users@lists.ceph.com <mailto:ceph-users@lists.ceph.com>
    http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
    <http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com>



--
Dmitriev Anton


_______________________________________________
ceph-users mailing list
ceph-users@lists.ceph.com
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com


--
Dmitriev Anton

_______________________________________________
ceph-users mailing list
ceph-users@lists.ceph.com
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com

Reply via email to