On 14.2.5 but also present in Luminous, buffer_anon memory use spirals
out of control when scanning many thousands of files. The use case is
more or less "look up this file and if it exists append this chunk to
it, otherwise create it with this chunk." The memory is recovered as
soon as the workload stops, and at most only 20-100 files are ever
open at one time.
Cache gets oversized but that's more or less expected, it's pretty
much always/immediately in some warn state, which makes me wonder if a
much larger cache might help buffer_anon use, looking for advice
there. This is on a deeply-hashed directory, but overall very little
data (<20GB), lots of tiny files.
As I typed this post the pool went from ~60GB to ~110GB. I've resorted
to a cronjob that restarts the active MDS when it reaches swap just to
keep the cluster alive.
~$ ceph daemon mds.mds1 dump_mempools
{
"mempool": {
"by_pool": {
"bloom_filter": {
"items": 4631659,
"bytes": 4631659
},
"bluestore_alloc": {
"items": 0,
"bytes": 0
},
"bluestore_cache_data": {
"items": 0,
"bytes": 0
},
"bluestore_cache_onode": {
"items": 0,
"bytes": 0
},
"bluestore_cache_other": {
"items": 0,
"bytes": 0
},
"bluestore_fsck": {
"items": 0,
"bytes": 0
},
"bluestore_txc": {
"items": 0,
"bytes": 0
},
"bluestore_writing_deferred": {
"items": 0,
"bytes": 0
},
"bluestore_writing": {
"items": 0,
"bytes": 0
},
"bluefs": {
"items": 0,
"bytes": 0
},
"buffer_anon": {
"items": 67791,
"bytes": 85598497506
},
"buffer_meta": {
"items": 57987,
"bytes": 5102856
},
"osd": {
"items": 0,
"bytes": 0
},
"osd_mapbl": {
"items": 0,
"bytes": 0
},
"osd_pglog": {
"items": 0,
"bytes": 0
},
"osdmap": {
"items": 582,
"bytes": 12248
},
"osdmap_mapping": {
"items": 0,
"bytes": 0
},
"pgmap": {
"items": 0,
"bytes": 0
},
"mds_co": {
"items": 284739975,
"bytes": 6883426437
},
"unittest_1": {
"items": 0,
"bytes": 0
},
"unittest_2": {
"items": 0,
"bytes": 0
}
},
"total": {
"items": 289497994,
"bytes": 92491670706
}
}
}
~$ ceph daemon mds.mds0 perf dump
{
"AsyncMessenger::Worker-0": {
"msgr_recv_messages": 1360700,
"msgr_send_messages": 2298283,
"msgr_recv_bytes": 17915475859,
"msgr_send_bytes": 2024853049,
"msgr_created_connections": 2031,
"msgr_active_connections": 18446744073709552000,
"msgr_running_total_time": 96.2125937,
"msgr_running_send_time": 38.268843421,
"msgr_running_recv_time": 44.299468018,
"msgr_running_fast_dispatch_time": 17.303765523
},
"AsyncMessenger::Worker-1": {
"msgr_recv_messages": 971844,
"msgr_send_messages": 1266589,
"msgr_recv_bytes": 14435001275,
"msgr_send_bytes": 1755800874,
"msgr_created_connections": 213,
"msgr_active_connections": 18446744073709552000,
"msgr_running_total_time": 60.745883284,
"msgr_running_send_time": 17.694164502,
"msgr_running_recv_time": 24.300171049,
"msgr_running_fast_dispatch_time": 14.947038849
},
"AsyncMessenger::Worker-2": {
"msgr_recv_messages": 1742305,
"msgr_send_messages": 2163916,
"msgr_recv_bytes": 30829094382,
"msgr_send_bytes": 2915900257,
"msgr_created_connections": 233,
"msgr_active_connections": 18446744073709552000,
"msgr_running_total_time": 137.913631549,
"msgr_running_send_time": 41.234654308,
"msgr_running_recv_time": 40.918463152,
"msgr_running_fast_dispatch_time": 36.512891479
},
"cct": {
"total_workers": 1,
"unhealthy_workers": 0
},
"finisher-PurgeQueue": {
"queue_len": 0,
"complete_latency": {
"avgcount": 47756,
"sum": 217.373554326,
"avgtime": 0.004551753
}
},
"mds": {
"request": 1178430,
"reply": 1178373,
"reply_latency": {
"avgcount": 1178373,
"sum": 60810.239426392,
"avgtime": 0.051605255
},
"forward": 0,
"dir_fetch": 49751,
"dir_commit": 44312,
"dir_split": 0,
"dir_merge": 0,
"inode_max": 100000,
"inodes": 2759030,
"inodes_top": 1919408,
"inodes_bottom": 836395,
"inodes_pin_tail": 3227,
"inodes_pinned": 17019,
"inodes_expired": 42387174,
"inodes_with_caps": 5485,
"caps": 11773,
"subtrees": 2,
"traverse": 1878329,
"traverse_hit": 1675078,
"traverse_forward": 0,
"traverse_discover": 0,
"traverse_dir_fetch": 42538,
"traverse_remote_ino": 0,
"traverse_lock": 25,
"load_cent": 1294614,
"q": 29,
"exported": 0,
"exported_inodes": 0,
"imported": 0,
"imported_inodes": 0,
"openino_dir_fetch": 7277,
"openino_backtrace_fetch": 1,
"openino_peer_discover": 0,
"root_rfiles": 31043731,
"root_rbytes": 5791840170135,
"root_rsnaps": 0
},
"mds_cache": {
"num_strays": 400,
"num_strays_delayed": 8,
"num_strays_enqueuing": 0,
"strays_created": 49534,
"strays_enqueued": 49638,
"strays_reintegrated": 0,
"strays_migrated": 0,
"num_recovering_processing": 0,
"num_recovering_enqueued": 0,
"num_recovering_prioritized": 0,
"recovery_started": 1194,
"recovery_completed": 1194,
"ireq_enqueue_scrub": 0,
"ireq_exportdir": 0,
"ireq_flush": 0,
"ireq_fragmentdir": 0,
"ireq_fragstats": 0,
"ireq_inodestats": 0
},
"mds_log": {
"evadd": 1811605,
"evex": 1809564,
"evtrm": 1809564,
"ev": 106369,
"evexg": 0,
"evexd": 2865,
"segadd": 2244,
"segex": 2244,
"segtrm": 2244,
"seg": 130,
"segexg": 0,
"segexd": 3,
"expos": 4457957952634,
"wrpos": 4458177431234,
"rdpos": 4454374251644,
"jlat": {
"avgcount": 1041651,
"sum": 65486.869073583,
"avgtime": 0.062868339
},
"replayed": 104328
},
"mds_mem": {
"ino": 2759033,
"ino+": 45094333,
"ino-": 42335300,
"dir": 6317,
"dir+": 8300,
"dir-": 1983,
"dn": 2761351,
"dn+": 45205560,
"dn-": 42444209,
"cap": 11773,
"cap+": 1191966,
"cap-": 1180193,
"rss": 67989140,
"heap": 330432
},
"mds_server": {
"dispatch_client_request": 1816885,
"dispatch_server_request": 0,
"handle_client_request": 1178430,
"handle_client_session": 11175,
"handle_slave_request": 0,
"req_create_latency": {
"avgcount": 52013,
"sum": 1098.436606927,
"avgtime": 0.021118501
},
"req_getattr_latency": {
"avgcount": 48725,
"sum": 1867.479634967,
"avgtime": 0.038326929
},
"req_getfilelock_latency": {
"avgcount": 0,
"sum": 0,
"avgtime": 0
},
"req_link_latency": {
"avgcount": 0,
"sum": 0,
"avgtime": 0
},
"req_lookup_latency": {
"avgcount": 397795,
"sum": 8410.812821606,
"avgtime": 0.021143586
},
"req_lookuphash_latency": {
"avgcount": 0,
"sum": 0,
"avgtime": 0
},
"req_lookupino_latency": {
"avgcount": 0,
"sum": 0,
"avgtime": 0
},
"req_lookupname_latency": {
"avgcount": 0,
"sum": 0,
"avgtime": 0
},
"req_lookupparent_latency": {
"avgcount": 0,
"sum": 0,
"avgtime": 0
},
"req_lookupsnap_latency": {
"avgcount": 0,
"sum": 0,
"avgtime": 0
},
"req_lssnap_latency": {
"avgcount": 0,
"sum": 0,
"avgtime": 0
},
"req_mkdir_latency": {
"avgcount": 108,
"sum": 6.164358676,
"avgtime": 0.057077395
},
"req_mknod_latency": {
"avgcount": 0,
"sum": 0,
"avgtime": 0
},
"req_mksnap_latency": {
"avgcount": 0,
"sum": 0,
"avgtime": 0
},
"req_open_latency": {
"avgcount": 287701,
"sum": 8356.203112022,
"avgtime": 0.029044748
},
"req_readdir_latency": {
"avgcount": 7727,
"sum": 158.295126355,
"avgtime": 0.020485974
},
"req_rename_latency": {
"avgcount": 11832,
"sum": 354.415798014,
"avgtime": 0.029954005
},
"req_renamesnap_latency": {
"avgcount": 0,
"sum": 0,
"avgtime": 0
},
"req_rmdir_latency": {
"avgcount": 151,
"sum": 11.192303283,
"avgtime": 0.074121213
},
"req_rmsnap_latency": {
"avgcount": 0,
"sum": 0,
"avgtime": 0
},
"req_rmxattr_latency": {
"avgcount": 0,
"sum": 0,
"avgtime": 0
},
"req_setattr_latency": {
"avgcount": 278323,
"sum": 39304.420644246,
"avgtime": 0.14121873
},
"req_setdirlayout_latency": {
"avgcount": 0,
"sum": 0,
"avgtime": 0
},
"req_setfilelock_latency": {
"avgcount": 44572,
"sum": 292.346143916,
"avgtime": 0.006558964
},
"req_setlayout_latency": {
"avgcount": 0,
"sum": 0,
"avgtime": 0
},
"req_setxattr_latency": {
"avgcount": 7,
"sum": 0.015024808,
"avgtime": 0.002146401
},
"req_symlink_latency": {
"avgcount": 0,
"sum": 0,
"avgtime": 0
},
"req_unlink_latency": {
"avgcount": 49419,
"sum": 950.457851572,
"avgtime": 0.01923264
},
"cap_revoke_eviction": 0
},
"mds_sessions": {
"session_count": 221,
"session_add": 221,
"session_remove": 0,
"sessions_open": 0,
"sessions_stale": 0,
"total_load": 116764,
"average_load": 528,
"avg_session_uptime": 3708284
},
"mempool": {
"bloom_filter_bytes": 4120998,
"bloom_filter_items": 4120998,
"bluestore_alloc_bytes": 0,
"bluestore_alloc_items": 0,
"bluestore_cache_data_bytes": 0,
"bluestore_cache_data_items": 0,
"bluestore_cache_onode_bytes": 0,
"bluestore_cache_onode_items": 0,
"bluestore_cache_other_bytes": 0,
"bluestore_cache_other_items": 0,
"bluestore_fsck_bytes": 0,
"bluestore_fsck_items": 0,
"bluestore_txc_bytes": 0,
"bluestore_txc_items": 0,
"bluestore_writing_deferred_bytes": 0,
"bluestore_writing_deferred_items": 0,
"bluestore_writing_bytes": 0,
"bluestore_writing_items": 0,
"bluefs_bytes": 0,
"bluefs_items": 0,
"buffer_anon_bytes": 61380965872,
"buffer_anon_items": 50480,
"buffer_meta_bytes": 3808640,
"buffer_meta_items": 43280,
"osd_bytes": 0,
"osd_items": 0,
"osd_mapbl_bytes": 0,
"osd_mapbl_items": 0,
"osd_pglog_bytes": 0,
"osd_pglog_items": 0,
"osdmap_bytes": 12248,
"osdmap_items": 582,
"osdmap_mapping_bytes": 0,
"osdmap_mapping_items": 0,
"pgmap_bytes": 0,
"pgmap_items": 0,
"mds_co_bytes": 6996083297,
"mds_co_items": 288521188,
"unittest_1_bytes": 0,
"unittest_1_items": 0,
"unittest_2_bytes": 0,
"unittest_2_items": 0
},
"objecter": {
"op_active": 72,
"op_laggy": 0,
"op_send": 1490006,
"op_send_bytes": 4115392592,
"op_resend": 0,
"op_reply": 1489934,
"op": 1490006,
"op_r": 52212,
"op_w": 1437794,
"op_rmw": 0,
"op_pg": 0,
"osdop_stat": 43160,
"osdop_create": 14114,
"osdop_read": 1127,
"osdop_write": 1042786,
"osdop_writefull": 1366,
"osdop_writesame": 0,
"osdop_append": 0,
"osdop_zero": 2,
"osdop_truncate": 0,
"osdop_delete": 55542,
"osdop_mapext": 0,
"osdop_sparse_read": 0,
"osdop_clonerange": 0,
"osdop_getxattr": 37965,
"osdop_setxattr": 28228,
"osdop_cmpxattr": 0,
"osdop_rmxattr": 0,
"osdop_resetxattrs": 0,
"osdop_call": 0,
"osdop_watch": 0,
"osdop_notify": 0,
"osdop_src_cmpxattr": 0,
"osdop_pgls": 0,
"osdop_pgls_filter": 0,
"osdop_other": 278315,
"linger_active": 0,
"linger_send": 0,
"linger_resend": 0,
"linger_ping": 0,
"poolop_active": 0,
"poolop_send": 0,
"poolop_resend": 0,
"poolstat_active": 0,
"poolstat_send": 0,
"poolstat_resend": 0,
"statfs_active": 0,
"statfs_send": 0,
"statfs_resend": 0,
"command_active": 0,
"command_send": 0,
"command_resend": 0,
"map_epoch": 0,
"map_full": 0,
"map_inc": 0,
"osd_sessions": 33,
"osd_session_open": 33,
"osd_session_close": 0,
"osd_laggy": 0,
"omap_wr": 73583,
"omap_rd": 99506,
"omap_del": 20167
},
"purge_queue": {
"pq_executing_ops": 0,
"pq_executing": 0,
"pq_executed": 49638
},
"throttle-msgr_dispatch_throttler-mds": {
"val": 1054,
"max": 104857600,
"get_started": 0,
"get": 4074846,
"get_sum": 62878031734,
"get_or_fail_fail": 0,
"get_or_fail_success": 4074846,
"take": 0,
"take_sum": 0,
"put": 4074840,
"put_sum": 62878030680,
"wait": {
"avgcount": 0,
"sum": 0,
"avgtime": 0
}
},
"throttle-objecter_bytes": {
"val": 332944,
"max": 104857600,
"get_started": 0,
"get": 0,
"get_sum": 0,
"get_or_fail_fail": 0,
"get_or_fail_success": 0,
"take": 1490006,
"take_sum": 4341973304,
"put": 1142949,
"put_sum": 4341640360,
"wait": {
"avgcount": 0,
"sum": 0,
"avgtime": 0
}
},
"throttle-objecter_ops": {
"val": 72,
"max": 1024,
"get_started": 0,
"get": 0,
"get_sum": 0,
"get_or_fail_fail": 0,
"get_or_fail_success": 0,
"take": 1490006,
"take_sum": 1490006,
"put": 1489934,
"put_sum": 1489934,
"wait": {
"avgcount": 0,
"sum": 0,
"avgtime": 0
}
},
"throttle-write_buf_throttle": {
"val": 0,
"max": 3758096384,
"get_started": 0,
"get": 49638,
"get_sum": 5013438,
"get_or_fail_fail": 0,
"get_or_fail_success": 49638,
"take": 0,
"take_sum": 0,
"put": 1066,
"put_sum": 5013438,
"wait": {
"avgcount": 0,
"sum": 0,
"avgtime": 0
}
},
"throttle-write_buf_throttle-0x2c2df40": {
"val": 0,
"max": 3758096384,
"get_started": 0,
"get": 1811605,
"get_sum": 3803177386,
"get_or_fail_fail": 0,
"get_or_fail_success": 1811605,
"take": 0,
"take_sum": 0,
"put": 1041720,
"put_sum": 3803177386,
"wait": {
"avgcount": 0,
"sum": 0,
"avgtime": 0
}
}
}
_______________________________________________
ceph-users mailing list
[email protected]
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com