Hello,
My manager keeps dying, the last meta log is below. What is causing this? I do have two roots in the osd tree with shared hosts(see below), I can't imagine that is causing balancer to fail? meta log: { "crash_id": "2019-05-11_19:09:17.999875Z_aa7afa7c-bc7e-43ec-b32a-821bd47bd68b", "timestamp": "2019-05-11 19:09:17.999875Z", "process_name": "ceph-mgr", "entity_name": "mgr.pok1-qz1-sr1-rk023-s08", "ceph_version": "14.2.0", "utsname_hostname": "pok1-qz1-sr1-rk023-s08", "utsname_sysname": "Linux", "utsname_release": "4.15.0-1014-ibm-gt", "utsname_version": "#16-Ubuntu SMP Tue Dec 11 11:19:10 UTC 2018", "utsname_machine": "x86_64", "os_name": "Ubuntu", "os_id": "ubuntu", "os_version_id": "18.04", "os_version": "18.04.1 LTS (Bionic Beaver)", "assert_condition": "osd_weight.count(i.first)", "assert_func": "int OSDMap::calc_pg_upmaps(CephContext*, float, int, const std::set<long int>&, OSDMap::Incremental*)", "assert_file": "/build/ceph-14.2.0/src/osd/OSDMap.cc", "assert_line": 4743, "assert_thread_name": "balancer", "assert_msg": "/build/ceph-14.2.0/src/osd/OSDMap.cc: In function 'int OSDMap::calc_pg_upmaps(CephContext*, float, int, const std::set<long int>&, OSDMap::Incremental*)' thread 7fffd6572700 time 2019-05-11 19:09:17.998114 \n/build/ceph-14.2.0/src/osd/OSDMap.cc: 4743: FAILED ceph_assert (osd_weight.count(i.first))\n", "backtrace": [ "(()+0x12890) [0x7fffee586890]", "(gsignal()+0xc7) [0x7fffed67ee97]", "(abort()+0x141) [0x7fffed680801]", "(ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x1a3) [0x7fffef1eb7d3]", "(ceph::__ceph_assertf_fail(char const*, char const*, int, char const*, char const*, ...)+0) [0x7fffef1eb95d]", "(OSDMap::calc_pg_upmaps(CephContext*, float, int, std::set<long, std::less<long>, std::allocator<long> > const&, OSDMap::Incremental*)+0x274b) [0x7fffef61bb3b]", "(()+0x1d52b6) [0x5555557292b6]", "(PyEval_EvalFrameEx()+0x8010) [0x7fffeeab21d0]", "(PyEval_EvalCodeEx()+0x7d8) [0x7fffeebe2278]", "(PyEval_EvalFrameEx()+0x5bf6) [0x7fffeeaafdb6]", "(PyEval_EvalFrameEx()+0x8b5b) [0x7fffeeab2d1b]", "(PyEval_EvalFrameEx()+0x8b5b) [0x7fffeeab2d1b]", "(PyEval_EvalCodeEx()+0x7d8) [0x7fffeebe2278]", "(()+0x1645f9) [0x7fffeeb675f9]", "(PyObject_Call()+0x43) [0x7fffeea57333]", "(()+0x1abd1c) [0x7fffeebaed1c]", "(PyObject_Call()+0x43) [0x7fffeea57333]", "(PyObject_CallMethod()+0xc8) [0x7fffeeb7bc78]", "(PyModuleRunner::serve()+0x62) [0x555555725f32]", "(PyModuleRunner::PyModuleRunnerThread::entry()+0x1cf) [0x5555557265df]", "(()+0x76db) [0x7fffee57b6db]", "(clone()+0x3f) [0x7fffed76188f]" ] } OSD TREE: ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF -29 54.58200 root tzrootthreenodes -25 18.19400 host pok1-qz1-sr1-rk001-s20 0 ssd 1.81898 osd.0 up 1.00000 1.00000 122 ssd 1.81898 osd.122 up 1.00000 1.00000 135 ssd 1.81898 osd.135 up 1.00000 1.00000 149 ssd 1.81898 osd.149 up 1.00000 1.00000 162 ssd 1.81898 osd.162 up 1.00000 1.00000 175 ssd 1.81898 osd.175 up 1.00000 1.00000 188 ssd 1.81898 osd.188 up 1.00000 1.00000 200 ssd 1.81898 osd.200 up 1.00000 1.00000 213 ssd 1.81898 osd.213 up 1.00000 1.00000 225 ssd 1.81898 osd.225 up 1.00000 1.00000 -5 18.19400 host pok1-qz1-sr1-rk002-s05 112 ssd 1.81898 osd.112 up 1.00000 1.00000 120 ssd 1.81898 osd.120 up 1.00000 1.00000 132 ssd 1.81898 osd.132 up 1.00000 1.00000 144 ssd 1.81898 osd.144 up 1.00000 1.00000 156 ssd 1.81898 osd.156 up 1.00000 1.00000 168 ssd 1.81898 osd.168 up 1.00000 1.00000 180 ssd 1.81898 osd.180 up 1.00000 1.00000 192 ssd 1.81898 osd.192 up 1.00000 1.00000 204 ssd 1.81898 osd.204 up 1.00000 1.00000 216 ssd 1.81898 osd.216 up 1.00000 1.00000 -11 18.19400 host pok1-qz1-sr1-rk002-s16 115 ssd 1.81898 osd.115 up 1.00000 1.00000 127 ssd 1.81898 osd.127 up 1.00000 1.00000 139 ssd 1.81898 osd.139 up 1.00000 1.00000 151 ssd 1.81898 osd.151 up 1.00000 1.00000 163 ssd 1.81898 osd.163 up 1.00000 1.00000 174 ssd 1.81898 osd.174 up 1.00000 1.00000 186 ssd 1.81898 osd.186 up 1.00000 1.00000 198 ssd 1.81898 osd.198 up 1.00000 1.00000 210 ssd 1.81898 osd.210 up 1.00000 1.00000 222 ssd 1.81898 osd.222 up 1.00000 1.00000 -1 145.36555 root default -3 18.00755 host pok1-qz1-sr1-rk001-s12 1 ssd 1.63669 osd.1 up 1.00000 1.00000 117 ssd 1.81898 osd.117 up 1.00000 1.00000 129 ssd 1.81898 osd.129 up 1.00000 1.00000 141 ssd 1.81898 osd.141 up 1.00000 1.00000 153 ssd 1.81898 osd.153 up 1.00000 1.00000 165 ssd 1.81898 osd.165 up 1.00000 1.00000 177 ssd 1.81898 osd.177 down 1.00000 1.00000 189 ssd 1.81898 osd.189 up 1.00000 1.00000 201 ssd 1.81898 osd.201 up 1.00000 1.00000 212 ssd 1.81898 osd.212 up 1.00000 1.00000 -25 18.19400 host pok1-qz1-sr1-rk001-s20 0 ssd 1.81898 osd.0 up 1.00000 1.00000 122 ssd 1.81898 osd.122 up 1.00000 1.00000 135 ssd 1.81898 osd.135 up 1.00000 1.00000 149 ssd 1.81898 osd.149 up 1.00000 1.00000 162 ssd 1.81898 osd.162 up 1.00000 1.00000 175 ssd 1.81898 osd.175 up 1.00000 1.00000 188 ssd 1.81898 osd.188 up 1.00000 1.00000 200 ssd 1.81898 osd.200 up 1.00000 1.00000 213 ssd 1.81898 osd.213 up 1.00000 1.00000 225 ssd 1.81898 osd.225 up 1.00000 1.00000 -5 18.19400 host pok1-qz1-sr1-rk002-s05 112 ssd 1.81898 osd.112 up 1.00000 1.00000 120 ssd 1.81898 osd.120 up 1.00000 1.00000 132 ssd 1.81898 osd.132 up 1.00000 1.00000 144 ssd 1.81898 osd.144 up 1.00000 1.00000 156 ssd 1.81898 osd.156 up 1.00000 1.00000 168 ssd 1.81898 osd.168 up 1.00000 1.00000 180 ssd 1.81898 osd.180 up 1.00000 1.00000 192 ssd 1.81898 osd.192 up 1.00000 1.00000 204 ssd 1.81898 osd.204 up 1.00000 1.00000 216 ssd 1.81898 osd.216 up 1.00000 1.00000 -7 18.19400 host pok1-qz1-sr1-rk002-s11 113 ssd 1.81898 osd.113 up 1.00000 1.00000 124 ssd 1.81898 osd.124 up 1.00000 1.00000 136 ssd 1.81898 osd.136 up 1.00000 1.00000 147 ssd 1.81898 osd.147 up 1.00000 1.00000 159 ssd 1.81898 osd.159 up 1.00000 1.00000 171 ssd 1.81898 osd.171 up 1.00000 1.00000 183 ssd 1.81898 osd.183 up 1.00000 1.00000 195 ssd 1.81898 osd.195 up 1.00000 1.00000 207 ssd 1.81898 osd.207 up 1.00000 1.00000 219 ssd 1.81898 osd.219 up 1.00000 1.00000 -9 18.19400 host pok1-qz1-sr1-rk002-s12 114 ssd 1.81898 osd.114 up 1.00000 1.00000 125 ssd 1.81898 osd.125 up 1.00000 1.00000 137 ssd 1.81898 osd.137 up 1.00000 1.00000 148 ssd 1.81898 osd.148 up 1.00000 1.00000 160 ssd 1.81898 osd.160 up 1.00000 1.00000 172 ssd 1.81898 osd.172 up 1.00000 1.00000 184 ssd 1.81898 osd.184 up 1.00000 1.00000 196 ssd 1.81898 osd.196 up 1.00000 1.00000 209 ssd 1.81898 osd.209 up 1.00000 1.00000 221 ssd 1.81898 osd.221 up 1.00000 1.00000 -11 18.19400 host pok1-qz1-sr1-rk002-s16 115 ssd 1.81898 osd.115 up 1.00000 1.00000 127 ssd 1.81898 osd.127 up 1.00000 1.00000 139 ssd 1.81898 osd.139 up 1.00000 1.00000 151 ssd 1.81898 osd.151 up 1.00000 1.00000 163 ssd 1.81898 osd.163 up 1.00000 1.00000 174 ssd 1.81898 osd.174 up 1.00000 1.00000 186 ssd 1.81898 osd.186 up 1.00000 1.00000 198 ssd 1.81898 osd.198 up 1.00000 1.00000 210 ssd 1.81898 osd.210 up 1.00000 1.00000 222 ssd 1.81898 osd.222 up 1.00000 1.00000 -13 18.19400 host pok1-qz1-sr1-rk002-s19 116 ssd 1.81898 osd.116 up 1.00000 1.00000 128 ssd 1.81898 osd.128 up 1.00000 1.00000 140 ssd 1.81898 osd.140 up 1.00000 1.00000 152 ssd 1.81898 osd.152 up 1.00000 1.00000 164 ssd 1.81898 osd.164 up 1.00000 1.00000 176 ssd 1.81898 osd.176 up 1.00000 1.00000 187 ssd 1.81898 osd.187 up 1.00000 1.00000 199 ssd 1.81898 osd.199 up 1.00000 1.00000 211 ssd 1.81898 osd.211 up 1.00000 1.00000 223 ssd 1.81898 osd.223 up 1.00000 1.00000 -17 18.19400 host pok1-qz1-sr1-rk002-s23 119 ssd 1.81898 osd.119 up 1.00000 1.00000 131 ssd 1.81898 osd.131 up 1.00000 1.00000 143 ssd 1.81898 osd.143 up 1.00000 1.00000 155 ssd 1.81898 osd.155 up 1.00000 1.00000 167 ssd 1.81898 osd.167 up 1.00000 1.00000 179 ssd 1.81898 osd.179 up 1.00000 1.00000 191 ssd 1.81898 osd.191 up 1.00000 1.00000 203 ssd 1.81898 osd.203 up 1.00000 1.00000 215 ssd 1.81898 osd.215 up 1.00000 1.00000 226 ssd 1.81898 osd.226 up 1.00000 1.00000
_______________________________________________ ceph-users mailing list ceph-users@lists.ceph.com http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com