Hello,

My manager keeps dying, the last meta log is below. What is causing this? I
do have two roots in the osd tree with shared hosts(see below), I can't
imagine that is causing balancer to fail?


meta log:
{
    "crash_id":
"2019-05-11_19:09:17.999875Z_aa7afa7c-bc7e-43ec-b32a-821bd47bd68b",
    "timestamp": "2019-05-11 19:09:17.999875Z",
    "process_name": "ceph-mgr",
    "entity_name": "mgr.pok1-qz1-sr1-rk023-s08",
    "ceph_version": "14.2.0",
    "utsname_hostname": "pok1-qz1-sr1-rk023-s08",
    "utsname_sysname": "Linux",
    "utsname_release": "4.15.0-1014-ibm-gt",
    "utsname_version": "#16-Ubuntu SMP Tue Dec 11 11:19:10 UTC 2018",
    "utsname_machine": "x86_64",
    "os_name": "Ubuntu",
    "os_id": "ubuntu",
    "os_version_id": "18.04",
    "os_version": "18.04.1 LTS (Bionic Beaver)",
    "assert_condition": "osd_weight.count(i.first)",
    "assert_func": "int OSDMap::calc_pg_upmaps(CephContext*, float, int,
const std::set<long int>&, OSDMap::Incremental*)",
    "assert_file": "/build/ceph-14.2.0/src/osd/OSDMap.cc",
    "assert_line": 4743,
    "assert_thread_name": "balancer",
    "assert_msg": "/build/ceph-14.2.0/src/osd/OSDMap.cc: In function 'int
OSDMap::calc_pg_upmaps(CephContext*, float, int, const std::set<long int>&,
OSDMap::Incremental*)' thread 7fffd6572700 time 2019-05-11 19:09:17.998114
\n/build/ceph-14.2.0/src/osd/OSDMap.cc: 4743: FAILED ceph_assert
(osd_weight.count(i.first))\n",
    "backtrace": [
        "(()+0x12890) [0x7fffee586890]",
        "(gsignal()+0xc7) [0x7fffed67ee97]",
        "(abort()+0x141) [0x7fffed680801]",
        "(ceph::__ceph_assert_fail(char const*, char const*, int, char
const*)+0x1a3) [0x7fffef1eb7d3]",
        "(ceph::__ceph_assertf_fail(char const*, char const*, int, char
const*, char const*, ...)+0) [0x7fffef1eb95d]",
        "(OSDMap::calc_pg_upmaps(CephContext*, float, int, std::set<long,
std::less<long>, std::allocator<long> > const&,
OSDMap::Incremental*)+0x274b) [0x7fffef61bb3b]",
        "(()+0x1d52b6) [0x5555557292b6]",
        "(PyEval_EvalFrameEx()+0x8010) [0x7fffeeab21d0]",
        "(PyEval_EvalCodeEx()+0x7d8) [0x7fffeebe2278]",
        "(PyEval_EvalFrameEx()+0x5bf6) [0x7fffeeaafdb6]",
        "(PyEval_EvalFrameEx()+0x8b5b) [0x7fffeeab2d1b]",
        "(PyEval_EvalFrameEx()+0x8b5b) [0x7fffeeab2d1b]",
        "(PyEval_EvalCodeEx()+0x7d8) [0x7fffeebe2278]",
        "(()+0x1645f9) [0x7fffeeb675f9]",
        "(PyObject_Call()+0x43) [0x7fffeea57333]",
        "(()+0x1abd1c) [0x7fffeebaed1c]",
        "(PyObject_Call()+0x43) [0x7fffeea57333]",
        "(PyObject_CallMethod()+0xc8) [0x7fffeeb7bc78]",
        "(PyModuleRunner::serve()+0x62) [0x555555725f32]",
        "(PyModuleRunner::PyModuleRunnerThread::entry()+0x1cf)
[0x5555557265df]",
        "(()+0x76db) [0x7fffee57b6db]",
        "(clone()+0x3f) [0x7fffed76188f]"
    ]
}

OSD TREE:
ID  CLASS WEIGHT    TYPE NAME                       STATUS REWEIGHT PRI-AFF
-29        54.58200 root tzrootthreenodes
-25        18.19400     host pok1-qz1-sr1-rk001-s20
  0   ssd   1.81898         osd.0                       up  1.00000 1.00000
122   ssd   1.81898         osd.122                     up  1.00000 1.00000
135   ssd   1.81898         osd.135                     up  1.00000 1.00000
149   ssd   1.81898         osd.149                     up  1.00000 1.00000
162   ssd   1.81898         osd.162                     up  1.00000 1.00000
175   ssd   1.81898         osd.175                     up  1.00000 1.00000
188   ssd   1.81898         osd.188                     up  1.00000 1.00000
200   ssd   1.81898         osd.200                     up  1.00000 1.00000
213   ssd   1.81898         osd.213                     up  1.00000 1.00000
225   ssd   1.81898         osd.225                     up  1.00000 1.00000
 -5        18.19400     host pok1-qz1-sr1-rk002-s05
112   ssd   1.81898         osd.112                     up  1.00000 1.00000
120   ssd   1.81898         osd.120                     up  1.00000 1.00000
132   ssd   1.81898         osd.132                     up  1.00000 1.00000
144   ssd   1.81898         osd.144                     up  1.00000 1.00000
156   ssd   1.81898         osd.156                     up  1.00000 1.00000
168   ssd   1.81898         osd.168                     up  1.00000 1.00000
180   ssd   1.81898         osd.180                     up  1.00000 1.00000
192   ssd   1.81898         osd.192                     up  1.00000 1.00000
204   ssd   1.81898         osd.204                     up  1.00000 1.00000
216   ssd   1.81898         osd.216                     up  1.00000 1.00000
-11        18.19400     host pok1-qz1-sr1-rk002-s16
115   ssd   1.81898         osd.115                     up  1.00000 1.00000
127   ssd   1.81898         osd.127                     up  1.00000 1.00000
139   ssd   1.81898         osd.139                     up  1.00000 1.00000
151   ssd   1.81898         osd.151                     up  1.00000 1.00000
163   ssd   1.81898         osd.163                     up  1.00000 1.00000
174   ssd   1.81898         osd.174                     up  1.00000 1.00000
186   ssd   1.81898         osd.186                     up  1.00000 1.00000
198   ssd   1.81898         osd.198                     up  1.00000 1.00000
210   ssd   1.81898         osd.210                     up  1.00000 1.00000
222   ssd   1.81898         osd.222                     up  1.00000 1.00000
 -1       145.36555 root default
 -3        18.00755     host pok1-qz1-sr1-rk001-s12
  1   ssd   1.63669         osd.1                       up  1.00000 1.00000
117   ssd   1.81898         osd.117                     up  1.00000 1.00000
129   ssd   1.81898         osd.129                     up  1.00000 1.00000
141   ssd   1.81898         osd.141                     up  1.00000 1.00000
153   ssd   1.81898         osd.153                     up  1.00000 1.00000
165   ssd   1.81898         osd.165                     up  1.00000 1.00000
177   ssd   1.81898         osd.177                   down  1.00000 1.00000
189   ssd   1.81898         osd.189                     up  1.00000 1.00000
201   ssd   1.81898         osd.201                     up  1.00000 1.00000
212   ssd   1.81898         osd.212                     up  1.00000 1.00000
-25        18.19400     host pok1-qz1-sr1-rk001-s20
  0   ssd   1.81898         osd.0                       up  1.00000 1.00000
122   ssd   1.81898         osd.122                     up  1.00000 1.00000
135   ssd   1.81898         osd.135                     up  1.00000 1.00000
149   ssd   1.81898         osd.149                     up  1.00000 1.00000
162   ssd   1.81898         osd.162                     up  1.00000 1.00000
175   ssd   1.81898         osd.175                     up  1.00000 1.00000
188   ssd   1.81898         osd.188                     up  1.00000 1.00000
200   ssd   1.81898         osd.200                     up  1.00000 1.00000
213   ssd   1.81898         osd.213                     up  1.00000 1.00000
225   ssd   1.81898         osd.225                     up  1.00000 1.00000
 -5        18.19400     host pok1-qz1-sr1-rk002-s05
112   ssd   1.81898         osd.112                     up  1.00000 1.00000
120   ssd   1.81898         osd.120                     up  1.00000 1.00000
132   ssd   1.81898         osd.132                     up  1.00000 1.00000
144   ssd   1.81898         osd.144                     up  1.00000 1.00000
156   ssd   1.81898         osd.156                     up  1.00000 1.00000
168   ssd   1.81898         osd.168                     up  1.00000 1.00000
180   ssd   1.81898         osd.180                     up  1.00000 1.00000
192   ssd   1.81898         osd.192                     up  1.00000 1.00000
204   ssd   1.81898         osd.204                     up  1.00000 1.00000
216   ssd   1.81898         osd.216                     up  1.00000 1.00000
 -7        18.19400     host pok1-qz1-sr1-rk002-s11
113   ssd   1.81898         osd.113                     up  1.00000 1.00000
124   ssd   1.81898         osd.124                     up  1.00000 1.00000
136   ssd   1.81898         osd.136                     up  1.00000 1.00000
147   ssd   1.81898         osd.147                     up  1.00000 1.00000
159   ssd   1.81898         osd.159                     up  1.00000 1.00000
171   ssd   1.81898         osd.171                     up  1.00000 1.00000
183   ssd   1.81898         osd.183                     up  1.00000 1.00000
195   ssd   1.81898         osd.195                     up  1.00000 1.00000
207   ssd   1.81898         osd.207                     up  1.00000 1.00000
219   ssd   1.81898         osd.219                     up  1.00000 1.00000
 -9        18.19400     host pok1-qz1-sr1-rk002-s12
114   ssd   1.81898         osd.114                     up  1.00000 1.00000
125   ssd   1.81898         osd.125                     up  1.00000 1.00000
137   ssd   1.81898         osd.137                     up  1.00000 1.00000
148   ssd   1.81898         osd.148                     up  1.00000 1.00000
160   ssd   1.81898         osd.160                     up  1.00000 1.00000
172   ssd   1.81898         osd.172                     up  1.00000 1.00000
184   ssd   1.81898         osd.184                     up  1.00000 1.00000
196   ssd   1.81898         osd.196                     up  1.00000 1.00000
209   ssd   1.81898         osd.209                     up  1.00000 1.00000
221   ssd   1.81898         osd.221                     up  1.00000 1.00000
-11        18.19400     host pok1-qz1-sr1-rk002-s16
115   ssd   1.81898         osd.115                     up  1.00000 1.00000
127   ssd   1.81898         osd.127                     up  1.00000 1.00000
139   ssd   1.81898         osd.139                     up  1.00000 1.00000
151   ssd   1.81898         osd.151                     up  1.00000 1.00000
163   ssd   1.81898         osd.163                     up  1.00000 1.00000
174   ssd   1.81898         osd.174                     up  1.00000 1.00000
186   ssd   1.81898         osd.186                     up  1.00000 1.00000
198   ssd   1.81898         osd.198                     up  1.00000 1.00000
210   ssd   1.81898         osd.210                     up  1.00000 1.00000
222   ssd   1.81898         osd.222                     up  1.00000 1.00000
-13        18.19400     host pok1-qz1-sr1-rk002-s19
116   ssd   1.81898         osd.116                     up  1.00000 1.00000
128   ssd   1.81898         osd.128                     up  1.00000 1.00000
140   ssd   1.81898         osd.140                     up  1.00000 1.00000
152   ssd   1.81898         osd.152                     up  1.00000 1.00000
164   ssd   1.81898         osd.164                     up  1.00000 1.00000
176   ssd   1.81898         osd.176                     up  1.00000 1.00000
187   ssd   1.81898         osd.187                     up  1.00000 1.00000
199   ssd   1.81898         osd.199                     up  1.00000 1.00000
211   ssd   1.81898         osd.211                     up  1.00000 1.00000
223   ssd   1.81898         osd.223                     up  1.00000 1.00000
-17        18.19400     host pok1-qz1-sr1-rk002-s23
119   ssd   1.81898         osd.119                     up  1.00000 1.00000
131   ssd   1.81898         osd.131                     up  1.00000 1.00000
143   ssd   1.81898         osd.143                     up  1.00000 1.00000
155   ssd   1.81898         osd.155                     up  1.00000 1.00000
167   ssd   1.81898         osd.167                     up  1.00000 1.00000
179   ssd   1.81898         osd.179                     up  1.00000 1.00000
191   ssd   1.81898         osd.191                     up  1.00000 1.00000
203   ssd   1.81898         osd.203                     up  1.00000 1.00000
215   ssd   1.81898         osd.215                     up  1.00000 1.00000
226   ssd   1.81898         osd.226                     up  1.00000 1.00000



_______________________________________________
ceph-users mailing list
ceph-users@lists.ceph.com
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com

Reply via email to