Hi!

I have a problem with the osds getting full on our cluster.

I've read all the topics on this list on how to deal with that, but I have a few questions.

First of all - the cluster itself is nowhere near being filled (about 55% data is used), but the osds don't get filled equally.

I've tried adding osds, but it didn't help - still some osds are being filled more than the others. I tried adjusting ratios, but it's not a long-term solution. I've tried adjusting weights, but I'm not sure if I'm doing it right..

At this point I had to stop the full osd (this is a production cluster) so that the radosgw will work.

Am I correctly assuming, that since the cluster is in WARNING state (not ERR) with that one osd down - that means I can safely delete some pgs from that osd? They have copies on other osds, otherwise cluster would be in ERR state? I can't start the osd because that would stop the radosgw from working.

Can you suggest how to reweight the osds so that the data will be distributed evenly (more or less..).

Also - the cluster got stuck with some backfill_toofull pgs - is there a way to deal with that? I've adjusted the ratio, but the pgs still are in backfill_toofull state..

Here's some info about the current cluster state - the norecover flag is set, because recovery process caused the requests to be blocked and radosgw giving too many errors, the flag is unset during the night. BTW - is there a way to slow down the rebalancing so that the cluster will still be responsive while repairing/moving pgs?

[root@cf01 ceph]# ceph -v
ceph version 0.94.5 (9764da52395923e0b32908d83a9f7304401fee43)

[root@cf01 ceph]# ceph -s
    cluster 3469081f-9852-4b6e-b7ed-900e77c48bb5
     health HEALTH_WARN
            10 pgs backfill_toofull
            48 pgs backfilling
            31 pgs degraded
            1 pgs recovering
            31 pgs stuck degraded
            59 pgs stuck unclean
            30 pgs stuck undersized
            30 pgs undersized
            recovery 6408175/131078852 objects degraded (4.889%)
            recovery 69703039/131078852 objects misplaced (53.176%)
            1 near full osd(s)
            norecover flag(s) set
monmap e1: 3 mons at {cf01=10.4.10.211:6789/0,cf02=10.4.10.212:6789/0,cf03=10.4.10.213:6789/0}
            election epoch 5826, quorum 0,1,2 cf01,cf02,cf03
     osdmap e5906: 20 osds: 19 up, 19 in; 58 remapped pgs
            flags norecover
      pgmap v12075461: 304 pgs, 17 pools, 23771 GB data, 45051 kobjects
            50218 GB used, 39142 GB / 89360 GB avail
            6408175/131078852 objects degraded (4.889%)
            69703039/131078852 objects misplaced (53.176%)
                 241 active+clean
                  24 active+remapped+backfilling
                  24 active+undersized+degraded+remapped+backfilling
                   6 active+undersized+degraded+remapped+backfill_toofull
                   4 active+clean+scrubbing+deep
                   4 active+remapped+backfill_toofull
                   1 active+recovering+degraded

[root@cf01 ceph]# ceph --admin-daemon /run/ceph/ceph-mon.cf01.asok config show | grep full
    "mon_cache_target_full_warn_ratio": "0.66",
    "mon_osd_full_ratio": "0.95",
    "mon_osd_nearfull_ratio": "0.85",
    "paxos_stash_full_interval": "25",
    "osd_backfill_full_ratio": "0.9",
    "osd_pool_default_cache_target_full_ratio": "0.8",
    "osd_debug_skip_full_check_in_backfill_reservation": "false",
    "osd_failsafe_full_ratio": "0.97",
    "osd_failsafe_nearfull_ratio": "0.9",

[root@cf01 ceph]# ceph df
GLOBAL:
    SIZE       AVAIL      RAW USED     %RAW USED
    89360G     39137G       50223G         56.20
POOLS:
NAME ID USED %USED MAX AVAIL OBJECTS vms 0 9907G 11.09 3505G 2541252 .rgw.root 1 848 0 3505G 3 .rgw.control 2 0 0 3505G 8 .rgw.gc 3 0 0 3505G 32 .rgw.buckets_cache 4 0 0 3505G 0 .rgw.buckets.index 5 0 0 3505G 67102 .rgw.buckets.extra 6 0 0 3505G 6 .log 7 121G 0.14 3505G 91018 .intent-log 8 0 0 3505G 0 .usage 9 0 0 3505G 18 .users 10 597 0 3505G 36 .users.email 11 0 0 3505G 0 .users.swift 12 0 0 3505G 0 .users.uid 13 11694 0 3505G 57 .rgw.buckets 14 13699G 15.33 3505G 43421376 .rgw 15 9256 0 3505G 50 one 17 43840M 0.05 2337G 11153


ID WEIGHT  REWEIGHT SIZE   USE    AVAIL  %USE  VAR
 0 1.00000  1.00000  5585G  2653G  2931G 47.51 0.85
 1 1.00000  1.00000  5585G  2960G  2624G 53.02 0.94
 2 1.00000  1.00000  5585G  3193G  2391G 57.18 1.02
10 1.00000  1.00000  3723G  2315G  1408G 62.18 1.11
16 1.00000  1.00000  3723G   763G  2959G 20.50 0.36
 3 1.00000  1.00000  5585G  3559G  2025G 63.73 1.13
 4 1.00000  1.00000  5585G  2354G  3230G 42.16 0.75
11 1.00000  1.00000  3723G  1302G  2420G 34.99 0.62
17 0.95000  0.95000  3723G  3388G   334G 91.01 1.62
12 1.00000  1.00000  3723G  2922G   800G 78.50 1.40
 5 1.00000  1.00000  5585G  3972G  1613G 71.12 1.27
 6 1.00000  1.00000  5585G  2975G  2609G 53.28 0.95
 7 1.00000  1.00000  5585G  2208G  3376G 39.54 0.70
13 1.00000  1.00000  3723G  2092G  1631G 56.19 1.00
18 1.00000  1.00000  3723G  3144G   578G 84.45 1.50
 8 1.00000  1.00000  5585G  2909G  2675G 52.10 0.93
 9 1.00000  1.00000  5585G  3089G  2495G 55.31 0.98
14 0.95000 0 0 0 0 0 0 (this osd is full at 97%)
15 1.00000  1.00000  3723G  2629G  1093G 70.63 1.26
19 1.00000  1.00000  3723G  1781G  1941G 47.86 0.85
              TOTAL 89360G 50217G 39143G 56.20
MIN/MAX VAR: 0/1.62  STDDEV: 16.80

[root@cf01 ceph]# ceph osd tree
ID WEIGHT   TYPE NAME      UP/DOWN REWEIGHT PRIMARY-AFFINITY
-1 19.89999 root default
-2  5.00000     host cf01
 0  1.00000         osd.0       up  1.00000          1.00000
 1  1.00000         osd.1       up  1.00000          1.00000
 2  1.00000         osd.2       up  1.00000          1.00000
10  1.00000         osd.10      up  1.00000          1.00000
16  1.00000         osd.16      up  1.00000          1.00000
-3  4.95000     host cf02
 3  1.00000         osd.3       up  1.00000          1.00000
 4  1.00000         osd.4       up  1.00000          1.00000
11  1.00000         osd.11      up  1.00000          1.00000
17  0.95000         osd.17      up  0.95000          1.00000
12  1.00000         osd.12      up  1.00000          1.00000
-4  5.00000     host cf03
 5  1.00000         osd.5       up  1.00000          1.00000
 6  1.00000         osd.6       up  1.00000          1.00000
 7  1.00000         osd.7       up  1.00000          1.00000
13  1.00000         osd.13      up  1.00000          1.00000
18  1.00000         osd.18      up  1.00000          1.00000
-5  4.95000     host cf04
 8  1.00000         osd.8       up  1.00000          1.00000
 9  1.00000         osd.9       up  1.00000          1.00000
14  0.95000         osd.14    down        0          1.00000
15  1.00000         osd.15      up  1.00000          1.00000
19  1.00000         osd.19      up  1.00000          1.00000

# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable straw_calc_version 1

# devices
device 0 osd.0
device 1 osd.1
device 2 osd.2
device 3 osd.3
device 4 osd.4
device 5 osd.5
device 6 osd.6
device 7 osd.7
device 8 osd.8
device 9 osd.9
device 10 osd.10
device 11 osd.11
device 12 osd.12
device 13 osd.13
device 14 osd.14
device 15 osd.15
device 16 osd.16
device 17 osd.17
device 18 osd.18
device 19 osd.19

# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 region
type 10 root

# buckets
host cf01 {
        id -2           # do not change unnecessarily
        # weight 5.000
        alg straw
        hash 0  # rjenkins1
        item osd.0 weight 1.000
        item osd.1 weight 1.000
        item osd.2 weight 1.000
        item osd.10 weight 1.000
        item osd.16 weight 1.000
}
host cf02 {
        id -3           # do not change unnecessarily
        # weight 4.950
        alg straw
        hash 0  # rjenkins1
        item osd.3 weight 1.000
        item osd.4 weight 1.000
        item osd.11 weight 1.000
        item osd.17 weight 0.950
        item osd.12 weight 1.000
}
host cf03 {
        id -4           # do not change unnecessarily
        # weight 5.000
        alg straw
        hash 0  # rjenkins1
        item osd.5 weight 1.000
        item osd.6 weight 1.000
        item osd.7 weight 1.000
        item osd.13 weight 1.000
        item osd.18 weight 1.000
}
host cf04 {
        id -5           # do not change unnecessarily
        # weight 4.950
        alg straw
        hash 0  # rjenkins1
        item osd.8 weight 1.000
        item osd.9 weight 1.000
        item osd.14 weight 0.950
        item osd.15 weight 1.000
        item osd.19 weight 1.000
}
root default {
        id -1           # do not change unnecessarily
        # weight 19.900
        alg straw
        hash 0  # rjenkins1
        item cf01 weight 5.000
        item cf02 weight 4.950
        item cf03 weight 5.000
        item cf04 weight 4.950
}

# rules
rule replicated_ruleset {
        ruleset 0
        type replicated
        min_size 1
        max_size 10
        step take default
        step chooseleaf firstn 0 type host
        step emit
}

# end crush map

--
Jacek Jarosiewicz
Administrator Systemów Informatycznych

----------------------------------------------------------------------------------------
SUPERMEDIA Sp. z o.o. z siedzibą w Warszawie
ul. Senatorska 13/15, 00-075 Warszawa
Sąd Rejonowy dla m.st.Warszawy, XII Wydział Gospodarczy Krajowego Rejestru Sądowego,
nr KRS 0000029537; kapitał zakładowy 44.556.000,00 zł
NIP: 957-05-49-503
Adres korespondencyjny: ul. Jubilerska 10, 04-190 Warszawa

----------------------------------------------------------------------------------------
SUPERMEDIA ->   http://www.supermedia.pl
dostep do internetu - hosting - kolokacja - lacza - telefonia
_______________________________________________
ceph-users mailing list
ceph-users@lists.ceph.com
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com

Reply via email to