# ceph pg 4.ff3 query > { "state": "active+recovering", > "epoch": 1642, > "up": [ > 7, > 26], > "acting": [ > 7, > 26], > "info": { "pgid": "4.ffe", > "last_update": "339'96", > "last_complete": "339'89", > "log_tail": "0'0", > "last_backfill": "MAX", > "purged_snaps": "[1~9]", > "history": { "epoch_created": 3, > "last_epoch_started": 1617, > "last_epoch_clean": 339, > "last_epoch_split": 0, > "same_up_since": 1616, > "same_interval_since": 1616, > "same_primary_since": 1616, > "last_scrub": "337'71", > "last_scrub_stamp": "2014-05-31 19:27:24.250179", > "last_deep_scrub": "337'41", > "last_deep_scrub_stamp": "2014-05-29 19:26:41.314233", > "last_clean_scrub_stamp": "2014-05-31 19:27:24.250179"}, > "stats": { "version": "339'96", > "reported_seq": "4126", > "reported_epoch": "1642", > "state": "active+recovering", > "last_fresh": "2014-06-03 19:31:00.239728", > "last_change": "2014-06-03 18:27:59.485319", > "last_active": "2014-06-03 19:31:00.239728", > "last_clean": "2014-05-31 20:07:05.950472", > "last_became_active": "0.000000", > "last_unstale": "2014-06-03 19:31:00.239728", > "mapping_epoch": 1614, > "log_start": "0'0", > "ondisk_log_start": "0'0", > "created": 3, > "last_epoch_clean": 339, > "parent": "0.0", > "parent_split_bits": 0, > "last_scrub": "337'71", > "last_scrub_stamp": "2014-05-31 19:27:24.250179", > "last_deep_scrub": "337'41", > "last_deep_scrub_stamp": "2014-05-29 19:26:41.314233", > "last_clean_scrub_stamp": "2014-05-31 19:27:24.250179", > "log_size": 96, > "ondisk_log_size": 96, > "stats_invalid": "0", > "stat_sum": { "num_bytes": 33554432, > "num_objects": 4, > "num_object_clones": 0, > "num_object_copies": 0, > "num_objects_missing_on_primary": 0, > "num_objects_degraded": 0, > "num_objects_unfound": 0, > "num_read": 2403, > "num_read_kb": 15994, > "num_write": 103, > "num_write_kb": 92721, > "num_scrub_errors": 0, > "num_shallow_scrub_errors": 0, > "num_deep_scrub_errors": 0, > "num_objects_recovered": 2, > "num_bytes_recovered": 16777216, > "num_keys_recovered": 0}, > "stat_cat_sum": {}, > "up": [ > 7, > 26], > "acting": [ > 7, > 26]}, > "empty": 0, > "dne": 0, > "incomplete": 0, > "last_epoch_started": 1617}, > "recovery_state": [ > { "name": "Started\/Primary\/Active", > "enter_time": "2014-06-03 18:27:58.473736", > "might_have_unfound": [ > { "osd": 2, > "status": "already probed"}, > { "osd": 3, > "status": "already probed"}, > { "osd": 12, > "status": "osd is down"}, > { "osd": 14, > "status": "osd is down"}, > { "osd": 19, > "status": "osd is down"}, > { "osd": 23, > "status": "querying"}, > { "osd": 26, > "status": "already probed"}], > "recovery_progress": { "backfill_target": -1, > "waiting_on_backfill": 0, > "backfill_pos": "0\/\/0\/\/-1", > "backfill_info": { "begin": "0\/\/0\/\/-1", > "end": "0\/\/0\/\/-1", > "objects": []}, > "peer_backfill_info": { "begin": "0\/\/0\/\/-1", > "end": "0\/\/0\/\/-1", > "objects": []}, > "backfills_in_flight": [], > "pull_from_peer": [], > "pushing": []}, > "scrub": { "scrubber.epoch_start": "0", > "scrubber.active": 0, > "scrubber.block_writes": 0, > "scrubber.finalizing": 0, > "scrubber.waiting_on": 0, > "scrubber.waiting_on_whom": []}}, > { "name": "Started", > "enter_time": "2014-06-03 18:27:57.308690"}]} 12, 14 and 19 were OSDs that corrupted. I’ve marked them as lost and removed them from the cluster. ‘ceph osd tree’ shows the following:
> # id weight type name up/down reweight > -1 16.38 root default > -2 5.46 host r-17E813A511 > 0 0.91 osd.0 up 1 > 1 0.91 osd.1 up 1 > 2 0.91 osd.2 up 1 > 3 0.91 osd.3 up 1 > 4 0.91 osd.4 up 1 > 5 0.91 osd.5 up 1 > -3 5.46 host r-3A72F8075A > 6 0.91 osd.6 up 1 > 7 0.91 osd.7 up 1 > 8 0.91 osd.8 up 1 > 9 0.91 osd.9 up 1 > 10 0.91 osd.10 up 1 > 11 0.91 osd.11 up 1 > -4 5.46 host r-F9CBF5C8C5 > 21 0.91 osd.21 up 1 > 22 0.91 osd.22 up 1 > 23 0.91 osd.23 up 1 > 24 0.91 osd.24 up 1 > 25 0.91 osd.25 up 1 > 26 0.91 osd.26 up 1 ./JRH On Jun 3, 2014, at 4:00 PM, Smart Weblications GmbH - Florian Wiessner <f.wiess...@smart-weblications.de> wrote: > Hi, > > Am 03.06.2014 21:46, schrieb Jason Harley: >> Howdy — >> >> I’ve had a failure on a small, Dumpling (0.67.4) cluster running on Ubuntu >> 13.10 machines. I had three OSD nodes (running 6 OSDs each), and lost two >> of them in a beautiful failure. One of these nodes even went so far as to >> scramble the XFS filesystems of my OSD disks (I’m curious if it has some bad >> DIMMs). >> >> Anyway, the thing is: I’m okay with losing the data, this was a test setup >> and I want to take this opportunity to learn from the recovery process. I’m >> now stuck in ‘HEALTH_ERR’ and want to get back to ‘HEALTH_OK’ without just >> reinitializing the cluster. >> >> My OSD map seems correct, I’ve done scrubs (deep, and normal) at the PG and >> OSD levels. ‘ceph -s’ shows that I have 47 unfound objects still after I >> told ceph to ‘mark_unfound_lost’. The remaining 47 PGs tell me that they >> "haven't probed all sources, not marking lost”. Two days have passed at >> this point, and I’d just like to get my cluster back to working and deal >> with the object loss (which seems located to a single pool). >> >> How do I move forward from here, if at all? Do I ‘force_create_pg’ the PGs >> containing my unfound objects? >> >>> # ceph health detail | grep "unfound" | grep "^pg" >>> pg 4.ffe is active+recovering, acting [7,26], 3 unfound > ... >>> pg 4.43 is active+recovering, acting [9,23], 1 unfound >> >> > > what is the output of: > > ceph pg query 4.ffe > > > > -- > > Mit freundlichen Grüßen, > > Florian Wiessner > > Smart Weblications GmbH > Martinsberger Str. 1 > D-95119 Naila > > fon.: +49 9282 9638 200 > fax.: +49 9282 9638 205 > 24/7: +49 900 144 000 00 - 0,99 EUR/Min* > http://www.smart-weblications.de > > -- > Sitz der Gesellschaft: Naila > Geschäftsführer: Florian Wiessner > HRB-Nr.: HRB 3840 Amtsgericht Hof > *aus dem dt. Festnetz, ggf. abweichende Preise aus dem Mobilfunknetz > _______________________________________________ > ceph-users mailing list > ceph-users@lists.ceph.com > http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
_______________________________________________ ceph-users mailing list ceph-users@lists.ceph.com http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com