Sure here it is ceph -s: cluster: id: 8bc45d9a-ef50-4038-8e1b-1f25ac46c945 health: HEALTH_ERR 100 scrub errors Possible data damage: 56 pgs inconsistent
services: mon: 3 daemons, quorum 0,1,pve3 mgr: pve3(active) osd: 3 osds: 3 up, 3 in data: pools: 1 pools, 256 pgs objects: 269k objects, 1007 GB usage: 2050 GB used, 1386 GB / 3436 GB avail pgs: 200 active+clean 56 active+clean+inconsistent --- ceph health detail : PG_DAMAGED Possible data damage: 56 pgs inconsistent pg 2.6 is active+clean+inconsistent, acting [1,0] pg 2.19 is active+clean+inconsistent, acting [1,2] pg 2.1e is active+clean+inconsistent, acting [1,2] pg 2.1f is active+clean+inconsistent, acting [1,2] pg 2.24 is active+clean+inconsistent, acting [0,2] pg 2.25 is active+clean+inconsistent, acting [2,0] pg 2.36 is active+clean+inconsistent, acting [1,0] pg 2.3d is active+clean+inconsistent, acting [1,2] pg 2.4b is active+clean+inconsistent, acting [1,0] pg 2.4c is active+clean+inconsistent, acting [0,2] pg 2.4d is active+clean+inconsistent, acting [1,2] pg 2.4f is active+clean+inconsistent, acting [1,2] pg 2.50 is active+clean+inconsistent, acting [1,2] pg 2.52 is active+clean+inconsistent, acting [1,2] pg 2.56 is active+clean+inconsistent, acting [1,0] pg 2.5b is active+clean+inconsistent, acting [1,2] pg 2.5c is active+clean+inconsistent, acting [1,2] pg 2.5d is active+clean+inconsistent, acting [1,0] pg 2.5f is active+clean+inconsistent, acting [1,2] pg 2.71 is active+clean+inconsistent, acting [0,2] pg 2.75 is active+clean+inconsistent, acting [1,2] pg 2.77 is active+clean+inconsistent, acting [1,2] pg 2.79 is active+clean+inconsistent, acting [1,2] pg 2.7e is active+clean+inconsistent, acting [1,2] pg 2.83 is active+clean+inconsistent, acting [1,0] pg 2.8a is active+clean+inconsistent, acting [1,0] pg 2.92 is active+clean+inconsistent, acting [1,2] pg 2.98 is active+clean+inconsistent, acting [1,0] pg 2.9a is active+clean+inconsistent, acting [1,0] pg 2.9e is active+clean+inconsistent, acting [1,0] pg 2.9f is active+clean+inconsistent, acting [1,2] pg 2.c6 is active+clean+inconsistent, acting [0,2] pg 2.c7 is active+clean+inconsistent, acting [1,0] pg 2.c8 is active+clean+inconsistent, acting [1,2] pg 2.cb is active+clean+inconsistent, acting [1,2] pg 2.cd is active+clean+inconsistent, acting [1,2] pg 2.ce is active+clean+inconsistent, acting [1,2] pg 2.d2 is active+clean+inconsistent, acting [2,1] pg 2.da is active+clean+inconsistent, acting [1,0] pg 2.de is active+clean+inconsistent, acting [1,2] pg 2.e1 is active+clean+inconsistent, acting [1,2] pg 2.e4 is active+clean+inconsistent, acting [1,0] pg 2.e6 is active+clean+inconsistent, acting [0,2] pg 2.e8 is active+clean+inconsistent, acting [1,2] pg 2.ee is active+clean+inconsistent, acting [1,0] pg 2.f9 is active+clean+inconsistent, acting [1,2] pg 2.fa is active+clean+inconsistent, acting [1,0] pg 2.fb is active+clean+inconsistent, acting [1,2] pg 2.fc is active+clean+inconsistent, acting [1,2] pg 2.fe is active+clean+inconsistent, acting [1,0] pg 2.ff is active+clean+inconsistent, acting [1,0] and ceph pg 2.6 query: { "state": "active+clean+inconsistent", "snap_trimq": "[]", "epoch": 1513, "up": [ 1, 0 ], "acting": [ 1, 0 ], "actingbackfill": [ "0", "1" ], "info": { "pgid": "2.6", "last_update": "1513'89145", "last_complete": "1513'89145", "log_tail": "1503'87586", "last_user_version": 330583, "last_backfill": "MAX", "last_backfill_bitwise": 0, "purged_snaps": [ { "start": "1", "length": "178" }, { "start": "17a", "length": "3d" }, { "start": "1b8", "length": "1" }, { "start": "1ba", "length": "1" }, { "start": "1bc", "length": "1" }, { "start": "1be", "length": "44" }, { "start": "205", "length": "12c" }, { "start": "332", "length": "1" }, { "start": "334", "length": "1" }, { "start": "336", "length": "1" }, { "start": "338", "length": "1" }, { "start": "33a", "length": "1" } ], "history": { "epoch_created": 90, "epoch_pool_created": 90, "last_epoch_started": 1339, "last_interval_started": 1338, "last_epoch_clean": 1339, "last_interval_clean": 1338, "last_epoch_split": 0, "last_epoch_marked_full": 0, "same_up_since": 1338, "same_interval_since": 1338, "same_primary_since": 1338, "last_scrub": "1513'89112", "last_scrub_stamp": "2017-11-01 05:52:21.259654", "last_deep_scrub": "1513'89112", "last_deep_scrub_stamp": "2017-11-01 05:52:21.259654", "last_clean_scrub_stamp": "2017-10-25 04:25:09.830840" }, "stats": { "version": "1513'89145", "reported_seq": "422820", "reported_epoch": "1513", "state": "active+clean+inconsistent", "last_fresh": "2017-11-01 08:11:38.411784", "last_change": "2017-11-01 05:52:21.259789", "last_active": "2017-11-01 08:11:38.411784", "last_peered": "2017-11-01 08:11:38.411784", "last_clean": "2017-11-01 08:11:38.411784", "last_became_active": "2017-10-15 20:36:33.644567", "last_became_peered": "2017-10-15 20:36:33.644567", "last_unstale": "2017-11-01 08:11:38.411784", "last_undegraded": "2017-11-01 08:11:38.411784", "last_fullsized": "2017-11-01 08:11:38.411784", "mapping_epoch": 1338, "log_start": "1503'87586", "ondisk_log_start": "1503'87586", "created": 90, "last_epoch_clean": 1339, "parent": "0.0", "parent_split_bits": 0, "last_scrub": "1513'89112", "last_scrub_stamp": "2017-11-01 05:52:21.259654", "last_deep_scrub": "1513'89112", "last_deep_scrub_stamp": "2017-11-01 05:52:21.259654", "last_clean_scrub_stamp": "2017-10-25 04:25:09.830840", "log_size": 1559, "ondisk_log_size": 1559, "stats_invalid": false, "dirty_stats_invalid": false, "omap_stats_invalid": false, "hitset_stats_invalid": false, "hitset_bytes_stats_invalid": false, "pin_stats_invalid": false, "stat_sum": { "num_bytes": 3747886080, "num_objects": 958, "num_object_clones": 295, "num_object_copies": 1916, "num_objects_missing_on_primary": 0, "num_objects_missing": 0, "num_objects_degraded": 0, "num_objects_misplaced": 0, "num_objects_unfound": 0, "num_objects_dirty": 958, "num_whiteouts": 0, "num_read": 333428, "num_read_kb": 135550185, "num_write": 79221, "num_write_kb": 13441239, "num_scrub_errors": 1, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 1, "num_objects_recovered": 245, "num_bytes_recovered": 1012833792, "num_keys_recovered": 6, "num_objects_omap": 0, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0, "num_flush": 0, "num_flush_kb": 0, "num_evict": 0, "num_evict_kb": 0, "num_promote": 0, "num_flush_mode_high": 0, "num_flush_mode_low": 0, "num_evict_mode_some": 0, "num_evict_mode_full": 0, "num_objects_pinned": 0, "num_legacy_snapsets": 0 }, "up": [ 1, 0 ], "acting": [ 1, 0 ], "blocked_by": [], "up_primary": 1, "acting_primary": 1 }, "empty": 0, "dne": 0, "incomplete": 0, "last_epoch_started": 1339, "hit_set_history": { "current_last_update": "0'0", "history": [] } }, "peer_info": [ { "peer": "0", "pgid": "2.6", "last_update": "1513'89145", "last_complete": "1513'89145", "log_tail": "1274'68440", "last_user_version": 315687, "last_backfill": "MAX", "last_backfill_bitwise": 0, "purged_snaps": [ { "start": "1", "length": "178" }, { "start": "17a", "length": "3d" }, { "start": "1b8", "length": "1" }, { "start": "1ba", "length": "1" }, { "start": "1bc", "length": "1" }, { "start": "1be", "length": "44" }, { "start": "205", "length": "82" }, { "start": "288", "length": "1" }, { "start": "28a", "length": "1" }, { "start": "28c", "length": "1" }, { "start": "28e", "length": "1" }, { "start": "290", "length": "1" } ], "history": { "epoch_created": 90, "epoch_pool_created": 90, "last_epoch_started": 1339, "last_interval_started": 1338, "last_epoch_clean": 1339, "last_interval_clean": 1338, "last_epoch_split": 0, "last_epoch_marked_full": 0, "same_up_since": 1338, "same_interval_since": 1338, "same_primary_since": 1338, "last_scrub": "1513'89112", "last_scrub_stamp": "2017-11-01 05:52:21.259654", "last_deep_scrub": "1513'89112", "last_deep_scrub_stamp": "2017-11-01 05:52:21.259654", "last_clean_scrub_stamp": "2017-10-25 04:25:09.830840" }, "stats": { "version": "1337'71465", "reported_seq": "347015", "reported_epoch": "1338", "state": "active+undersized+degraded", "last_fresh": "2017-10-15 20:35:36.930611", "last_change": "2017-10-15 20:30:35.752042", "last_active": "2017-10-15 20:35:36.930611", "last_peered": "2017-10-15 20:35:36.930611", "last_clean": "2017-10-15 20:30:01.443288", "last_became_active": "2017-10-15 20:30:35.752042", "last_became_peered": "2017-10-15 20:30:35.752042", "last_unstale": "2017-10-15 20:35:36.930611", "last_undegraded": "2017-10-15 20:30:35.749043", "last_fullsized": "2017-10-15 20:30:35.749043", "mapping_epoch": 1338, "log_start": "1274'68440", "ondisk_log_start": "1274'68440", "created": 90, "last_epoch_clean": 1331, "parent": "0.0", "parent_split_bits": 0, "last_scrub": "1294'71370", "last_scrub_stamp": "2017-10-15 09:27:31.756027", "last_deep_scrub": "1284'70813", "last_deep_scrub_stamp": "2017-10-14 06:35:57.556773", "last_clean_scrub_stamp": "2017-10-15 09:27:31.756027", "log_size": 3025, "ondisk_log_size": 3025, "stats_invalid": false, "dirty_stats_invalid": false, "omap_stats_invalid": false, "hitset_stats_invalid": false, "hitset_bytes_stats_invalid": false, "pin_stats_invalid": false, "stat_sum": { "num_bytes": 3555027456, "num_objects": 917, "num_object_clones": 255, "num_object_copies": 1834, "num_objects_missing_on_primary": 0, "num_objects_missing": 0, "num_objects_degraded": 917, "num_objects_misplaced": 0, "num_objects_unfound": 0, "num_objects_dirty": 917, "num_whiteouts": 0, "num_read": 275095, "num_read_kb": 111713846, "num_write": 64324, "num_write_kb": 11365374, "num_scrub_errors": 0, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 0, "num_objects_recovered": 243, "num_bytes_recovered": 1008594432, "num_keys_recovered": 6, "num_objects_omap": 0, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0, "num_flush": 0, "num_flush_kb": 0, "num_evict": 0, "num_evict_kb": 0, "num_promote": 0, "num_flush_mode_high": 0, "num_flush_mode_low": 0, "num_evict_mode_some": 0, "num_evict_mode_full": 0, "num_objects_pinned": 0, "num_legacy_snapsets": 0 }, "up": [ 1, 0 ], "acting": [ 1, 0 ], "blocked_by": [], "up_primary": 1, "acting_primary": 1 }, "empty": 0, "dne": 0, "incomplete": 0, "last_epoch_started": 1339, "hit_set_history": { "current_last_update": "0'0", "history": [] } } ], "recovery_state": [ { "name": "Started/Primary/Active", "enter_time": "2017-10-15 20:36:33.574915", "might_have_unfound": [ { "osd": "0", "status": "already probed" } ], "recovery_progress": { "backfill_targets": [], "waiting_on_backfill": [], "last_backfill_started": "MIN", "backfill_info": { "begin": "MIN", "end": "MIN", "objects": [] }, "peer_backfill_info": [], "backfills_in_flight": [], "recovering": [], "pg_backend": { "pull_from_peer": [], "pushing": [] } }, "scrub": { "scrubber.epoch_start": "1338", "scrubber.active": false, "scrubber.state": "INACTIVE", "scrubber.start": "MIN", "scrubber.end": "MIN", "scrubber.subset_last_update": "0'0", "scrubber.deep": false, "scrubber.seed": 0, "scrubber.waiting_on": 0, "scrubber.waiting_on_whom": [] } }, { "name": "Started", "enter_time": "2017-10-15 20:36:32.592892" } ], "agent_state": {} } 2017-10-30 23:30 GMT+01:00 Gregory Farnum <gfar...@redhat.com>: > You'll need to tell us exactly what error messages you're seeing, what the > output of ceph -s is, and the output of pg query for the relevant PGs. > There's not a lot of documentation because much of this tooling is new, > it's changing quickly, and most people don't have the kinds of problems > that turn out to be unrepairable. We should do better about that, though. > -Greg > > On Mon, Oct 30, 2017, 11:40 AM Mario Giammarco <mgiamma...@gmail.com> > wrote: > >> >[Questions to the list] >> >How is it possible that the cluster cannot repair itself with ceph pg >> repair? >> >No good copies are remaining? >> >Cannot decide which copy is valid or up-to date? >> >If so, why not, when there is checksum, mtime for everything? >> >In this inconsistent state which object does the cluster serve when it >> doesn't know which one is the valid? >> >> >> I am asking the same questions too, it seems strange to me that in a >> fault tolerant clustered file storage like Ceph there is no >> documentation about this. >> >> I know that I am pedantic but please note that saying "to be sure use >> three copies" is not enough because I am not sure what Ceph really does >> when three copies are not matching. >> >> >> >> >> >> _______________________________________________ >> ceph-users mailing list >> ceph-users@lists.ceph.com >> http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com >> >
_______________________________________________ ceph-users mailing list ceph-users@lists.ceph.com http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com