__reserve_metadata_snap() and __release_metadata_snap() modify the
superblock's held_root directly in the block_manager's buffer. If the
subsequent metadata commit fails, the held_root gets flushed to disk
through the abort_transaction path, resulting in inconsistent metadata.
Reproducer 1: __reserve_metadata_snap()
1. Create a 2 MiB metadata device and make the region after the 14th
block inaccessible, to trigger metadata commit failure in the
subsequent reserve_metadata_snap operation. The 14th block will be
the shadow destination for the index block.
dmsetup create tmeta --table "0 112 linear /dev/sdc 0
112 3984 error"
2. Create a 16 MiB thin-pool
dmsetup create tdata --table "0 32768 zero"
dd if=/dev/zero of=/dev/mapper/tmeta bs=4k count=1
dmsetup create tpool --table "0 32768 thin-pool /dev/mapper/tmeta \
/dev/mapper/tdata 128 0 1 skip_block_zeroing"
3. Take a metadata snapshot to trigger metadata commit failure and
transaction abort. However, the held_root is written to disk,
breaking metadata consistency.
dmsetup message tpool 0 "reserve_metadata_snap"
thin_check v1.2.2 result:
Bad reference count for metadata block 6. Expected 2, but space map contains 1.
Bad reference count for metadata block 7. Expected 2, but space map contains 1.
Bad reference count for metadata block 13. Expected 1, but space map contains
0.
Reproducer 2: __release_metadata_snap()
1. Create a 2 MiB metadata device and make the region after the 16th
block inaccessible, to trigger metadata commit failure in the
subsequent release_metadata_snap operation. The 16th block will be
the shadow destination for the index block.
dmsetup create tmeta --table "0 128 linear /dev/sdc 0
128 3968 error"
2. Create a 16 MiB thin-pool
dmsetup create tdata --table "0 32768 zero"
dd if=/dev/zero of=/dev/mapper/tmeta bs=4k count=1
dmsetup create tpool --table "0 32768 thin-pool /dev/mapper/tmeta \
/dev/mapper/tdata 128 0 1 skip_block_zeroing"
3. Reserve then release the metadata snapshot, to trigger metadata
commit failure and transaction abort. The held_root gets removed
from the on-disk superblock, causing inconsistent metadata.
dmsetup message tpool 0 "reserve_metadata_snap"
dmsetup message tpool 0 "release_metadata_snap"
thin_check v1.2.2 result:
Bad reference count for metadata block 6. Expected 1, but space map contains 2.
Bad reference count for metadata block 7. Expected 1, but space map contains 2.
1 metadata blocks have leaked.
Fix by deferring the held_root update to commit time.
Additionally, move the existing-snapshot check before the shadow
operation in __reserve_metadata_snap to avoid unnecessary shadowing, and
move the pmd->held_root clearing to the end of __release_metadata_snap
to preserve the snapshot on teardown failure.
Fixes: 991d9fa02da0 ("dm: add thin provisioning target")
Signed-off-by: Ming-Hung Tsai <[email protected]>
---
drivers/md/dm-thin-metadata.c | 59 ++++++++++-------------------------
1 file changed, 16 insertions(+), 43 deletions(-)
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index b6a2d2081a24..4fe65647f274 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -186,6 +186,7 @@ struct dm_pool_metadata {
uint32_t time;
dm_block_t root;
dm_block_t details_root;
+ dm_block_t held_root;
struct list_head thin_devices;
uint64_t trans_id;
unsigned long flags;
@@ -748,6 +749,7 @@ static int __open_metadata(struct dm_pool_metadata *pmd)
*/
pmd->root = le64_to_cpu(disk_super->data_mapping_root);
pmd->details_root = le64_to_cpu(disk_super->device_details_root);
+ pmd->held_root = le64_to_cpu(disk_super->held_root);
__setup_btree_details(pmd);
dm_bm_unlock(sblock);
@@ -838,6 +840,7 @@ static int __begin_transaction(struct dm_pool_metadata *pmd)
pmd->time = le32_to_cpu(disk_super->time);
pmd->root = le64_to_cpu(disk_super->data_mapping_root);
pmd->details_root = le64_to_cpu(disk_super->device_details_root);
+ pmd->held_root = le64_to_cpu(disk_super->held_root);
pmd->trans_id = le64_to_cpu(disk_super->trans_id);
pmd->flags = le32_to_cpu(disk_super->flags);
pmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
@@ -928,6 +931,7 @@ static int __commit_transaction(struct dm_pool_metadata
*pmd)
disk_super->time = cpu_to_le32(pmd->time);
disk_super->data_mapping_root = cpu_to_le64(pmd->root);
disk_super->device_details_root = cpu_to_le64(pmd->details_root);
+ disk_super->held_root = cpu_to_le64(pmd->held_root);
disk_super->trans_id = cpu_to_le64(pmd->trans_id);
disk_super->flags = cpu_to_le32(pmd->flags);
@@ -1333,9 +1337,14 @@ static int __reserve_metadata_snap(struct
dm_pool_metadata *pmd)
{
int r, inc;
struct thin_disk_superblock *disk_super;
- struct dm_block *copy, *sblock;
+ struct dm_block *copy;
dm_block_t held_root;
+ if (pmd->held_root) {
+ DMWARN("Pool metadata snapshot already exists: release this
before taking another.");
+ return -EBUSY;
+ }
+
/*
* We commit to ensure the btree roots which we increment in a
* moment are up to date.
@@ -1361,14 +1370,6 @@ static int __reserve_metadata_snap(struct
dm_pool_metadata *pmd)
held_root = dm_block_location(copy);
disk_super = dm_block_data(copy);
- if (le64_to_cpu(disk_super->held_root)) {
- DMWARN("Pool metadata snapshot already exists: release this
before taking another.");
-
- dm_tm_dec(pmd->tm, held_root);
- dm_tm_unlock(pmd->tm, copy);
- return -EBUSY;
- }
-
/*
* Wipe the spacemap since we're not publishing this.
*/
@@ -1384,18 +1385,8 @@ static int __reserve_metadata_snap(struct
dm_pool_metadata *pmd)
dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->device_details_root));
dm_tm_unlock(pmd->tm, copy);
- /*
- * Write the held root into the superblock.
- */
- r = superblock_lock(pmd, &sblock);
- if (r) {
- dm_tm_dec(pmd->tm, held_root);
- return r;
- }
+ pmd->held_root = held_root;
- disk_super = dm_block_data(sblock);
- disk_super->held_root = cpu_to_le64(held_root);
- dm_bm_unlock(sblock);
return 0;
}
@@ -1415,18 +1406,10 @@ static int __release_metadata_snap(struct
dm_pool_metadata *pmd)
{
int r;
struct thin_disk_superblock *disk_super;
- struct dm_block *sblock, *copy;
+ struct dm_block *copy;
dm_block_t held_root;
- r = superblock_lock(pmd, &sblock);
- if (r)
- return r;
-
- disk_super = dm_block_data(sblock);
- held_root = le64_to_cpu(disk_super->held_root);
- disk_super->held_root = cpu_to_le64(0);
-
- dm_bm_unlock(sblock);
+ held_root = pmd->held_root;
if (!held_root) {
DMWARN("No pool metadata snapshot found: nothing to release.");
@@ -1444,6 +1427,8 @@ static int __release_metadata_snap(struct
dm_pool_metadata *pmd)
dm_tm_unlock(pmd->tm, copy);
+ pmd->held_root = 0;
+
return 0;
}
@@ -1462,19 +1447,7 @@ int dm_pool_release_metadata_snap(struct
dm_pool_metadata *pmd)
static int __get_metadata_snap(struct dm_pool_metadata *pmd,
dm_block_t *result)
{
- int r;
- struct thin_disk_superblock *disk_super;
- struct dm_block *sblock;
-
- r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
- &sb_validator, &sblock);
- if (r)
- return r;
-
- disk_super = dm_block_data(sblock);
- *result = le64_to_cpu(disk_super->held_root);
-
- dm_bm_unlock(sblock);
+ *result = pmd->held_root;
return 0;
}
--
2.49.0