Hi,
In the current mainline libblock implementation, if the library fails
to read a block from the device (for example, because a filesystem is
trying to read/write beyond the end of the device*) the code falls
into an infinite loop where it continues to retry the I/O operation
again and again.
This patch will prevent this by limiting the number of retries before giving up.
Can you let me know what do you thing about it?
* the libblock library knows the total size of the device expressed in
physical blocks.
Cheers,
--
--------------------
Maurizio Lombardi
=== modified file 'uspace/lib/block/block.c'
--- uspace/lib/block/block.c 2013-09-15 11:10:56 +0000
+++ uspace/lib/block/block.c 2014-06-16 13:51:02 +0000
@@ -55,6 +55,8 @@
#include <stacktrace.h>
#include "block.h"
+#define MAX_WRITE_RETRIES 10
+
/** Lock protecting the device connection list */
static FIBRIL_MUTEX_INITIALIZE(dcl_lock);
/** Device connection list head. */
@@ -79,6 +81,7 @@
bd_t *bd;
void *bb_buf;
aoff64_t bb_addr;
+ aoff64_t pblocks; /**< Number of physical blocks */
size_t pblock_size; /**< Physical block size. */
cache_t *cache;
} devcon_t;
@@ -86,6 +89,7 @@
static int read_blocks(devcon_t *, aoff64_t, size_t, void *, size_t);
static int write_blocks(devcon_t *, aoff64_t, size_t, void *, size_t);
static aoff64_t ba_ltop(devcon_t *, aoff64_t);
+static int __block_get_nblocks(bd_t *bd, aoff64_t *nblocks);
static devcon_t *devcon_search(service_id_t service_id)
{
@@ -103,7 +107,7 @@
}
static int devcon_add(service_id_t service_id, async_sess_t *sess,
- size_t bsize, bd_t *bd)
+ size_t bsize, aoff64_t dev_size, bd_t *bd)
{
devcon_t *devcon;
@@ -118,6 +122,7 @@
devcon->bb_buf = NULL;
devcon->bb_addr = 0;
devcon->pblock_size = bsize;
+ devcon->pblocks = dev_size;
devcon->cache = NULL;
fibril_mutex_lock(&dcl_lock);
@@ -164,8 +169,16 @@
async_hangup(sess);
return rc;
}
+
+ aoff64_t dev_size;
+ rc = __block_get_nblocks(bd, &dev_size);
+ if (rc != EOK) {
+ bd_close(bd);
+ async_hangup(sess);
+ return rc;
+ }
- rc = devcon_add(service_id, sess, bsize, bd);
+ rc = devcon_add(service_id, sess, bsize, dev_size, bd);
if (rc != EOK) {
bd_close(bd);
async_hangup(sess);
@@ -349,6 +362,7 @@
{
fibril_mutex_initialize(&b->lock);
b->refcnt = 1;
+ b->write_failures = 0;
b->dirty = false;
b->toxic = false;
fibril_rwlock_initialize(&b->contents_lock);
@@ -373,7 +387,7 @@
cache_t *cache;
block_t *b;
link_t *link;
-
+ aoff64_t p_ba;
int rc;
devcon = devcon_search(service_id);
@@ -383,6 +397,17 @@
cache = devcon->cache;
+ /* Check whether the logical block (or part of it) is beyond
+ * the end of the device or not.
+ */
+ p_ba = ba_ltop(devcon, ba);
+ p_ba += cache->blocks_cluster;
+ if (p_ba >= devcon->pblocks) {
+ /* This request cannot be satisfied */
+ return EIO;
+ }
+
+
retry:
rc = EOK;
b = NULL;
@@ -458,9 +483,14 @@
* another try. Hopefully, we will grab
* another block next time.
*/
- fibril_mutex_unlock(&b->lock);
- goto retry;
- }
+ if (b->write_failures < MAX_WRITE_RETRIES) {
+ b->write_failures++;
+ fibril_mutex_unlock(&b->lock);
+ goto retry;
+ }
+ } else
+ b->write_failures = 0;
+
b->dirty = false;
if (!fibril_mutex_trylock(&cache->lock)) {
/*
@@ -577,6 +607,8 @@
(blocks_cached > CACHE_HI_WATERMARK || mode != CACHE_MODE_WB)) {
rc = write_blocks(devcon, block->pba, cache->blocks_cluster,
block->data, block->size);
+ if (rc == EOK)
+ block->write_failures = 0;
block->dirty = false;
}
fibril_mutex_unlock(&block->lock);
@@ -604,8 +636,13 @@
block->refcnt++;
fibril_mutex_unlock(&block->lock);
fibril_mutex_unlock(&cache->lock);
- goto retry;
+ if (block->write_failures++ >= MAX_WRITE_RETRIES)
+ goto give_up;
+ else
+ goto retry;
}
+
+give_up:
/*
* Take the block out of the cache and free it.
*/
@@ -771,7 +808,13 @@
devcon_t *devcon = devcon_search(service_id);
assert(devcon);
- return bd_get_num_blocks(devcon->bd, nblocks);
+ return __block_get_nblocks(devcon->bd, nblocks);
+}
+
+/** Get number of blocks on device **/
+static int __block_get_nblocks(bd_t *bd, aoff64_t *nblocks)
+{
+ return bd_get_num_blocks(bd, nblocks);
}
/** Read bytes directly from the device (bypass cache)
=== modified file 'uspace/lib/block/block.h'
--- uspace/lib/block/block.h 2013-09-15 11:10:56 +0000
+++ uspace/lib/block/block.h 2014-01-24 20:26:34 +0000
@@ -80,6 +80,8 @@
aoff64_t pba;
/** Size of the block. */
size_t size;
+ /** Number of write failures. */
+ int write_failures;
/** Link for placing the block into the free block list. */
link_t free_link;
/** Link for placing the block into the block hash table. */
_______________________________________________
HelenOS-devel mailing list
[email protected]
http://lists.modry.cz/listinfo/helenos-devel