Hi,

In the current mainline libblock implementation, if the library fails
to read a block from the device (for example, because a filesystem is
trying to read/write beyond the end of the device*) the code falls
into an infinite loop where it continues to retry the I/O operation
again and again.

This patch will prevent this by limiting the number of retries before giving up.
Can you let me know what do you thing about it?

* the libblock library knows the total size of the device expressed in
physical blocks.

Cheers,
-- 
--------------------
Maurizio Lombardi
=== modified file 'uspace/lib/block/block.c'
--- uspace/lib/block/block.c	2013-09-15 11:10:56 +0000
+++ uspace/lib/block/block.c	2014-06-16 13:51:02 +0000
@@ -55,6 +55,8 @@
 #include <stacktrace.h>
 #include "block.h"
 
+#define MAX_WRITE_RETRIES 10
+
 /** Lock protecting the device connection list */
 static FIBRIL_MUTEX_INITIALIZE(dcl_lock);
 /** Device connection list head. */
@@ -79,6 +81,7 @@
 	bd_t *bd;
 	void *bb_buf;
 	aoff64_t bb_addr;
+	aoff64_t pblocks;    /**< Number of physical blocks */
 	size_t pblock_size;  /**< Physical block size. */
 	cache_t *cache;
 } devcon_t;
@@ -86,6 +89,7 @@
 static int read_blocks(devcon_t *, aoff64_t, size_t, void *, size_t);
 static int write_blocks(devcon_t *, aoff64_t, size_t, void *, size_t);
 static aoff64_t ba_ltop(devcon_t *, aoff64_t);
+static int __block_get_nblocks(bd_t *bd, aoff64_t *nblocks);
 
 static devcon_t *devcon_search(service_id_t service_id)
 {
@@ -103,7 +107,7 @@
 }
 
 static int devcon_add(service_id_t service_id, async_sess_t *sess,
-    size_t bsize, bd_t *bd)
+    size_t bsize, aoff64_t dev_size, bd_t *bd)
 {
 	devcon_t *devcon;
 	
@@ -118,6 +122,7 @@
 	devcon->bb_buf = NULL;
 	devcon->bb_addr = 0;
 	devcon->pblock_size = bsize;
+	devcon->pblocks = dev_size;
 	devcon->cache = NULL;
 	
 	fibril_mutex_lock(&dcl_lock);
@@ -164,8 +169,16 @@
 		async_hangup(sess);
 		return rc;
 	}
+
+	aoff64_t dev_size;
+	rc = __block_get_nblocks(bd, &dev_size);
+	if (rc != EOK) {
+		bd_close(bd);
+		async_hangup(sess);
+		return rc;
+	}
 	
-	rc = devcon_add(service_id, sess, bsize, bd);
+	rc = devcon_add(service_id, sess, bsize, dev_size, bd);
 	if (rc != EOK) {
 		bd_close(bd);
 		async_hangup(sess);
@@ -349,6 +362,7 @@
 {
 	fibril_mutex_initialize(&b->lock);
 	b->refcnt = 1;
+	b->write_failures = 0;
 	b->dirty = false;
 	b->toxic = false;
 	fibril_rwlock_initialize(&b->contents_lock);
@@ -373,7 +387,7 @@
 	cache_t *cache;
 	block_t *b;
 	link_t *link;
-
+	aoff64_t p_ba;
 	int rc;
 	
 	devcon = devcon_search(service_id);
@@ -383,6 +397,17 @@
 	
 	cache = devcon->cache;
 
+	/* Check whether the logical block (or part of it) is beyond
+	 * the end of the device or not.
+	 */
+	p_ba = ba_ltop(devcon, ba);
+	p_ba += cache->blocks_cluster;
+	if (p_ba >= devcon->pblocks) {
+		/* This request cannot be satisfied */
+		return EIO;
+	}
+
+
 retry:
 	rc = EOK;
 	b = NULL;
@@ -458,9 +483,14 @@
 					 * another try. Hopefully, we will grab
 					 * another block next time.
 					 */
-					fibril_mutex_unlock(&b->lock);
-					goto retry;
-				}
+					if (b->write_failures < MAX_WRITE_RETRIES) {
+						b->write_failures++;
+						fibril_mutex_unlock(&b->lock);
+						goto retry;
+					}
+				} else
+					b->write_failures = 0;
+
 				b->dirty = false;
 				if (!fibril_mutex_trylock(&cache->lock)) {
 					/*
@@ -577,6 +607,8 @@
 	    (blocks_cached > CACHE_HI_WATERMARK || mode != CACHE_MODE_WB)) {
 		rc = write_blocks(devcon, block->pba, cache->blocks_cluster,
 		    block->data, block->size);
+		if (rc == EOK)
+			block->write_failures = 0;
 		block->dirty = false;
 	}
 	fibril_mutex_unlock(&block->lock);
@@ -604,8 +636,13 @@
 				block->refcnt++;
 				fibril_mutex_unlock(&block->lock);
 				fibril_mutex_unlock(&cache->lock);
-				goto retry;
+				if (block->write_failures++ >= MAX_WRITE_RETRIES)
+					goto give_up;
+				else
+					goto retry;
 			}
+
+give_up:
 			/*
 			 * Take the block out of the cache and free it.
 			 */
@@ -771,7 +808,13 @@
 	devcon_t *devcon = devcon_search(service_id);
 	assert(devcon);
 	
-	return bd_get_num_blocks(devcon->bd, nblocks);
+	return __block_get_nblocks(devcon->bd, nblocks);
+}
+
+/** Get number of blocks on device **/
+static int __block_get_nblocks(bd_t *bd, aoff64_t *nblocks)
+{
+	return bd_get_num_blocks(bd, nblocks);
 }
 
 /** Read bytes directly from the device (bypass cache)

=== modified file 'uspace/lib/block/block.h'
--- uspace/lib/block/block.h	2013-09-15 11:10:56 +0000
+++ uspace/lib/block/block.h	2014-01-24 20:26:34 +0000
@@ -80,6 +80,8 @@
 	aoff64_t pba;
 	/** Size of the block. */
 	size_t size;
+	/** Number of write failures. */
+	int write_failures;
 	/** Link for placing the block into the free block list. */
 	link_t free_link;
 	/** Link for placing the block into the block hash table. */ 

_______________________________________________
HelenOS-devel mailing list
[email protected]
http://lists.modry.cz/listinfo/helenos-devel

Reply via email to