Re: [PATCH] Loop device - Tracking page writes made to a loop device through mmap

2007-03-14 Thread Kandan Venkataraman

On Tue, 13 Mar 2007, Christoph Hellwig wrote:


On Wed, Mar 14, 2007 at 07:21:41AM +1100, Kandan Venkataraman wrote:

All comments have been taken care of.

Description:

A  file_operations structure variable called loop_fops is initialised with
the default block device file operations (def_blk_fops).
The mmap operation is overriden with a new function called loop_file_mmap.


NACK.  block device driver should never ever play around with file
operations themselves.  If you want functionality like the one you
have please don't overload the loop driver, but start a new (character)
driver doing specificaly what you want.  And even then I'm not sure
we'd want functionality like this in the mainline tree, but at least
we can have an open discussion if it's done properly.



The reason I chose the loop device was the following:

The loop device has backing storage and makes use of the page cache. The 
nopage method for all block device files and most disk files is 
implemented by filemap_nopage. This function uses the page cache. Thus, 
this allows me to mmap a large address space.


If I wrote a character device, I would have to store the contents in 
kernel memory as the function filemap_nopage is not suitable for character 
devices. This would severly restrict the size of my mmap.


I am out my depth here, any pointers on how to proceed would be very 
helpful


Thanks
Kandan
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] Loop device - Tracking page writes made to a loop device through mmap

2007-03-13 Thread Kandan Venkataraman
ile  to set as backing storage*/
   if ((dfd = open ("/root/file", O_RDWR, S_IRWXU)) < 0) {
   fprintf(stderr, "can't create device file for writing\n");
   goto out4;
   }

   if (ioctl(fd, LOOP_SET_FD, dfd) < 0) {
   perror("ioctl: LOOP_SET_FD");
   goto out3;
   }

   if ((start = mmap(0, maxPages * pageSize, PROT_READ | PROT_WRITE,
 MAP_SHARED, fd, 0)) == MAP_FAILED) {
   perror("mmap error");
   goto out2;
   }


   if (ioctl(fd, LOOP_SET_TRACK_PGWRITE, 1) < 0) {
   perror("ioctl: LOOP_SET_TRACK_PGWRITE");
   goto out1;
   }

   if (ioctl(fd, LOOP_CLR_PGWRITES, 0) < 0) {
   perror("ioctl: LOOP_CLR_PGWRITES");
   goto out1;
   }

   array = (int *)start;

   array[0] = 5;

   fprintf(stderr, "value = %d\n", array[0]);

   array[1] = 9;

   fprintf(stderr, "value = %d\n", array[1]);

   array[elemsPerPage] = 14;

   fprintf(stderr, "value = %d\n", array[elemsPerPage]);

   array[3*elemsPerPage+60] = 35;

   fprintf(stderr, "value = %d\n", array[3*elemsPerPage+60]);

   if (ioctl(fd, LOOP_GET_PGWRITES, &pgarray) < 0) {
   perror("ioctl: LOOP_GET_PGWRITES");
   goto out1;
   }

   int i;
   for (i= 0; i < pgarray.num; i++)
   fprintf(stderr, "offset %ld\n", pgarray.pgoff[i]);

out1:
   munmap(start, maxPages * pageSize);
out2:
   ioctl(fd, LOOP_CLR_FD, 0);
out3:
   close(dfd);
out4:
   close(fd);
out5:
   return 0;
}



Signed-off-by: Kandan Venkataraman [EMAIL PROTECTED]


diff -uprN linux-2.6.19.2/drivers/block/loop.c 
linux-2.6.19.2-new/drivers/block/loop.c
--- linux-2.6.19.2/drivers/block/loop.c 2007-01-11 06:10:37.0 +1100
+++ linux-2.6.19.2-new/drivers/block/loop.c 2007-03-11 07:31:26.0 
+1100
@@ -74,12 +74,16 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
 static int max_loop = 8;
 static struct loop_device *loop_dev;
 static struct gendisk **disks;
+static kmem_cache_t *pgoff_elem_cache;
+static char *cache_name = "loop_pgoff_elem_cache";
+static struct file_operations loop_fops;
 
 /*
  * Transfer functions
@@ -646,6 +650,85 @@ static void do_loop_switch(struct loop_d
complete(&p->wait);
 }
 
+static void pgoff_tree_clear(struct rb_root *rb_root)
+{
+   struct rb_node *rb_node  = rb_root->rb_node;
+
+   while (rb_node != NULL) {
+
+   rb_erase(rb_node, rb_root);
+   kmem_cache_free(pgoff_elem_cache, rb_entry(rb_node, 
+   struct pgoff_elem, node));
+   rb_node = rb_root->rb_node;
+   }
+
+   *rb_root = RB_ROOT;
+}
+
+static int loop_set_track_pgwrite(struct loop_device *lo, unsigned long arg)
+{
+   if (lo->lo_state != Lo_bound)
+   return -ENXIO;
+
+   if (arg)
+   lo->lo_track_pgwrite = 1;
+   else {
+   if (lo->lo_track_pgwrite)
+   pgoff_tree_clear(&lo->pgoff_tree);
+   lo->lo_track_pgwrite = 0;
+   }
+
+   return 0;
+}
+
+static int loop_clr_pgwrites(struct loop_device *lo)
+{
+   struct file *filp = lo->lo_backing_file;
+
+   if (lo->lo_state != Lo_bound)
+   return -ENXIO;
+
+   if (filp == NULL || !lo->lo_track_pgwrite)
+   return -EINVAL;
+
+   pgoff_tree_clear(&lo->pgoff_tree);
+
+   return 0;
+}
+
+static int loop_get_pgwrites(struct loop_device *lo, 
+struct loop_pgoff_array __user *arg)
+{
+   struct file *filp = lo->lo_backing_file;
+   struct loop_pgoff_array array;
+   loff_t i = 0;
+   struct rb_node *rb_node  = rb_first(&lo->pgoff_tree);
+
+   if (lo->lo_state != Lo_bound)
+   return -ENXIO;
+
+   if (filp == NULL || !lo->lo_track_pgwrite)
+   return -EINVAL;
+
+   if (copy_from_user(&array, arg, sizeof (struct loop_pgoff_array)))
+   return -EFAULT;
+
+   while (i < array.max && rb_node != NULL) {
+
+   if (put_user(rb_entry(rb_node, struct pgoff_elem, node)->offset,
+ array.pgoff + i))
+   return -EFAULT;
+
+   ++i;
+   rb_node = rb_next(rb_node);
+   }
+   array.num = i;
+
+   if (copy_to_user(arg, &array, sizeof(array)))
+   return -EFAULT;
+
+   return 0;
+}
 
 /*
  * loop_change_fd switched the backing store of a loopback device to
@@ -692,6 +775,8 @@ static int loop_change_fd(struct loop_de
if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
goto out_pu

Re: [PATCH] Loop device - Tracking page writes made to a loop devicethrough mmap

2007-03-08 Thread Kandan Venkataraman


On Thu, 8 Mar 2007, Jari Ruusu wrote:


Kandan Venkataraman wrote:

All comments have been taken care of.


Your patch still does not do conversions of existing user space visible
'struct loop_info64' which is pretty much cast in stone. Blindly overwriting
larger structure over smaller user space buffer of existing userspace
binaries is the wrong way to do this.


Which of the two options below would be the best way 
to get around this problem.


i)A new ioctl call to set/unset lo_track_pgwrite.

ii) Create 'struct loop_info64_v2' with 
lo_track_pgwrite in it and two new ioctls LOOP_SET_STATUS64_V2

and LOOP_GET_STATUS64_V2.

Either way 'struct loop_info64' would be left untouched.

Please comment.







There was a time when folks at least pretended that breaking user space ABI
was not tolerable.

--
Jari Ruusu  1024R/3A220F51 5B 4B F9 BB D3 3F 52 E9  DB 1D EB E3 24 0E A9 DD


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] Loop device - Tracking page writes made to a loop device through mmap

2007-03-07 Thread Kandan Venkataraman
  fprintf(stderr, "can't create device file for writing\n");
goto out4;
}
if (ioctl(fd, LOOP_SET_FD, dfd) < 0) {
perror("ioctl: LOOP_SET_FD");
goto out3;
}
if ((start = mmap(0, maxPages * pageSize, PROT_READ | PROT_WRITE,
  MAP_SHARED, fd, 0)) == MAP_FAILED) {
perror("mmap error");
goto out2;
}
if (ioctl(fd, LOOP_GET_STATUS64, &info) < 0) {
perror("ioctl: LOOP_CLR_PGWRITES");
goto out1;
}
info.lo_track_pgwrite = 1;

if (ioctl(fd, LOOP_SET_STATUS64, &info) < 0) {
perror("ioctl: LOOP_SET_STATUS64");
goto out1;
}
if (ioctl(fd, LOOP_CLR_PGWRITES, 0) < 0) {
perror("ioctl: LOOP_CLR_PGWRITES");
goto out1;
}
array = (int *)start;

array[0] = 5;

fprintf(stderr, "value = %d\n", array[0]);

array[1] = 9;

fprintf(stderr, "value = %d\n", array[1]);

array[elemsPerPage] = 14;

fprintf(stderr, "value = %d\n", array[elemsPerPage]);

array[3*elemsPerPage+60] = 35;

fprintf(stderr, "value = %d\n", array[3*elemsPerPage+60]);

if (ioctl(fd, LOOP_GET_PGWRITES, &pgarray) < 0) {
perror("ioctl: LOOP_GET_PGWRITES");
goto out1;
}
int i;
for (i= 0; i < pgarray.num; i++)
fprintf(stderr, "offset %ld\n", pgarray.pgoff[i]);

out1:
munmap(start, maxPages * pageSize);
out2:
ioctl(fd, LOOP_CLR_FD, 0);
out3:
close(dfd);
out4:
close(fd);
out5:
return 0;
}




Signed-off-by: Kandan Venkataraman [EMAIL PROTECTED]


diff -uprN linux-2.6.19.2/drivers/block/loop.c 
linux-2.6.19.2-new/drivers/block/loop.c
--- linux-2.6.19.2/drivers/block/loop.c 2007-03-03 16:26:03.0 +1100
+++ linux-2.6.19.2-new/drivers/block/loop.c 2007-03-03 16:44:38.0 
+1100
@@ -74,12 +74,16 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
 static int max_loop = 8;
 static struct loop_device *loop_dev;
 static struct gendisk **disks;
+static kmem_cache_t *pgoff_elem_cache;
+static char *cache_name = "loop_pgoff_elem_cache";
+static struct file_operations loop_fops;
 
 /*
  * Transfer functions
@@ -646,6 +650,70 @@ static void do_loop_switch(struct loop_d
complete(&p->wait);
 }
 
+static void pgoff_tree_clear(struct rb_root *rb_root)
+{
+   struct rb_node *rb_node  = rb_root->rb_node;
+
+   while (rb_node != NULL) {
+
+   rb_erase(rb_node, rb_root);
+   kmem_cache_free(pgoff_elem_cache, rb_entry(rb_node, 
+   struct pgoff_elem, node));
+   rb_node = rb_root->rb_node;
+   }
+
+   *rb_root = RB_ROOT;
+}
+
+
+static int loop_clr_pgwrites(struct loop_device *lo)
+{
+   struct file *filp = lo->lo_backing_file;
+
+   if (lo->lo_state != Lo_bound)
+   return -ENXIO;
+
+   if (filp == NULL || !lo->lo_track_pgwrite)
+   return -EINVAL;
+
+   pgoff_tree_clear(&lo->pgoff_tree);
+
+   return 0;
+}
+
+static int loop_get_pgwrites(struct loop_device *lo, 
+struct loop_pgoff_array __user *arg)
+{
+   struct file *filp = lo->lo_backing_file;
+   struct loop_pgoff_array array;
+   loff_t i = 0;
+   struct rb_node *rb_node  = rb_first(&lo->pgoff_tree);
+
+   if (lo->lo_state != Lo_bound)
+   return -ENXIO;
+
+   if (filp == NULL || !lo->lo_track_pgwrite)
+   return -EINVAL;
+
+   if (copy_from_user(&array, arg, sizeof (struct loop_pgoff_array)))
+   return -EFAULT;
+
+   while (i < array.max && rb_node != NULL) {
+
+   if (put_user(rb_entry(rb_node, struct pgoff_elem, node)->offset,
+ array.pgoff + i))
+   return -EFAULT;
+
+   ++i;
+   rb_node = rb_next(rb_node);
+   }
+   array.num = i;
+
+   if (copy_to_user(arg, &array, sizeof(array)))
+   return -EFAULT;
+
+   return 0;
+}
 
 /*
  * loop_change_fd switched the backing store of a loopback device to
@@ -692,6 +760,8 @@ static int loop_change_fd(struct loop_de
if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
goto out_putf;
 
+   pgoff_tree_clear(&lo->pgoff_tree);
+
/* and ... switch */
error = loop_switch(lo, file);
if (error)
@@ -799,6 +869,8 @@ static int loop_set_fd(struct loop_devic
lo->transfer = transfer_none;
lo->ioctl = NULL;
lo->lo_sizelimit = 0;
+   lo->lo

Re: [PATCH] Loop device - Tracking page writes made to a loop device through mmap

2007-03-02 Thread Kandan Venkataraman

There were a couple more white spaces, instead of tabs, hopefully this is
the last of them:-)

Signed-off-by: Kandan Venkataraman [EMAIL PROTECTED]


diff -uprN linux-2.6.19.2/drivers/block/loop.c 
linux-2.6.19.2-new/drivers/block/loop.c
--- linux-2.6.19.2/drivers/block/loop.c 2007-03-03 16:26:03.0 +1100
+++ linux-2.6.19.2-new/drivers/block/loop.c 2007-03-03 16:44:38.0 
+1100
@@ -74,12 +74,16 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
 static int max_loop = 8;
 static struct loop_device *loop_dev;
 static struct gendisk **disks;
+static kmem_cache_t *pgoff_elem_cache;
+static char *cache_name = "loop_pgoff_elem_cache";
+static struct file_operations loop_fops;
 
 /*
  * Transfer functions
@@ -646,6 +650,70 @@ static void do_loop_switch(struct loop_d
complete(&p->wait);
 }
 
+static void pgoff_tree_clear(struct rb_root *rb_root)
+{
+   struct rb_node *rb_node  = rb_root->rb_node;
+
+   while (rb_node != NULL) {
+
+   rb_erase(rb_node, rb_root);
+   kmem_cache_free(pgoff_elem_cache, rb_entry(rb_node, 
+   struct pgoff_elem, node));
+   rb_node = rb_root->rb_node;
+   }
+
+   *rb_root = RB_ROOT;
+}
+
+
+static int loop_clr_pgwrites(struct loop_device *lo)
+{
+   struct file *filp = lo->lo_backing_file;
+
+   if (lo->lo_state != Lo_bound)
+   return -ENXIO;
+
+   if (filp == NULL || !lo->lo_track_pgwrite)
+   return -EINVAL;
+
+   pgoff_tree_clear(&lo->pgoff_tree);
+
+   return 0;
+}
+
+static int loop_get_pgwrites(struct loop_device *lo, 
+struct loop_pgoff_array __user *arg)
+{
+   struct file *filp = lo->lo_backing_file;
+   struct loop_pgoff_array array;
+   loff_t i = 0;
+   struct rb_node *rb_node  = rb_first(&lo->pgoff_tree);
+
+   if (lo->lo_state != Lo_bound)
+   return -ENXIO;
+
+   if (filp == NULL || !lo->lo_track_pgwrite)
+   return -EINVAL;
+
+   if (copy_from_user(&array, arg, sizeof (struct loop_pgoff_array)))
+   return -EFAULT;
+
+   while (i < array.max && rb_node != NULL) {
+
+   if (put_user(rb_entry(rb_node, struct pgoff_elem, node)->offset,
+ array.pgoff + i))
+   return -EFAULT;
+
+   ++i;
+   rb_node = rb_next(rb_node);
+   }
+   array.num = i;
+
+   if (copy_to_user(arg, &array, sizeof(array)))
+   return -EFAULT;
+
+   return 0;
+}
 
 /*
  * loop_change_fd switched the backing store of a loopback device to
@@ -692,6 +760,8 @@ static int loop_change_fd(struct loop_de
if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
goto out_putf;
 
+   pgoff_tree_clear(&lo->pgoff_tree);
+
/* and ... switch */
error = loop_switch(lo, file);
if (error)
@@ -799,6 +869,8 @@ static int loop_set_fd(struct loop_devic
lo->transfer = transfer_none;
lo->ioctl = NULL;
lo->lo_sizelimit = 0;
+   lo->lo_track_pgwrite = 0;
+   lo->pgoff_tree = RB_ROOT;
lo->old_gfp_mask = mapping_gfp_mask(mapping);
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
 
@@ -913,6 +985,8 @@ static int loop_clr_fd(struct loop_devic
lo->lo_sizelimit = 0;
lo->lo_encrypt_key_size = 0;
lo->lo_flags = 0;
+   lo->lo_track_pgwrite = 0;
+   pgoff_tree_clear(&lo->pgoff_tree);
lo->lo_thread = NULL;
memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
@@ -969,6 +1043,14 @@ loop_set_status(struct loop_device *lo, 
return -EFBIG;
}
 
+   if (info->lo_track_pgwrite)
+   lo->lo_track_pgwrite = 1;
+   else {
+   if (lo->lo_track_pgwrite)
+   pgoff_tree_clear(&lo->pgoff_tree);
+   lo->lo_track_pgwrite = 0;
+   }
+
memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
lo->lo_file_name[LO_NAME_SIZE-1] = 0;
@@ -1011,6 +1093,7 @@ loop_get_status(struct loop_device *lo, 
info->lo_offset = lo->lo_offset;
info->lo_sizelimit = lo->lo_sizelimit;
info->lo_flags = lo->lo_flags;
+   info->lo_track_pgwrite = lo->lo_track_pgwrite;
memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE);
memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
info->lo_encrypt_type =
@@ -1036,6 +1119,7 @@ loop_info64_from_old(const struct loop_i
info64->lo_encrypt_type = info->lo_encrypt_type;
info64->lo

Re: [PATCH] Loop device - Tracking page writes made to a loop device through mmap

2007-03-02 Thread Kandan Venkataraman

The patch file seems to start in the same line as the sign off

So I will introduce a new line at the end of my mail, hopefully that will 
fix that problem.


Signed-off-by: Kandan Venkataraman [EMAIL PROTECTED]


diff -uprN linux-2.6.19.2/drivers/block/loop.c 
linux-2.6.19.2-new/drivers/block/loop.c
--- linux-2.6.19.2/drivers/block/loop.c 2007-03-03 07:59:47.0 +1100
+++ linux-2.6.19.2-new/drivers/block/loop.c 2007-03-03 08:02:04.0 
+1100
@@ -74,12 +74,16 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
 static int max_loop = 8;
 static struct loop_device *loop_dev;
 static struct gendisk **disks;
+static kmem_cache_t *pgoff_elem_cache;
+static char *cache_name = "loop_pgoff_elem_cache";
+static struct file_operations loop_fops;
 
 /*
  * Transfer functions
@@ -646,6 +650,70 @@ static void do_loop_switch(struct loop_d
complete(&p->wait);
 }
 
+static void pgoff_tree_clear(struct rb_root *rb_root)
+{
+   struct rb_node *rb_node  = rb_root->rb_node;
+
+   while (rb_node != NULL) {
+
+   rb_erase(rb_node, rb_root);
+   kmem_cache_free(pgoff_elem_cache, rb_entry(rb_node, 
+   struct pgoff_elem, node));
+   rb_node = rb_root->rb_node;
+   }
+
+   *rb_root = RB_ROOT;
+}
+
+
+static int loop_clr_pgwrites(struct loop_device *lo)
+{
+   struct file *filp = lo->lo_backing_file;
+
+   if (lo->lo_state != Lo_bound)
+   return -ENXIO;
+
+   if (filp == NULL || !lo->lo_track_pgwrite)
+   return -EINVAL;
+
+   pgoff_tree_clear(&lo->pgoff_tree);
+
+   return 0;
+}
+
+static int loop_get_pgwrites(struct loop_device *lo, 
+struct loop_pgoff_array __user *arg)
+{
+   struct file *filp = lo->lo_backing_file;
+   struct loop_pgoff_array array;
+   loff_t i = 0;
+   struct rb_node *rb_node  = rb_first(&lo->pgoff_tree);
+
+   if (lo->lo_state != Lo_bound)
+   return -ENXIO;
+
+   if (filp == NULL || !lo->lo_track_pgwrite)
+   return -EINVAL;
+
+   if (copy_from_user(&array, arg, sizeof (struct loop_pgoff_array)))
+   return -EFAULT;
+
+   while (i < array.max && rb_node != NULL) {
+
+   if (put_user(rb_entry(rb_node, struct pgoff_elem, node)->offset,
+ array.pgoff + i))
+   return -EFAULT;
+
+   ++i;
+   rb_node = rb_next(rb_node);
+   }
+   array.num = i;
+
+   if (copy_to_user(arg, &array, sizeof(array)))
+   return -EFAULT;
+
+   return 0;
+}
 
 /*
  * loop_change_fd switched the backing store of a loopback device to
@@ -692,6 +760,8 @@ static int loop_change_fd(struct loop_de
if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
goto out_putf;
 
+   pgoff_tree_clear(&lo->pgoff_tree);
+
/* and ... switch */
error = loop_switch(lo, file);
if (error)
@@ -799,6 +869,8 @@ static int loop_set_fd(struct loop_devic
lo->transfer = transfer_none;
lo->ioctl = NULL;
lo->lo_sizelimit = 0;
+   lo->lo_track_pgwrite = 0;
+   lo->pgoff_tree = RB_ROOT;
lo->old_gfp_mask = mapping_gfp_mask(mapping);
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
 
@@ -913,6 +985,8 @@ static int loop_clr_fd(struct loop_devic
lo->lo_sizelimit = 0;
lo->lo_encrypt_key_size = 0;
lo->lo_flags = 0;
+   lo->lo_track_pgwrite = 0;
+   pgoff_tree_clear(&lo->pgoff_tree);
lo->lo_thread = NULL;
memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
@@ -969,6 +1043,14 @@ loop_set_status(struct loop_device *lo, 
return -EFBIG;
}
 
+   if (info->lo_track_pgwrite)
+   lo->lo_track_pgwrite = 1;
+   else {
+   if (lo->lo_track_pgwrite)
+   pgoff_tree_clear(&lo->pgoff_tree);
+   lo->lo_track_pgwrite = 0;
+   }
+
memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
lo->lo_file_name[LO_NAME_SIZE-1] = 0;
@@ -1011,6 +1093,7 @@ loop_get_status(struct loop_device *lo, 
info->lo_offset = lo->lo_offset;
info->lo_sizelimit = lo->lo_sizelimit;
info->lo_flags = lo->lo_flags;
+   info->lo_track_pgwrite = lo->lo_track_pgwrite;
memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE);
memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
info->lo_encrypt_type =
@@ -1036,6 +1119,7 @@ loop_info64_from_old(const struct loop_i
info64->

Re: [PATCH] Loop device - Tracking page writes made to a loop device through mmap

2007-03-02 Thread Kandan Venkataraman
I have included the patch as a text file. This is the only way I could get 
the patch to work from an email.


Thank you for your patience

Signed-off-by: Kandan Venkataraman [EMAIL PROTECTED]diff -uprN linux-2.6.19.2/drivers/block/loop.c 
linux-2.6.19.2-new/drivers/block/loop.c
--- linux-2.6.19.2/drivers/block/loop.c 2007-03-03 07:59:47.0 +1100
+++ linux-2.6.19.2-new/drivers/block/loop.c 2007-03-03 08:02:04.0 
+1100
@@ -74,12 +74,16 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
 static int max_loop = 8;
 static struct loop_device *loop_dev;
 static struct gendisk **disks;
+static kmem_cache_t *pgoff_elem_cache;
+static char *cache_name = "loop_pgoff_elem_cache";
+static struct file_operations loop_fops;
 
 /*
  * Transfer functions
@@ -646,6 +650,70 @@ static void do_loop_switch(struct loop_d
complete(&p->wait);
 }
 
+static void pgoff_tree_clear(struct rb_root *rb_root)
+{
+   struct rb_node *rb_node  = rb_root->rb_node;
+
+   while (rb_node != NULL) {
+
+   rb_erase(rb_node, rb_root);
+   kmem_cache_free(pgoff_elem_cache, rb_entry(rb_node, 
+   struct pgoff_elem, node));
+   rb_node = rb_root->rb_node;
+   }
+
+   *rb_root = RB_ROOT;
+}
+
+
+static int loop_clr_pgwrites(struct loop_device *lo)
+{
+   struct file *filp = lo->lo_backing_file;
+
+   if (lo->lo_state != Lo_bound)
+   return -ENXIO;
+
+   if (filp == NULL || !lo->lo_track_pgwrite)
+   return -EINVAL;
+
+   pgoff_tree_clear(&lo->pgoff_tree);
+
+   return 0;
+}
+
+static int loop_get_pgwrites(struct loop_device *lo, 
+struct loop_pgoff_array __user *arg)
+{
+   struct file *filp = lo->lo_backing_file;
+   struct loop_pgoff_array array;
+   loff_t i = 0;
+   struct rb_node *rb_node  = rb_first(&lo->pgoff_tree);
+
+   if (lo->lo_state != Lo_bound)
+   return -ENXIO;
+
+   if (filp == NULL || !lo->lo_track_pgwrite)
+   return -EINVAL;
+
+   if (copy_from_user(&array, arg, sizeof (struct loop_pgoff_array)))
+   return -EFAULT;
+
+   while (i < array.max && rb_node != NULL) {
+
+   if (put_user(rb_entry(rb_node, struct pgoff_elem, node)->offset,
+ array.pgoff + i))
+   return -EFAULT;
+
+   ++i;
+   rb_node = rb_next(rb_node);
+   }
+   array.num = i;
+
+   if (copy_to_user(arg, &array, sizeof(array)))
+   return -EFAULT;
+
+   return 0;
+}
 
 /*
  * loop_change_fd switched the backing store of a loopback device to
@@ -692,6 +760,8 @@ static int loop_change_fd(struct loop_de
if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
goto out_putf;
 
+   pgoff_tree_clear(&lo->pgoff_tree);
+
/* and ... switch */
error = loop_switch(lo, file);
if (error)
@@ -799,6 +869,8 @@ static int loop_set_fd(struct loop_devic
lo->transfer = transfer_none;
lo->ioctl = NULL;
lo->lo_sizelimit = 0;
+   lo->lo_track_pgwrite = 0;
+   lo->pgoff_tree = RB_ROOT;
lo->old_gfp_mask = mapping_gfp_mask(mapping);
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
 
@@ -913,6 +985,8 @@ static int loop_clr_fd(struct loop_devic
lo->lo_sizelimit = 0;
lo->lo_encrypt_key_size = 0;
lo->lo_flags = 0;
+   lo->lo_track_pgwrite = 0;
+   pgoff_tree_clear(&lo->pgoff_tree);
lo->lo_thread = NULL;
memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
@@ -969,6 +1043,14 @@ loop_set_status(struct loop_device *lo, 
return -EFBIG;
}
 
+   if (info->lo_track_pgwrite)
+   lo->lo_track_pgwrite = 1;
+   else {
+   if (lo->lo_track_pgwrite)
+   pgoff_tree_clear(&lo->pgoff_tree);
+   lo->lo_track_pgwrite = 0;
+   }
+
memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
lo->lo_file_name[LO_NAME_SIZE-1] = 0;
@@ -1011,6 +1093,7 @@ loop_get_status(struct loop_device *lo, 
info->lo_offset = lo->lo_offset;
info->lo_sizelimit = lo->lo_sizelimit;
info->lo_flags = lo->lo_flags;
+   info->lo_track_pgwrite = lo->lo_track_pgwrite;
memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE);
memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
info->lo_encrypt_type =
@@ -1036,6 +1119,7 @@ loop_info64_from_old(const struct loop_i
info64->lo_encrypt_type = 

Re: [PATCH] Loop device - Tracking page writes made to a loop device through mmap

2007-03-02 Thread Kandan Venkataraman
esses to handle 
queries sharing the same memory.
During this copy over, the transaction process will hold off processing 
transactions till the update is complete.

This would be very useful for high speed in-memory transaction systems, where 
the query load can be passed
of to other processes. Example of such systems would be a stock trading system, 
where clients buy and sell
stock(equity, options etc). 
At the same time lot of clients would be downloading market data and this can be done independently of the transactions.


This new facility will provide a way of tracking changes made to business data, 
independent of the application domain.

Signed-off-by: Kandan Venkataraman [EMAIL PROTECTED]


diff -uprN linux-2.6.19.2/drivers/block/loop.c 
linux-2.6.19.2-new/drivers/block/loop.c
--- linux-2.6.19.2/drivers/block/loop.c 2007-03-02 22:05:06.0 +1100
+++ linux-2.6.19.2-new/drivers/block/loop.c 2007-03-02 22:03:49.0 
+1100
@@ -74,12 +74,16 @@
 #include 
 #include 
 #include 
+#include 

 #include 

 static int max_loop = 8;
 static struct loop_device *loop_dev;
 static struct gendisk **disks;
+static kmem_cache_t *pgoff_elem_cache;
+static char* cache_name = "loop_pgoff_elem_cache";
+static struct file_operations loop_fops;

 /*
  * Transfer functions
@@ -646,6 +650,67 @@ static void do_loop_switch(struct loop_d
complete(&p->wait);
 }

+static void pgoff_tree_clear(struct rb_root *rb_root)
+{
+   struct rb_node *rb_node  = rb_root->rb_node;
+
+   while (rb_node != NULL) {
+
+   rb_erase(rb_node, rb_root);
+   kmem_cache_free(pgoff_elem_cache, rb_entry(rb_node, struct 
pgoff_elem, node));
+   rb_node = rb_root->rb_node;
+   }
+
+   *rb_root = RB_ROOT;
+}
+
+
+static int loop_clr_pgwrites(struct loop_device *lo)
+{
+   struct file *filp = lo->lo_backing_file;
+
+   if (lo->lo_state != Lo_bound)
+   return -ENXIO;
+
+   if (filp == NULL || !lo->lo_track_pgwrite)
+   return -EINVAL;
+
+   pgoff_tree_clear(&lo->pgoff_tree);
+
+   return 0;
+}
+
+static int loop_get_pgwrites(struct loop_device *lo, struct loop_pgoff_array 
__user *arg)
+{
+   struct file *filp = lo->lo_backing_file;
+   struct loop_pgoff_array array;
+   loff_t i = 0;
+   struct rb_node *rb_node  = rb_first(&lo->pgoff_tree);
+
+   if (lo->lo_state != Lo_bound)
+   return -ENXIO;
+
+   if (filp == NULL || !lo->lo_track_pgwrite)
+   return -EINVAL;
+
+   if (copy_from_user(&array, arg, sizeof (struct loop_pgoff_array)))
+   return -EFAULT;
+
+   while (i < array.max && rb_node != NULL) {
+
+   if (put_user(rb_entry(rb_node, struct pgoff_elem, 
node)->offset, array.pgoff + i))
+   return -EFAULT;
+
+   ++i;
+   rb_node = rb_next(rb_node);
+   }
+   array.num = i;
+
+   if (copy_to_user(arg, &array, sizeof(array)))
+   return -EFAULT;
+
+   return 0;
+}

 /*
  * loop_change_fd switched the backing store of a loopback device to
@@ -692,6 +757,8 @@ static int loop_change_fd(struct loop_de
if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
goto out_putf;

+   pgoff_tree_clear(&lo->pgoff_tree);
+
/* and ... switch */
error = loop_switch(lo, file);
if (error)
@@ -799,6 +866,8 @@ static int loop_set_fd(struct loop_devic
lo->transfer = transfer_none;
lo->ioctl = NULL;
lo->lo_sizelimit = 0;
+   lo->lo_track_pgwrite = 0;
+   lo->pgoff_tree = RB_ROOT;
lo->old_gfp_mask = mapping_gfp_mask(mapping);
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));

@@ -913,6 +982,8 @@ static int loop_clr_fd(struct loop_devic
lo->lo_sizelimit = 0;
lo->lo_encrypt_key_size = 0;
lo->lo_flags = 0;
+   lo->lo_track_pgwrite = 0;
+   pgoff_tree_clear(&lo->pgoff_tree);
lo->lo_thread = NULL;
memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
@@ -969,6 +1040,14 @@ loop_set_status(struct loop_device *lo,
return -EFBIG;
}

+   if (info->lo_track_pgwrite)
+   lo->lo_track_pgwrite = 1;
+   else {
+   if (lo->lo_track_pgwrite)
+   pgoff_tree_clear(&lo->pgoff_tree);
+   lo->lo_track_pgwrite = 0;
+   }
+
memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
lo->lo_file_name[LO_NAME_SIZE-1] = 0;
@@ -1011,6 +1090,7 @@ loop_get_status(struct loop_device *lo,
info->lo_offset = lo->lo_offset;
info->lo_sizelimit = lo-&

Re: [PATCH] Loop device - Tracking page writes made to a loop device through mmap

2007-03-02 Thread Kandan Venkataraman
other processes. Example of such systems would be a stock trading system, 
where clients buy and sell
stock(equity, options etc). 
At the same time lot of clients would be downloading market data and this can be done independently of the transactions.


This new facility will provide a way of tracking changes made to business data, 
independent of the application domain.

Signed-off-by: Kandan Venkataraman [EMAIL PROTECTED]


diff -uprN linux-2.6.19.2/drivers/block/loop.c 
linux-2.6.19.2-new/drivers/block/loop.c
--- linux-2.6.19.2/drivers/block/loop.c 2007-01-11 06:10:37.0 +1100
+++ linux-2.6.19.2-new/drivers/block/loop.c 2007-03-02 20:20:55.0 
+1100
@@ -74,12 +74,16 @@
 #include 
 #include 
 #include 
+#include 

 #include 

 static int max_loop = 8;
 static struct loop_device *loop_dev;
 static struct gendisk **disks;
+static kmem_cache_t *pgoff_elem_cache;
+static char* cache_name = "loop_pgoff_elem_cache";
+static struct file_operations loop_fops;

 /*
  * Transfer functions
@@ -646,6 +650,67 @@ static void do_loop_switch(struct loop_d
complete(&p->wait);
 }

+static void pgoff_tree_clear(struct rb_root *rb_root)
+{
+   struct rb_node *rb_node  = rb_root->rb_node;
+
+   while (rb_node != NULL) {
+
+   rb_erase(rb_node, rb_root);
+   kmem_cache_free(pgoff_elem_cache, rb_entry(rb_node, struct 
pgoff_elem, node));
+   rb_node = rb_root->rb_node;
+   }
+
+  *rb_root = RB_ROOT;
+}
+
+
+static int loop_clr_pgwrites(struct loop_device *lo)
+{
+   struct file *filp = lo->lo_backing_file;
+
+   if (lo->lo_state != Lo_bound)
+   return -ENXIO;
+
+   if (filp == NULL || !lo->lo_track_pgwrite)
+   return -EINVAL;
+
+   pgoff_tree_clear(&lo->pgoff_tree);
+
+   return 0;
+}
+
+static int loop_get_pgwrites(struct loop_device *lo, struct loop_pgoff_array 
__user *arg)
+{
+   struct file *filp = lo->lo_backing_file;
+   struct loop_pgoff_array array;
+   loff_t i = 0;
+   struct rb_node *rb_node  = rb_first(&lo->pgoff_tree);
+
+   if (lo->lo_state != Lo_bound)
+   return -ENXIO;
+
+   if (filp == NULL || !lo->lo_track_pgwrite)
+   return -EINVAL;
+
+   if (copy_from_user(&array, arg, sizeof (struct loop_pgoff_array)))
+   return -EFAULT;
+
+   while (i < array.max && rb_node != NULL) {
+
+   if (put_user(rb_entry(rb_node, struct pgoff_elem, 
node)->offset, array.pgoff + i))
+   return -EFAULT;
+
+   ++i;
+   rb_node = rb_next(rb_node);
+   }
+   array.num = i;
+
+   if (copy_to_user(arg, &array, sizeof(array)))
+   return -EFAULT;
+
+   return 0;
+}

 /*
  * loop_change_fd switched the backing store of a loopback device to
@@ -692,6 +757,8 @@ static int loop_change_fd(struct loop_de
if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
goto out_putf;

+   pgoff_tree_clear(&lo->pgoff_tree);
+
/* and ... switch */
error = loop_switch(lo, file);
if (error)
@@ -799,6 +866,8 @@ static int loop_set_fd(struct loop_devic
lo->transfer = transfer_none;
lo->ioctl = NULL;
lo->lo_sizelimit = 0;
+   lo->lo_track_pgwrite = 0;
+   lo->pgoff_tree = RB_ROOT;
lo->old_gfp_mask = mapping_gfp_mask(mapping);
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));

@@ -913,6 +982,8 @@ static int loop_clr_fd(struct loop_devic
lo->lo_sizelimit = 0;
lo->lo_encrypt_key_size = 0;
lo->lo_flags = 0;
+   lo->lo_track_pgwrite = 0;
+   pgoff_tree_clear(&lo->pgoff_tree);
lo->lo_thread = NULL;
memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
@@ -969,6 +1040,14 @@ loop_set_status(struct loop_device *lo,
return -EFBIG;
}

+   if (info->lo_track_pgwrite)
+   lo->lo_track_pgwrite = 1;
+   else {
+   if (lo->lo_track_pgwrite)
+   pgoff_tree_clear(&lo->pgoff_tree);
+   lo->lo_track_pgwrite = 0;
+   }
+
memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
lo->lo_file_name[LO_NAME_SIZE-1] = 0;
@@ -1011,6 +1090,7 @@ loop_get_status(struct loop_device *lo,
info->lo_offset = lo->lo_offset;
info->lo_sizelimit = lo->lo_sizelimit;
info->lo_flags = lo->lo_flags;
+   info->lo_track_pgwrite = lo->lo_track_pgwrite;
memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE);
memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
  

[PATCH] Loop device - Tracking page writes made to a loop device through mmap

2007-02-28 Thread Kandan Venkataraman
The patch is for tracking writes made to a loop device made through mmap. 
 
A  file_operations structure variable called loop_fops is initialised with the 
default block device file operations (def_blk_fops).
The mmap operation is overriden with a new function called loop_file_mmap. 
 
A vm_operations structure variable called loop_file_vm_ops is initialised with 
the default operations for a disk file.
The page_mkwrite operation in this variable is initialised to a new function 
called loop_track_pgwrites.
 
In the function lo_open, the file operations pointer of the device file is 
initialised with the address of loop_fops.
 
The function loop_file_mmap simply calls generic_file_mmap and then initialises 
the vm_ops of the vma with address of loop_file_vm_ops.
 
The function loop_track_pgwrites stores the page offset of the page that is 
being written to,  in a red-black tree within the loop device.
 
A flag lo_track_pgwrite has been added to the structs loop_device and 
loop_info64 to turn on/off tracking of page writes.
 
Two new ioctls have been added.
 
The ioctl cmd LOOP_GET_PGWRITES retrieves the page offsets of pages that have 
been written to.
The ioctl cmd LOOP_CLR_PGWRITES empties the red-black tree
 
This functionality would allow us to have a read only version and a write 
version of memory by doing the following:
Associate a normal file as backing storage for  the loop device and mmap to the 
loop device. Call this mmapped address space as area1.
Mmap to a normal file of identical size. Call this mmapped address space as 
area2.
 
Changes made to area1 can be periodically copied to area2 using the ioctl cmds 
(retreive dirty page offsets and copy the dirty pages from area1 to area2). 
This facility would provide a quick way of updating the read only version.

Please CC your reply to [EMAIL PROTECTED]
 
The following patch is against vanilla linux-2.6.19.2
 
Signed-off-by: Kandan Venkataraman [EMAIL PROTECTED]


diff -uprN linux-2.6.19.2/drivers/block/loop.c 
linux-2.6.19.2-new/drivers/block/loop.c
--- linux-2.6.19.2/drivers/block/loop.c 2007-01-11 06:10:37.0 +1100
+++ linux-2.6.19.2-new/drivers/block/loop.c 2007-02-27 17:23:18.0 
+1100
@@ -74,12 +74,16 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
 static int max_loop = 8;
 static struct loop_device *loop_dev;
 static struct gendisk **disks;
+static kmem_cache_t *pgoff_elem_cache;
+static char*  cache_name = "loop_pgoff_elem_cache";
+static struct file_operations loop_fops;
 
 /*
  * Transfer functions
@@ -646,6 +650,73 @@ static void do_loop_switch(struct loop_d
complete(&p->wait);
 }
 
+static void pgoff_tree_clear(struct rb_root *rb_root)
+{
+   struct rb_node *rb_node  = rb_root->rb_node;
+
+   while (rb_node != NULL) {
+
+   rb_erase(rb_node, rb_root); 
+   kmem_cache_free(pgoff_elem_cache, rb_entry(rb_node, struct 
pgoff_elem, node));
+   rb_node = rb_root->rb_node;
+   }
+
+  *rb_root = RB_ROOT;
+}
+
+
+static int loop_clr_pgwrites(struct loop_device *lo)
+{
+   struct file *filp = lo->lo_backing_file;
+
+   if (lo->lo_state != Lo_bound)
+   return -ENXIO;
+
+   if (filp == NULL)
+   return -EINVAL;
+
+   if (!lo->lo_track_pgwrite)
+ return 0;
+
+   pgoff_tree_clear(&lo->pgoff_tree);
+
+   return 0;
+}
+
+static int loop_get_pgwrites(struct loop_device *lo, struct loop_pgoff_array 
__user *arg)
+{
+   struct file *filp = lo->lo_backing_file;
+   struct loop_pgoff_array array;
+   loff_t i = 0;
+   struct rb_node *rb_node  = rb_first(&lo->pgoff_tree);
+
+   if (lo->lo_state != Lo_bound)
+   return -ENXIO;
+
+   if (filp == NULL)
+   return -EINVAL;
+
+   if (!lo->lo_track_pgwrite)
+ return 0;
+
+   if (copy_from_user(&array, arg, sizeof (struct loop_pgoff_array)))
+   return -EFAULT;
+
+   while (i < array.max && rb_node != NULL) {
+
+ if (put_user(rb_entry(rb_node, struct pgoff_elem, node)->offset, 
array.pgoff + i))
+return -EFAULT;
+
+ ++i;
+ rb_node = rb_next(rb_node);
+   }
+   array.num = i;
+
+   if (copy_to_user(arg, &array, sizeof(array)))
+ return -EFAULT;
+
+   return 0;
+}
 
 /*
  * loop_change_fd switched the backing store of a loopback device to
@@ -692,6 +763,8 @@ static int loop_change_fd(struct loop_de
if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
goto out_putf;
 
+   pgoff_tree_clear(&lo->pgoff_tree);
+
/* and ... switch */
error = loop_switch(lo, file);
if (error)
@@ -799,6 +872,8 @@ static int loop_set_fd(struct loop_devic
lo->transfer = transfer_none;
lo->ioctl = NULL;
lo->lo_sizelimit = 0;
+   lo->lo_track_pgwrite = 0;
+   lo->pgoff

[PATCH] Loop device - Tracking page writes made to a loop device through mmap

2007-02-28 Thread Kandan Venkataraman
The patch is for tracking writes made to a loop device made through
mmap. 
 
A  file_operations structure variable called loop_fops is initialised
with the default block device file operations (def_blk_fops).
The mmap operation is overriden with a new function called
loop_file_mmap. 
 
A vm_operations structure variable called loop_file_vm_ops is
initialised with the default operations for a disk file.
The page_mkwrite operation in this variable is initialised to a new
function called loop_track_pgwrites.
 
In the function lo_open, the file operations pointer of the device file
is initialised with the address of loop_fops.
 
The function loop_file_mmap simply calls generic_file_mmap and then
initialises the vm_ops of the vma with address of loop_file_vm_ops.
 
The function loop_track_pgwrites stores the page offset of the page that
is being written to,  in a red-black tree within the loop device.
 
A flag lo_track_pgwrite has been added to the structs loop_device and
loop_info64 to turn on/off tracking of page writes.
 
Two new ioctls have been added.
 
The ioctl cmd LOOP_GET_PGWRITES retrieves the page offsets of pages that
have been written to.
The ioctl cmd LOOP_CLR_PGWRITES empties the red-black tree
 
This functionality would allow us to have a read only version and a
write version of memory by doing the following:
Associate a normal file as backing storage for  the loop device and mmap
to the loop device. Call this mmapped address space as area1.
Mmap to a normal file of identical size. Call this mmapped address space
as area2.
 
Changes made to area1 can be periodically copied to area2 using the
ioctl cmds (retreive dirty page offsets and copy the dirty pages from
area1 to area2). This facility would provide a quick way of updating the
read only version.
 
The following patch is against vanilla linux-2.6.19.2
 
Signed-off-by: Kandan Venkataraman [EMAIL PROTECTED]
 
 
diff -uprN linux-2.6.19.2/drivers/block/loop.c
linux-2.6.19.2-new/drivers/block/loop.c
--- linux-2.6.19.2/drivers/block/loop.c 2007-01-11 06:10:37.0
+1100
+++ linux-2.6.19.2-new/drivers/block/loop.c 2007-02-27
17:23:18.0 +1100
@@ -74,12 +74,16 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
 static int max_loop = 8;
 static struct loop_device *loop_dev;
 static struct gendisk **disks;
+static kmem_cache_t *pgoff_elem_cache;
+static char*  cache_name = "loop_pgoff_elem_cache";
+static struct file_operations loop_fops;
 
 /*
  * Transfer functions
@@ -646,6 +650,73 @@ static void do_loop_switch(struct loop_d
  complete(&p->wait);
 }
 
+static void pgoff_tree_clear(struct rb_root *rb_root)
+{
+ struct rb_node *rb_node  = rb_root->rb_node;
+
+ while (rb_node != NULL) {
+
+  rb_erase(rb_node, rb_root); 
+  kmem_cache_free(pgoff_elem_cache, rb_entry(rb_node, struct
pgoff_elem, node));
+  rb_node = rb_root->rb_node;
+ }
+
+  *rb_root = RB_ROOT;
+}
+
+
+static int loop_clr_pgwrites(struct loop_device *lo)
+{
+ struct file *filp = lo->lo_backing_file;
+
+ if (lo->lo_state != Lo_bound)
+  return -ENXIO;
+
+ if (filp == NULL)
+  return -EINVAL;
+
+ if (!lo->lo_track_pgwrite)
+   return 0;
+
+ pgoff_tree_clear(&lo->pgoff_tree);
+
+ return 0;
+}
+
+static int loop_get_pgwrites(struct loop_device *lo, struct
loop_pgoff_array __user *arg)
+{
+ struct file *filp = lo->lo_backing_file;
+ struct loop_pgoff_array array;
+ loff_t i = 0;
+ struct rb_node *rb_node  = rb_first(&lo->pgoff_tree);
+
+ if (lo->lo_state != Lo_bound)
+  return -ENXIO;
+
+ if (filp == NULL)
+  return -EINVAL;
+
+ if (!lo->lo_track_pgwrite)
+   return 0;
+
+ if (copy_from_user(&array, arg, sizeof (struct loop_pgoff_array)))
+  return -EFAULT;
+
+ while (i < array.max && rb_node != NULL) {
+
+   if (put_user(rb_entry(rb_node, struct pgoff_elem, node)->offset,
array.pgoff + i))
+   return -EFAULT;
+
+   ++i;
+   rb_node = rb_next(rb_node);
+ }
+ array.num = i;
+
+ if (copy_to_user(arg, &array, sizeof(array)))
+   return -EFAULT;
+
+ return 0;
+}
 
 /*
  * loop_change_fd switched the backing store of a loopback device to
@@ -692,6 +763,8 @@ static int loop_change_fd(struct loop_de
  if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
   goto out_putf;
 
+ pgoff_tree_clear(&lo->pgoff_tree);
+
  /* and ... switch */
  error = loop_switch(lo, file);
  if (error)
@@ -799,6 +872,8 @@ static int loop_set_fd(struct loop_devic
  lo->transfer = transfer_none;
  lo->ioctl = NULL;
  lo->lo_sizelimit = 0;
+ lo->lo_track_pgwrite = 0;
+ lo->pgoff_tree = RB_ROOT;
  lo->old_gfp_mask = mapping_gfp_mask(mapping);
  mapping_set_gfp_mask(mapping, lo->old_gfp_mask &
~(__GFP_IO|__GFP_FS));
 
@@ -913,6 +988,8 @@ static int loop_clr_fd(struct loop_devic
  lo->lo_sizelimit = 0;
  lo->lo_encrypt_key_size = 0;
  lo->lo_flags = 0;
+ lo->lo_track_pgwrite = 0;
+ pgoff_tree_clear(&lo->pgoff_tree);
  lo->lo_thread = NULL;
  memset(lo->lo_en