Hi all,
Attached patch cleans up a long overdue (my bad) need for consolidating
the I/O paths for read, write and readv, writev. The aio read and write
parts also need consolidation into a common routine which is not done in
this patch. I figured I might as well do this now since readx, writex
system calls also require similar consolidation.
Note: This does not (yet) consolidate readv, writev paths to make use of
the read, write path.
Comments welcome,
Thanks,
Murali
Index: src/apps/admin/pvfs2-ping.c
===================================================================
RCS file: /anoncvs/pvfs2/src/apps/admin/pvfs2-ping.c,v
retrieving revision 1.47
diff -u -r1.47 pvfs2-ping.c
--- src/apps/admin/pvfs2-ping.c 25 May 2006 18:14:14 -0000      1.47
+++ src/apps/admin/pvfs2-ping.c 26 May 2006 02:59:13 -0000
@@ -223,9 +223,12 @@
 
     PVFS_error_details_free(error_details);
 
-    /* if we hit this point, then everything is ok */
-    printf("   Ok; root handle is owned by exactly one server.\n");
-    printf("\n");
+    if (!err)
+    {
+        /* if we hit this point, then everything is ok */
+        printf("   Ok; root handle is owned by exactly one server.\n");
+        printf("\n");
+    }
 
     PVFS_sys_finalize();
 
Index: src/kernel/linux-2.6/file.c
===================================================================
RCS file: /anoncvs/pvfs2/src/kernel/linux-2.6/file.c,v
retrieving revision 1.114
diff -u -r1.114 file.c
--- src/kernel/linux-2.6/file.c 4 Apr 2006 15:04:28 -0000       1.114
+++ src/kernel/linux-2.6/file.c 26 May 2006 02:59:20 -0000
@@ -17,6 +17,13 @@
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 
+enum {
+    IO_READ = 0,
+    IO_WRITE = 1,
+    IO_READV = 0,
+    IO_WRITEV = 1,
+};
+
 extern struct list_head pvfs2_request_list;
 extern spinlock_t pvfs2_request_list_lock;
 extern wait_queue_head_t pvfs2_request_list_waitq;
@@ -101,53 +108,127 @@
     return ret;
 }
 
-/** Read data from a specified offset in a file (referenced by inode).
- *  Data may be placed either in a user or kernel buffer.
- */
-ssize_t pvfs2_inode_read(
-    struct inode *inode,
-    char __user *buf,
-    size_t count,
-    loff_t *offset,
-    int copy_to_user,
-    loff_t readahead_size)
+struct rw_options {
+    int type;
+    /* sigh.. we will never pass sparse type checks.. */
+    char *buf;
+    size_t count;
+    loff_t *offset;
+    union {
+        struct {
+            struct inode *inode;
+            int copy_to_user;
+            loff_t readahead_size;
+        } read;
+        struct {
+            struct file *file;
+        } write;
+    } io;
+};
+
+static ssize_t do_read_write(struct rw_options *rw)
 {
-    int ret = -1;
-    size_t each_count = 0, amt_complete = 0;
-    size_t total_count = 0;
     pvfs2_kernel_op_t *new_op = NULL;
     int buffer_index = -1;
-    char *current_buf = buf;
-    loff_t original_offset = *offset;
-    pvfs2_inode_t *pvfs2_inode = PVFS2_I(inode);
+    struct inode *inode;
+    pvfs2_inode_t *pvfs2_inode = NULL;
+    char *current_buf = NULL;
+    size_t count;
+    loff_t *offset;
+    ssize_t ret;
+    ssize_t total_count;
+    char *fnstr = NULL;
+    size_t readahead_size;
+    int copy_to_user;
+    struct file *file;
 
-    if (copy_to_user && (!access_ok(VERIFY_WRITE, buf, count)))
-        return -EFAULT;
+    total_count = 0;
+    ret = -EINVAL;
+    file = NULL;
+    inode = NULL;
+    if (!rw)
+        goto out;
+    count = rw->count;
+    current_buf = (char *) rw->buf;
+    if (!current_buf)
+        goto out;
+    offset = rw->offset;
+    if (!offset)
+        goto out;
+    if (rw->type == IO_READ)
+    {
+        inode = rw->io.read.inode;
+        if (!inode)
+            goto out;
+        file = NULL;
+        copy_to_user = rw->io.read.copy_to_user;
+        ret = -EFAULT;
+        if (copy_to_user && 
+                !access_ok(VERIFY_WRITE, (char __user *) current_buf, count))
+            goto out;
+        fnstr = "pvfs2_file_read";
+        readahead_size = rw->io.read.readahead_size;
+    }
+    else
+    {
+        file = rw->io.write.file;
+        copy_to_user = 1;
+        readahead_size = 0;
+        if (!file)
+            goto out;
+        inode = file->f_dentry->d_inode;
+        if (!inode)
+            goto out;
+        fnstr = "pvfs2_file_write";
+        ret = -EFAULT;
+        if (!access_ok(VERIFY_READ, (char __user *) current_buf, count))
+            goto out;
+        if(file->f_pos > i_size_read(inode))
+        {
+            i_size_write(inode, file->f_pos);
+        }
+        /* perform generic linux kernel tests for sanity of write arguments */
+        /* NOTE: this is particularly helpful in handling fsize rlimit 
properly */
+#ifdef PVFS2_LINUX_KERNEL_2_4
+        ret = pvfs2_precheck_file_write(file, inode, &count, offset);
+#else
+        ret = generic_write_checks(file, offset, &count, 
S_ISBLK(inode->i_mode));
+#endif
+        if (ret != 0 || count == 0)
+        {
+            pvfs2_print("pvfs2_file_write: failed generic argument checks.\n");
+            goto out;
+        }
+        pvfs2_print("%s: proceeding with offset : %ld, size %ld\n",
+                fnstr, (unsigned long) *offset, (unsigned long) count);
+    }
+    pvfs2_inode = PVFS2_I(inode);
 
     while(total_count < count)
     {
+        size_t each_count, amt_complete;
+
         new_op = op_alloc();
         if (!new_op)
         {
-            return -ENOMEM;
+            ret = -ENOMEM;
+            goto out;
         }
 
         new_op->upcall.type = PVFS2_VFS_OP_FILE_IO;
         new_op->upcall.req.io.async_vfs_io = PVFS_VFS_SYNC_IO; /* synchronous 
I/O */
         new_op->upcall.req.io.readahead_size = readahead_size;
-        new_op->upcall.req.io.io_type = PVFS_IO_READ;
+        new_op->upcall.req.io.io_type = 
+            (rw->type == IO_READ) ? PVFS_IO_READ : PVFS_IO_WRITE;
         new_op->upcall.req.io.refn = pvfs2_inode->refn;
 
         ret = pvfs_bufmap_get(&buffer_index);
         if (ret < 0)
         {
-            pvfs2_error("pvfs2_inode_read: pvfs_bufmap_get() "
-                        "failure (%d)\n", ret);
-            op_release(new_op);
-            *offset = original_offset;
-            return ret;
+            pvfs2_error("do_read_write: pvfs_bufmap_get() "
+                        "failure (%ld)\n", (long) ret);
+            goto out;
         }
-
         /* how much to transfer in this loop iteration */
         each_count = (((count - total_count) > pvfs_bufmap_size_query()) ?
                       pvfs_bufmap_size_query() : (count - total_count));
@@ -155,9 +236,18 @@
         new_op->upcall.req.io.buf_index = buffer_index;
         new_op->upcall.req.io.count = each_count;
         new_op->upcall.req.io.offset = *offset;
-
+        if (rw->type == IO_WRITE)
+        {
+            /* copy data from application */
+            ret = pvfs_bufmap_copy_from_user(buffer_index, current_buf, 
each_count);
+            if(ret < 0)
+            {
+                pvfs2_print("%s: Failed to copy user buffer.\n", fnstr);
+                goto out;
+            }
+        }
         ret = service_operation(
-            new_op, "pvfs2_inode_read", PVFS2_OP_RETRY_COUNT,
+            new_op, fnstr, PVFS2_OP_RETRY_COUNT,
             get_interruptible_flag(inode));
 
         if (ret < 0)
@@ -170,53 +260,54 @@
               termination unless we've got debugging turned on, as
               this can happen regularly (i.e. ctrl-c)
             */
-            if(ret == -EINTR)
+            if (ret == -EINTR)
             {
-                pvfs2_print("pvfs2_inode_read: returning error %d\n", ret);
+                pvfs2_print("%s: returning error %ld\n", fnstr, (long) ret);
             }
             else
             {
                 pvfs2_error(
-                    "pvfs2_inode_read: error reading from handle %llu, "
-                    "\n  -- returning %d \n",
-                    llu(pvfs2_ino_to_handle(inode->i_ino)), ret);
+                    "%s: error writing to handle %llu, "
+                    "-- returning %ld\n",
+                    fnstr,
+                    llu(pvfs2_ino_to_handle(inode->i_ino)),
+                    (long) ret);
             }
-            return ret;
+            goto out;
         }
-
-        /* copy data out to destination */
-        if (new_op->downcall.resp.io.amt_complete)
+        if (rw->type == IO_READ)
         {
-            if (copy_to_user)
-            {
-                ret = pvfs_bufmap_copy_to_user(
-                    current_buf, buffer_index,
-                    new_op->downcall.resp.io.amt_complete);
-            }
-            else
+            /* copy data out to destination */
+            if (new_op->downcall.resp.io.amt_complete)
             {
-                ret = pvfs_bufmap_copy_to_kernel(
-                    current_buf, buffer_index,
-                    new_op->downcall.resp.io.amt_complete);
-            }
-
-            if (ret)
-            {
-                pvfs2_print("Failed to copy user buffer.\n");
-                /* put error code in downcall so that handle_io_error()
-                 * preserves properly
-                 */
-                new_op->downcall.status = ret;
-                handle_io_error();
-                return(ret);
+                if (copy_to_user)
+                {
+                    ret = pvfs_bufmap_copy_to_user(
+                        current_buf, buffer_index,
+                        new_op->downcall.resp.io.amt_complete);
+                }
+                else
+                {
+                    ret = pvfs_bufmap_copy_to_kernel(
+                        current_buf, buffer_index,
+                        new_op->downcall.resp.io.amt_complete);
+                }
+                if (ret)
+                {
+                    pvfs2_print("Failed to copy user buffer.\n");
+                    /* put error code in downcall so that handle_io_error()
+                     * preserves properly
+                     */
+                    new_op->downcall.status = ret;
+                    handle_io_error();
+                    goto out;
+                }
             }
         }
-
         current_buf += new_op->downcall.resp.io.amt_complete;
         *offset += new_op->downcall.resp.io.amt_complete;
         total_count += new_op->downcall.resp.io.amt_complete;
         amt_complete = new_op->downcall.resp.io.amt_complete;
-
         /*
           tell the device file owner waiting on I/O that this read has
           completed and it can return now.  in this exact case, on
@@ -224,9 +315,10 @@
           after this.
         */
         wake_up_device_for_return(new_op);
+        new_op = NULL;
         pvfs_bufmap_put(buffer_index);
-
-        /* if we got a short read, fall out and return what we
+        buffer_index = -1;
+        /* if we got a short read/write, fall out and return what we
          * got so far
          */
         if (amt_complete < each_count)
@@ -234,12 +326,45 @@
             break;
         }
     }
+    if (total_count > 0) {
+        ret = total_count;
+    }
+out:
+    if (new_op) 
+        op_release(new_op);
+    if (buffer_index >= 0) 
+        pvfs_bufmap_put(buffer_index);
+    if (ret > 0 && file != NULL && inode != NULL)
+    {
+#ifdef HAVE_TOUCH_ATIME
+        touch_atime(file->f_vfsmnt, file->f_dentry);
+#else
+        update_atime(inode);
+#endif
+    }
+    return ret;
+}
 
-    /*
-      NOTE: for this special case, op is freed by devreq_writev and
-      *not* here.
-    */
-    return(total_count); 
+/** Read data from a specified offset in a file (referenced by inode).
+ *  Data may be placed either in a user or kernel buffer.
+ */
+ssize_t pvfs2_inode_read(
+    struct inode *inode,
+    char __user *buf,
+    size_t count,
+    loff_t *offset,
+    int copy_to_user,
+    loff_t readahead_size)
+{
+    struct rw_options rw;
+    rw.type = IO_READ;
+    rw.buf  = buf;
+    rw.count = count;
+    rw.offset = offset;
+    rw.io.read.inode = inode;
+    rw.io.read.copy_to_user = copy_to_user;
+    rw.io.read.readahead_size = readahead_size;
+    return do_read_write(&rw); 
 }
 
 /** Read data from a specified offset in a file into a user buffer.
@@ -268,160 +393,13 @@
     size_t count,
     loff_t *offset)
 {
-    int ret = -1;
-    pvfs2_kernel_op_t *new_op = NULL;
-    char __user *current_buf = (char __user *)buf;
-    loff_t original_offset = *offset;
-    int buffer_index = -1;
-    size_t each_count = 0, total_count = 0;
-    struct inode *inode = file->f_dentry->d_inode;
-    pvfs2_inode_t *pvfs2_inode = PVFS2_I(inode);
-    size_t amt_complete = 0;
-
-    pvfs2_print("pvfs2_file_write: called on %s [f_pos %ld off %ld size 
%ld]\n",
-                (file && file->f_dentry && file->f_dentry->d_name.name ?
-                 (char *)file->f_dentry->d_name.name : "UNKNOWN"),
-                (unsigned long) file->f_pos,
-                (unsigned long) *offset, (unsigned long) count);
-
-    if (!access_ok(VERIFY_READ, buf, count))
-        return -EFAULT;
-
-    if(file->f_pos > i_size_read(inode))
-    {
-        i_size_write(inode, file->f_pos);
-    }
-    
-    /* perform generic linux kernel tests for sanity of write arguments */
-    /* NOTE: this is particularly helpful in handling fsize rlimit properly */
-#ifdef PVFS2_LINUX_KERNEL_2_4
-    ret = pvfs2_precheck_file_write(file, inode, &count, offset);
-#else
-    ret = generic_write_checks(file, offset, &count, S_ISBLK(inode->i_mode));
-#endif
-    if (ret != 0 || count == 0)
-    {
-        pvfs2_print("pvfs2_file_write: failed generic argument checks.\n");
-        return(ret);
-    }
-
-    pvfs2_print("pvfs2_file_write: proceeding with offset : %ld, size %ld\n",
-                (unsigned long) *offset, (unsigned long) count);
-
-    while(total_count < count)
-    {
-        new_op = op_alloc();
-        if (!new_op)
-        {
-            *offset = original_offset;
-            return -ENOMEM;
-        }
-
-        new_op->upcall.type = PVFS2_VFS_OP_FILE_IO;
-        new_op->upcall.req.io.async_vfs_io = PVFS_VFS_SYNC_IO; /* synchronous 
I/O */
-        new_op->upcall.req.io.io_type = PVFS_IO_WRITE;
-        new_op->upcall.req.io.refn = pvfs2_inode->refn;
-
-        pvfs2_print("pvfs2_file_write: writing %d bytes at offset %lu (%lu)\n",
-                (int)count, (unsigned long)file->f_pos, (unsigned 
long)*offset);
-
-        ret = pvfs_bufmap_get(&buffer_index);
-        if (ret < 0)
-        {
-            pvfs2_error("pvfs2_file_write: pvfs_bufmap_get() "
-                        "failure (%d)\n", ret);
-            op_release(new_op);
-            *offset = original_offset;
-            return ret;
-        }
-
-        /* how much to transfer in this loop iteration */
-        each_count = (((count - total_count) > pvfs_bufmap_size_query()) ?
-                      pvfs_bufmap_size_query() : (count - total_count));
-
-        new_op->upcall.req.io.buf_index = buffer_index;
-        new_op->upcall.req.io.count = each_count;
-        new_op->upcall.req.io.offset = *offset;
-
-        /* copy data from application */
-        ret = pvfs_bufmap_copy_from_user(
-            buffer_index, current_buf, each_count);
-        if(ret < 0)
-        {
-            pvfs2_print("Failed to copy user buffer.\n");
-            op_release(new_op);
-            pvfs_bufmap_put(buffer_index);
-            *offset = original_offset;
-            return ret;
-        }
-
-        ret = service_operation(
-            new_op, "pvfs2_file_write", PVFS2_OP_RETRY_COUNT,
-            get_interruptible_flag(inode));
-
-        if (ret < 0)
-        {
-            /* this macro is defined in pvfs2-kernel.h */
-            handle_io_error();
-
-            /*
-              don't write an error to syslog on signaled operation
-              termination unless we've got debugging turned on, as
-              this can happen regularly (i.e. ctrl-c)
-            */
-            if (ret == -EINTR)
-            {
-                pvfs2_print("pvfs2_file_write: returning error %d\n", ret);
-            }
-            else
-            {
-                pvfs2_error(
-                    "pvfs2_file_write: error writing to handle %llu, "
-                    "FILE: %s\n  -- returning %d\n",
-                    llu(pvfs2_ino_to_handle(inode->i_ino)),
-                    (file && file->f_dentry && file->f_dentry->d_name.name ?
-                     (char *)file->f_dentry->d_name.name : "UNKNOWN"),
-                    ret);
-            }
-            *offset = original_offset;
-            return ret;
-        }
-
-        current_buf += new_op->downcall.resp.io.amt_complete;
-        pvfs2_print("amt_complete = %ld\n", (unsigned long) 
new_op->downcall.resp.io.amt_complete);
-        *offset += new_op->downcall.resp.io.amt_complete;
-        total_count += new_op->downcall.resp.io.amt_complete;
-        amt_complete = new_op->downcall.resp.io.amt_complete;
-
-        /*
-          tell the device file owner waiting on I/O that this read has
-          completed and it can return now.  in this exact case, on
-          wakeup the device will free the op, so we *cannot* touch it
-          after this.
-        */
-        wake_up_device_for_return(new_op);
-        pvfs_bufmap_put(buffer_index);
-
-        /* if we got a short write, fall out and return what we got so
-         * far TODO: define semantics here- kind of depends on pvfs2
-         * semantics that don't really exist yet
-         */
-        if (amt_complete < each_count)
-        {
-            break;
-        }
-    }
-    pvfs2_print("pvfs2_file_write: pos at the end was %lu(%lu)\n", 
-            (unsigned long) *offset, (unsigned long) file->f_pos);
-    if (total_count)
-    {
-#ifdef HAVE_TOUCH_ATIME
-        touch_atime(file->f_vfsmnt, file->f_dentry);
-#else
-        update_atime(inode);
-#endif
-    }
-    return total_count;
+    struct rw_options rw;
+    rw.type = IO_WRITE;
+    rw.buf  = (char *) buf;
+    rw.count = count;
+    rw.offset = offset;
+    rw.io.write.file = file;
+    return do_read_write(&rw);
 }
 
 /*
@@ -563,37 +541,38 @@
 }
 
 
-/** Reads data to several contiguous user buffers (an iovec) from a file at a
- * specified offset.
- */
-static ssize_t pvfs2_file_readv(
-    struct file *file,
-    const struct iovec *iov,
-    unsigned long nr_segs,
-    loff_t *offset)
+static ssize_t do_readv_writev(int type, struct file *file,
+        const struct iovec *iov, unsigned long nr_segs, loff_t *offset)
 {
-    int ret = -1;
-    pvfs2_kernel_op_t *new_op = NULL;
-    struct iovec *iovecptr = NULL, *ptr = NULL;
-    loff_t original_offset = *offset;
-    int buffer_index = -1;
+    ssize_t ret;
+    unsigned int to_free;
+    unsigned long seg;
+    ssize_t total_count, count;
+    size_t  each_count;
     struct inode *inode = file->f_dentry->d_inode;
     pvfs2_inode_t *pvfs2_inode = PVFS2_I(inode);
-    size_t amt_complete = 0;
-    size_t total_count = 0, count = 0, each_count = 0;
-    unsigned int seg, to_free = 0;
     unsigned long new_nr_segs = 0, max_new_nr_segs = 0;
-    unsigned int  seg_count, *seg_array = NULL;
-
+    unsigned int  seg_count = 0, *seg_array = NULL;
+    struct iovec *iovecptr = NULL, *ptr = NULL;
+    pvfs2_kernel_op_t *new_op = NULL;
+    int buffer_index = -1;
+    size_t amt_complete = 0;
+    char *fnstr = (type == IO_READV) ? "pvfs2_file_readv" : 
"pvfs2_file_writev";
 
-    /* Calculate the total length to read by adding up the length of each io
-     * segment */
+    ret = -EINVAL;
+    total_count = 0;
+    count =  0;
+    to_free = 0;
+    /* 
+     * Calculate the total length to read/write by adding up the 
+     * lengths of each io segment 
+     */
     for (seg = 0; seg < nr_segs; seg++)
     {
        const struct iovec *iv = &iov[seg];
        count += iv->iov_len;
        if (unlikely((ssize_t)(count|iv->iov_len) < 0))
-           return -EINVAL;
+            goto out;
         if (total_count + iv->iov_len < pvfs_bufmap_size_query())
         {
             total_count += iv->iov_len;
@@ -602,7 +581,22 @@
         else {
             total_count = (total_count + iv->iov_len - 
                     pvfs_bufmap_size_query());
-            max_new_nr_segs+=2;
+            max_new_nr_segs += 2;
+        }
+    }
+    if (type == IO_WRITEV)
+    {
+        /* perform generic linux kernel tests for sanity of write arguments */
+        /* NOTE: this is particularly helpful in handling fsize rlimit 
properly */
+#ifdef PVFS2_LINUX_KERNEL_2_4
+        ret = pvfs2_precheck_file_write(file, inode, &count, offset);
+#else
+        ret = generic_write_checks(file, offset, &count, 
S_ISBLK(inode->i_mode));
+#endif
+        if (ret != 0 || count == 0)
+        {
+            pvfs2_print("%s: failed generic argument checks.\n", fnstr);
+            goto out;
         }
     }
     total_count = 0;
@@ -625,12 +619,12 @@
                         &new_nr_segs, &iovecptr, /* OUT */
                         &seg_count, &seg_array)  /* OUT */ ) < 0)
         {
-            pvfs2_error("Failed to split iovecs to satisfy larger "
-                    " than blocksize readv request %d\n", ret);
-            return ret;
+            pvfs2_error("%s: Failed to split iovecs to satisfy larger "
+                    " than blocksize readv/writev request %d\n", fnstr, ret);
+            goto out;
         }
-        pvfs2_print("pvfs_file_readv: Splitting iovecs from %lu to %lu 
[max_new %lu]\n", 
-                nr_segs, new_nr_segs, max_new_nr_segs);
+        pvfs2_print("%s: Splitting iovecs from %lu to %lu [max_new %lu]\n", 
+                fnstr, nr_segs, new_nr_segs, max_new_nr_segs);
         /* We must free seg_array and iovecptr */
         to_free = 1;
     }
@@ -646,53 +640,48 @@
         to_free = 0;
     }
     ptr = iovecptr;
-    pvfs2_print("pvfs2_file_readv reading [EMAIL PROTECTED]", (int) count, 
lld(*offset));
-    pvfs2_print("pvfs2_file_readv: new_nr_segs: %lu, seg_count: %u\n", 
-            new_nr_segs, seg_count);
+
+    pvfs2_print("%s [EMAIL PROTECTED]", fnstr, (int) count, *offset);
+    pvfs2_print("%s: new_nr_segs: %lu, seg_count: %u\n", 
+            fnstr, new_nr_segs, seg_count);
+#ifdef PVFS2_KERNEL_DEBUG
     for (seg = 0; seg < new_nr_segs; seg++)
     {
-        pvfs2_print("pvfs2_file_readv: %d) %p to %p [%d bytes]\n", 
+        pvfs2_print("%s: %d) %p to %p [%d bytes]\n", 
+                fnstr,
                 seg + 1, iovecptr[seg].iov_base, 
                 iovecptr[seg].iov_base + iovecptr[seg].iov_len, 
                 (int) iovecptr[seg].iov_len);
     }
     for (seg = 0; seg < seg_count; seg++)
     {
-        pvfs2_print("pvfs2_file_readv: %d) %u\n", seg + 1, seg_array[seg]);
+        pvfs2_print("%s: %d) %u\n", fnstr, seg + 1, seg_array[seg]);
     }
+#endif
     seg = 0;
     while (total_count < count)
     {
         new_op = op_alloc();
         if (!new_op)
         {
-            *offset = original_offset;
-            if (to_free) {
-                kfree(iovecptr);
-                kfree(seg_array);
-            }
-            return -ENOMEM;
+            ret = -ENOMEM;
+            goto out;
         }
-
         new_op->upcall.type = PVFS2_VFS_OP_FILE_IO;
         new_op->upcall.req.io.async_vfs_io = PVFS_VFS_SYNC_IO; /* synchronous 
I/O */
         /* disable read-ahead */
         new_op->upcall.req.io.readahead_size = 0;
-        new_op->upcall.req.io.io_type = PVFS_IO_READ;
+        new_op->upcall.req.io.io_type = 
+            (type == IO_READV) ? PVFS_IO_READ : PVFS_IO_WRITE;
         new_op->upcall.req.io.refn = pvfs2_inode->refn;
 
+        /* get a shared buffer index */
         ret = pvfs_bufmap_get(&buffer_index);
         if (ret < 0)
         {
-            pvfs2_error("pvfs2_file_readv: pvfs_bufmap_get() "
-                        "failure (%d)\n", ret);
-            op_release(new_op);
-            *offset = original_offset;
-            if (to_free) {
-                kfree(iovecptr);
-                kfree(seg_array);
-            }
-            return ret;
+            pvfs2_error("%s: pvfs_bufmap_get() "
+                        "failure (%d)\n", fnstr, ret);
+            goto out;
         }
 
         /* how much to transfer in this loop iteration */
@@ -702,10 +691,28 @@
         new_op->upcall.req.io.buf_index = buffer_index;
         new_op->upcall.req.io.count = each_count;
         new_op->upcall.req.io.offset = *offset;
-
-        ret = service_operation(
-            new_op, "pvfs2_file_readv", PVFS2_OP_RETRY_COUNT,
-            get_interruptible_flag(inode));
+        if (type == IO_WRITEV)
+        {
+            /* 
+             * copy data from application by pulling it out  of the iovec.
+             * Number of segments to copy so that we don't overflow the 
block-size
+             * is set in seg_array[], and ptr points to the appropriate
+             * beginning of the iovec from where data needs to be copied out,
+             * and each_count indicates the size in bytes that needs to be 
pulled
+             * out.  */
+            pvfs2_print("%s nr_segs %u, offset: %llu each_count: %d\n",
+                    fnstr, seg_array[seg], *offset, (int) each_count);
+            ret = pvfs_bufmap_copy_iovec_from_user(
+                    buffer_index, ptr, seg_array[seg], each_count);
+            if (ret < 0)
+            {
+                pvfs2_error("%s: Failed to copy user buffer.  Please make sure 
"
+                            "that the pvfs2-client is running. %d\n", fnstr, 
ret);
+                goto out;
+            }
+        }
+        ret = service_operation(new_op, fnstr,
+            PVFS2_OP_RETRY_COUNT, get_interruptible_flag(inode));
 
         if (ret < 0)
         {
@@ -719,50 +726,48 @@
               */
               if (ret == -EINTR)
               {
-                  pvfs2_print("pvfs2_file_readv: returning error %d\n", ret);
+                  pvfs2_print("%s: returning error %d\n", fnstr, ret);
               }
               else
               {
                   pvfs2_error(
-                        "pvfs2_file_readv: error writing to handle %llu, "
+                        "%s: error on handle %llu, "
                         "FILE: %s\n  -- returning %d\n",
-                        llu(pvfs2_ino_to_handle(inode->i_ino)),
+                        fnstr, llu(pvfs2_ino_to_handle(inode->i_ino)),
                         (file && file->f_dentry && file->f_dentry->d_name.name 
?
                          (char *)file->f_dentry->d_name.name : "UNKNOWN"),
                         ret);
               }
-              *offset = original_offset;
-              if (to_free) {
-                  kfree(seg_array);
-                  kfree(iovecptr);
-              }
-              return ret;
+              goto out;
         }
-        pvfs2_print("pvfs2_file_readv nr_segs %u, offset: %llu 
each_count:%d\n",
-                (int) seg_array[seg], *offset, (int) each_count);
-        /*
-         * copy data to application by pushing it out to the iovec.
-         * Number of segments to copy so that we don't
-         * overflow the block-size is set in seg_array[], and
-         * ptr points to the appropriate beginning of the
-         * iovec from where data needs to be copied to, and
-         * new_op->downcall.resp.io.amt_complete indicates
-         * the size in bytes that needs to be pushed out
-         */
-        if (new_op->downcall.resp.io.amt_complete)
+
+        if (type == IO_READV)
         {
-            ret = pvfs_bufmap_copy_to_user_iovec(
-                    buffer_index, ptr, seg_array[seg],
-                    new_op->downcall.resp.io.amt_complete);
-            if (ret < 0)
+            pvfs2_print("%s: nr_segs %u, offset: %llu each_count:%d\n",
+                fnstr, (int) seg_array[seg], *offset, (int) each_count);
+            /*
+             * copy data to application by pushing it out to the iovec.
+             * Number of segments to copy so that we don't
+             * overflow the block-size is set in seg_array[], and
+             * ptr points to the appropriate beginning of the
+             * iovec from where data needs to be copied to, and
+             * new_op->downcall.resp.io.amt_complete indicates
+             * the size in bytes that needs to be pushed out
+             */
+            if (new_op->downcall.resp.io.amt_complete)
             {
-                pvfs2_error("Failed to copy user buffer.  Please make sure "
-                            "that the pvfs2-client is running.\n");
-                /* put error codes in downcall so that handle_io_error()
-                 * preserves it properly */
-                new_op->downcall.status = ret;
-                handle_io_error();
-                return(ret);
+                ret = pvfs_bufmap_copy_to_user_iovec(buffer_index, ptr, 
seg_array[seg],
+                        new_op->downcall.resp.io.amt_complete);
+                if (ret < 0)
+                {
+                    pvfs2_error("Failed to copy user buffer.  Please make sure 
"
+                                "that the pvfs2-client is running.\n");
+                    /* put error codes in downcall so that handle_io_error()
+                     * preserves it properly */
+                    new_op->downcall.status = ret;
+                    handle_io_error();
+                    goto out;
+                }
             }
         }
         /* advance the iovec pointer */
@@ -779,22 +784,54 @@
           after this.
         */
         wake_up_device_for_return(new_op);
+        new_op = NULL;
         pvfs_bufmap_put(buffer_index);
+        buffer_index = -1;
 
-        /* if we got a short write, fall out and return what we got so
-         * far 
+        /* if we got a short I/O operations,
+         * fall out and return what we got so far 
          */
         if (amt_complete < each_count)
         {
             break;
         }
     }
-
-    if (to_free) {
+    if (total_count > 0)
+    {
+        ret = total_count;
+    }
+out:
+    if (new_op)
+        op_release(new_op);
+    if (buffer_index >= 0)
+        pvfs_bufmap_put(buffer_index);
+    if (to_free) 
+    {
         kfree(iovecptr);
         kfree(seg_array);
     }
-    return total_count;
+    if (ret > 0 && file != NULL && inode != NULL)
+    {
+#ifdef HAVE_TOUCH_ATIME
+        touch_atime(file->f_vfsmnt, file->f_dentry);
+#else
+        update_atime(inode);
+#endif
+    }
+    return ret;
+}
+
+
+/** Reads data to several contiguous user buffers (an iovec) from a file at a
+ * specified offset.
+ */
+static ssize_t pvfs2_file_readv(
+    struct file *file,
+    const struct iovec *iov,
+    unsigned long nr_segs,
+    loff_t *offset)
+{
+    return do_readv_writev(IO_READV, file, iov, nr_segs, offset);
 }
 
 
@@ -807,248 +844,7 @@
     unsigned long nr_segs,
     loff_t *offset)
 {
-    int ret = -1;
-    pvfs2_kernel_op_t *new_op = NULL;
-    struct iovec *iovecptr = NULL, *ptr = NULL;
-    loff_t original_offset = *offset;
-    int buffer_index = -1;
-    struct inode *inode = file->f_dentry->d_inode;
-    pvfs2_inode_t *pvfs2_inode = PVFS2_I(inode);
-    size_t amt_complete = 0;
-    size_t total_count = 0, count = 0, each_count = 0;
-    unsigned int seg, to_free = 0;
-    unsigned long new_nr_segs = 0, max_new_nr_segs = 0;
-    unsigned int  seg_count, *seg_array = NULL;
-
-
-    /* Calculate the total length to write by adding up the length of each io
-     * segment */
-    for (seg = 0; seg < nr_segs; seg++)
-    {
-       const struct iovec *iv = &iov[seg];
-       count += iv->iov_len;
-       if (unlikely((ssize_t)(count|iv->iov_len) < 0))
-           return -EINVAL;
-        if (total_count + iv->iov_len < pvfs_bufmap_size_query())
-        {
-            total_count += iv->iov_len;
-            max_new_nr_segs++;
-        }
-        else {
-            total_count = (total_count + iv->iov_len - 
pvfs_bufmap_size_query());
-            max_new_nr_segs+=2;
-        }
-
-    }
-    /* perform generic linux kernel tests for sanity of write arguments */
-    /* NOTE: this is particularly helpful in handling fsize rlimit properly */
-#ifdef PVFS2_LINUX_KERNEL_2_4
-    ret = pvfs2_precheck_file_write(file, inode, &count, offset);
-#else
-    ret = generic_write_checks(file, offset, &count, S_ISBLK(inode->i_mode));
-#endif
-    if (ret != 0 || count == 0)
-    {
-        pvfs2_print("pvfs2_file_writev: failed generic argument checks.\n");
-        return(ret);
-    }
-
-    total_count = 0;
-    /*
-     * if the total size of data transfer requested is greater than
-     * the kernel-set blocksize of PVFS2, then we split the iovecs
-     * such that no iovec description straddles this block size
-     * limitation.
-     */
-    if (count > pvfs_bufmap_size_query())
-    {
-        /*
-         * Split up the given iovec description such that
-         * no iovec descriptor straddles over the block-size limitation.
-         * This makes us our job easier to stage the I/O.
-         * In addition, this function will also compute an array with seg_count
-         * entries that will store the number of segments that straddle the
-         * block-size boundaries.
-         */
-        if ((ret = split_iovecs(max_new_nr_segs, nr_segs, iov, /* IN */
-                        &new_nr_segs, &iovecptr, /* OUT */
-                        &seg_count, &seg_array) /* OUT */ ) < 0)
-        {
-            pvfs2_error("Failed to split iovecs to satisfy larger than 
blocksize writev request %d\n", ret);
-            return ret;
-        }
-        pvfs2_print("pvfs_file_writev: Splitting iovecs from %lu to %lu 
[max_new %lu]\n", 
-                nr_segs, new_nr_segs, max_new_nr_segs);
-        /* We must free seg_array and iovecptr */
-        to_free = 1;
-    }
-    else {
-        /* Number of segments dont change! */
-        new_nr_segs = nr_segs;
-         /* use the given iovec description */
-        iovecptr = (struct iovec *) iov;
-        /* There is only 1 element in the seg_array */
-        seg_count = 1;
-        /* and its value is the number of segments passed in */
-        seg_array = (unsigned int *) &nr_segs;
-        /* We dont have to free up anything */
-        to_free = 0;
-    }
-    ptr = iovecptr;
-    pvfs2_print("pvfs2_file_writev writing [EMAIL PROTECTED]", (int) count, 
*offset);
-    pvfs2_print("pvfs2_file_writev: new_nr_segs: %lu, seg_count: %u\n", 
-            new_nr_segs, seg_count);
-    for (seg = 0; seg < new_nr_segs; seg++)
-    {
-        pvfs2_print("pvfs2_file_writev: %d) %p to %p [%d bytes]\n", 
-                seg + 1, iovecptr[seg].iov_base, 
-                iovecptr[seg].iov_base + iovecptr[seg].iov_len, 
-                (int) iovecptr[seg].iov_len);
-    }
-    for (seg = 0; seg < seg_count; seg++)
-    {
-        pvfs2_print("pvfs2_file_writev: %d) %u\n", seg + 1, seg_array[seg]);
-    }
-    seg = 0;
-    while (total_count < count)
-    {
-        new_op = op_alloc();
-        if (!new_op)
-        {
-            *offset = original_offset;
-            if (to_free) {
-                kfree(iovecptr);
-                kfree(seg_array);
-            }
-            return -ENOMEM;
-        }
-
-        new_op->upcall.type = PVFS2_VFS_OP_FILE_IO;
-        new_op->upcall.req.io.async_vfs_io = PVFS_VFS_SYNC_IO; /* synchronous 
I/O */
-        new_op->upcall.req.io.io_type = PVFS_IO_WRITE;
-        new_op->upcall.req.io.refn = pvfs2_inode->refn;
-
-        ret = pvfs_bufmap_get(&buffer_index);
-        if (ret < 0)
-        {
-            pvfs2_error("pvfs2_file_writev: pvfs_bufmap_get() "
-                        "failure (%d)\n", ret);
-            op_release(new_op);
-            *offset = original_offset;
-            if (to_free) {
-                kfree(iovecptr);
-                kfree(seg_array);
-            }
-            return ret;
-        }
-
-        /* how much to transfer in this loop iteration */
-        each_count = (((count - total_count) > pvfs_bufmap_size_query()) ?
-                      pvfs_bufmap_size_query() : (count - total_count));
-
-        new_op->upcall.req.io.buf_index = buffer_index;
-        new_op->upcall.req.io.count = each_count;
-        new_op->upcall.req.io.offset = *offset;
-        pvfs2_print("pvfs2_file_writev nr_segs %u, offset: %llu each_count: 
%d\n",
-                seg_array[seg], *offset, (int) each_count);
-
-        /* 
-         * copy data from application by pulling it out  of the iovec.
-         * Number of segments to copy so that we don't overflow the block-size
-         * is set in seg_array[], and ptr points to the appropriate
-         * beginning of the iovec from where data needs to be copied out,
-         * and each_count indicates the size in bytes that needs to be pulled
-         * out.  */
-        ret = pvfs_bufmap_copy_iovec_from_user(
-                buffer_index, ptr, seg_array[seg], each_count);
-        if (ret < 0)
-        {
-            pvfs2_error("Failed to copy user buffer.  Please make sure "
-                        "that the pvfs2-client is running. %d\n", ret);
-            op_release(new_op);
-            pvfs_bufmap_put(buffer_index);
-            *offset = original_offset;
-            if (to_free) {
-                kfree(seg_array);
-                kfree(iovecptr);
-            }
-            return ret;
-        }
-
-        ret = service_operation(
-            new_op, "pvfs2_file_writev", PVFS2_OP_RETRY_COUNT,
-            get_interruptible_flag(inode));
-
-        if (ret < 0)
-        {
-              /* this macro is defined in pvfs2-kernel.h */
-              handle_io_error();
-
-              /*
-                don't write an error to syslog on signaled operation
-                termination unless we've got debugging turned on, as
-                this can happen regularly (i.e. ctrl-c)
-              */
-              if (ret == -EINTR)
-              {
-                  pvfs2_print("pvfs2_file_writev: returning error %d\n", ret);
-              }
-              else
-              {
-                  pvfs2_error(
-                        "pvfs2_file_writev: error writing to handle %llu, "
-                        "FILE: %s\n  -- returning %d\n",
-                        llu(pvfs2_ino_to_handle(inode->i_ino)),
-                        (file && file->f_dentry && file->f_dentry->d_name.name 
?
-                         (char *)file->f_dentry->d_name.name : "UNKNOWN"),
-                        ret);
-              }
-              *offset = original_offset;
-              if (to_free) {
-                  kfree(seg_array);
-                  kfree(iovecptr);
-              }
-              return ret;
-        }
-        /* advance the iovec pointer */
-        ptr += seg_array[seg];
-        seg++;
-        *offset += new_op->downcall.resp.io.amt_complete;
-        total_count += new_op->downcall.resp.io.amt_complete;
-        amt_complete = new_op->downcall.resp.io.amt_complete;
-
-        /*
-          tell the device file owner waiting on I/O that this read has
-          completed and it can return now.  in this exact case, on
-          wakeup the device will free the op, so we *cannot* touch it
-          after this.
-        */
-        wake_up_device_for_return(new_op);
-        pvfs_bufmap_put(buffer_index);
-
-        /* if we got a short write, fall out and return what we got so
-         * far TODO: define semantics here- kind of depends on pvfs2
-         * semantics that don't really exist yet
-         */
-        if (amt_complete < each_count)
-        {
-            break;
-        }
-    }
-
-    if (to_free) {
-        kfree(iovecptr);
-        kfree(seg_array);
-    }
-    if (total_count)
-    {
-#ifdef HAVE_TOUCH_ATIME
-        touch_atime(file->f_vfsmnt, file->f_dentry);
-#else
-        update_atime(inode);
-#endif
-    }
-    return total_count;
+    return do_readv_writev(IO_WRITEV, file, iov, nr_segs, offset);
 }
 
 #ifdef HAVE_AIO_VFS_SUPPORT
Index: src/kernel/linux-2.6/pvfs2-kernel.h
===================================================================
RCS file: /anoncvs/pvfs2/src/kernel/linux-2.6/pvfs2-kernel.h,v
retrieving revision 1.116
diff -u -r1.116 pvfs2-kernel.h
--- src/kernel/linux-2.6/pvfs2-kernel.h 19 May 2006 21:37:11 -0000      1.116
+++ src/kernel/linux-2.6/pvfs2-kernel.h 26 May 2006 02:59:21 -0000
@@ -752,8 +752,9 @@
     {                                                     \
         wake_up_device_for_return(new_op);                \
     }                                                     \
+    new_op = NULL;                                        \
     pvfs_bufmap_put(buffer_index);                        \
-    *offset = original_offset;                            \
+    buffer_index = -1;                                    \
 } while(0)
 
 #ifdef HAVE_AIO_VFS_SUPPORT
_______________________________________________
Pvfs2-developers mailing list
Pvfs2-developers@beowulf-underground.org
http://www.beowulf-underground.org/mailman/listinfo/pvfs2-developers

Reply via email to