This patch plugs the extended fdmap into the kernel. At the moment, this
is done only through sys_dup2() and F_DUPFD.
The base value for the unsequential file descriptor allocation is (at the
moment) set to FD_UNSEQ_BASE (defined in asm-generic/fcntl.h):

#define FD_UNSEQ_BASE   (1U << 28)

The sys_dup2() system call (and the F_DUPFD ioclt) understand values from
FD_UNSEQ_BASE up, and use the unsequential fdmap in that case.
In sys_dup2(), if the FD_UNSEQ_ALLOC bit of "newfd" is set, the syscall will
allocate a new file descriptor inside the unsequential fdmap:

#define FD_UNSEQ_ALLOC  (1U << 30)

All the functions that deal with fd<->file* has been changed to make them
unsequential fdmap aware.
There is a new kernel function get_unused_fd_unseq() (and its locked version
__get_unused_fd_unseq()), that can be used to allocate file descriptors
inside the unsequential fdmap. At the moment, no one besides sys_dup2() and
F_DUPFD uses it, but it is possible to integrate it in other paths generating
new file descriptors.
It'd be possible to add a new O_UNSEQFD flag to open(2) and make sys_open()
to allocate the new descriptor inside the unsequential map.
So at the moment, to allocate a file descriptor in the unsequential map, you
can do something like:

        ufd = dup2(fd, FD_UNSEQ_ALLOC);
        close(fd);

and use "ufd" instead of "fd".
Verified and tested on a P4 HT and a dual Opteron using this test program:

http://www.xmailserver.org/extfd-test.c



Signed-off-by: Davide Libenzi <[EMAIL PROTECTED]>


- Davide


Index: linux-2.6.mod/include/linux/file.h
===================================================================
--- linux-2.6.mod.orig/include/linux/file.h     2007-06-02 15:34:26.000000000 
-0700
+++ linux-2.6.mod/include/linux/file.h  2007-06-02 15:36:07.000000000 -0700
@@ -11,6 +11,12 @@
 #include <linux/spinlock.h>
 #include <linux/rcupdate.h>
 #include <linux/types.h>
+#include <linux/fdmap.h>
+
+/*
+ * Initial size for the non sequential file descriptor arena
+ */
+#define FDMAP_UNSEQ_SIZE       64U
 
 /*
  * The default fd array needs to be at least BITS_PER_LONG,
@@ -50,9 +56,15 @@
    */
        spinlock_t file_lock ____cacheline_aligned_in_smp;
        int next_fd;
+       int fd_count;
        struct embedded_fd_set close_on_exec_init;
        struct embedded_fd_set open_fds_init;
        struct file * fd_array[NR_OPEN_DEFAULT];
+
+       /*
+        * Used for non-contiguous file descriptor allocations.
+        */
+       struct fd_map *fmap;
 };
 
 #define files_fdtable(files) (rcu_dereference((files)->fdt))
@@ -77,22 +89,27 @@
 struct kmem_cache;
 
 extern int expand_files(struct files_struct *, int nr);
+extern struct fd_map *files_fdmap_alloc(struct files_struct *files,
+                                       unsigned int size);
+extern int __get_unused_fd_unseq(struct files_struct *files, int fd,
+                                unsigned long flags);
+extern int get_unused_fd_unseq(struct files_struct *files, int fd,
+                              unsigned long flags);
 extern void free_fdtable_rcu(struct rcu_head *rcu);
 extern void __init files_defer_init(void);
+extern struct file *fcheck_files(struct files_struct *files, unsigned int fd);
 
 static inline void free_fdtable(struct fdtable *fdt)
 {
        call_rcu(&fdt->rcu, free_fdtable_rcu);
 }
 
-static inline struct file * fcheck_files(struct files_struct *files, unsigned 
int fd)
+/*
+ * Must be called with files->lock held.
+ */
+static inline struct fd_map *files_fdmap(struct files_struct *files)
 {
-       struct file * file = NULL;
-       struct fdtable *fdt = files_fdtable(files);
-
-       if (fd < fdt->max_fds)
-               file = rcu_dereference(fdt->fd[fd]);
-       return file;
+       return files->fmap ? files->fmap: files_fdmap_alloc(files, 0);
 }
 
 /*
Index: linux-2.6.mod/fs/fcntl.c
===================================================================
--- linux-2.6.mod.orig/fs/fcntl.c       2007-06-02 15:34:27.000000000 -0700
+++ linux-2.6.mod/fs/fcntl.c    2007-06-02 15:36:07.000000000 -0700
@@ -28,22 +28,34 @@
        struct files_struct *files = current->files;
        struct fdtable *fdt;
        spin_lock(&files->file_lock);
-       fdt = files_fdtable(files);
-       if (flag)
-               FD_SET(fd, fdt->close_on_exec);
-       else
-               FD_CLR(fd, fdt->close_on_exec);
+       if (files->fmap && fdmap_fdof(files->fmap, fd))
+               fdmap_set_fdflags(files->fmap, fd, flag ? 0: FDMAP_F_CLOEXEC,
+                                 flag ? FDMAP_F_CLOEXEC: 0);
+       else {
+               fdt = files_fdtable(files);
+               if (flag)
+                       FD_SET(fd, fdt->close_on_exec);
+               else
+                       FD_CLR(fd, fdt->close_on_exec);
+       }
        spin_unlock(&files->file_lock);
 }
 
 static int get_close_on_exec(unsigned int fd)
 {
        struct files_struct *files = current->files;
+       struct fd_map *fmap;
        struct fdtable *fdt;
        int res;
+
        rcu_read_lock();
-       fdt = files_fdtable(files);
-       res = FD_ISSET(fd, fdt->close_on_exec);
+       fmap = rcu_dereference(files->fmap);
+       if (fmap && fdmap_fdof(fmap, fd))
+               res = (fdmap_get_fdflags(fmap, fd) & FDMAP_F_CLOEXEC) != 0;
+       else {
+               fdt = files_fdtable(files);
+               res = FD_ISSET(fd, fdt->close_on_exec);
+       }
        rcu_read_unlock();
        return res;
 }
@@ -62,11 +74,12 @@
        int error;
        struct fdtable *fdt;
 
-       error = -EINVAL;
        if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
-               goto out;
-
+               return -EINVAL;
 repeat:
+       if (files->fd_count >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
+               return -EMFILE;
+
        fdt = files_fdtable(files);
        /*
         * Someone might have closed fd's in the range
@@ -80,14 +93,13 @@
        if (start < fdt->max_fds)
                newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
                                           fdt->max_fds, start);
-       
-       error = -EMFILE;
+
        if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
-               goto out;
+               return -EMFILE;
 
        error = expand_files(files, newfd);
        if (error < 0)
-               goto out;
+               return error;
 
        /*
         * If we needed to expand the fs array we
@@ -103,11 +115,9 @@
         */
        if (start <= files->next_fd)
                files->next_fd = newfd + 1;
+       files->fd_count++;
 
-       error = newfd;
-       
-out:
-       return error;
+       return newfd;
 }
 
 static int dupfd(struct file *file, unsigned int start)
@@ -117,18 +127,22 @@
        int fd;
 
        spin_lock(&files->file_lock);
-       fd = locate_fd(files, file, start);
-       if (fd >= 0) {
-               /* locate_fd() may have expanded fdtable, load the ptr */
-               fdt = files_fdtable(files);
-               FD_SET(fd, fdt->open_fds);
-               FD_CLR(fd, fdt->close_on_exec);
-               spin_unlock(&files->file_lock);
+       if (start >= FD_UNSEQ_BASE)
+               fd = __get_unused_fd_unseq(files, FDMAP_HINT_FDUP(start), 0);
+       else {
+               fd = locate_fd(files, file, start);
+               if (fd >= 0) {
+                       /* locate_fd() may have expanded fdtable, load the ptr 
*/
+                       fdt = files_fdtable(files);
+                       FD_SET(fd, fdt->open_fds);
+                       FD_CLR(fd, fdt->close_on_exec);
+               }
+       }
+       spin_unlock(&files->file_lock);
+       if (likely(fd >= 0))
                fd_install(fd, file);
-       } else {
-               spin_unlock(&files->file_lock);
+       else
                fput(file);
-       }
 
        return fd;
 }
@@ -136,7 +150,7 @@
 asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
 {
        int err = -EBADF;
-       struct file * file, *tofree;
+       struct file * file, *tofree = NULL;
        struct files_struct * files = current->files;
        struct fdtable *fdt;
 
@@ -146,32 +160,52 @@
        err = newfd;
        if (newfd == oldfd)
                goto out_unlock;
-       err = -EBADF;
-       if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
-               goto out_unlock;
-       get_file(file);                 /* We are now finished with oldfd */
-
-       err = expand_files(files, newfd);
-       if (err < 0)
-               goto out_fput;
-
-       /* To avoid races with open() and dup(), we will mark the fd as
-        * in-use in the open-file bitmap throughout the entire dup2()
-        * process.  This is quite safe: do_close() uses the fd array
-        * entry, not the bitmap, to decide what work needs to be
-        * done.  --sct */
-       /* Doesn't work. open() might be there first. --AV */
+       if (newfd >= FD_UNSEQ_BASE) {
+               if (!(newfd & FD_UNSEQ_ALLOC)) {
+                       if (files->fmap && fdmap_fdof(files->fmap, newfd))
+                               tofree = fdmap_file_get(files->fmap, newfd);
+               }
+               if (!tofree) {
+                       err = __get_unused_fd_unseq(files,
+                               newfd & FD_UNSEQ_ALLOC ? FDMAP_HINT_ANY:
+                                                   FDMAP_HINT_EXACT(newfd), 0);
+                       if (err < 0)
+                               goto out_unlock;
+                       newfd = err;
+               }
+               get_file(file);
+               fdmap_install(files->fmap, newfd, file);
+       } else {
+               err = -EBADF;
+               if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
+                       goto out_unlock;
+               get_file(file);         /* We are now finished with oldfd */
+
+               err = expand_files(files, newfd);
+               if (err < 0)
+                       goto out_fput;
+
+               /* To avoid races with open() and dup(), we will mark the fd as
+                * in-use in the open-file bitmap throughout the entire dup2()
+                * process.  This is quite safe: do_close() uses the fd array
+                * entry, not the bitmap, to decide what work needs to be
+                * done.  --sct */
+               /* Doesn't work. open() might be there first. --AV */
 
-       /* Yes. It's a race. In user space. Nothing sane to do */
-       err = -EBUSY;
-       fdt = files_fdtable(files);
-       tofree = fdt->fd[newfd];
-       if (!tofree && FD_ISSET(newfd, fdt->open_fds))
-               goto out_fput;
-
-       rcu_assign_pointer(fdt->fd[newfd], file);
-       FD_SET(newfd, fdt->open_fds);
-       FD_CLR(newfd, fdt->close_on_exec);
+               /* Yes. It's a race. In user space. Nothing sane to do */
+               err = -EBUSY;
+               fdt = files_fdtable(files);
+               tofree = fdt->fd[newfd];
+               if (FD_ISSET(newfd, fdt->open_fds)) {
+                       if (!tofree)
+                               goto out_fput;
+               } else
+                       files->fd_count++;
+
+               rcu_assign_pointer(fdt->fd[newfd], file);
+               FD_SET(newfd, fdt->open_fds);
+               FD_CLR(newfd, fdt->close_on_exec);
+       }
        spin_unlock(&files->file_lock);
 
        if (tofree)
Index: linux-2.6.mod/fs/exec.c
===================================================================
--- linux-2.6.mod.orig/fs/exec.c        2007-06-02 15:34:27.000000000 -0700
+++ linux-2.6.mod/fs/exec.c     2007-06-02 15:36:07.000000000 -0700
@@ -783,6 +783,8 @@
 static void flush_old_files(struct files_struct * files)
 {
        long j = -1;
+       unsigned int start, base;
+       unsigned long fset;
        struct fdtable *fdt;
 
        spin_lock(&files->file_lock);
@@ -807,6 +809,18 @@
                spin_lock(&files->file_lock);
 
        }
+       for (start = 0;;) {
+               if (!files->fmap)
+                       break;
+               if (!fdmap_next_flag_set(files->fmap, FDMAP_BIT_CLOEXEC,
+                                        &start, &base, &fset))
+                       break;
+               spin_unlock(&files->file_lock);
+               for (; fset; base++, fset >>= 1)
+                       if (fset & 1)
+                               sys_close(base);
+               spin_lock(&files->file_lock);
+       }
        spin_unlock(&files->file_lock);
 }
 
Index: linux-2.6.mod/kernel/exit.c
===================================================================
--- linux-2.6.mod.orig/kernel/exit.c    2007-06-02 15:34:27.000000000 -0700
+++ linux-2.6.mod/kernel/exit.c 2007-06-02 15:36:07.000000000 -0700
@@ -417,10 +417,18 @@
 
 EXPORT_SYMBOL(daemonize);
 
+static int files_fdmap_close(void *priv, struct file *file)
+{
+       filp_close(file, (struct files_struct *) priv);
+       cond_resched();
+       return 0;
+}
+
 static void close_files(struct files_struct * files)
 {
        int i, j;
        struct fdtable *fdt;
+       struct file *file;
 
        j = 0;
 
@@ -438,7 +446,7 @@
                set = fdt->open_fds->fds_bits[j++];
                while (set) {
                        if (set & 1) {
-                               struct file * file = xchg(&fdt->fd[i], NULL);
+                               file = xchg(&fdt->fd[i], NULL);
                                if (file) {
                                        filp_close(file, files);
                                        cond_resched();
@@ -448,6 +456,8 @@
                        set >>= 1;
                }
        }
+       if (files->fmap)
+               fdmap_for_each_file(files->fmap, files_fdmap_close, files);
 }
 
 struct files_struct *get_files_struct(struct task_struct *task)
@@ -469,6 +479,8 @@
 
        if (atomic_dec_and_test(&files->count)) {
                close_files(files);
+               if (files->fmap)
+                       fdmap_free(files->fmap);
                /*
                 * Free the fd and fdset arrays if we expanded them.
                 * If the fdtable was embedded, pass files for freeing
Index: linux-2.6.mod/fs/open.c
===================================================================
--- linux-2.6.mod.orig/fs/open.c        2007-06-02 15:34:27.000000000 -0700
+++ linux-2.6.mod/fs/open.c     2007-06-02 15:36:07.000000000 -0700
@@ -861,10 +861,12 @@
        int fd, error;
        struct fdtable *fdt;
 
-       error = -EMFILE;
        spin_lock(&files->file_lock);
-
 repeat:
+       error = -EMFILE;
+       if (files->fd_count >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
+               goto out;
+
        fdt = files_fdtable(files);
        fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds,
                                files->next_fd);
@@ -881,18 +883,17 @@
        if (error < 0)
                goto out;
 
-       if (error) {
-               /*
-                * If we needed to expand the fs array we
-                * might have blocked - try again.
-                */
-               error = -EMFILE;
+       /*
+        * If we needed to expand the fs array we
+        * might have blocked - try again.
+        */
+       if (error)
                goto repeat;
-       }
 
        FD_SET(fd, fdt->open_fds);
        FD_CLR(fd, fdt->close_on_exec);
        files->next_fd = fd + 1;
+       files->fd_count++;
 #if 1
        /* Sanity check */
        if (fdt->fd[fd] != NULL) {
@@ -911,10 +912,17 @@
 
 static void __put_unused_fd(struct files_struct *files, unsigned int fd)
 {
-       struct fdtable *fdt = files_fdtable(files);
-       __FD_CLR(fd, fdt->open_fds);
-       if (fd < files->next_fd)
-               files->next_fd = fd;
+       struct fdtable *fdt;
+
+       if (files->fmap && fdmap_fdof(files->fmap, fd)) {
+               fdmap_putfd(files->fmap, fd);
+       } else {
+               fdt = files_fdtable(files);
+               __FD_CLR(fd, fdt->open_fds);
+               if (fd < files->next_fd)
+                       files->next_fd = fd;
+       }
+       files->fd_count--;
 }
 
 void fastcall put_unused_fd(unsigned int fd)
@@ -927,6 +935,116 @@
 
 EXPORT_SYMBOL(put_unused_fd);
 
+struct fd_map *files_fdmap_alloc(struct files_struct *files, unsigned int size)
+{
+       struct fd_map *fmap, *ofmap, *nfmap;
+
+       size = max(size, FDMAP_UNSEQ_SIZE);
+       ofmap = files->fmap;
+       if (ofmap)
+               size = max(size, 2 * ofmap->size);
+       spin_unlock(&files->file_lock);
+       fmap = fdmap_alloc(FD_UNSEQ_BASE, size, !ofmap);
+       spin_lock(&files->file_lock);
+       if (fmap) {
+               nfmap = files->fmap;
+               if (nfmap) {
+                       if (ofmap == nfmap) {
+                               fdmap_copy(fmap, nfmap, NULL, 0);
+                               rcu_assign_pointer(files->fmap, fmap);
+                               fdmap_free(nfmap);
+                       } else {
+                               fdmap_free(fmap);
+                               fmap = nfmap;
+                       }
+               } else
+                       rcu_assign_pointer(files->fmap, fmap);
+       }
+       return fmap;
+}
+
+/**
+ * __get_unused_fd_unseq - Allocates a file descriptor inside the unsequential
+ *                         file descriptor map (locked)
+ *
+ * @files:  [in] Pointer the files_struct that hosts the unsequential file
+ *               descriptor map
+ * @fd:     [in] Hint value for the file descriptor allocation. See function
+ *               fdmap_newfd() description for the @fd values documentation
+ * @flags:  [in] Flags to be associated with the file descriptor
+ *
+ * Returns the allocated file descriptor, or a negative value in case of error.
+ * This function must be called while holding @files->lock. In case the file
+ * descriptor map should be resized, the held lock will be temporarly released
+ * (and re-acquired).
+ */
+int __get_unused_fd_unseq(struct files_struct *files, int fd,
+                         unsigned long flags)
+{
+       int nfd;
+       unsigned long mflags = 0;
+       struct fd_map *fmap;
+
+       /*
+        * Map special open flags parameters to fdmap flags. TODO!!
+        */
+
+repeat:
+       if (unlikely(files->fd_count >=
+                    current->signal->rlim[RLIMIT_NOFILE].rlim_cur))
+               return -EMFILE;
+       fmap = files_fdmap(files);
+       if (!fmap)
+               return -ENOMEM;
+       nfd = fdmap_newfd(fmap, fd, mflags);
+       if (unlikely(nfd == -EMFILE)) {
+               unsigned int size = 0, afd = abs(fd);
+
+               if (afd > FD_UNSEQ_BASE) {
+                       size = afd - FD_UNSEQ_BASE;
+                       size += size / 2;
+               }
+               if (!files_fdmap_alloc(files, size))
+                       return -ENOMEM;
+               goto repeat;
+       }
+       files->fd_count++;
+       return nfd;
+}
+
+/**
+ * get_unused_fd_unseq - Allocates a file descriptor inside the unsequential
+ *                       file descriptor map (unlocked)
+ *
+ * This function is the unlocked counterpart of the __get_unused_fd_unseq()
+ * function.
+ */
+int get_unused_fd_unseq(struct files_struct *files, int fd,
+                        unsigned long flags)
+{
+       spin_lock(&files->file_lock);
+       fd = __get_unused_fd_unseq(files, fd, flags);
+       spin_unlock(&files->file_lock);
+       return fd;
+}
+
+struct file *fcheck_files(struct files_struct *files, unsigned int fd)
+{
+       struct file *file = NULL;
+       struct fd_map *fmap;
+       struct fdtable *fdt;
+
+       fmap = rcu_dereference(files->fmap);
+       if (fmap && fdmap_fdof(fmap, fd))
+               file = fdmap_file_get(fmap, fd);
+       else {
+               fdt = files_fdtable(files);
+               if (fd < fdt->max_fds)
+                       file = rcu_dereference(fdt->fd[fd]);
+       }
+       return file;
+}
+
 /*
  * Install a file pointer in the fd array.
  *
@@ -945,9 +1063,13 @@
        struct files_struct *files = current->files;
        struct fdtable *fdt;
        spin_lock(&files->file_lock);
-       fdt = files_fdtable(files);
-       BUG_ON(fdt->fd[fd] != NULL);
-       rcu_assign_pointer(fdt->fd[fd], file);
+       if (files->fmap && fdmap_fdof(files->fmap, fd)) {
+               fdmap_install(files->fmap, fd, file);
+       } else {
+               fdt = files_fdtable(files);
+               BUG_ON(fdt->fd[fd] != NULL);
+               rcu_assign_pointer(fdt->fd[fd], file);
+       }
        spin_unlock(&files->file_lock);
 }
 
@@ -1053,14 +1175,20 @@
        int retval;
 
        spin_lock(&files->file_lock);
-       fdt = files_fdtable(files);
-       if (fd >= fdt->max_fds)
-               goto out_unlock;
-       filp = fdt->fd[fd];
-       if (!filp)
-               goto out_unlock;
-       rcu_assign_pointer(fdt->fd[fd], NULL);
-       FD_CLR(fd, fdt->close_on_exec);
+       if (files->fmap && fdmap_fdof(files->fmap, fd)) {
+               filp = fdmap_file_get(files->fmap, fd);
+               if (!filp)
+                       goto out_unlock;
+       } else {
+               fdt = files_fdtable(files);
+               if (fd >= fdt->max_fds)
+                       goto out_unlock;
+               filp = fdt->fd[fd];
+               if (!filp)
+                       goto out_unlock;
+               rcu_assign_pointer(fdt->fd[fd], NULL);
+               FD_CLR(fd, fdt->close_on_exec);
+       }
        __put_unused_fd(files, fd);
        spin_unlock(&files->file_lock);
        retval = filp_close(filp, files);
Index: linux-2.6.mod/kernel/fork.c
===================================================================
--- linux-2.6.mod.orig/kernel/fork.c    2007-06-02 15:34:27.000000000 -0700
+++ linux-2.6.mod/kernel/fork.c 2007-06-02 15:36:07.000000000 -0700
@@ -641,6 +641,8 @@
 
        spin_lock_init(&newf->file_lock);
        newf->next_fd = 0;
+       newf->fd_count = 0;
+       newf->fmap = NULL;
        fdt = &newf->fdtab;
        fdt->max_fds = NR_OPEN_DEFAULT;
        fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init;
@@ -671,6 +673,27 @@
                goto out;
 
        spin_lock(&oldf->file_lock);
+repeat:
+       if (oldf->fmap) {
+               struct fd_map *ofmap;
+               unsigned int size, fcount;
+
+               ofmap = oldf->fmap;
+               size = ofmap->size;
+               spin_unlock(&oldf->file_lock);
+               newf->fmap = fdmap_alloc(FD_UNSEQ_BASE, size, 0);
+               if (!newf->fmap)
+                       goto out_release;
+               spin_lock(&oldf->file_lock);
+               if (oldf->fmap != ofmap) {
+                       fdmap_free(newf->fmap);
+                       newf->fmap = NULL;
+                       goto repeat;
+               }
+               fdmap_copy(newf->fmap, ofmap, &fcount, 1);
+               newf->fd_count = fcount;
+       }
+
        old_fdt = files_fdtable(oldf);
        new_fdt = files_fdtable(newf);
        open_files = count_open_files(old_fdt);
@@ -709,6 +732,7 @@
                struct file *f = *old_fds++;
                if (f) {
                        get_file(f);
+                       newf->fd_count++;
                } else {
                        /*
                         * The fd may be claimed in the fd bitmap but not yet
@@ -739,6 +763,8 @@
        return newf;
 
 out_release:
+       if (newf->fmap)
+               fdmap_free(newf->fmap);
        kmem_cache_free(files_cachep, newf);
 out:
        return NULL;
Index: linux-2.6.mod/include/linux/init_task.h
===================================================================
--- linux-2.6.mod.orig/include/linux/init_task.h        2007-06-02 
15:34:27.000000000 -0700
+++ linux-2.6.mod/include/linux/init_task.h     2007-06-02 15:36:07.000000000 
-0700
@@ -28,7 +28,9 @@
        .next_fd        = 0,                            \
        .close_on_exec_init = { { 0, } },               \
        .open_fds_init  = { { 0, } },                   \
-       .fd_array       = { NULL, }                     \
+       .fd_array       = { NULL, },                    \
+       .fd_count       = 0,                            \
+       .fmap           = NULL                          \
 }
 
 #define INIT_KIOCTX(name, which_mm) \
Index: linux-2.6.mod/kernel/kmod.c
===================================================================
--- linux-2.6.mod.orig/kernel/kmod.c    2007-06-02 15:34:27.000000000 -0700
+++ linux-2.6.mod/kernel/kmod.c 2007-06-02 15:36:07.000000000 -0700
@@ -155,6 +155,7 @@
                fdt = files_fdtable(f);
                FD_SET(0, fdt->open_fds);
                FD_CLR(0, fdt->close_on_exec);
+               f->fd_count++;
                spin_unlock(&f->file_lock);
 
                /* and disallow core files too */
Index: linux-2.6.mod/include/asm-generic/fcntl.h
===================================================================
--- linux-2.6.mod.orig/include/asm-generic/fcntl.h      2007-06-02 
15:34:27.000000000 -0700
+++ linux-2.6.mod/include/asm-generic/fcntl.h   2007-06-02 15:36:07.000000000 
-0700
@@ -3,6 +3,17 @@
 
 #include <linux/types.h>
 
+/*
+ * Base for non sequential file descriptors
+ */
+#define FD_UNSEQ_BASE  (1U << 28)
+
+/*
+ * Speacial value for non sequential file descriptors, to tell to
+ * allocate a new non sequential value
+ */
+#define FD_UNSEQ_ALLOC (1U << 30)
+
 /* open/fcntl - O_SYNC is only implemented on blocks devices and on files
    located on an ext2 file system */
 #define O_ACCMODE      00000003
Index: linux-2.6.mod/fs/proc/base.c
===================================================================
--- linux-2.6.mod.orig/fs/proc/base.c   2007-06-02 15:34:27.000000000 -0700
+++ linux-2.6.mod/fs/proc/base.c        2007-06-02 15:36:07.000000000 -0700
@@ -1384,10 +1384,11 @@
        struct dentry *dentry = filp->f_path.dentry;
        struct inode *inode = dentry->d_inode;
        struct task_struct *p = get_proc_task(inode);
-       unsigned int fd, tid, ino;
+       unsigned int fd, tid, ino, topfd;
        int retval;
        struct files_struct * files;
        struct fdtable *fdt;
+       struct fd_map *fmap;
 
        retval = -ENOENT;
        if (!p)
@@ -1412,9 +1413,14 @@
                                goto out;
                        rcu_read_lock();
                        fdt = files_fdtable(files);
-                       for (fd = filp->f_pos-2;
-                            fd < fdt->max_fds;
-                            fd++, filp->f_pos++) {
+                       fmap = rcu_dereference(files->fmap);
+                       fd = filp->f_pos - 2;
+                       if (fd < fdt->max_fds || !fmap)
+                               topfd = fdt->max_fds;
+                       else
+                               topfd = fdmap_topfd(fmap);
+rescan:
+                       for (; fd < topfd; fd++, filp->f_pos++) {
                                char name[PROC_NUMBUF];
                                int len;
 
@@ -1425,13 +1431,19 @@
                                len = snprintf(name, sizeof(name), "%d", fd);
                                if (proc_fill_cache(filp, dirent, filldir,
                                                    name, len, instantiate,
-                                                   p, &fd) < 0) {
-                                       rcu_read_lock();
-                                       break;
-                               }
+                                                   p, &fd) < 0)
+                                       goto out_put_files;
                                rcu_read_lock();
                        }
+                       fmap = rcu_dereference(files->fmap);
+                       if (fmap && fd < fdmap_basefd(fmap)) {
+                               fd = fdmap_basefd(fmap);
+                               filp->f_pos = fd + 2;
+                               topfd = fdmap_topfd(fmap);
+                               goto rescan;
+                       }
                        rcu_read_unlock();
+out_put_files:
                        put_files_struct(files);
        }
 out:

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to