On Tue, 8 Nov 2005, Attila Nagy wrote:

Any chance to investigate it further? It's a really annoying bug, which makes quota support a little bit useless.

Attached is a patch that corrects at least one or two deadlock scenarios in UNIX domain socket garbage collecting (unpgc_task.diff). I've also attached a patch for kern_descriptor.c that fixes a bug there in the passing of file descriptors referencing files over UNIX domain sockets (closef_threadnull.diff). I've committed the latter to 7.x, with the intent to merge to 6.x in a week or so. The larger UNIX domain socket garbage collection patch I need to think about some more, but I'm quite interested in whether it fixes the deadlock you're seeing (or for that matter, makes it worse somehow).

Thanks,

Robert N M Watson
Index: uipc_usrreq.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/uipc_usrreq.c,v
retrieving revision 1.158
diff -u -r1.158 uipc_usrreq.c
--- uipc_usrreq.c       31 Oct 2005 15:41:25 -0000      1.158
+++ uipc_usrreq.c       9 Nov 2005 21:49:53 -0000
@@ -59,6 +59,7 @@
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
+#include <sys/taskqueue.h>
 #include <sys/un.h>
 #include <sys/unpcb.h>
 #include <sys/vnode.h>
@@ -112,6 +113,13 @@
 #define        UNP_LOCK_ASSERT()       mtx_assert(&unp_mtx, MA_OWNED)
 #define        UNP_UNLOCK_ASSERT()     mtx_assert(&unp_mtx, MA_NOTOWNED)
 
+/*
+ * Garbage collection of cyclic file descriptor/socket references occurs
+ * asynchronously in a taskqueue context.  See unp_gc() for a full
+ * description.
+ */
+static struct task     unp_gc_task;
+
 static int     unp_attach(struct socket *);
 static void    unp_detach(struct unpcb *);
 static int     unp_bind(struct unpcb *,struct sockaddr *, struct thread *);
@@ -120,7 +128,7 @@
 static void    unp_disconnect(struct unpcb *);
 static void    unp_shutdown(struct unpcb *);
 static void    unp_drop(struct unpcb *, int);
-static void    unp_gc(void);
+static void    unp_gc(__unused void *, int);
 static void    unp_scan(struct mbuf *, void (*)(struct file *));
 static void    unp_mark(struct file *);
 static void    unp_discard(struct file *);
@@ -795,19 +803,9 @@
        }
        soisdisconnected(unp->unp_socket);
        unp->unp_socket->so_pcb = NULL;
-       if (unp_rights) {
-               /*
-                * Normally the receive buffer is flushed later,
-                * in sofree, but if our receive buffer holds references
-                * to descriptors that are now garbage, we will dispose
-                * of those descriptor references after the garbage collector
-                * gets them (resulting in a "panic: closef: count < 0").
-                */
-               sorflush(unp->unp_socket);
-               unp_gc();       /* Will unlock UNP. */
-       } else
-               UNP_UNLOCK();
-       UNP_UNLOCK_ASSERT();
+       if (unp_rights)
+               taskqueue_enqueue(taskqueue_thread, &unp_gc_task);
+       UNP_UNLOCK();
        if (unp->unp_addr != NULL)
                FREE(unp->unp_addr, M_SONAME);
        uma_zfree(unp_zone, unp);
@@ -1395,7 +1393,7 @@
        uma_zone_set_max(unp_zone, nmbclusters);
        LIST_INIT(&unp_dhead);
        LIST_INIT(&unp_shead);
-
+       TASK_INIT(&unp_gc_task, 0, unp_gc, NULL);
        UNP_LOCK_INIT();
 }
 
@@ -1581,14 +1579,20 @@
 }
 
 /*
- * unp_defer is thread-local during garbage collection, and does not require
- * explicit synchronization.  unp_gcing prevents other threads from entering
- * garbage collection, and perhaps should be an sx lock instead.
+ * unp_defer indicates whether additional work has been defered for a future
+ * pass through unp_gc().  It is thread local and does not require explicit
+ * synchronization.
  */
-static int     unp_defer, unp_gcing;
+static int     unp_defer;
+
+static int unp_taskcount;
+SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, "");
+
+static int unp_recycled;
+SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, "");
 
 static void
-unp_gc(void)
+unp_gc(__unused void *arg, int pending)
 {
        struct file *fp, *nextfp;
        struct socket *so;
@@ -1597,15 +1601,8 @@
        int nfiles_snap;
        int nfiles_slack = 20;
 
-       UNP_LOCK_ASSERT();
-
-       if (unp_gcing) {
-               UNP_UNLOCK();
-               return;
-       }
-       unp_gcing = 1;
+       unp_taskcount++;
        unp_defer = 0;
-       UNP_UNLOCK();
        /*
         * before going through all this, set all FDs to
         * be NOT defered and NOT externally accessible
@@ -1700,6 +1697,9 @@
        } while (unp_defer);
        sx_sunlock(&filelist_lock);
        /*
+        * XXXRW: The following comments need updating for a post-SMPng and
+        * deferred unp_gc() world, but are still generally accurate.
+        *
         * We grab an extra reference to each of the file table entries
         * that are not otherwise accessible and then free the rights
         * that are stored in messages on them.
@@ -1788,12 +1788,11 @@
                        FILE_UNLOCK(tfp);
                }
        }
-       for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
+       for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
                closef(*fpp, (struct thread *) NULL);
+               unp_recycled++;
+       }
        free(extra_ref, M_TEMP);
-       unp_gcing = 0;
-
-       UNP_UNLOCK_ASSERT();
 }
 
 void
Index: kern_descrip.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_descrip.c,v
retrieving revision 1.283
retrieving revision 1.284
diff -u -r1.283 -r1.284
--- kern_descrip.c      1 Nov 2005 17:13:05 -0000       1.283
+++ kern_descrip.c      9 Nov 2005 20:54:25 -0000       1.284
@@ -1880,9 +1880,13 @@
         * a flag in the unlock to free ONLY locks obeying POSIX
         * semantics, and not to free BSD-style file locks.
         * If the descriptor was in a message, POSIX-style locks
-        * aren't passed with the descriptor.
+        * aren't passed with the descripto, and the thread pointer
+        * will be NULL.  Callers should be careful only to pass a
+        * NULL thread pointer when there really is no owning
+        * context that might have locks, or the locks will be
+        * leaked.
         */
-       if (fp->f_type == DTYPE_VNODE) {
+       if (fp->f_type == DTYPE_VNODE && td != NULL) {
                int vfslocked;
 
                vp = fp->f_vnode;
_______________________________________________
freebsd-hackers@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-hackers
To unsubscribe, send any mail to "[EMAIL PROTECTED]"

Reply via email to