Module Name:    src
Committed By:   hannken
Date:           Sat Aug  6 11:04:25 UTC 2011

Modified Files:
        src/sys/kern: sys_select.c

Log Message:
Fix the races of direct select()/poll():

- When sel_do_scan() restarts do a full initialization with selclear() so
  we start from an empty set without registered events.  Defer the
  evaluation of l_selret after selclear() and add the count of direct events
  to the count of events.

- For selscan()/pollscan() zero the output descriptors before we poll and
  for selscan() take the sc_lock before we change them.

- Change sel_setevents() to not count events already set.

Reviewed by: YAMAMOTO Takashi <y...@netbsd.org>

Should fix PR #44763 (select/poll direct-set optimization seems racy)
       and PR #45187 (select(2) sometimes doesn't wakeup)


To generate a diff of this commit:
cvs rdiff -u -r1.33 -r1.34 src/sys/kern/sys_select.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/sys_select.c
diff -u src/sys/kern/sys_select.c:1.33 src/sys/kern/sys_select.c:1.34
--- src/sys/kern/sys_select.c:1.33	Sat May 28 15:33:41 2011
+++ src/sys/kern/sys_select.c	Sat Aug  6 11:04:25 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: sys_select.c,v 1.33 2011/05/28 15:33:41 christos Exp $	*/
+/*	$NetBSD: sys_select.c,v 1.34 2011/08/06 11:04:25 hannken Exp $	*/
 
 /*-
  * Copyright (c) 2007, 2008, 2009, 2010 The NetBSD Foundation, Inc.
@@ -84,7 +84,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sys_select.c,v 1.33 2011/05/28 15:33:41 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sys_select.c,v 1.34 2011/08/06 11:04:25 hannken Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -235,18 +235,19 @@
 	sc = curcpu()->ci_data.cpu_selcluster;
 	lock = sc->sc_lock;
 	l->l_selcluster = sc;
-	SLIST_INIT(&l->l_selwait);
-
-	l->l_selret = 0;
 	if (op == SELOP_SELECT) {
 		l->l_selbits = fds;
 		l->l_selni = ni;
 	} else {
 		l->l_selbits = NULL;
 	}
+
 	for (;;) {
 		int ncoll;
 
+		SLIST_INIT(&l->l_selwait);
+		l->l_selret = 0;
+
 		/*
 		 * No need to lock.  If this is overwritten by another value
 		 * while scanning, we will retry below.  We only need to see
@@ -276,18 +277,18 @@
 		if (__predict_false(sc->sc_ncoll != ncoll)) {
 			/* Collision: perform re-scan. */
 			mutex_spin_exit(lock);
+			selclear();
 			continue;
 		}
 		if (__predict_true(l->l_selflag == SEL_EVENT)) {
 			/* Events occured, they are set directly. */
 			mutex_spin_exit(lock);
-			KASSERT(l->l_selret != 0);
-			*retval = l->l_selret;
 			break;
 		}
 		if (__predict_true(l->l_selflag == SEL_RESET)) {
 			/* Events occured, but re-scan is requested. */
 			mutex_spin_exit(lock);
+			selclear();
 			continue;
 		}
 		/* Nothing happen, therefore - sleep. */
@@ -304,6 +305,12 @@
 	}
 	selclear();
 
+	/* Add direct events if any. */
+	if (l->l_selflag == SEL_EVENT) {
+		KASSERT(l->l_selret != 0);
+		*retval += l->l_selret;
+	}
+
 	if (__predict_false(mask))
 		sigsuspendteardown(l);
 
@@ -371,11 +378,14 @@
 	fd_mask *ibitp, *obitp;
 	int msk, i, j, fd, n;
 	file_t *fp;
+	kmutex_t *lock;
 
+	lock = curlwp->l_selcluster->sc_lock;
 	ibitp = (fd_mask *)(bits + ni * 0);
 	obitp = (fd_mask *)(bits + ni * 3);
 	n = 0;
 
+	memset(obitp, 0, ni * 3);
 	for (msk = 0; msk < 3; msk++) {
 		for (i = 0; i < nfd; i += NFDBITS) {
 			fd_mask ibits, obits;
@@ -397,7 +407,12 @@
 				}
 				fd_putfile(fd);
 			}
-			*obitp++ = obits;
+			if (obits != 0) {
+				mutex_spin_enter(lock);
+				*obitp |= obits;
+				mutex_spin_exit(lock);
+			}
+			obitp++;
 		}
 	}
 	*retval = n;
@@ -505,14 +520,14 @@
 pollscan(struct pollfd *fds, const int nfd, register_t *retval)
 {
 	file_t *fp;
-	int i, n = 0;
+	int i, n = 0, revents;
 
 	for (i = 0; i < nfd; i++, fds++) {
+		fds->revents = 0;
 		if (fds->fd < 0) {
-			fds->revents = 0;
+			revents = 0;
 		} else if ((fp = fd_getfile(fds->fd)) == NULL) {
-			fds->revents = POLLNVAL;
-			n++;
+			revents = POLLNVAL;
 		} else {
 			/*
 			 * Perform poll: registers select request or returns
@@ -520,12 +535,14 @@
 			 * selrecord(), which is a pointer to struct pollfd.
 			 */
 			curlwp->l_selrec = (uintptr_t)fds;
-			fds->revents = (*fp->f_ops->fo_poll)(fp,
+			revents = (*fp->f_ops->fo_poll)(fp,
 			    fds->events | POLLERR | POLLHUP);
-			if (fds->revents != 0)
-				n++;
 			fd_putfile(fds->fd);
 		}
+		if (revents) {
+			fds->revents = revents;
+			n++;
+		}
 	}
 	*retval = n;
 	return (0);
@@ -626,7 +643,9 @@
 		int n;
 
 		for (n = 0; n < 3; n++) {
-			if ((fds[idx] & fbit) != 0 && (sel_flag[n] & events)) {
+			if ((fds[idx] & fbit) != 0 &&
+			    (ofds[idx] & fbit) == 0 &&
+			    (sel_flag[n] & events)) {
 				ofds[idx] |= fbit;
 				ret++;
 			}
@@ -638,8 +657,9 @@
 		int revents = events & (pfd->events | POLLERR | POLLHUP);
 
 		if (revents) {
+			if (pfd->revents == 0)
+				ret = 1;
 			pfd->revents |= revents;
-			ret = 1;
 		}
 	}
 	/* Check whether there are any events to return. */

Reply via email to