Module Name:    src
Committed By:   rmind
Date:           Mon Apr 11 22:31:43 UTC 2011

Modified Files:
        src/sys/kern: uipc_sem.c
        src/sys/sys: file.h

Log Message:
Highly rewrite POSIX semaphore.  Use file descriptor subsystem, greatly
simplify reference counting, and fix various bugs which existed before,
e.g. some incorrect locking or mq_unlink() not checking for permissions.

Also, fixes PR/43452.


To generate a diff of this commit:
cvs rdiff -u -r1.29 -r1.30 src/sys/kern/uipc_sem.c
cvs rdiff -u -r1.71 -r1.72 src/sys/sys/file.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/uipc_sem.c
diff -u src/sys/kern/uipc_sem.c:1.29 src/sys/kern/uipc_sem.c:1.30
--- src/sys/kern/uipc_sem.c:1.29	Fri Nov 14 15:49:21 2008
+++ src/sys/kern/uipc_sem.c	Mon Apr 11 22:31:43 2011
@@ -1,11 +1,11 @@
-/*	$NetBSD: uipc_sem.c,v 1.29 2008/11/14 15:49:21 ad Exp $	*/
+/*	$NetBSD: uipc_sem.c,v 1.30 2011/04/11 22:31:43 rmind Exp $	*/
 
 /*-
- * Copyright (c) 2003, 2007, 2008 The NetBSD Foundation, Inc.
+ * Copyright (c) 2011 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
- * by Jason R. Thorpe of Wasabi Systems, Inc, and by Andrew Doran.
+ * by Mindaugas Rasiukevicius.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -55,8 +55,12 @@
  * SUCH DAMAGE.
  */
 
+/*
+ * Implementation of POSIX semaphore.
+ */
+
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uipc_sem.c,v 1.29 2008/11/14 15:49:21 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uipc_sem.c,v 1.30 2011/04/11 22:31:43 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -67,6 +71,8 @@
 #include <sys/stat.h>
 #include <sys/kmem.h>
 #include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
 #include <sys/kauth.h>
 #include <sys/module.h>
 #include <sys/mount.h>
@@ -74,14 +80,48 @@
 #include <sys/syscallargs.h>
 #include <sys/syscallvar.h>
 
-#define SEM_MAX_NAMELEN	14
-#define SEM_VALUE_MAX (~0U)
-#define SEM_HASHTBL_SIZE 13
+MODULE(MODULE_CLASS_MISC, ksem, NULL);
 
-#define SEM_TO_ID(x)	(((x)->ks_id))
-#define SEM_HASH(id)	((id) % SEM_HASHTBL_SIZE)
+#define	SEM_MAX_NAMELEN		14
+#define	SEM_VALUE_MAX		(~0U)
 
-MODULE(MODULE_CLASS_MISC, ksem, NULL);
+#define	KS_UNLINKED		0x01
+
+typedef struct ksem {
+	LIST_ENTRY(ksem)	ks_entry;	/* global list entry */
+	kmutex_t		ks_lock;	/* lock on this ksem */
+	kcondvar_t		ks_cv;		/* condition variable */
+	u_int			ks_ref;		/* number of references */
+	u_int			ks_value;	/* current value */
+	u_int			ks_waiters;	/* number of waiters */
+	char *			ks_name;	/* name, if named */
+	size_t			ks_namelen;	/* length of name */
+	int			ks_flags;	/* for KS_UNLINKED */
+	mode_t			ks_mode;	/* protection bits */
+	uid_t			ks_uid;		/* creator uid */
+	gid_t			ks_gid;		/* creator gid */
+} ksem_t;
+
+static kmutex_t		ksem_lock	__cacheline_aligned;
+static LIST_HEAD(,ksem)	ksem_head	__cacheline_aligned;
+static u_int		nsems		__cacheline_aligned;
+
+static int		ksem_sysinit(void);
+static int		ksem_sysfini(bool);
+static int		ksem_modcmd(modcmd_t, void *);
+static int		ksem_close_fop(file_t *);
+
+static const struct fileops semops = {
+	.fo_read = fbadop_read,
+	.fo_write = fbadop_write,
+	.fo_ioctl = fbadop_ioctl,
+	.fo_fcntl = fnullop_fcntl,
+	.fo_poll = fnullop_poll,
+	.fo_stat = fbadop_stat,
+	.fo_close = ksem_close_fop,
+	.fo_kqfilter = fnullop_kqfilter,
+	.fo_restart = fnullop_restart,
+};
 
 static const struct syscall_package ksem_syscalls[] = {
 	{ SYS__ksem_init, 0, (sy_call_t *)sys__ksem_init },
@@ -96,283 +136,186 @@
 	{ 0, 0, NULL },
 };
 
-/*
- * Note: to read the ks_name member, you need either the ks_interlock
- * or the ksem_mutex.  To write the ks_name member, you need both.  Make
- * sure the order is ksem_mutex -> ks_interlock.
- */
-struct ksem {
-	LIST_ENTRY(ksem) ks_entry;	/* global list entry */
-	LIST_ENTRY(ksem) ks_hash;	/* hash list entry */
-	kmutex_t ks_interlock;		/* lock on this ksem */
-	kcondvar_t ks_cv;		/* condition variable */
-	unsigned int ks_ref;		/* number of references */
-	char *ks_name;			/* if named, this is the name */
-	size_t ks_namelen;		/* length of name */
-	mode_t ks_mode;			/* protection bits */
-	uid_t ks_uid;			/* creator uid */
-	gid_t ks_gid;			/* creator gid */
-	unsigned int ks_value;		/* current value */
-	unsigned int ks_waiters;	/* number of waiters */
-	intptr_t ks_id;			/* unique identifier */
-};
-
-struct ksem_ref {
-	LIST_ENTRY(ksem_ref) ksr_list;
-	struct ksem *ksr_ksem;
-};
-
-struct ksem_proc {
-	krwlock_t kp_lock;
-	LIST_HEAD(, ksem_ref) kp_ksems;
-};
-
-LIST_HEAD(ksem_list, ksem);
-
-/*
- * ksem_mutex protects ksem_head and nsems.  Only named semaphores go
- * onto ksem_head.
- */
-static kmutex_t ksem_mutex;
-static struct ksem_list ksem_head = LIST_HEAD_INITIALIZER(&ksem_head);
-static struct ksem_list ksem_hash[SEM_HASHTBL_SIZE];
-static int nsems = 0;
-
-/*
- * ksem_counter is the last assigned intptr_t.  It needs to be COMPAT_NETBSD32
- * friendly, even though intptr_t itself is defined as uintptr_t.
- */
-static uint32_t ksem_counter = 1;
-
-static specificdata_key_t ksem_specificdata_key;
-static void *ksem_ehook;
-static void *ksem_fhook;
-
-static void
-ksem_free(struct ksem *ks)
+static int
+ksem_sysinit(void)
 {
+	int error;
 
-	KASSERT(mutex_owned(&ks->ks_interlock));
-
-	/*
-	 * If the ksem is anonymous (or has been unlinked), then
-	 * this is the end if its life.
-	 */
-	if (ks->ks_name == NULL) {
-		mutex_exit(&ks->ks_interlock);
-		mutex_destroy(&ks->ks_interlock);
-		cv_destroy(&ks->ks_cv);
-
-		mutex_enter(&ksem_mutex);
-		nsems--;
-		LIST_REMOVE(ks, ks_hash);
-		mutex_exit(&ksem_mutex);
+	nsems = 0;
+	mutex_init(&ksem_lock, MUTEX_DEFAULT, IPL_NONE);
+	LIST_INIT(&ksem_head);
 
-		kmem_free(ks, sizeof(*ks));
-		return;
+	error = syscall_establish(NULL, ksem_syscalls);
+	if (error) {
+		(void)ksem_sysfini(false);
 	}
-	mutex_exit(&ks->ks_interlock);
-}
-
-static inline void
-ksem_addref(struct ksem *ks)
-{
-
-	KASSERT(mutex_owned(&ks->ks_interlock));
-	ks->ks_ref++;
-	KASSERT(ks->ks_ref != 0);
+	return error;
 }
 
-static inline void
-ksem_delref(struct ksem *ks)
+static int
+ksem_sysfini(bool interface)
 {
+	int error;
 
-	KASSERT(mutex_owned(&ks->ks_interlock));
-	KASSERT(ks->ks_ref != 0);
-	if (--ks->ks_ref == 0) {
-		ksem_free(ks);
-		return;
+	if (interface) {
+		error = syscall_disestablish(NULL, ksem_syscalls);
+		if (error != 0) {
+			return error;
+		}
+		if (nsems != 0) {
+			error = syscall_establish(NULL, ksem_syscalls);
+			KASSERT(error == 0);
+			return EBUSY;
+		}
 	}
-	mutex_exit(&ks->ks_interlock);
+	mutex_destroy(&ksem_lock);
+	return 0;
 }
 
-static struct ksem_proc *
-ksem_proc_alloc(void)
+static int
+ksem_modcmd(modcmd_t cmd, void *arg)
 {
-	struct ksem_proc *kp;
-
-	kp = kmem_alloc(sizeof(*kp), KM_SLEEP);
-	rw_init(&kp->kp_lock);
-	LIST_INIT(&kp->kp_ksems);
-
-	return (kp);
-}
 
-static void
-ksem_proc_dtor(void *arg)
-{
-	struct ksem_proc *kp = arg;
-	struct ksem_ref *ksr;
+	switch (cmd) {
+	case MODULE_CMD_INIT:
+		return ksem_sysinit();
 
-	rw_enter(&kp->kp_lock, RW_WRITER);
+	case MODULE_CMD_FINI:
+		return ksem_sysfini(true);
 
-	while ((ksr = LIST_FIRST(&kp->kp_ksems)) != NULL) {
-		LIST_REMOVE(ksr, ksr_list);
-		mutex_enter(&ksr->ksr_ksem->ks_interlock);
-		ksem_delref(ksr->ksr_ksem);
-		kmem_free(ksr, sizeof(*ksr));
+	default:
+		return ENOTTY;
 	}
-
-	rw_exit(&kp->kp_lock);
-	rw_destroy(&kp->kp_lock);
-	kmem_free(kp, sizeof(*kp));
 }
 
-static void
-ksem_add_proc(struct proc *p, struct ksem *ks)
+static ksem_t *
+ksem_lookup(const char *name)
 {
-	struct ksem_proc *kp;
-	struct ksem_ref *ksr;
+	ksem_t *ks;
 
-	kp = proc_getspecific(p, ksem_specificdata_key);
-	if (kp == NULL) {
-		kp = ksem_proc_alloc();
-		proc_setspecific(p, ksem_specificdata_key, kp);
-	}
-
-	ksr = kmem_alloc(sizeof(*ksr), KM_SLEEP);
-	ksr->ksr_ksem = ks;
+	KASSERT(mutex_owned(&ksem_lock));
 
-	rw_enter(&kp->kp_lock, RW_WRITER);
-	LIST_INSERT_HEAD(&kp->kp_ksems, ksr, ksr_list);
-	rw_exit(&kp->kp_lock);
-}
-
-/* We MUST have a write lock on the ksem_proc list! */
-static struct ksem_ref *
-ksem_drop_proc(struct ksem_proc *kp, struct ksem *ks)
-{
-	struct ksem_ref *ksr;
-
-	KASSERT(mutex_owned(&ks->ks_interlock));
-	LIST_FOREACH(ksr, &kp->kp_ksems, ksr_list) {
-		if (ksr->ksr_ksem == ks) {
-			ksem_delref(ks);
-			LIST_REMOVE(ksr, ksr_list);
-			return (ksr);
+	LIST_FOREACH(ks, &ksem_head, ks_entry) {
+		if (strcmp(ks->ks_name, name) == 0) {
+			mutex_enter(&ks->ks_lock);
+			return ks;
 		}
 	}
-#ifdef DIAGNOSTIC
-	panic("ksem_drop_proc: ksem_proc %p ksem %p", kp, ks);
-#endif
-	return (NULL);
+	return NULL;
 }
 
 static int
-ksem_perm(struct lwp *l, struct ksem *ks)
+ksem_perm(lwp_t *l, ksem_t *ks)
 {
-	kauth_cred_t uc;
+	kauth_cred_t uc = l->l_cred;
+	mode_t mode = ks->ks_mode;
 
-	KASSERT(mutex_owned(&ks->ks_interlock));
-	uc = l->l_cred;
-	if ((kauth_cred_geteuid(uc) == ks->ks_uid && (ks->ks_mode & S_IWUSR) != 0) ||
-	    (kauth_cred_getegid(uc) == ks->ks_gid && (ks->ks_mode & S_IWGRP) != 0) ||
-	    (ks->ks_mode & S_IWOTH) != 0 ||
+	KASSERT(mutex_owned(&ks->ks_lock));
+	if ((kauth_cred_geteuid(uc) == ks->ks_uid && (mode & S_IWUSR) != 0) ||
+	    (kauth_cred_getegid(uc) == ks->ks_gid && (mode & S_IWGRP) != 0) ||
+	    (mode & S_IWOTH) != 0 ||
 	    kauth_authorize_generic(uc, KAUTH_GENERIC_ISSUSER, NULL) == 0)
-		return (0);
-	return (EPERM);
-}
-
-static struct ksem *
-ksem_lookup_byid(intptr_t id)
-{
-	struct ksem *ks;
+		return 0;
 
-	KASSERT(mutex_owned(&ksem_mutex));
-	LIST_FOREACH(ks, &ksem_hash[SEM_HASH(id)], ks_hash) {
-		if (ks->ks_id == id)
-			return ks;
-	}
-	return NULL;
+	return EACCES;
 }
 
-static struct ksem *
-ksem_lookup_byname(const char *name)
+/*
+ * ksem_get: get the semaphore from the descriptor.
+ *
+ * => locks the semaphore, if found.
+ * => holds a reference on the file descriptor.
+ */
+static int
+ksem_get(int fd, ksem_t **ksret)
 {
-	struct ksem *ks;
+	ksem_t *ks;
+	file_t *fp;
 
-	KASSERT(mutex_owned(&ksem_mutex));
-	LIST_FOREACH(ks, &ksem_head, ks_entry) {
-		if (strcmp(ks->ks_name, name) == 0) {
-			mutex_enter(&ks->ks_interlock);
-			return (ks);
-		}
+	fp = fd_getfile(fd);
+	if (__predict_false(fp == NULL)) {
+		return EBADF;
+	}
+	if (__predict_false(fp->f_type != DTYPE_SEM)) {
+		fd_putfile(fd);
+		return EBADF;
 	}
-	return (NULL);
+	ks = fp->f_data;
+	mutex_enter(&ks->ks_lock);
+
+	*ksret = ks;
+	return 0;
 }
 
+/*
+ * ksem_create: allocate and setup a new semaphore structure.
+ */
 static int
-ksem_create(struct lwp *l, const char *name, struct ksem **ksret,
-    mode_t mode, unsigned int value)
+ksem_create(lwp_t *l, const char *name, ksem_t **ksret, mode_t mode, u_int val)
 {
-	struct ksem *ret;
+	ksem_t *ks;
 	kauth_cred_t uc;
+	char *kname;
 	size_t len;
 
-	uc = l->l_cred;
-	if (value > SEM_VALUE_MAX)
-		return (EINVAL);
-	ret = kmem_zalloc(sizeof(*ret), KM_SLEEP);
+	/* Pre-check for the limit. */
+	if (nsems >= ksem_max) {
+		return ENFILE;
+	}
+
+	if (val > SEM_VALUE_MAX) {
+		return EINVAL;
+	}
+
 	if (name != NULL) {
 		len = strlen(name);
 		if (len > SEM_MAX_NAMELEN) {
-			kmem_free(ret, sizeof(*ret));
-			return (ENAMETOOLONG);
+			return ENAMETOOLONG;
 		}
-		/* name must start with a '/' but not contain one. */
+		/* Name must start with a '/' but not contain one. */
 		if (*name != '/' || len < 2 || strchr(name + 1, '/') != NULL) {
-			kmem_free(ret, sizeof(*ret));
-			return (EINVAL);
+			return EINVAL;
 		}
-		ret->ks_namelen = len + 1;
-		ret->ks_name = kmem_alloc(ret->ks_namelen, KM_SLEEP);
-		strlcpy(ret->ks_name, name, len + 1);
-	} else
-		ret->ks_name = NULL;
-	ret->ks_mode = mode;
-	ret->ks_value = value;
-	ret->ks_ref = 1;
-	ret->ks_waiters = 0;
-	ret->ks_uid = kauth_cred_geteuid(uc);
-	ret->ks_gid = kauth_cred_getegid(uc);
-	mutex_init(&ret->ks_interlock, MUTEX_DEFAULT, IPL_NONE);
-	cv_init(&ret->ks_cv, "psem");
-
-	mutex_enter(&ksem_mutex);
-	if (nsems >= ksem_max) {
-		mutex_exit(&ksem_mutex);
-		if (ret->ks_name != NULL)
-			kmem_free(ret->ks_name, ret->ks_namelen);
-		kmem_free(ret, sizeof(*ret));
-		return (ENFILE);
-	}
-	nsems++;
-	while (ksem_lookup_byid(ksem_counter) != NULL) {
-		ksem_counter++;
-		/* 0 is a special value for libpthread */
-		if (ksem_counter == 0)
-			ksem_counter++;
+		kname = kmem_alloc(++len, KM_SLEEP);
+		strlcpy(kname, name, len);
+	} else {
+		kname = NULL;
+		len = 0;
 	}
-	ret->ks_id = ksem_counter;
-	LIST_INSERT_HEAD(&ksem_hash[SEM_HASH(ret->ks_id)], ret, ks_hash);
-	mutex_exit(&ksem_mutex);
 
-	*ksret = ret;
-	return (0);
+	ks = kmem_zalloc(sizeof(ksem_t), KM_SLEEP);
+	mutex_init(&ks->ks_lock, MUTEX_DEFAULT, IPL_NONE);
+	cv_init(&ks->ks_cv, "psem");
+	ks->ks_name = kname;
+	ks->ks_namelen = len;
+	ks->ks_mode = mode;
+	ks->ks_value = val;
+	ks->ks_ref = 1;
+
+	uc = l->l_cred;
+	ks->ks_uid = kauth_cred_geteuid(uc);
+	ks->ks_gid = kauth_cred_getegid(uc);
+
+	*ksret = ks;
+	return 0;
+}
+
+static void
+ksem_free(ksem_t *ks)
+{
+
+	if (ks->ks_name) {
+		KASSERT(ks->ks_namelen > 0);
+		kmem_free(ks->ks_name, ks->ks_namelen);
+	}
+	mutex_destroy(&ks->ks_lock);
+	cv_destroy(&ks->ks_cv);
+	kmem_free(ks, sizeof(ksem_t));
 }
 
 int
-sys__ksem_init(struct lwp *l, const struct sys__ksem_init_args *uap, register_t *retval)
+sys__ksem_init(struct lwp *l, const struct sys__ksem_init_args *uap,
+    register_t *retval)
 {
 	/* {
 		unsigned int value;
@@ -383,32 +326,43 @@
 }
 
 int
-do_ksem_init(struct lwp *l, unsigned int value, intptr_t *idp,
-    copyout_t docopyout)
+do_ksem_init(lwp_t *l, u_int val, intptr_t *idp, copyout_t docopyout)
 {
-	struct ksem *ks;
+	proc_t *p = l->l_proc;
+	ksem_t *ks;
+	file_t *fp;
 	intptr_t id;
-	int error;
+	int fd, error;
 
-	/* Note the mode does not matter for anonymous semaphores. */
-	error = ksem_create(l, NULL, &ks, 0, value);
-	if (error)
-		return (error);
-	id = SEM_TO_ID(ks);
-	error = (*docopyout)(&id, idp, sizeof(id));
+	error = fd_allocfile(&fp, &fd);
 	if (error) {
-		mutex_enter(&ks->ks_interlock);
-		ksem_delref(ks);
-		return (error);
+		return error;
 	}
+	fp->f_type = DTYPE_SEM;
+	fp->f_flag = FREAD | FWRITE;
+	fp->f_ops = &semops;
 
-	ksem_add_proc(l->l_proc, ks);
+	id = (intptr_t)fd;
+	error = (*docopyout)(&id, idp, sizeof(*idp));
+	if (error) {
+		fd_abort(p, fp, fd);
+		return error;
+	}
 
-	return (0);
+	/* Note the mode does not matter for anonymous semaphores. */
+	error = ksem_create(l, NULL, &ks, 0, val);
+	if (error) {
+		fd_abort(p, fp, fd);
+		return error;
+	}
+	fp->f_data = ks;
+	fd_affix(p, fp, fd);
+	return error;
 }
 
 int
-sys__ksem_open(struct lwp *l, const struct sys__ksem_open_args *uap, register_t *retval)
+sys__ksem_open(struct lwp *l, const struct sys__ksem_open_args *uap,
+    register_t *retval)
 {
 	/* {
 		const char *name;
@@ -427,467 +381,325 @@
      unsigned int value, intptr_t *idp, copyout_t docopyout)
 {
 	char name[SEM_MAX_NAMELEN + 1];
-	size_t done;
-	int error;
-	struct ksem *ksnew, *ks;
+	proc_t *p = l->l_proc;
+	ksem_t *ksnew = NULL, *ks;
+	file_t *fp;
 	intptr_t id;
+	int fd, error;
 
-	error = copyinstr(semname, name, sizeof(name), &done);
-	if (error)
-		return (error);
+	error = copyinstr(semname, name, sizeof(name), NULL);
+	if (error) {
+		return error;
+	}
+	error = fd_allocfile(&fp, &fd);
+	if (error) {
+		return error;
+	}
+	fp->f_type = DTYPE_SEM;
+	fp->f_flag = FREAD | FWRITE;
+	fp->f_ops = &semops;
+
+	/*
+	 * The ID (file descriptor number) can be stored early.
+	 * Note that zero is a special value for libpthread.
+	 */
+	id = (intptr_t)fd;
+	error = (*docopyout)(&id, idp, sizeof(*idp));
+	if (error) {
+		goto err;
+	}
+
+	if (oflag & O_CREAT) {
+		/* Create a new semaphore. */
+		error = ksem_create(l, name, &ksnew, mode, value);
+		if (error) {
+			goto err;
+		}
+		KASSERT(ksnew != NULL);
+	}
 
-	ksnew = NULL;
-	mutex_enter(&ksem_mutex);
-	ks = ksem_lookup_byname(name);
+	/* Lookup for a semaphore with such name. */
+	mutex_enter(&ksem_lock);
+	ks = ksem_lookup(name);
+	if (ks) {
+		KASSERT(mutex_owned(&ks->ks_lock));
+		mutex_exit(&ksem_lock);
 
-	/* Found one? */
-	if (ks != NULL) {
 		/* Check for exclusive create. */
 		if (oflag & O_EXCL) {
-			mutex_exit(&ks->ks_interlock);
-			mutex_exit(&ksem_mutex);
-			return (EEXIST);
+			mutex_exit(&ks->ks_lock);
+			error = EEXIST;
+			goto err;
 		}
- found_one:
 		/*
-		 * Verify permissions.  If we can access it, add
-		 * this process's reference.
+		 * Verify permissions.  If we can access it,
+		 * add the reference of this thread.
 		 */
-		KASSERT(mutex_owned(&ks->ks_interlock));
 		error = ksem_perm(l, ks);
-		if (error == 0)
-			ksem_addref(ks);
-		mutex_exit(&ks->ks_interlock);
-		mutex_exit(&ksem_mutex);
-		if (error)
-			return (error);
-
-		id = SEM_TO_ID(ks);
-		error = (*docopyout)(&id, idp, sizeof(id));
+		if (error == 0) {
+			ks->ks_ref++;
+		}
+		mutex_exit(&ks->ks_lock);
 		if (error) {
-			mutex_enter(&ks->ks_interlock);
-			ksem_delref(ks);
-			return (error);
+			goto err;
+		}
+	} else {
+		/* Fail if not found and not creating. */
+		if ((oflag & O_CREAT) == 0) {
+			mutex_exit(&ksem_lock);
+			KASSERT(ksnew == NULL);
+			return ENOENT;
 		}
 
-		ksem_add_proc(l->l_proc, ks);
-
-		return (0);
-	}
+		/* Check for the limit locked. */
+		if (nsems >= ksem_max) {
+			mutex_exit(&ksem_lock);
+			error = ENFILE;
+			goto err;
+		}
 
-	/*
-	 * didn't ask for creation? error.
-	 */
-	if ((oflag & O_CREAT) == 0) {
-		mutex_exit(&ksem_mutex);
-		return (ENOENT);
+		/*
+		 * Finally, insert semaphore into the hash.
+		 * Note: it already has the initial reference.
+		 */
+		ks = ksnew;
+		LIST_INSERT_HEAD(&ksem_head, ks, ks_entry);
+		nsems++;
+		mutex_exit(&ksem_lock);
+
+		ksnew = NULL;
+	}
+	KASSERT(ks != NULL);
+	fp->f_data = ks;
+	fd_affix(p, fp, fd);
+err:
+	if (error) {
+		fd_abort(p, fp, fd);
 	}
-
-	/*
-	 * We may block during creation, so drop the lock.
-	 */
-	mutex_exit(&ksem_mutex);
-	error = ksem_create(l, name, &ksnew, mode, value);
-	if (error != 0)
-		return (error);
-
-	id = SEM_TO_ID(ksnew);
-	error = (*docopyout)(&id, idp, sizeof(id));
-	if (error) {
-		kmem_free(ksnew->ks_name, ksnew->ks_namelen);
-		ksnew->ks_name = NULL;
-
-		mutex_enter(&ksnew->ks_interlock);
-		ksem_delref(ksnew);
-		return (error);
+	if (ksnew) {
+		ksem_free(ksnew);
 	}
+	return error;
+}
 
-	/*
-	 * We need to make sure we haven't lost a race while
-	 * allocating during creation.
-	 */
-	mutex_enter(&ksem_mutex);
-	if ((ks = ksem_lookup_byname(name)) != NULL) {
-		if (oflag & O_EXCL) {
-			mutex_exit(&ks->ks_interlock);
-			mutex_exit(&ksem_mutex);
-
-			kmem_free(ksnew->ks_name, ksnew->ks_namelen);
-			ksnew->ks_name = NULL;
-
-			mutex_enter(&ksnew->ks_interlock);
-			ksem_delref(ksnew);
-			return (EEXIST);
-		}
-		goto found_one;
-	} else {
-		/* ksnew already has its initial reference. */
-		LIST_INSERT_HEAD(&ksem_head, ksnew, ks_entry);
-		mutex_exit(&ksem_mutex);
-
-		ksem_add_proc(l->l_proc, ksnew);
-	}
-	return (error);
+int
+sys__ksem_close(struct lwp *l, const struct sys__ksem_close_args *uap,
+    register_t *retval)
+{
+	/* {
+		intptr_t id;
+	} */
+	struct sys_close_args cuap;
+	SCARG(&cuap, fd) = SCARG(uap, id);
+	return sys_close(l, (const void *)&cuap, retval);
 }
 
-/* We must have a read lock on the ksem_proc list! */
-static struct ksem *
-ksem_lookup_proc(struct ksem_proc *kp, intptr_t id)
+static int
+ksem_close_fop(file_t *fp)
 {
-	struct ksem_ref *ksr;
+	ksem_t *ks = fp->f_data;
+	bool destroy = false;
 
-	LIST_FOREACH(ksr, &kp->kp_ksems, ksr_list) {
-		if (id == SEM_TO_ID(ksr->ksr_ksem)) {
-			mutex_enter(&ksr->ksr_ksem->ks_interlock);
-			return (ksr->ksr_ksem);
-		}
+	mutex_enter(&ks->ks_lock);
+	KASSERT(ks->ks_ref > 0);
+	if (--ks->ks_ref == 0) {
+		/*
+		 * Destroy if the last reference and semaphore is unnamed,
+		 * or unlinked (for named semaphore).
+		 */
+		destroy = (ks->ks_flags & KS_UNLINKED) || (ks->ks_name == NULL);
 	}
+	mutex_exit(&ks->ks_lock);
 
-	return (NULL);
+	if (destroy) {
+		ksem_free(ks);
+	}
+	return 0;
 }
 
 int
-sys__ksem_unlink(struct lwp *l, const struct sys__ksem_unlink_args *uap, register_t *retval)
+sys__ksem_unlink(struct lwp *l, const struct sys__ksem_unlink_args *uap,
+    register_t *retval)
 {
 	/* {
 		const char *name;
 	} */
-	char name[SEM_MAX_NAMELEN + 1], *cp;
-	size_t done, len;
-	struct ksem *ks;
+	char name[SEM_MAX_NAMELEN + 1];
+	ksem_t *ks;
+	u_int refcnt;
 	int error;
 
-	error = copyinstr(SCARG(uap, name), name, sizeof(name), &done);
+	error = copyinstr(SCARG(uap, name), name, sizeof(name), NULL);
 	if (error)
 		return error;
 
-	mutex_enter(&ksem_mutex);
-	ks = ksem_lookup_byname(name);
+	mutex_enter(&ksem_lock);
+	ks = ksem_lookup(name);
 	if (ks == NULL) {
-		mutex_exit(&ksem_mutex);
-		return (ENOENT);
+		mutex_exit(&ksem_lock);
+		return ENOENT;
 	}
+	KASSERT(mutex_owned(&ks->ks_lock));
 
-	KASSERT(mutex_owned(&ks->ks_interlock));
+	/* Verify permissions. */
+	error = ksem_perm(l, ks);
+	if (error) {
+		mutex_exit(&ks->ks_lock);
+		mutex_exit(&ksem_lock);
+		return error;
+	}
 
+	/* Remove and destroy if no referenes. */
 	LIST_REMOVE(ks, ks_entry);
-	cp = ks->ks_name;
-	len = ks->ks_namelen;
-	ks->ks_name = NULL;
-
-	mutex_exit(&ksem_mutex);
-
-	if (ks->ks_ref == 0)
-		ksem_free(ks);
-	else
-		mutex_exit(&ks->ks_interlock);
-
-	kmem_free(cp, len);
+	nsems--;
 
-	return (0);
-}
-
-int
-sys__ksem_close(struct lwp *l, const struct sys__ksem_close_args *uap, register_t *retval)
-{
-	/* {
-		intptr_t id;
-	} */
-	struct ksem_proc *kp;
-	struct ksem_ref *ksr;
-	struct ksem *ks;
-
-	kp = proc_getspecific(l->l_proc, ksem_specificdata_key);
-	if (kp == NULL)
-		return (EINVAL);
-
-	rw_enter(&kp->kp_lock, RW_WRITER);
-
-	ks = ksem_lookup_proc(kp, SCARG(uap, id));
-	if (ks == NULL) {
-		rw_exit(&kp->kp_lock);
-		return (EINVAL);
+	refcnt = ks->ks_ref;
+	if (refcnt) {
+		/* Mark as unlinked, if there are references. */
+		ks->ks_flags |= KS_UNLINKED;
 	}
+	mutex_exit(&ks->ks_lock);
+	mutex_exit(&ksem_lock);
 
-	KASSERT(mutex_owned(&ks->ks_interlock));
-	if (ks->ks_name == NULL) {
-		mutex_exit(&ks->ks_interlock);
-		rw_exit(&kp->kp_lock);
-		return (EINVAL);
+	if (refcnt == 0) {
+		ksem_free(ks);
 	}
-
-	ksr = ksem_drop_proc(kp, ks);
-	rw_exit(&kp->kp_lock);
-	kmem_free(ksr, sizeof(*ksr));
-
-	return (0);
+	return 0;
 }
 
 int
-sys__ksem_post(struct lwp *l, const struct sys__ksem_post_args *uap, register_t *retval)
+sys__ksem_post(struct lwp *l, const struct sys__ksem_post_args *uap,
+    register_t *retval)
 {
 	/* {
 		intptr_t id;
 	} */
-	struct ksem_proc *kp;
-	struct ksem *ks;
-	int error;
-
-	kp = proc_getspecific(l->l_proc, ksem_specificdata_key);
-	if (kp == NULL)
-		return (EINVAL);
-
-	rw_enter(&kp->kp_lock, RW_READER);
-	ks = ksem_lookup_proc(kp, SCARG(uap, id));
-	rw_exit(&kp->kp_lock);
-	if (ks == NULL)
-		return (EINVAL);
+	int fd = (int)SCARG(uap, id), error;
+	ksem_t *ks;
 
-	KASSERT(mutex_owned(&ks->ks_interlock));
+	error = ksem_get(fd, &ks);
+	if (error) {
+		return error;
+	}
+	KASSERT(mutex_owned(&ks->ks_lock));
 	if (ks->ks_value == SEM_VALUE_MAX) {
 		error = EOVERFLOW;
 		goto out;
 	}
-	++ks->ks_value;
-	if (ks->ks_waiters)
+	ks->ks_value++;
+	if (ks->ks_waiters) {
 		cv_broadcast(&ks->ks_cv);
-	error = 0;
- out:
-	mutex_exit(&ks->ks_interlock);
-	return (error);
+	}
+out:
+	mutex_exit(&ks->ks_lock);
+	fd_putfile(fd);
+	return error;
 }
 
 static int
-ksem_wait(struct lwp *l, intptr_t id, int tryflag)
+ksem_wait(lwp_t *l, intptr_t id, bool try)
 {
-	struct ksem_proc *kp;
-	struct ksem *ks;
-	int error;
+	int fd = (int)id, error;
+	ksem_t *ks;
 
-	kp = proc_getspecific(l->l_proc, ksem_specificdata_key);
-	if (kp == NULL)
-		return (EINVAL);
-
-	rw_enter(&kp->kp_lock, RW_READER);
-	ks = ksem_lookup_proc(kp, id);
-	rw_exit(&kp->kp_lock);
-	if (ks == NULL)
-		return (EINVAL);
-
-	KASSERT(mutex_owned(&ks->ks_interlock));
-	ksem_addref(ks);
+	error = ksem_get(fd, &ks);
+	if (error) {
+		return error;
+	}
+	KASSERT(mutex_owned(&ks->ks_lock));
 	while (ks->ks_value == 0) {
 		ks->ks_waiters++;
-		if (tryflag)
-			error = EAGAIN;
-		else
-			error = cv_wait_sig(&ks->ks_cv, &ks->ks_interlock);
+		error = try ? EAGAIN : cv_wait_sig(&ks->ks_cv, &ks->ks_lock);
 		ks->ks_waiters--;
 		if (error)
 			goto out;
 	}
 	ks->ks_value--;
-	error = 0;
- out:
-	ksem_delref(ks);
-	return (error);
+out:
+	mutex_exit(&ks->ks_lock);
+	fd_putfile(fd);
+	return error;
 }
 
 int
-sys__ksem_wait(struct lwp *l, const struct sys__ksem_wait_args *uap, register_t *retval)
+sys__ksem_wait(struct lwp *l, const struct sys__ksem_wait_args *uap,
+    register_t *retval)
 {
 	/* {
 		intptr_t id;
 	} */
 
-	return ksem_wait(l, SCARG(uap, id), 0);
+	return ksem_wait(l, SCARG(uap, id), false);
 }
 
 int
-sys__ksem_trywait(struct lwp *l, const struct sys__ksem_trywait_args *uap, register_t *retval)
+sys__ksem_trywait(struct lwp *l, const struct sys__ksem_trywait_args *uap,
+    register_t *retval)
 {
 	/* {
 		intptr_t id;
 	} */
 
-	return ksem_wait(l, SCARG(uap, id), 1);
+	return ksem_wait(l, SCARG(uap, id), true);
 }
 
 int
-sys__ksem_getvalue(struct lwp *l, const struct sys__ksem_getvalue_args *uap, register_t *retval)
+sys__ksem_getvalue(struct lwp *l, const struct sys__ksem_getvalue_args *uap,
+    register_t *retval)
 {
 	/* {
 		intptr_t id;
 		unsigned int *value;
 	} */
-	struct ksem_proc *kp;
-	struct ksem *ks;
+	int fd = (int)SCARG(uap, id), error;
+	ksem_t *ks;
 	unsigned int val;
 
-	kp = proc_getspecific(l->l_proc, ksem_specificdata_key);
-	if (kp == NULL)
-		return (EINVAL);
-
-	rw_enter(&kp->kp_lock, RW_READER);
-	ks = ksem_lookup_proc(kp, SCARG(uap, id));
-	rw_exit(&kp->kp_lock);
-	if (ks == NULL)
-		return (EINVAL);
-
-	KASSERT(mutex_owned(&ks->ks_interlock));
+	error = ksem_get(fd, &ks);
+	if (error) {
+		return error;
+	}
+	KASSERT(mutex_owned(&ks->ks_lock));
 	val = ks->ks_value;
-	mutex_exit(&ks->ks_interlock);
+	mutex_exit(&ks->ks_lock);
+	fd_putfile(fd);
 
-	return (copyout(&val, SCARG(uap, value), sizeof(val)));
+	return copyout(&val, SCARG(uap, value), sizeof(val));
 }
 
 int
-sys__ksem_destroy(struct lwp *l, const struct sys__ksem_destroy_args *uap, register_t *retval)
+sys__ksem_destroy(struct lwp *l, const struct sys__ksem_destroy_args *uap,
+    register_t *retval)
 {
 	/* {
 		intptr_t id;
 	} */
-	struct ksem_proc *kp;
-	struct ksem_ref *ksr;
-	struct ksem *ks;
-
-	kp = proc_getspecific(l->l_proc, ksem_specificdata_key);
-	if (kp == NULL)
-		return (EINVAL);
-
-	rw_enter(&kp->kp_lock, RW_WRITER);
+	int fd = (int)SCARG(uap, id), error;
+	struct sys_close_args cuap;
+	ksem_t *ks;
 
-	ks = ksem_lookup_proc(kp, SCARG(uap, id));
-	if (ks == NULL) {
-		rw_exit(&kp->kp_lock);
-		return (EINVAL);
+	error = ksem_get(fd, &ks);
+	if (error) {
+		return error;
 	}
+	KASSERT(mutex_owned(&ks->ks_lock));
 
-	KASSERT(mutex_owned(&ks->ks_interlock));
-
-	/*
-	 * XXX This misses named semaphores which have been unlink'd,
-	 * XXX but since behavior of destroying a named semaphore is
-	 * XXX undefined, this is technically allowed.
-	 */
+	/* Operation is only for unnamed semaphores. */
 	if (ks->ks_name != NULL) {
-		mutex_exit(&ks->ks_interlock);
-		rw_exit(&kp->kp_lock);
-		return (EINVAL);
+		error = EINVAL;
+		goto out;
 	}
-
+	/* Cannot destroy if there are waiters. */
 	if (ks->ks_waiters) {
-		mutex_exit(&ks->ks_interlock);
-		rw_exit(&kp->kp_lock);
-		return (EBUSY);
-	}
-
-	ksr = ksem_drop_proc(kp, ks);
-	rw_exit(&kp->kp_lock);
-	kmem_free(ksr, sizeof(*ksr));
-
-	return (0);
-}
-
-static void
-ksem_forkhook(struct proc *p2, struct proc *p1)
-{
-	struct ksem_proc *kp1, *kp2;
-	struct ksem_ref *ksr, *ksr1;
-
-	kp1 = proc_getspecific(p1, ksem_specificdata_key);
-	if (kp1 == NULL)
-		return;
-
-	kp2 = ksem_proc_alloc();
-
-	rw_enter(&kp1->kp_lock, RW_READER);
-
-	if (!LIST_EMPTY(&kp1->kp_ksems)) {
-		LIST_FOREACH(ksr, &kp1->kp_ksems, ksr_list) {
-			ksr1 = kmem_alloc(sizeof(*ksr), KM_SLEEP);
-			ksr1->ksr_ksem = ksr->ksr_ksem;
-			mutex_enter(&ksr->ksr_ksem->ks_interlock);
-			ksem_addref(ksr->ksr_ksem);
-			mutex_exit(&ksr->ksr_ksem->ks_interlock);
-			LIST_INSERT_HEAD(&kp2->kp_ksems, ksr1, ksr_list);
-		}
-	}
-
-	rw_exit(&kp1->kp_lock);
-	proc_setspecific(p2, ksem_specificdata_key, kp2);
-}
-
-static void
-ksem_exechook(struct proc *p, void *arg)
-{
-	struct ksem_proc *kp;
-
-	kp = proc_getspecific(p, ksem_specificdata_key);
-	if (kp != NULL) {
-		proc_setspecific(p, ksem_specificdata_key, NULL);
-		ksem_proc_dtor(kp);
-	}
-}
-
-static int
-ksem_fini(bool interface)
-{
-	int error;
-
-	if (interface) {
-		error = syscall_disestablish(NULL, ksem_syscalls);
-		if (error != 0) {
-			return error;
-		}
-		if (nsems != 0) {
-			error = syscall_establish(NULL, ksem_syscalls);
-			KASSERT(error == 0);
-			return EBUSY;
-		}
+		error = EBUSY;
+		goto out;
 	}
-	exechook_disestablish(ksem_ehook);
-	forkhook_disestablish(ksem_fhook);
-	proc_specific_key_delete(ksem_specificdata_key);
-	mutex_destroy(&ksem_mutex);
-	return 0;
-}
-
-static int
-ksem_init(void)
-{
-	int error, i;
-
-	mutex_init(&ksem_mutex, MUTEX_DEFAULT, IPL_NONE);
-	for (i = 0; i < SEM_HASHTBL_SIZE; i++)
-		LIST_INIT(&ksem_hash[i]);
-	error = proc_specific_key_create(&ksem_specificdata_key,
-	    ksem_proc_dtor);
-	if (error != 0) {
-		mutex_destroy(&ksem_mutex);
+out:
+	mutex_exit(&ks->ks_lock);
+	fd_putfile(fd);
+	if (error) {
 		return error;
 	}
-	ksem_ehook = exechook_establish(ksem_exechook, NULL);
-	ksem_fhook = forkhook_establish(ksem_forkhook);
-	error = syscall_establish(NULL, ksem_syscalls);
-	if (error != 0) {
-		(void)ksem_fini(false);
-	}
-	return error;
-}
-
-static int
-ksem_modcmd(modcmd_t cmd, void *arg)
-{
-
-	switch (cmd) {
-	case MODULE_CMD_INIT:
-		return ksem_init();
-
-	case MODULE_CMD_FINI:
-		return ksem_fini(true);
-
-	default:
-		return ENOTTY;
-	}
+	SCARG(&cuap, fd) = fd;
+	return sys_close(l, (const void *)&cuap, retval);
 }

Index: src/sys/sys/file.h
diff -u src/sys/sys/file.h:1.71 src/sys/sys/file.h:1.72
--- src/sys/sys/file.h:1.71	Thu Dec 24 19:01:12 2009
+++ src/sys/sys/file.h	Mon Apr 11 22:31:43 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: file.h,v 1.71 2009/12/24 19:01:12 elad Exp $	*/
+/*	$NetBSD: file.h,v 1.72 2011/04/11 22:31:43 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009 The NetBSD Foundation, Inc.
@@ -109,15 +109,6 @@
 	kmutex_t	f_lock;		/* lock on structure */
 	int		f_flag;		/* see fcntl.h */
 	u_int		f_marker;	/* traversal marker (sysctl) */
-#define	DTYPE_VNODE	1		/* file */
-#define	DTYPE_SOCKET	2		/* communications endpoint */
-#define	DTYPE_PIPE	3		/* pipe */
-#define	DTYPE_KQUEUE	4		/* event queue */
-#define	DTYPE_MISC	5		/* misc file descriptor type */
-#define	DTYPE_CRYPTO	6		/* crypto */
-#define	DTYPE_MQUEUE	7		/* message queue */
-#define DTYPE_NAMES \
-    "0", "file", "socket", "pipe", "kqueue", "misc", "crypto", "mqueue"
 	u_int		f_type;		/* descriptor type */
 	u_int		f_advice;	/* access pattern hint; UVM_ADV_* */
 	u_int		f_count;	/* reference count */
@@ -127,6 +118,23 @@
 };
 
 /*
+ * Descriptor types.
+ */
+
+#define	DTYPE_VNODE	1		/* file */
+#define	DTYPE_SOCKET	2		/* communications endpoint */
+#define	DTYPE_PIPE	3		/* pipe */
+#define	DTYPE_KQUEUE	4		/* event queue */
+#define	DTYPE_MISC	5		/* misc file descriptor type */
+#define	DTYPE_CRYPTO	6		/* crypto */
+#define	DTYPE_MQUEUE	7		/* message queue */
+#define	DTYPE_SEM	8		/* semaphore */
+
+#define DTYPE_NAMES	\
+    "0", "file", "socket", "pipe", "kqueue", "misc", "crypto", "mqueue", \
+    "semaphore"
+
+/*
  * Flags for fo_read and fo_write and do_fileread/write/v
  */
 #define	FOF_UPDATE_OFFSET	0x0001	/* update the file offset */

Reply via email to