Module Name:    src
Committed By:   pooka
Date:           Sat Jan  8 14:19:27 UTC 2011

Modified Files:
        src/lib/librumphijack: hijack.c

Log Message:
Support fork() and dup2().

This is sufficient to make an unmodified httpd(8) be able to serve
pages via a rump networking stack.


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/lib/librumphijack/hijack.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/lib/librumphijack/hijack.c
diff -u src/lib/librumphijack/hijack.c:1.1 src/lib/librumphijack/hijack.c:1.2
--- src/lib/librumphijack/hijack.c:1.1	Fri Jan  7 19:52:43 2011
+++ src/lib/librumphijack/hijack.c	Sat Jan  8 14:19:27 2011
@@ -1,4 +1,4 @@
-/*      $NetBSD: hijack.c,v 1.1 2011/01/07 19:52:43 pooka Exp $	*/
+/*      $NetBSD: hijack.c,v 1.2 2011/01/08 14:19:27 pooka Exp $	*/
 
 /*-
  * Copyright (c) 2011 Antti Kantee.  All Rights Reserved.
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: hijack.c,v 1.1 2011/01/07 19:52:43 pooka Exp $");
+__RCSID("$NetBSD: hijack.c,v 1.2 2011/01/08 14:19:27 pooka Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -50,9 +50,6 @@
 #include <stdlib.h>
 #include <unistd.h>
 
-/* XXX: need runtime selection.  low for now due to FD_SETSIZE */
-#define HIJACK_FDOFF 128
-
 enum {	RUMPCALL_SOCKET, RUMPCALL_ACCEPT, RUMPCALL_BIND, RUMPCALL_CONNECT,
 	RUMPCALL_GETPEERNAME, RUMPCALL_GETSOCKNAME, RUMPCALL_LISTEN,
 	RUMPCALL_RECVFROM, RUMPCALL_RECVMSG,
@@ -104,13 +101,13 @@
 static int	(*host_select)(int, fd_set *, fd_set *, fd_set *,
 			       struct timeval *);
 static int	(*host_poll)(struct pollfd *, nfds_t, int);
+static pid_t	(*host_fork)(void);
+static int	(*host_dup2)(int, int);
 #if 0
 static int	(*host_pollts)(struct pollfd *, nfds_t,
 			       const struct timespec *, const sigset_t *);
 #endif
 
-#define assertfd(_fd_) assert((_fd_) >= HIJACK_FDOFF)
-
 static void *rumpcalls[RUMPCALL__NUM];
 
 /*
@@ -150,6 +147,8 @@
 	host_close = dlsym(RTLD_NEXT, "close");
 	host_select = dlsym(RTLD_NEXT, "select");
 	host_poll = dlsym(RTLD_NEXT, "poll");
+	host_fork = dlsym(RTLD_NEXT, "fork");
+	host_dup2 = dlsym(RTLD_NEXT, "dup2");
 
 	for (i = 0; i < RUMPCALL__NUM; i++) {
 		char sysname[128];
@@ -166,7 +165,6 @@
 		err(1, "rumpclient init");
 }
 
-#define ADJ(fd) (fd - HIJACK_FDOFF)
 //#define DEBUGJACK
 #ifdef DEBUGJACK
 #define DPRINTF(x) printf x
@@ -174,6 +172,45 @@
 #define DPRINTF(x)
 #endif
 
+static unsigned dup2mask;
+#define ISDUP2D(fd) (((fd+1) & dup2mask) == ((fd)+1))
+
+/* XXX: need runtime selection.  low for now due to FD_SETSIZE */
+#define HIJACK_FDOFF 128
+#define HIJACK_SELECT 128 /* XXX */
+#define HIJACK_ASSERT 128 /* XXX */
+static int
+fd_rump2host(int fd)
+{
+
+	if (fd == -1)
+		return fd;
+
+	if (!ISDUP2D(fd))
+		fd += HIJACK_FDOFF;
+
+	return fd;
+}
+
+static int
+fd_host2rump(int fd)
+{
+
+	if (!ISDUP2D(fd))
+		fd -= HIJACK_FDOFF;
+	return fd;
+}
+
+static bool
+fd_isrump(int fd)
+{
+
+	return ISDUP2D(fd) || fd >= HIJACK_FDOFF;
+}
+
+#define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= HIJACK_ASSERT)
+#undef HIJACK_FDOFF
+
 /*
  * Following wrappers always call the rump kernel.
  */
@@ -185,12 +222,12 @@
 	int (*rc_socket)(int, int, int);
 	int fd;
 
-	DPRINTF(("socket\n"));
 	rc_socket = rumpcalls[RUMPCALL_SOCKET];
 	fd = rc_socket(domain, type, protocol);
-	if (fd != -1)
-		fd += HIJACK_FDOFF;
-	return fd;
+
+	DPRINTF(("socket <- %d\n", fd_rump2host(fd)));
+
+	return fd_rump2host(fd);
 }
 
 int
@@ -199,13 +236,13 @@
 	int (*rc_accept)(int, struct sockaddr *, socklen_t *);
 	int fd;
 
-	DPRINTF(("accept %d\n", s));
+	DPRINTF(("accept -> %d", s));
 	assertfd(s);
 	rc_accept = rumpcalls[RUMPCALL_ACCEPT];
-	fd = rc_accept(ADJ(s), addr, addrlen);
-	if (fd != -1)
-		fd += HIJACK_FDOFF;
-	return fd;
+	fd = rc_accept(fd_host2rump(s), addr, addrlen);
+	DPRINTF((" <- %d\n", fd_rump2host(fd)));
+
+	return fd_rump2host(fd);
 }
 
 int
@@ -213,10 +250,11 @@
 {
 	int (*rc_bind)(int, const struct sockaddr *, socklen_t);
 
-	DPRINTF(("bind\n"));
+	DPRINTF(("bind -> %d\n", s));
 	assertfd(s);
 	rc_bind = rumpcalls[RUMPCALL_BIND];
-	return rc_bind(ADJ(s), name, namelen);
+
+	return rc_bind(fd_host2rump(s), name, namelen);
 }
 
 int
@@ -224,10 +262,11 @@
 {
 	int (*rc_connect)(int, const struct sockaddr *, socklen_t);
 
-	DPRINTF(("connect %d\n", s));
+	DPRINTF(("connect -> %d\n", s));
 	assertfd(s);
 	rc_connect = rumpcalls[RUMPCALL_CONNECT];
-	return rc_connect(ADJ(s), name, namelen);
+
+	return rc_connect(fd_host2rump(s), name, namelen);
 }
 
 int
@@ -235,10 +274,10 @@
 {
 	int (*rc_getpeername)(int, struct sockaddr *, socklen_t *);
 
-	DPRINTF(("getpeername\n"));
+	DPRINTF(("getpeername -> %d\n", s));
 	assertfd(s);
 	rc_getpeername = rumpcalls[RUMPCALL_GETPEERNAME];
-	return rc_getpeername(ADJ(s), name, namelen);
+	return rc_getpeername(fd_host2rump(s), name, namelen);
 }
 
 int
@@ -246,10 +285,10 @@
 {
 	int (*rc_getsockname)(int, struct sockaddr *, socklen_t *);
 
-	DPRINTF(("getsockname\n"));
+	DPRINTF(("getsockname -> %d\n", s));
 	assertfd(s);
 	rc_getsockname = rumpcalls[RUMPCALL_GETSOCKNAME];
-	return rc_getsockname(ADJ(s), name, namelen);
+	return rc_getsockname(fd_host2rump(s), name, namelen);
 }
 
 int
@@ -257,10 +296,10 @@
 {
 	int (*rc_listen)(int, int);
 
-	DPRINTF(("listen\n"));
+	DPRINTF(("listen -> %d\n", s));
 	assertfd(s);
 	rc_listen = rumpcalls[RUMPCALL_LISTEN];
-	return rc_listen(ADJ(s), backlog);
+	return rc_listen(fd_host2rump(s), backlog);
 }
 
 ssize_t
@@ -280,7 +319,7 @@
 	DPRINTF(("recvfrom\n"));
 	assertfd(s);
 	rc_recvfrom = rumpcalls[RUMPCALL_RECVFROM];
-	return rc_recvfrom(ADJ(s), buf, len, flags, from, fromlen);
+	return rc_recvfrom(fd_host2rump(s), buf, len, flags, from, fromlen);
 }
 
 ssize_t
@@ -291,7 +330,7 @@
 	DPRINTF(("recvmsg\n"));
 	assertfd(s);
 	rc_recvmsg = rumpcalls[RUMPCALL_RECVMSG];
-	return rc_recvmsg(ADJ(s), msg, flags);
+	return rc_recvmsg(fd_host2rump(s), msg, flags);
 }
 
 ssize_t
@@ -314,7 +353,7 @@
 	DPRINTF(("sendto\n"));
 	assertfd(s);
 	rc_sendto = rumpcalls[RUMPCALL_SENDTO];
-	return rc_sendto(ADJ(s), buf, len, flags, to, tolen);
+	return rc_sendto(fd_host2rump(s), buf, len, flags, to, tolen);
 }
 
 ssize_t
@@ -325,7 +364,7 @@
 	DPRINTF(("sendmsg\n"));
 	assertfd(s);
 	rc_sendmsg = rumpcalls[RUMPCALL_SENDTO];
-	return rc_sendmsg(ADJ(s), msg, flags);
+	return rc_sendmsg(fd_host2rump(s), msg, flags);
 }
 
 int
@@ -336,7 +375,7 @@
 	DPRINTF(("getsockopt\n"));
 	assertfd(s);
 	rc_getsockopt = rumpcalls[RUMPCALL_GETSOCKOPT];
-	return rc_getsockopt(ADJ(s), level, optname, optval, optlen);
+	return rc_getsockopt(fd_host2rump(s), level, optname, optval, optlen);
 }
 
 int
@@ -347,7 +386,7 @@
 	DPRINTF(("setsockopt\n"));
 	assertfd(s);
 	rc_setsockopt = rumpcalls[RUMPCALL_SETSOCKOPT];
-	return rc_setsockopt(ADJ(s), level, optname, optval, optlen);
+	return rc_setsockopt(fd_host2rump(s), level, optname, optval, optlen);
 }
 
 int
@@ -358,7 +397,69 @@
 	DPRINTF(("shutdown\n"));
 	assertfd(s);
 	rc_shutdown = rumpcalls[RUMPCALL_SHUTDOWN];
-	return rc_shutdown(ADJ(s), how);
+	return rc_shutdown(fd_host2rump(s), how);
+}
+
+/*
+ * dup2 is special.  we allow dup2 of a rump kernel fd to 0-2 since
+ * many programs do that.  dup2 of a rump kernel fd to another value
+ * not >= fdoff is an error.
+ *
+ * Note: cannot rump2host newd, because it is often hardcoded.
+ *
+ * XXX: should disable debug prints after stdout/stderr are dup2'd
+ */
+int
+dup2(int oldd, int newd)
+{
+	int rv;
+
+	DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd));
+
+	if (fd_isrump(oldd)) {
+		if (!(newd >= 0 && newd <= 2))
+			return EBADF;
+		oldd = fd_host2rump(oldd);
+		rv = rump_sys_dup2(oldd, newd);
+		if (rv != -1)
+			dup2mask |= newd+1;
+		return rv;
+	} else {
+		return host_dup2(oldd, newd);
+	}
+}
+
+/*
+ * We just wrap fork the appropriate rump client calls to preserve
+ * the file descriptors of the forked parent in the child, but
+ * prevent double use of connection fd.
+ */
+
+pid_t
+fork()
+{
+	struct rumpclient_fork *rf;
+	pid_t rv;
+
+	DPRINTF(("fork\n"));
+
+	if ((rf = rumpclient_prefork()) == NULL)
+		return -1;
+
+	switch ((rv = host_fork())) {
+	case -1:
+		/* XXX: cancel rf */
+		break;
+	case 0:
+		if (rumpclient_fork_init(rf) == -1)
+			rv = -1;
+		break;
+	default:
+		break;
+	}
+
+	DPRINTF(("fork returns %d\n", rv));
+	return rv;
 }
 
 /*
@@ -372,11 +473,11 @@
 	ssize_t n;
 
 	DPRINTF(("read %d\n", fd));
-	if (fd < HIJACK_FDOFF) {
-		op_read = host_read;
-	} else {
-		fd = ADJ(fd);
+	if (fd_isrump(fd)) {
+		fd = fd_host2rump(fd);
 		op_read = rumpcalls[RUMPCALL_READ];
+	} else {
+		op_read = host_read;
 	}
 
 	n = op_read(fd, buf, len);
@@ -388,11 +489,11 @@
 {
 	int (*op_readv)(int, const struct iovec *, int);
 
-	if (fd < HIJACK_FDOFF) {
-		op_readv = host_readv;
-	} else {
-		fd = ADJ(fd);
+	if (fd_isrump(fd)) {
+		fd = fd_host2rump(fd);
 		op_readv = rumpcalls[RUMPCALL_READV];
+	} else {
+		op_readv = host_readv;
 	}
 
 	DPRINTF(("readv\n"));
@@ -404,11 +505,11 @@
 {
 	int (*op_write)(int, const void *, size_t);
 
-	if (fd < HIJACK_FDOFF) {
-		op_write = host_write;
-	} else {
-		fd = ADJ(fd);
+	if (fd_isrump(fd)) {
+		fd = fd_host2rump(fd);
 		op_write = rumpcalls[RUMPCALL_WRITE];
+	} else {
+		op_write = host_write;
 	}
 
 	return op_write(fd, buf, len);
@@ -419,11 +520,11 @@
 {
 	int (*op_writev)(int, const struct iovec *, int);
 
-	if (fd < HIJACK_FDOFF) {
-		op_writev = host_writev;
-	} else {
-		fd = ADJ(fd);
+	if (fd_isrump(fd)) {
+		fd = fd_host2rump(fd);
 		op_writev = rumpcalls[RUMPCALL_WRITEV];
+	} else {
+		op_writev = host_writev;
 	}
 
 	return op_writev(fd, iov, iovcnt);
@@ -437,11 +538,11 @@
 	int rv;
 
 	DPRINTF(("ioctl\n"));
-	if (fd < HIJACK_FDOFF) {
-		op_ioctl = host_ioctl;
-	} else {
-		fd = ADJ(fd);
+	if (fd_isrump(fd)) {
+		fd = fd_host2rump(fd);
 		op_ioctl = rumpcalls[RUMPCALL_IOCTL];
+	} else {
+		op_ioctl = host_ioctl;
 	}
 
 	va_start(ap, cmd);
@@ -458,11 +559,11 @@
 	int rv;
 
 	DPRINTF(("fcntl\n"));
-	if (fd < HIJACK_FDOFF) {
-		op_fcntl = host_fcntl;
-	} else {
-		fd = ADJ(fd);
+	if (fd_isrump(fd)) {
+		fd = fd_host2rump(fd);
 		op_fcntl = rumpcalls[RUMPCALL_FCNTL];
+	} else {
+		op_fcntl = host_fcntl;
 	}
 
 	va_start(ap, cmd);
@@ -477,11 +578,11 @@
 	int (*op_close)(int);
 
 	DPRINTF(("close %d\n", fd));
-	if (fd < HIJACK_FDOFF) {
-		op_close = host_close;
-	} else {
-		fd = ADJ(fd);
+	if (fd_isrump(fd)) {
+		fd = fd_host2rump(fd);
 		op_close = rumpcalls[RUMPCALL_CLOSE];
+	} else {
+		op_close = host_close;
 	}
 
 	return op_close(fd);
@@ -501,16 +602,16 @@
 
 	for (i = 0; i < MIN(nfds, FD_SETSIZE); i++) {
 		if (FD_ISSET(i, setti)) {
-			if (i < HIJACK_FDOFF)
-				*hostcall = 1;
-			else
+			if (fd_isrump(i))
 				*rumpcall = 1;
+			else
+				*hostcall = 1;
 		}
 	}
 }
 
 static void
-adjustset(fd_set *setti, int nfds, int plus)
+adjustset(fd_set *setti, int nfds, int (*fdadj)(int))
 {
 	int fd, i;
 
@@ -518,14 +619,9 @@
 		return;
 
 	for (i = 0; i < MIN(nfds, FD_SETSIZE); i++) {
-		assert(i < HIJACK_FDOFF || !plus);
-		assert(i >= HIJACK_FDOFF || plus);
 		if (FD_ISSET(i, setti)) {
 			FD_CLR(i, setti);
-			if (plus)
-				fd = i + HIJACK_FDOFF;
-			else
-				fd = i - HIJACK_FDOFF;
+			fd = fdadj(fd);
 			FD_SET(fd, setti);
 		}
 	}
@@ -551,19 +647,19 @@
 	if (hostcall) {
 		op_select = host_select;
 	} else {
-		adjustset(readfds, nfds, 1);
-		adjustset(writefds, nfds, 1);
-		adjustset(exceptfds, nfds, 1);
+		adjustset(readfds, nfds, fd_host2rump);
+		adjustset(writefds, nfds, fd_host2rump);
+		adjustset(exceptfds, nfds, fd_host2rump);
 		op_select = rumpcalls[RUMPCALL_SELECT];
 	}
 
 	DPRINTF(("select\n"));
-	rv = op_select(nfds+HIJACK_FDOFF,
+	rv = op_select(nfds+HIJACK_SELECT,
 	    readfds, writefds, exceptfds, timeout);
 	if (rumpcall) {
-		adjustset(readfds, nfds, 0);
-		adjustset(writefds, nfds, 0);
-		adjustset(exceptfds, nfds, 0);
+		adjustset(readfds, nfds, fd_rump2host);
+		adjustset(writefds, nfds, fd_rump2host);
+		adjustset(exceptfds, nfds, fd_rump2host);
 	}
 	return rv;
 }
@@ -574,23 +670,20 @@
 	nfds_t i;
 
 	for (i = 0; i < nfds; i++) {
-		if (fds[i].fd < HIJACK_FDOFF)
-			(*hostcall)++;
-		else
+		if (fd_isrump(fds[i].fd))
 			(*rumpcall)++;
+		else
+			(*hostcall)++;
 	}
 }
 
 static void
-adjustpoll(struct pollfd *fds, nfds_t nfds, int plus)
+adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int))
 {
 	nfds_t i;
 
 	for (i = 0; i < nfds; i++) {
-		if (plus)
-			fds[i].fd += HIJACK_FDOFF;
-		else
-			fds[i].fd -= HIJACK_FDOFF;
+		fds[i].fd = fdadj(fds[i].fd);
 	}
 }
 
@@ -629,6 +722,7 @@
 	nfds_t i;
 	int rv;
 
+	DPRINTF(("poll\n"));
 	checkpoll(fds, nfds, &hostcall, &rumpcall);
 
 	if (hostcall && rumpcall) {
@@ -659,14 +753,14 @@
 
 		/* split vectors */
 		for (i = 0; i < nfds; i++) {
-			if (fds[i].fd < HIJACK_FDOFF) {
-				pfd_host[i].fd = fds[i].fd;
-				pfd_host[i].events = fds[i].events;
-				pfd_rump[i].fd = -1;
-			} else {
+			if (fd_isrump(fds[i].fd)) {
 				pfd_host[i].fd = -1;
-				pfd_rump[i].fd = ADJ(fds[i].fd);
+				pfd_rump[i].fd = fd_host2rump(fds[i].fd);
 				pfd_rump[i].events = fds[i].events;
+			} else {
+				pfd_rump[i].fd = -1;
+				pfd_host[i].fd = fds[i].fd;
+				pfd_host[i].events = fds[i].events;
 			}
 		}
 
@@ -738,12 +832,12 @@
 			op_poll = host_poll;
 		} else {
 			op_poll = rumpcalls[RUMPCALL_POLL];
-			adjustpoll(fds, nfds, 0);
+			adjustpoll(fds, nfds, fd_host2rump);
 		}
 
 		rv = op_poll(fds, nfds, timeout);
 		if (rumpcall)
-			adjustpoll(fds, nfds, 1);
+			adjustpoll(fds, nfds, fd_rump2host);
 	}
 
 	return rv;

Reply via email to