Module Name: src Committed By: pooka Date: Mon Feb 28 19:57:37 UTC 2011
Modified Files: src/lib/librumphijack: hijack.c Log Message: A simple dup2-enforced affine transformation isn't enough when dealing with dup2() from a rump kernel fd to a host kernel fd. Consider: s1 = socket(); s2 = socket(); dup2(s2, 0); Instead, maintain a real mapping table (and get on my knees and pray i don't have to touch this hair-splitting code ever again). Apparently bourne shell scripts from a rump kernel fs work now (sh script.sh; ./script.sh doesn't work for obvious "IT'S THE WRONG FS NAMESPACE" reasons). No test regressions either, so I'm a happy camper. To generate a diff of this commit: cvs rdiff -u -r1.70 -r1.71 src/lib/librumphijack/hijack.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/lib/librumphijack/hijack.c diff -u src/lib/librumphijack/hijack.c:1.70 src/lib/librumphijack/hijack.c:1.71 --- src/lib/librumphijack/hijack.c:1.70 Sun Feb 27 11:32:12 2011 +++ src/lib/librumphijack/hijack.c Mon Feb 28 19:57:36 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: hijack.c,v 1.70 2011/02/27 11:32:12 pooka Exp $ */ +/* $NetBSD: hijack.c,v 1.71 2011/02/28 19:57:36 pooka Exp $ */ /*- * Copyright (c) 2011 Antti Kantee. All Rights Reserved. @@ -26,7 +26,7 @@ */ #include <sys/cdefs.h> -__RCSID("$NetBSD: hijack.c,v 1.70 2011/02/27 11:32:12 pooka Exp $"); +__RCSID("$NetBSD: hijack.c,v 1.71 2011/02/28 19:57:36 pooka Exp $"); #define __ssp_weak_name(fun) _hijack_ ## fun @@ -232,23 +232,89 @@ } syscalls[DUALCALL__NUM]; #define GETSYSCALL(which, name) syscalls[DUALCALL_##name].bs_##which -pid_t (*host_fork)(void); -int (*host_daemon)(int, int); -int (*host_execve)(const char *, char *const[], char *const[]); -void * (*host_mmap)(void *, size_t, int, int, int, off_t); - -/* ok, we need *two* bits per dup2'd fd to track fd+HIJACKOFF aliases */ -static uint32_t dup2mask; -#define ISDUP2D(fd) (((fd) < 16) && (1<<(fd) & dup2mask)) -#define SETDUP2(fd) \ - do { if ((fd) < 16) dup2mask |= (1<<(fd)); } while (/*CONSTCOND*/0) -#define CLRDUP2(fd) \ - do { if ((fd) < 16) dup2mask &= ~(1<<(fd)); } while (/*CONSTCOND*/0) -#define ISDUP2ALIAS(fd) (((fd) < 16) && (1<<((fd)+16) & dup2mask)) -#define SETDUP2ALIAS(fd) \ - do { if ((fd) < 16) dup2mask |= (1<<((fd)+16)); } while (/*CONSTCOND*/0) -#define CLRDUP2ALIAS(fd) \ - do { if ((fd) < 16) dup2mask &= ~(1<<((fd)+16)); } while (/*CONSTCOND*/0) +static pid_t (*host_fork)(void); +static int (*host_daemon)(int, int); +static int (*host_execve)(const char *, char *const[], char *const[]); +static void * (*host_mmap)(void *, size_t, int, int, int, off_t); + +static bool fd_isrump(int); +static bool path_isrump(const char *); + +/* + * Maintain a mapping table for the usual dup2 suspects. + */ +/* note: you cannot change this without editing the env-passing code */ +#define DUP2HIGH 2 +static uint32_t dup2vec[DUP2HIGH+1]; +#define DUP2BIT (1<<31) +#define DUP2ALIAS (1<<30) +#define DUP2FDMASK ((1<<30)-1) + +static bool +isdup2d(int fd) +{ + + return fd <= DUP2HIGH && fd >= 0 && dup2vec[fd] & DUP2BIT; +} + +static int +mapdup2(int hostfd) +{ + + _DIAGASSERT(isdup2d(hostfd)); + return dup2vec[hostfd] & DUP2FDMASK; +} + +static int +unmapdup2(int rumpfd) +{ + int i; + + for (i = 0; i <= DUP2HIGH; i++) { + if (dup2vec[i] & DUP2BIT && (dup2vec[i] & DUP2FDMASK) == rumpfd) + return i; + } + return -1; +} + +static void +setdup2(int hostfd, int rumpfd) +{ + + if (hostfd > DUP2HIGH) { + _DIAGASSERT(0); + return; + } + + dup2vec[hostfd] = DUP2BIT | DUP2ALIAS | rumpfd; +} + +static void +clrdup2(int hostfd) +{ + + if (hostfd > DUP2HIGH) { + _DIAGASSERT(0); + return; + } + + dup2vec[hostfd] = 0; +} + +static bool +killdup2alias(int rumpfd) +{ + int hostfd; + + if ((hostfd = unmapdup2(rumpfd)) == -1) + return false; + + if (dup2vec[hostfd] & DUP2ALIAS) { + dup2vec[hostfd] &= ~DUP2ALIAS; + return true; + } + return false; +} //#define DEBUGJACK #ifdef DEBUGJACK @@ -258,7 +324,7 @@ { va_list ap; - if (ISDUP2D(STDERR_FILENO)) + if (isdup2d(STDERR_FILENO)) return; va_start(ap, fmt); @@ -266,6 +332,28 @@ va_end(ap); } +static const char * +whichfd(int fd) +{ + + if (fd == -1) + return "-1"; + else if (fd_isrump(fd)) + return "rump"; + else + return "host"; +} + +static const char * +whichpath(const char *path) +{ + + if (path_isrump(path)) + return "rump"; + else + return "host"; +} + #else #define DPRINTF(x) #endif @@ -275,7 +363,7 @@ { \ type (*fun) proto; \ \ - DPRINTF(("%s -> %d\n", __STRING(name), fd)); \ + DPRINTF(("%s -> %d (%s)\n", __STRING(name), fd, whichfd(fd))); \ if (fd_isrump(fd)) { \ fun = syscalls[rcname].bs_rump; \ fd = fd_host2rump(fd); \ @@ -291,7 +379,8 @@ { \ type (*fun) proto; \ \ - DPRINTF(("%s -> %s\n", __STRING(name), path)); \ + DPRINTF(("%s -> %s (%s)\n", __STRING(name), path, \ + whichpath(path))); \ if (path_isrump(path)) { \ fun = syscalls[rcname].bs_rump; \ path = path_host2rump(path); \ @@ -500,9 +589,13 @@ } } - if (getenv_r("RUMPHIJACK__DUP2MASK", buf, sizeof(buf)) == 0) { - dup2mask = strtoul(buf, NULL, 10); - unsetenv("RUMPHIJACK__DUP2MASK"); + if (getenv_r("RUMPHIJACK__DUP2INFO", buf, sizeof(buf)) == 0) { + if (sscanf(buf, "%u,%u,%u", + &dup2vec[0], &dup2vec[1], &dup2vec[2]) != 3) { + warnx("invalid dup2mask: %s", buf); + memset(dup2vec, 0, sizeof(dup2vec)); + } + unsetenv("RUMPHIJACK__DUP2INFO"); } if (getenv_r("RUMPHIJACK__PWDINRUMP", buf, sizeof(buf)) == 0) { pwdinrump = true; @@ -510,35 +603,47 @@ } } -/* XXX: need runtime selection. low for now due to FD_SETSIZE */ +/* Need runtime selection. low for now due to FD_SETSIZE */ #define HIJACK_FDOFF 128 + static int fd_rump2host(int fd) { if (fd == -1) return fd; + return fd + HIJACK_FDOFF; +} - if (!ISDUP2D(fd)) - fd += HIJACK_FDOFF; +static int +fd_rump2host_withdup(int fd) +{ + int hfd; - return fd; + _DIAGASSERT(fd != -1); + hfd = unmapdup2(fd); + if (hfd != -1) { + _DIAGASSERT(hfd <= DUP2HIGH); + return hfd; + } + return fd_rump2host(fd); } static int fd_host2rump(int fd) { - if (!ISDUP2D(fd)) - fd -= HIJACK_FDOFF; - return fd; + if (!isdup2d(fd)) + return fd - HIJACK_FDOFF; + else + return mapdup2(fd); } static bool fd_isrump(int fd) { - return ISDUP2D(fd) || fd >= HIJACK_FDOFF; + return isdup2d(fd) || fd >= HIJACK_FDOFF; } #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= HIJACK_FDOFF) @@ -614,7 +719,7 @@ int (*op_close)(int) = GETSYSCALL(host, CLOSE); int ofd, i; - for (i = 1; ISDUP2D(fd); i++) { + for (i = 1; isdup2d(fd); i++) { ofd = fd; fd = op_fcntl(ofd, F_DUPFD, i); op_close(ofd); @@ -631,6 +736,8 @@ va_list ap; int fd; + DPRINTF(("open -> %s (%s)\n", path, whichpath(path))); + if (path_isrump(path)) { path = path_host2rump(path); op_open = GETSYSCALL(rump, OPEN); @@ -648,6 +755,8 @@ fd = fd_rump2host(fd); else fd = fd_dupgood(fd); + + DPRINTF(("open <- %d (%s)\n", fd, whichfd(fd))); return fd; } @@ -854,13 +963,12 @@ return rv; } -#include <syslog.h> int fcntl(int fd, int cmd, ...) { int (*op_fcntl)(int, int, ...); va_list ap; - int rv, minfd, i; + int rv, minfd, i, maxdup2; DPRINTF(("fcntl -> %d (cmd %d)\n", fd, cmd)); @@ -890,23 +998,21 @@ /* * Additionally, we want to do a rump closem, but only - * for the file descriptors not within the dup2mask. + * for the file descriptors not dup2'd. */ - /* why don't we offer fls()? */ - for (i = 15; i >= 0; i--) { - if (ISDUP2D(i)) - break; + for (i = 0, maxdup2 = 0; i <= DUP2HIGH; i++) { + if (dup2vec[i] & DUP2BIT) + maxdup2 = MAX(dup2vec[i] & DUP2FDMASK, maxdup2); } if (fd >= HIJACK_FDOFF) fd -= HIJACK_FDOFF; else fd = 0; - fd = MAX(i+1, fd); + fd = MAX(maxdup2+1, fd); /* hmm, maybe we should close rump fd's not within dup2mask? */ - return rump_sys_fcntl(fd, F_CLOSEM); case F_MAXFD: @@ -957,21 +1063,23 @@ DPRINTF(("close -> %d\n", fd)); if (fd_isrump(fd)) { - int undup2 = 0; + bool undup2 = false; + int ofd; + + if (isdup2d(ofd = fd)) { + undup2 = true; + } fd = fd_host2rump(fd); - if (ISDUP2ALIAS(fd)) { - _DIAGASSERT(ISDUP2D(fd)); - CLRDUP2ALIAS(fd); + if (!undup2 && killdup2alias(fd)) { return 0; } - if (ISDUP2D(fd)) - undup2 = 1; op_close = GETSYSCALL(rump, CLOSE); rv = op_close(fd); - if (rv == 0 && undup2) - CLRDUP2(fd); + if (rv == 0 && undup2) { + clrdup2(ofd); + } } else { if (rumpclient__closenotify(&fd, RUMPCLIENT_CLOSE_CLOSE) == -1) return -1; @@ -1016,19 +1124,28 @@ DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd)); if (fd_isrump(oldd)) { - if (!(newd >= 0 && newd <= 2)) { + int (*op_close)(int) = GETSYSCALL(host, CLOSE); + + /* only allow fd 0-2 for cross-kernel dup */ + if (!(newd >= 0 && newd <= 2 && !fd_isrump(newd))) { errno = EBADF; return -1; } - oldd = fd_host2rump(oldd); - if (oldd == newd) { - SETDUP2(newd); - SETDUP2ALIAS(newd); - return newd; - } - rv = rump_sys_dup2(oldd, newd); - if (rv != -1) - SETDUP2(newd); + + /* regular dup2? */ + if (fd_isrump(newd)) { + newd = fd_host2rump(newd); + rv = rump_sys_dup2(oldd, newd); + return fd_rump2host(rv); + } + + /* + * dup2 rump => host? just establish an + * entry in the mapping table. + */ + op_close(newd); + setdup2(newd, fd_host2rump(oldd)); + rv = 0; } else { host_dup2 = syscalls[DUALCALL_DUP2].bs_host; if (rumpclient__closenotify(&newd, RUMPCLIENT_CLOSE_DUP2) == -1) @@ -1087,19 +1204,14 @@ char **newenv; size_t nelem; int rv, sverrno; - int bonus = 1, i = 0; + int bonus = 2, i = 0; - if (dup2mask) { - snprintf(buf, sizeof(buf), "RUMPHIJACK__DUP2MASK=%u", dup2mask); - dup2str = malloc(strlen(buf)+1); - if (dup2str == NULL) { - errno = ENOMEM; - return -1; - } - strcpy(dup2str, buf); - bonus++; - } else { - dup2str = NULL; + snprintf(buf, sizeof(buf), "RUMPHIJACK__DUP2INFO=%u,%u,%u", + dup2vec[0], dup2vec[1], dup2vec[2]); + dup2str = strdup(buf); + if (dup2str == NULL) { + errno = ENOMEM; + return -1; } if (pwdinrump) { @@ -1111,17 +1223,16 @@ for (nelem = 0; envp && envp[nelem]; nelem++) continue; - newenv = malloc(sizeof(*newenv) * nelem+bonus); + newenv = malloc(sizeof(*newenv) * (nelem+bonus)); if (newenv == NULL) { free(dup2str); errno = ENOMEM; return -1; } memcpy(newenv, envp, nelem*sizeof(*newenv)); - if (dup2str) { - newenv[nelem+i] = dup2str; - i++; - } + newenv[nelem+i] = dup2str; + i++; + if (pwdinrumpstr) { newenv[nelem+i] = __UNCONST(pwdinrumpstr); i++; @@ -1473,7 +1584,7 @@ rv = op_pollts(fds, nfds, ts, sigmask); if (rumpcall) - adjustpoll(fds, nfds, fd_rump2host); + adjustpoll(fds, nfds, fd_rump2host_withdup); } return rv;