Below is the major cleanup that I want to commit to vnd(4), and then I'll be happy with it... for now. :)
Changes: - Recompact the remaining sc_flags bits - Use vn_rdwr() instead of doing the uio/iovec/VOP_{READ,WRITE} dance. - Cleanup vndstrategy() a little more, making it more like rdstrategy() and others. - Enforce some reasonable limits on virtual disk geometry. Some of these are necessary (e.g., secsize >= 512 and power of 2; nsectors and ntracks > 0) but the upper limits are to cope with the rest of the disklabel handling code not being written with extreme geometries in mind. Until we've had a chance to check/fix all of that code, it's safer to just not allow it. (The limits are high enough to allow any disk geometries still listed in any arch's disktab though.) - Misc cleanups to make vnd(4) look more like other disk drivers. - Just return -1 in vndsize() because we don't support swapping to vnd(4) anymore. Anyway, ok's to commit this as is would be great, but I can break it into smaller diffs if anyone prefers. (I just don't want to serialize the review process unnecessarily.) Index: vnd.c =================================================================== RCS file: /cvs/src/sys/dev/vnd.c,v retrieving revision 1.140 diff -u -p -r1.140 vnd.c --- vnd.c 6 Jul 2011 05:12:46 -0000 1.140 +++ vnd.c 6 Jul 2011 06:38:29 -0000 @@ -94,9 +94,9 @@ struct vnd_softc { }; /* sc_flags */ -#define VNF_INITED 0x0002 -#define VNF_HAVELABEL 0x0400 -#define VNF_READONLY 0x2000 +#define VNF_INITED 0x0001 +#define VNF_HAVELABEL 0x0002 +#define VNF_READONLY 0x0004 #define VNDRW(v) ((v)->sc_flags & VNF_READONLY ? FREAD : FREAD|FWRITE) @@ -110,6 +110,7 @@ void vndclear(struct vnd_softc *); int vndsetcred(struct vnd_softc *, struct ucred *); int vndgetdisklabel(dev_t, struct vnd_softc *, struct disklabel *, int); void vndencrypt(struct vnd_softc *, caddr_t, size_t, daddr64_t, int); +void vndencryptbuf(struct vnd_softc *, struct buf *, int); size_t vndbdevsize(struct vnode *, struct proc *); void @@ -135,30 +136,38 @@ vndencrypt(struct vnd_softc *sc, caddr_t } void +vndencryptbuf(struct vnd_softc *sc, struct buf *bp, int encrypt) +{ + vndencrypt(sc, bp->b_data, bp->b_bcount, bp->b_blkno, encrypt); +} + +void vndattach(int num) { - char *mem; - u_long size; + size_t size; int i; if (num <= 0) return; + size = num * sizeof(struct vnd_softc); - mem = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO); - if (mem == NULL) { + vnd_softc = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO); + if (vnd_softc == NULL) { printf("WARNING: no memory for vnode disks\n"); return; } - vnd_softc = (struct vnd_softc *)mem; + for (i = 0; i < num; i++) { struct vnd_softc *sc = &vnd_softc[i]; sc->sc_dev.dv_unit = i; snprintf(sc->sc_dev.dv_xname, sizeof(sc->sc_dev.dv_xname), "vnd%d", i); + sc->sc_dk.dk_name = sc->sc_dev.dv_xname; disk_construct(&sc->sc_dk); device_ref(&sc->sc_dev); } + numvnd = num; } @@ -180,7 +189,7 @@ vndopen(dev_t dev, int flags, int mode, if ((flags & FWRITE) && (sc->sc_flags & VNF_READONLY)) { error = EROFS; - goto bad; + goto unlock; } if ((sc->sc_flags & VNF_INITED) && @@ -194,7 +203,7 @@ vndopen(dev_t dev, int flags, int mode, error = disk_openpart(&sc->sc_dk, part, mode, (sc->sc_flags & VNF_HAVELABEL) != 0); -bad: + unlock: disk_unlock(&sc->sc_dk); return (error); } @@ -259,65 +268,54 @@ void vndstrategy(struct buf *bp) { int unit = DISKUNIT(bp->b_dev); - struct vnd_softc *sc = &vnd_softc[unit]; - int s, part; - struct iovec aiov; - struct uio auio; - struct proc *p = curproc; - daddr64_t off; + struct vnd_softc *sc; + struct partition *p; + size_t off; + int s; DNPRINTF(VDB_FOLLOW, "vndstrategy(%p): unit %d\n", bp, unit); + if (unit >= numvnd) { + bp->b_error = ENXIO; + goto bad; + } + sc = &vnd_softc[unit]; + if ((sc->sc_flags & VNF_HAVELABEL) == 0) { bp->b_error = ENXIO; - bp->b_flags |= B_ERROR; - bp->b_resid = bp->b_bcount; - goto done; + goto bad; } if (bounds_check_with_label(bp, sc->sc_dk.dk_label) == -1) goto done; - part = DISKPART(bp->b_dev); - off = DL_SECTOBLK(sc->sc_dk.dk_label, - DL_GETPOFFSET(&sc->sc_dk.dk_label->d_partitions[part])); - aiov.iov_base = bp->b_data; - auio.uio_resid = aiov.iov_len = bp->b_bcount; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_offset = dbtob((off_t)(bp->b_blkno + off)); - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_procp = p; - - vn_lock(sc->sc_vp, LK_EXCLUSIVE | LK_RETRY, p); - if (bp->b_flags & B_READ) { - auio.uio_rw = UIO_READ; - bp->b_error = VOP_READ(sc->sc_vp, &auio, 0, - sc->sc_cred); - if (sc->sc_keyctx) - vndencrypt(sc, bp->b_data, - bp->b_bcount, bp->b_blkno, 0); - } else { - if (sc->sc_keyctx) - vndencrypt(sc, bp->b_data, - bp->b_bcount, bp->b_blkno, 1); - auio.uio_rw = UIO_WRITE; - /* - * Upper layer has already checked I/O for - * limits, so there is no need to do it again. - */ - bp->b_error = VOP_WRITE(sc->sc_vp, &auio, - IO_NOLIMIT, sc->sc_cred); - /* Data in buffer cache needs to be in clear */ - if (sc->sc_keyctx) - vndencrypt(sc, bp->b_data, - bp->b_bcount, bp->b_blkno, 0); - } - VOP_UNLOCK(sc->sc_vp, 0, p); + p = &sc->sc_dk.dk_label->d_partitions[DISKPART(bp->b_dev)]; + off = DL_GETPOFFSET(p) * sc->sc_dk.dk_label->d_secsize + + (u_int64_t)bp->b_blkno * DEV_BSIZE; + + if (sc->sc_keyctx && !(bp->b_flags & B_READ)) + vndencryptbuf(sc, bp, 1); + + /* + * Use IO_NOLIMIT because upper layer has already checked I/O + * for limits, so there is no need to do it again. + */ + bp->b_error = vn_rdwr((bp->b_flags & B_READ) ? UIO_READ : UIO_WRITE, + sc->sc_vp, bp->b_data, bp->b_bcount, off, UIO_SYSSPACE, IO_NOLIMIT, + sc->sc_cred, &bp->b_resid, curproc); if (bp->b_error) bp->b_flags |= B_ERROR; - bp->b_resid = auio.uio_resid; -done: + + /* Data in buffer cache needs to be in clear */ + if (sc->sc_keyctx) + vndencryptbuf(sc, bp, 0); + + goto done; + + bad: + bp->b_flags |= B_ERROR; + bp->b_resid = bp->b_bcount; + done: s = splbio(); biodone(bp); splx(s); @@ -328,18 +326,6 @@ done: int vndread(dev_t dev, struct uio *uio, int flags) { - int unit = DISKUNIT(dev); - struct vnd_softc *sc; - - DNPRINTF(VDB_FOLLOW, "vndread(%x, %p)\n", dev, uio); - - if (unit >= numvnd) - return (ENXIO); - sc = &vnd_softc[unit]; - - if ((sc->sc_flags & VNF_INITED) == 0) - return (ENXIO); - return (physio(vndstrategy, dev, B_READ, minphys, uio)); } @@ -347,18 +333,6 @@ vndread(dev_t dev, struct uio *uio, int int vndwrite(dev_t dev, struct uio *uio, int flags) { - int unit = DISKUNIT(dev); - struct vnd_softc *sc; - - DNPRINTF(VDB_FOLLOW, "vndwrite(%x, %p)\n", dev, uio); - - if (unit >= numvnd) - return (ENXIO); - sc = &vnd_softc[unit]; - - if ((sc->sc_flags & VNF_INITED) == 0) - return (ENXIO); - return (physio(vndstrategy, dev, B_WRITE, minphys, uio)); } @@ -410,6 +384,12 @@ vndioctl(dev_t dev, u_long cmd, caddr_t if (sc->sc_flags & VNF_INITED) return (EBUSY); + if (vio->vnd_secsize < 512 || vio->vnd_secsize > 16384 || + (vio->vnd_secsize & (vio->vnd_secsize - 1)) != 0 || + vio->vnd_ntracks == 0 || vio->vnd_ntracks > 512 || + vio->vnd_nsectors == 0 || vio->vnd_nsectors > 4096) + return (EINVAL); + if ((error = disk_lock(&sc->sc_dk)) != 0) return (error); @@ -489,7 +469,6 @@ vndioctl(dev_t dev, u_long cmd, caddr_t sc->sc_vp, (unsigned long long)sc->sc_size); /* Attach the disk. */ - sc->sc_dk.dk_name = sc->sc_dev.dv_xname; disk_attach(&sc->sc_dev, &sc->sc_dk); disk_unlock(&sc->sc_dk); @@ -625,29 +604,20 @@ vndioctl(dev_t dev, u_long cmd, caddr_t int vndsetcred(struct vnd_softc *sc, struct ucred *cred) { - struct uio auio; - struct iovec aiov; - char *tmpbuf; + void *buf; + size_t len; int error; - struct proc *p = curproc; sc->sc_cred = crdup(cred); - tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); + + len = MIN(DEV_BSIZE, sc->sc_size * sc->sc_secsize); + buf = malloc(len, M_TEMP, M_WAITOK); /* XXX: Horrible kludge to establish credentials for NFS */ - aiov.iov_base = tmpbuf; - aiov.iov_len = MIN(DEV_BSIZE, sc->sc_size * sc->sc_secsize); - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_offset = 0; - auio.uio_rw = UIO_READ; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_resid = aiov.iov_len; - vn_lock(sc->sc_vp, LK_RETRY | LK_EXCLUSIVE, p); - error = VOP_READ(sc->sc_vp, &auio, 0, sc->sc_cred); - VOP_UNLOCK(sc->sc_vp, 0, p); + error = vn_rdwr(UIO_READ, sc->sc_vp, buf, len, 0, + UIO_SYSSPACE, IO_NOLIMIT, sc->sc_cred, NULL, curproc); - free(tmpbuf, M_TEMP); + free(buf, M_TEMP); return (error); } @@ -655,13 +625,12 @@ void vndclear(struct vnd_softc *sc) { struct vnode *vp = sc->sc_vp; - struct proc *p = curproc; /* XXX */ DNPRINTF(VDB_FOLLOW, "vndclear(%p): vp %p\n", sc, vp); if (vp == NULL) panic("vndioctl: null vp"); - (void) vn_close(vp, VNDRW(sc), sc->sc_cred, p); + (void) vn_close(vp, VNDRW(sc), sc->sc_cred, curproc); crfree(sc->sc_cred); sc->sc_flags = 0; sc->sc_vp = NULL; @@ -673,18 +642,11 @@ vndclear(struct vnd_softc *sc) daddr64_t vndsize(dev_t dev) { - int unit = DISKUNIT(dev); - struct vnd_softc *sc = &vnd_softc[unit]; - - if (unit >= numvnd || (sc->sc_flags & VNF_INITED) == 0) - return (-1); - return (sc->sc_size * (sc->sc_secsize / DEV_BSIZE)); + return (-1); } int vnddump(dev_t dev, daddr64_t blkno, caddr_t va, size_t size) { - - /* Not implemented. */ return (ENXIO); }