Below is the major cleanup that I want to commit to vnd(4), and then
I'll be happy with it... for now. :)
Changes:
- Recompact the remaining sc_flags bits
- Use vn_rdwr() instead of doing the uio/iovec/VOP_{READ,WRITE}
dance.
- Cleanup vndstrategy() a little more, making it more like
rdstrategy() and others.
- Enforce some reasonable limits on virtual disk geometry. Some of
these are necessary (e.g., secsize >= 512 and power of 2; nsectors
and ntracks > 0) but the upper limits are to cope with the rest of
the disklabel handling code not being written with extreme
geometries in mind. Until we've had a chance to check/fix all of
that code, it's safer to just not allow it. (The limits are high
enough to allow any disk geometries still listed in any arch's
disktab though.)
- Misc cleanups to make vnd(4) look more like other disk drivers.
- Just return -1 in vndsize() because we don't support swapping to
vnd(4) anymore.
Anyway, ok's to commit this as is would be great, but I can break it
into smaller diffs if anyone prefers. (I just don't want to serialize
the review process unnecessarily.)
Index: vnd.c
===================================================================
RCS file: /cvs/src/sys/dev/vnd.c,v
retrieving revision 1.140
diff -u -p -r1.140 vnd.c
--- vnd.c 6 Jul 2011 05:12:46 -0000 1.140
+++ vnd.c 6 Jul 2011 06:38:29 -0000
@@ -94,9 +94,9 @@ struct vnd_softc {
};
/* sc_flags */
-#define VNF_INITED 0x0002
-#define VNF_HAVELABEL 0x0400
-#define VNF_READONLY 0x2000
+#define VNF_INITED 0x0001
+#define VNF_HAVELABEL 0x0002
+#define VNF_READONLY 0x0004
#define VNDRW(v) ((v)->sc_flags & VNF_READONLY ? FREAD :
FREAD|FWRITE)
@@ -110,6 +110,7 @@ void vndclear(struct vnd_softc *);
int vndsetcred(struct vnd_softc *, struct ucred *);
int vndgetdisklabel(dev_t, struct vnd_softc *, struct disklabel *, int);
void vndencrypt(struct vnd_softc *, caddr_t, size_t, daddr64_t, int);
+void vndencryptbuf(struct vnd_softc *, struct buf *, int);
size_t vndbdevsize(struct vnode *, struct proc *);
void
@@ -135,30 +136,38 @@ vndencrypt(struct vnd_softc *sc, caddr_t
}
void
+vndencryptbuf(struct vnd_softc *sc, struct buf *bp, int encrypt)
+{
+ vndencrypt(sc, bp->b_data, bp->b_bcount, bp->b_blkno, encrypt);
+}
+
+void
vndattach(int num)
{
- char *mem;
- u_long size;
+ size_t size;
int i;
if (num <= 0)
return;
+
size = num * sizeof(struct vnd_softc);
- mem = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
- if (mem == NULL) {
+ vnd_softc = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (vnd_softc == NULL) {
printf("WARNING: no memory for vnode disks\n");
return;
}
- vnd_softc = (struct vnd_softc *)mem;
+
for (i = 0; i < num; i++) {
struct vnd_softc *sc = &vnd_softc[i];
sc->sc_dev.dv_unit = i;
snprintf(sc->sc_dev.dv_xname, sizeof(sc->sc_dev.dv_xname),
"vnd%d", i);
+ sc->sc_dk.dk_name = sc->sc_dev.dv_xname;
disk_construct(&sc->sc_dk);
device_ref(&sc->sc_dev);
}
+
numvnd = num;
}
@@ -180,7 +189,7 @@ vndopen(dev_t dev, int flags, int mode,
if ((flags & FWRITE) && (sc->sc_flags & VNF_READONLY)) {
error = EROFS;
- goto bad;
+ goto unlock;
}
if ((sc->sc_flags & VNF_INITED) &&
@@ -194,7 +203,7 @@ vndopen(dev_t dev, int flags, int mode,
error = disk_openpart(&sc->sc_dk, part, mode,
(sc->sc_flags & VNF_HAVELABEL) != 0);
-bad:
+ unlock:
disk_unlock(&sc->sc_dk);
return (error);
}
@@ -259,65 +268,54 @@ void
vndstrategy(struct buf *bp)
{
int unit = DISKUNIT(bp->b_dev);
- struct vnd_softc *sc = &vnd_softc[unit];
- int s, part;
- struct iovec aiov;
- struct uio auio;
- struct proc *p = curproc;
- daddr64_t off;
+ struct vnd_softc *sc;
+ struct partition *p;
+ size_t off;
+ int s;
DNPRINTF(VDB_FOLLOW, "vndstrategy(%p): unit %d\n", bp, unit);
+ if (unit >= numvnd) {
+ bp->b_error = ENXIO;
+ goto bad;
+ }
+ sc = &vnd_softc[unit];
+
if ((sc->sc_flags & VNF_HAVELABEL) == 0) {
bp->b_error = ENXIO;
- bp->b_flags |= B_ERROR;
- bp->b_resid = bp->b_bcount;
- goto done;
+ goto bad;
}
if (bounds_check_with_label(bp, sc->sc_dk.dk_label) == -1)
goto done;
- part = DISKPART(bp->b_dev);
- off = DL_SECTOBLK(sc->sc_dk.dk_label,
- DL_GETPOFFSET(&sc->sc_dk.dk_label->d_partitions[part]));
- aiov.iov_base = bp->b_data;
- auio.uio_resid = aiov.iov_len = bp->b_bcount;
- auio.uio_iov = &aiov;
- auio.uio_iovcnt = 1;
- auio.uio_offset = dbtob((off_t)(bp->b_blkno + off));
- auio.uio_segflg = UIO_SYSSPACE;
- auio.uio_procp = p;
-
- vn_lock(sc->sc_vp, LK_EXCLUSIVE | LK_RETRY, p);
- if (bp->b_flags & B_READ) {
- auio.uio_rw = UIO_READ;
- bp->b_error = VOP_READ(sc->sc_vp, &auio, 0,
- sc->sc_cred);
- if (sc->sc_keyctx)
- vndencrypt(sc, bp->b_data,
- bp->b_bcount, bp->b_blkno, 0);
- } else {
- if (sc->sc_keyctx)
- vndencrypt(sc, bp->b_data,
- bp->b_bcount, bp->b_blkno, 1);
- auio.uio_rw = UIO_WRITE;
- /*
- * Upper layer has already checked I/O for
- * limits, so there is no need to do it again.
- */
- bp->b_error = VOP_WRITE(sc->sc_vp, &auio,
- IO_NOLIMIT, sc->sc_cred);
- /* Data in buffer cache needs to be in clear */
- if (sc->sc_keyctx)
- vndencrypt(sc, bp->b_data,
- bp->b_bcount, bp->b_blkno, 0);
- }
- VOP_UNLOCK(sc->sc_vp, 0, p);
+ p = &sc->sc_dk.dk_label->d_partitions[DISKPART(bp->b_dev)];
+ off = DL_GETPOFFSET(p) * sc->sc_dk.dk_label->d_secsize +
+ (u_int64_t)bp->b_blkno * DEV_BSIZE;
+
+ if (sc->sc_keyctx && !(bp->b_flags & B_READ))
+ vndencryptbuf(sc, bp, 1);
+
+ /*
+ * Use IO_NOLIMIT because upper layer has already checked I/O
+ * for limits, so there is no need to do it again.
+ */
+ bp->b_error = vn_rdwr((bp->b_flags & B_READ) ? UIO_READ : UIO_WRITE,
+ sc->sc_vp, bp->b_data, bp->b_bcount, off, UIO_SYSSPACE, IO_NOLIMIT,
+ sc->sc_cred, &bp->b_resid, curproc);
if (bp->b_error)
bp->b_flags |= B_ERROR;
- bp->b_resid = auio.uio_resid;
-done:
+
+ /* Data in buffer cache needs to be in clear */
+ if (sc->sc_keyctx)
+ vndencryptbuf(sc, bp, 0);
+
+ goto done;
+
+ bad:
+ bp->b_flags |= B_ERROR;
+ bp->b_resid = bp->b_bcount;
+ done:
s = splbio();
biodone(bp);
splx(s);
@@ -328,18 +326,6 @@ done:
int
vndread(dev_t dev, struct uio *uio, int flags)
{
- int unit = DISKUNIT(dev);
- struct vnd_softc *sc;
-
- DNPRINTF(VDB_FOLLOW, "vndread(%x, %p)\n", dev, uio);
-
- if (unit >= numvnd)
- return (ENXIO);
- sc = &vnd_softc[unit];
-
- if ((sc->sc_flags & VNF_INITED) == 0)
- return (ENXIO);
-
return (physio(vndstrategy, dev, B_READ, minphys, uio));
}
@@ -347,18 +333,6 @@ vndread(dev_t dev, struct uio *uio, int
int
vndwrite(dev_t dev, struct uio *uio, int flags)
{
- int unit = DISKUNIT(dev);
- struct vnd_softc *sc;
-
- DNPRINTF(VDB_FOLLOW, "vndwrite(%x, %p)\n", dev, uio);
-
- if (unit >= numvnd)
- return (ENXIO);
- sc = &vnd_softc[unit];
-
- if ((sc->sc_flags & VNF_INITED) == 0)
- return (ENXIO);
-
return (physio(vndstrategy, dev, B_WRITE, minphys, uio));
}
@@ -410,6 +384,12 @@ vndioctl(dev_t dev, u_long cmd, caddr_t
if (sc->sc_flags & VNF_INITED)
return (EBUSY);
+ if (vio->vnd_secsize < 512 || vio->vnd_secsize > 16384 ||
+ (vio->vnd_secsize & (vio->vnd_secsize - 1)) != 0 ||
+ vio->vnd_ntracks == 0 || vio->vnd_ntracks > 512 ||
+ vio->vnd_nsectors == 0 || vio->vnd_nsectors > 4096)
+ return (EINVAL);
+
if ((error = disk_lock(&sc->sc_dk)) != 0)
return (error);
@@ -489,7 +469,6 @@ vndioctl(dev_t dev, u_long cmd, caddr_t
sc->sc_vp, (unsigned long long)sc->sc_size);
/* Attach the disk. */
- sc->sc_dk.dk_name = sc->sc_dev.dv_xname;
disk_attach(&sc->sc_dev, &sc->sc_dk);
disk_unlock(&sc->sc_dk);
@@ -625,29 +604,20 @@ vndioctl(dev_t dev, u_long cmd, caddr_t
int
vndsetcred(struct vnd_softc *sc, struct ucred *cred)
{
- struct uio auio;
- struct iovec aiov;
- char *tmpbuf;
+ void *buf;
+ size_t len;
int error;
- struct proc *p = curproc;
sc->sc_cred = crdup(cred);
- tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK);
+
+ len = MIN(DEV_BSIZE, sc->sc_size * sc->sc_secsize);
+ buf = malloc(len, M_TEMP, M_WAITOK);
/* XXX: Horrible kludge to establish credentials for NFS */
- aiov.iov_base = tmpbuf;
- aiov.iov_len = MIN(DEV_BSIZE, sc->sc_size * sc->sc_secsize);
- auio.uio_iov = &aiov;
- auio.uio_iovcnt = 1;
- auio.uio_offset = 0;
- auio.uio_rw = UIO_READ;
- auio.uio_segflg = UIO_SYSSPACE;
- auio.uio_resid = aiov.iov_len;
- vn_lock(sc->sc_vp, LK_RETRY | LK_EXCLUSIVE, p);
- error = VOP_READ(sc->sc_vp, &auio, 0, sc->sc_cred);
- VOP_UNLOCK(sc->sc_vp, 0, p);
+ error = vn_rdwr(UIO_READ, sc->sc_vp, buf, len, 0,
+ UIO_SYSSPACE, IO_NOLIMIT, sc->sc_cred, NULL, curproc);
- free(tmpbuf, M_TEMP);
+ free(buf, M_TEMP);
return (error);
}
@@ -655,13 +625,12 @@ void
vndclear(struct vnd_softc *sc)
{
struct vnode *vp = sc->sc_vp;
- struct proc *p = curproc; /* XXX */
DNPRINTF(VDB_FOLLOW, "vndclear(%p): vp %p\n", sc, vp);
if (vp == NULL)
panic("vndioctl: null vp");
- (void) vn_close(vp, VNDRW(sc), sc->sc_cred, p);
+ (void) vn_close(vp, VNDRW(sc), sc->sc_cred, curproc);
crfree(sc->sc_cred);
sc->sc_flags = 0;
sc->sc_vp = NULL;
@@ -673,18 +642,11 @@ vndclear(struct vnd_softc *sc)
daddr64_t
vndsize(dev_t dev)
{
- int unit = DISKUNIT(dev);
- struct vnd_softc *sc = &vnd_softc[unit];
-
- if (unit >= numvnd || (sc->sc_flags & VNF_INITED) == 0)
- return (-1);
- return (sc->sc_size * (sc->sc_secsize / DEV_BSIZE));
+ return (-1);
}
int
vnddump(dev_t dev, daddr64_t blkno, caddr_t va, size_t size)
{
-
- /* Not implemented. */
return (ENXIO);
}