On Sat, Sep 02, 2023 at 08:57:03AM +0930, Brett Lymn wrote:
> On Thu, Aug 31, 2023 at 10:24:07AM +0200, J. Hannken-Illjes wrote:
> > 
> > I'm short on time, some remarks:
> > 
> 
> Thanks, I will have a closer look at these issues.  This could be where the 
> deadlock I was
> seeing is coming from.
> 

OK, I have, finally, updated the diff in line with the comments (thanks
again for the good feedback).

-- 
Brett Lymn
--
Sent from my NetBSD device.

"We are were wolves",
"You mean werewolves?",
"No we were wolves, now we are something else entirely",
"Oh"
Index: kern_veriexec.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_veriexec.c,v
retrieving revision 1.27
diff -u -r1.27 kern_veriexec.c
--- kern_veriexec.c     9 Apr 2023 09:18:09 -0000       1.27
+++ kern_veriexec.c     18 Dec 2023 21:12:46 -0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: kern_veriexec.c,v 1.27 2023/04/09 09:18:09 riastradh Exp $     
*/
+/*     $NetBSD$        */
 
 /*-
  * Copyright (c) 2005, 2006 Elad Efrat <e...@netbsd.org>
@@ -29,7 +29,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_veriexec.c,v 1.27 2023/04/09 09:18:09 
riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD$");
 
 #include "opt_veriexec.h"
 
@@ -38,6 +38,7 @@
 #include <sys/kmem.h>
 #include <sys/vnode.h>
 #include <sys/namei.h>
+#include <sys/exec.h>
 #include <sys/once.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
@@ -49,6 +50,7 @@
 #include <sys/sha2.h>
 #include <sys/rmd160.h>
 #include <sys/md5.h>
+#include <uvm/uvm_extern.h>
 #include <sys/fileassoc.h>
 #include <sys/kauth.h>
 #include <sys/conf.h>
@@ -68,11 +70,6 @@
 #define VERIEXEC_UNLOCKED      0x00    /* Nothing locked, callee does it */
 #define VERIEXEC_LOCKED                0x01    /* Global op lock held */
 
-/* state of file locking for veriexec_file_verify */
-#define VERIEXEC_FILE_UNLOCKED 0x02    /* Nothing locked, callee does it */
-#define VERIEXEC_FILE_LOCKED   0x04    /* File locked */
-
-#define VERIEXEC_RW_UPGRADE(lock)      while((rw_tryupgrade(lock)) == 0){};
 
 struct veriexec_fpops {
        const char *type;
@@ -86,11 +83,17 @@
 
 /* Veriexec per-file entry data. */
 struct veriexec_file_entry {
-       krwlock_t lock;                         /* r/w lock */
+       kmutex_t vfe_lock;                      /* operations lock */
+       int vfe_refcnt;                         /* number of refs */
+       kcondvar_t vfe_cv;                      /* wait for no refs */
        u_char *filename;                       /* File name. */
        u_char type;                            /* Entry type. */
        u_char status;                          /* Evaluation status. */
+       u_char page_fp_status;                  /* Per-page FP status. */
        u_char *fp;                             /* Fingerprint. */
+       void *page_fp;                          /* Per-page fingerprints */
+       size_t npages;                          /* Number of pages. */
+       size_t last_page_size;                  /* To support < PAGE_SIZE */
        struct veriexec_fpops *ops;             /* Fingerprint ops vector*/
        size_t filename_len;                    /* Length of filename. */
 };
@@ -120,15 +123,69 @@
     void *, void *, void *, void *);
 static struct veriexec_fpops *veriexec_fpops_lookup(const char *);
 static void veriexec_file_free(struct veriexec_file_entry *);
+static void veriexec_busy(void);
+static void veriexec_unbusy(void);
+static void vfe_busy(struct veriexec_file_entry *vfe);
+static void vfe_unbusy(struct veriexec_file_entry *vfe);
 
 static unsigned int veriexec_tablecount = 0;
 
 /*
- * Veriexec operations global lock - most ops hold this as a read
- * lock, it is upgraded to a write lock when destroying veriexec file
- * table entries.
+ * Veriexec operations global lock.
+ */
+static kmutex_t veriexec_op_lock;
+static kcondvar_t veriexec_cv;
+static int veriexec_refcnt;
+
+/*
+ * Busy a file table entry.
+ */
+static void
+vfe_busy(struct veriexec_file_entry *vfe)
+{
+       mutex_enter(&vfe->vfe_lock);
+       vfe->vfe_refcnt++;
+       mutex_exit(&vfe->vfe_lock);
+}
+
+/*
+ * Drop a reference to a file table entry, if count zero then wake
+ * up any potential waiters.
+ */
+static void
+vfe_unbusy(struct veriexec_file_entry *vfe)
+{
+       mutex_enter(&vfe->vfe_lock);
+       KASSERT(vfe->vfe_refcnt > 0);
+       if (--(vfe->vfe_refcnt) == 0)
+               cv_broadcast(&vfe->vfe_cv);
+       mutex_exit(&vfe->vfe_lock);
+}
+
+/*
+ * Grab the oplock and bump the reference count.
  */
-static krwlock_t veriexec_op_lock;
+static void
+veriexec_busy(void)
+{
+       mutex_enter(&veriexec_op_lock);
+       veriexec_refcnt++;
+       mutex_exit(&veriexec_op_lock);
+}
+
+/*
+ * Decrement the reference count, if we hit count zero then wake up
+ * any possible waiters.
+ */
+static void
+veriexec_unbusy(void)
+{
+       mutex_enter(&veriexec_op_lock);
+       KASSERT(veriexec_refcnt > 0);
+       if (--veriexec_refcnt == 0)
+               cv_broadcast(&veriexec_cv);
+       mutex_exit(&veriexec_op_lock);
+}
 
 /*
  * Sysctl helper routine for Veriexec.
@@ -221,7 +278,7 @@
 }
 
 /*
- * Add ops to the fingerprint ops vector list.
+ * Add ops to the fignerprint ops vector list.
  */
 int
 veriexec_fpops_add(const char *fp_type, size_t hash_len, size_t ctx_size,
@@ -230,17 +287,16 @@
 {
        struct veriexec_fpops *ops;
 
-       KASSERT(init != NULL);
-       KASSERT(update != NULL);
-       KASSERT(final != NULL);
-       KASSERT(hash_len != 0);
-       KASSERT(ctx_size != 0);
-       KASSERT(fp_type != NULL);
+       /* Sanity check all parameters. */
+       if ((fp_type == NULL) || (hash_len == 0) || (ctx_size == 0) ||
+           (init == NULL) || (update == NULL) || (final == NULL))
+               return (EFAULT);
 
        if (veriexec_fpops_lookup(fp_type) != NULL)
                return (EEXIST);
 
        ops = kmem_alloc(sizeof(*ops), KM_SLEEP);
+
        ops->type = fp_type;
        ops->hash_len = hash_len;
        ops->context_size = ctx_size;
@@ -310,7 +366,7 @@
                return KAUTH_RESULT_DEFER;
 
        result = KAUTH_RESULT_DEFER;
-       req = (enum kauth_system_req)(uintptr_t)arg0;
+       req = (enum kauth_system_req)arg0;
 
        if (req == KAUTH_REQ_SYSTEM_VERIEXEC_MODIFY &&
            veriexec_strict > VERIEXEC_LEARNING) {
@@ -351,14 +407,21 @@
            NULL) == NULL)
                panic("Veriexec: Can't listen on system scope");
 
-       rw_init(&veriexec_op_lock);
+       mutex_init(&veriexec_op_lock, MUTEX_DEFAULT, IPL_NONE);
+       cv_init(&veriexec_cv, "Veriexec");
+       veriexec_refcnt = 0;
 
 #define        FPOPS_ADD(a, b, c, d, e, f)                     \
-       veriexec_fpops_add(a, b, c,                     \
+       veriexec_fpops_add(a, b, c,                     \
            __FPTRCAST(veriexec_fpop_init_t, d),        \
            __FPTRCAST(veriexec_fpop_update_t, e),      \
            __FPTRCAST(veriexec_fpop_final_t, f))
 
+#ifdef VERIFIED_EXEC_FP_RMD160
+       FPOPS_ADD("RMD160", RMD160_DIGEST_LENGTH, sizeof(RMD160_CTX),
+           RMD160Init, RMD160Update, RMD160Final);
+#endif /* VERIFIED_EXEC_FP_RMD160 */
+
 #ifdef VERIFIED_EXEC_FP_SHA256
        FPOPS_ADD("SHA256", SHA256_DIGEST_LENGTH, sizeof(SHA256_CTX),
            SHA256_Init, SHA256_Update, SHA256_Final);
@@ -374,6 +437,16 @@
            SHA512_Init, SHA512_Update, SHA512_Final);
 #endif /* VERIFIED_EXEC_FP_SHA512 */
 
+#ifdef VERIFIED_EXEC_FP_SHA1
+       FPOPS_ADD("SHA1", SHA1_DIGEST_LENGTH, sizeof(SHA1_CTX),
+           SHA1Init, SHA1Update, SHA1Final);
+#endif /* VERIFIED_EXEC_FP_SHA1 */
+
+#ifdef VERIFIED_EXEC_FP_MD5
+       FPOPS_ADD("MD5", MD5_DIGEST_LENGTH, sizeof(MD5_CTX),
+           MD5Init, MD5Update, MD5Final);
+#endif /* VERIFIED_EXEC_FP_MD5 */
+
 #undef FPOPS_ADD
 }
 
@@ -400,57 +473,104 @@
  * NOTE: vfe is assumed to be locked for writing on entry.
  */
 static int
-veriexec_fp_calc(struct lwp *l, struct vnode *vp, int file_lock_state,
+veriexec_fp_calc(struct lwp *l, struct vnode *vp, int lock_state,
     struct veriexec_file_entry *vfe, u_char *fp)
 {
        struct vattr va;
-       void *ctx;
-       u_char *buf;
+       void *ctx, *page_ctx;
+       u_char *buf, *page_fp;
        off_t offset, len;
-       size_t resid;
-       int error;
+       size_t resid, npages;
+       int error, do_perpage, pagen;
 
-       KASSERT(file_lock_state != VERIEXEC_LOCKED);
-       KASSERT(file_lock_state != VERIEXEC_UNLOCKED);
-
-       if (file_lock_state == VERIEXEC_FILE_UNLOCKED)
+       if (lock_state == VERIEXEC_UNLOCKED)
                vn_lock(vp, LK_SHARED | LK_RETRY);
        error = VOP_GETATTR(vp, &va, l->l_cred);
-       if (file_lock_state == VERIEXEC_FILE_UNLOCKED)
+       if (lock_state == VERIEXEC_UNLOCKED)
                VOP_UNLOCK(vp);
        if (error)
                return (error);
 
+#ifdef notyet /* XXX - for now */
+       if ((vfe->type & VERIEXEC_UNTRUSTED) &&
+           (vfe->page_fp_status == PAGE_FP_NONE))
+               do_perpage = 1;
+       else
+#endif  /* notyet */
+               do_perpage = 0;
+
        ctx = kmem_alloc(vfe->ops->context_size, KM_SLEEP);
        buf = kmem_alloc(PAGE_SIZE, KM_SLEEP);
 
+       page_ctx = NULL;
+       page_fp = NULL;
+       npages = 0;
+       if (do_perpage) {
+               npages = (va.va_size >> PAGE_SHIFT) + 1;
+               page_fp = kmem_alloc(vfe->ops->hash_len * npages, KM_SLEEP);
+               vfe->page_fp = page_fp;
+               page_ctx = kmem_alloc(vfe->ops->context_size, KM_SLEEP);
+       }
+
        (vfe->ops->init)(ctx);
 
        len = 0;
        error = 0;
+       pagen = 0;
        for (offset = 0; offset < va.va_size; offset += PAGE_SIZE) {
                len = ((va.va_size - offset) < PAGE_SIZE) ?
                    (va.va_size - offset) : PAGE_SIZE;
 
                error = vn_rdwr(UIO_READ, vp, buf, len, offset,
-                               UIO_SYSSPACE,
-                               ((file_lock_state == VERIEXEC_FILE_LOCKED)?
-                                IO_NODELOCKED : 0),
+                               UIO_SYSSPACE, lock_state,
                                l->l_cred, &resid, NULL);
 
                if (error) {
+                       if (do_perpage) {
+                               kmem_free(vfe->page_fp,
+                                   vfe->ops->hash_len * npages);
+                               vfe->page_fp = NULL;
+                       }
+
                        goto bad;
                }
 
                (vfe->ops->update)(ctx, buf, (unsigned int) len);
 
+               if (do_perpage) {
+                       (vfe->ops->init)(page_ctx);
+                       (vfe->ops->update)(page_ctx, buf, (unsigned int)len);
+                       (vfe->ops->final)(page_fp, page_ctx);
+
+                       if (veriexec_verbose >= 2) {
+                               int i;
+
+                               printf("hash for page %d: ", pagen);
+                               for (i = 0; i < vfe->ops->hash_len; i++)
+                                       printf("%02x", page_fp[i]);
+                               printf("\n");
+                       }
+
+                       page_fp += vfe->ops->hash_len;
+                       pagen++;
+               }
+
                if (len != PAGE_SIZE)
                        break;
        }
 
        (vfe->ops->final)(fp, ctx);
 
+       if (do_perpage) {
+               vfe->last_page_size = len;
+               vfe->page_fp_status = PAGE_FP_READY;
+               vfe->npages = npages;
+       }
+
 bad:
+       if (do_perpage)
+               kmem_free(page_ctx, vfe->ops->context_size);
+
        kmem_free(ctx, vfe->ops->context_size);
        kmem_free(buf, PAGE_SIZE);
 
@@ -479,32 +599,6 @@
        return (memcmp(fp1, fp2, ops->hash_len));
 }
 
-static int
-veriexec_fp_status(struct lwp *l, struct vnode *vp, int file_lock_state,
-    struct veriexec_file_entry *vfe, u_char *status)
-{
-       size_t hash_len = vfe->ops->hash_len;
-       u_char *digest;
-       int error;
-
-       digest = kmem_zalloc(hash_len, KM_SLEEP);
-
-       error = veriexec_fp_calc(l, vp, file_lock_state, vfe, digest);
-       if (error)
-               goto out;
-
-       /* Compare fingerprint with loaded data. */
-       if (veriexec_fp_cmp(vfe->ops, vfe->fp, digest) == 0)
-               *status = FINGERPRINT_VALID;
-       else
-               *status = FINGERPRINT_NOMATCH;
-
-out:
-       kmem_free(digest, hash_len);
-       return error;
-}
-
-
 static struct veriexec_table_entry *
 veriexec_table_lookup(struct mount *mp)
 {
@@ -534,8 +628,12 @@
 veriexec_file_report(struct veriexec_file_entry *vfe, const u_char *msg,
     const u_char *filename, struct lwp *l, int f)
 {
+       if (msg == NULL)
+               return;
+
        if (vfe != NULL && vfe->filename != NULL)
                filename = vfe->filename;
+
        if (filename == NULL)
                return;
 
@@ -561,21 +659,15 @@
  * exec_script(), 'flag' will be VERIEXEC_INDIRECT.  If we are called from
  * vn_open(), 'flag' will be VERIEXEC_FILE.
  *
- * 'veriexec_op_lock' must be locked (and remains locked).
- *
- * NOTE: The veriexec file entry pointer (vfep) will be returned LOCKED
- *       on no error.
+ * NOTE: The veriexec file entry pointer (vfep) will be returned with a
+ *       reference held on no error.
  */
 static int
-veriexec_file_verify(struct lwp *l, struct vnode *vp, const u_char *name,
-    int flag, int file_lock_state, struct veriexec_file_entry **vfep)
+veriexec_file_verify(struct lwp *l, struct vnode *vp, int lock_flag,
+    const u_char *name, int flag, struct veriexec_file_entry **vfep)
 {
        struct veriexec_file_entry *vfe;
-       int error = 0;
-
-       KASSERT(rw_lock_held(&veriexec_op_lock));
-       KASSERT(file_lock_state != VERIEXEC_LOCKED);
-       KASSERT(file_lock_state != VERIEXEC_UNLOCKED);
+       int error;
 
 #define VFE_NEEDS_EVAL(vfe) ((vfe->status == FINGERPRINT_NOTEVAL) || \
                             (vfe->type & VERIEXEC_UNTRUSTED))
@@ -586,54 +678,76 @@
        if (vp->v_type != VREG)
                return (0);
 
+       veriexec_busy();
+
        /* Lookup veriexec table entry, save pointer if requested. */
        vfe = veriexec_get(vp);
        if (vfep != NULL)
                *vfep = vfe;
+       if (vfe == NULL)
+               goto out;
 
-       /* No entry in the veriexec tables. */
-       if (vfe == NULL) {
-               veriexec_file_report(NULL, "No entry.", name,
-                   l, REPORT_VERBOSE);
-
-               /*
-                * Lockdown mode: Deny access to non-monitored files.
-                * IPS mode: Deny execution of non-monitored files.
-                */
-               if ((veriexec_strict >= VERIEXEC_LOCKDOWN) ||
-                   ((veriexec_strict >= VERIEXEC_IPS) &&
-                    (flag != VERIEXEC_FILE)))
-                       return (EPERM);
-
-               return (0);
-       }
+       error = 0;
 
        /*
         * Grab the lock for the entry, if we need to do an evaluation
-        * then the lock is a write lock, after we have the write
-        * lock, check if we really need it - some other thread may
-        * have already done the work for us.
+        * then drop our reference and wait for all other threads to
+        * finish their work.  Once woken we check to see if another
+        * thread has done our work for us.
         */
        if (VFE_NEEDS_EVAL(vfe)) {
-               rw_enter(&vfe->lock, RW_WRITER);
-               if (!VFE_NEEDS_EVAL(vfe))
-                       rw_downgrade(&vfe->lock);
-       } else
-               rw_enter(&vfe->lock, RW_READER);
+               mutex_enter(&veriexec_op_lock);
+               KASSERT(veriexec_refcnt > 0);
+               veriexec_refcnt--;
+               while (veriexec_refcnt > 0) {
+                       cv_wait(&veriexec_cv, &veriexec_op_lock);
+               }
+
+               if (!VFE_NEEDS_EVAL(vfe)) {
+                       veriexec_refcnt++;
+                       mutex_exit(&veriexec_op_lock);
+               }
+       }
+
+       vfe_busy(vfe);
 
        /* Evaluate fingerprint if needed. */
        if (VFE_NEEDS_EVAL(vfe)) {
-               u_char status;
+               u_char *digest;
+
+               veriexec_refcnt++;
+               mutex_exit(&veriexec_op_lock);
+               mutex_enter(&vfe->vfe_lock);
+               if (vfe->vfe_refcnt > 1) {
+                       vfe->vfe_refcnt--;
+                       while (vfe->vfe_refcnt > 0) {
+                               cv_wait(&vfe->vfe_cv, &vfe->vfe_lock);
+                       }
+                       vfe->vfe_refcnt++;
+               }
+
+               /* Calculate fingerprint for on-disk file. */
+               digest = kmem_zalloc(vfe->ops->hash_len, KM_SLEEP);
 
-               error = veriexec_fp_status(l, vp, file_lock_state, vfe, 
&status);
+               error = veriexec_fp_calc(l, vp, lock_flag, vfe, digest);
                if (error) {
                        veriexec_file_report(vfe, "Fingerprint calculation 
error.",
                            name, NULL, REPORT_ALWAYS);
-                       rw_exit(&vfe->lock);
+                       kmem_free(digest, vfe->ops->hash_len);
+                       vfe_unbusy(vfe);
+                       /* to get here we hold the op lock */
+                       mutex_exit(&veriexec_op_lock);
                        return (error);
                }
-               vfe->status = status;
-               rw_downgrade(&vfe->lock);
+
+               /* Compare fingerprint with loaded data. */
+               if (veriexec_fp_cmp(vfe->ops, vfe->fp, digest) == 0)
+                       vfe->status = FINGERPRINT_VALID;
+               else
+                       vfe->status = FINGERPRINT_NOMATCH;
+
+               kmem_free(digest, vfe->ops->hash_len);
+               mutex_exit(&vfe->vfe_lock);
        }
 
        if (!(vfe->type & flag)) {
@@ -642,20 +756,43 @@
 
                /* IPS mode: Enforce access type. */
                if (veriexec_strict >= VERIEXEC_IPS) {
-                       rw_exit(&vfe->lock);
+                       vfe_unbusy(vfe);
+                       veriexec_unbusy();
                        return (EPERM);
                }
        }
 
-       switch (vfe->status) {
+ out:
+       /* No entry in the veriexec tables. */
+       if (vfe == NULL) {
+               veriexec_file_report(NULL, "No entry.", name,
+                   l, REPORT_VERBOSE);
+
+               veriexec_unbusy();
+
+               /*
+                * Lockdown mode: Deny access to non-monitored files.
+                * IPS mode: Deny execution of non-monitored files.
+                */
+               if ((veriexec_strict >= VERIEXEC_LOCKDOWN) ||
+                   ((veriexec_strict >= VERIEXEC_IPS) &&
+                    (flag != VERIEXEC_FILE)))
+                       return (EPERM);
+
+               return (0);
+       }
+
+        switch (vfe->status) {
        case FINGERPRINT_NOTEVAL:
                /* Should not happen. */
-               rw_exit(&vfe->lock);
+               vfe_unbusy(vfe);
+               veriexec_unbusy();
                veriexec_file_report(vfe, "Not-evaluated status "
                    "post evaluation; inconsistency detected.", name,
                    NULL, REPORT_ALWAYS|REPORT_PANIC);
-               __builtin_unreachable();
-               /* NOTREACHED */
+
+               /*NOTREACHED*/
+               break;
 
        case FINGERPRINT_VALID:
                /* Valid fingerprint. */
@@ -671,7 +808,7 @@
 
                /* IDS mode: Deny access on fingerprint mismatch. */
                if (veriexec_strict >= VERIEXEC_IDS) {
-                       rw_exit(&vfe->lock);
+                       vfe_unbusy(vfe);
                        error = EPERM;
                }
 
@@ -679,11 +816,13 @@
 
        default:
                /* Should never happen. */
-               rw_exit(&vfe->lock);
+               vfe_unbusy(vfe);
+               veriexec_unbusy();
                veriexec_file_report(vfe, "Invalid status "
                    "post evaluation.", name, NULL, REPORT_ALWAYS|REPORT_PANIC);
-               /* NOTREACHED */
-       }
+        }
+
+       veriexec_unbusy();
 
        return (error);
 }
@@ -698,13 +837,10 @@
        if (veriexec_bypass && (veriexec_strict == VERIEXEC_LEARNING))
                return 0;
 
-       rw_enter(&veriexec_op_lock, RW_READER);
-       r = veriexec_file_verify(l, vp, name, flag, VERIEXEC_FILE_UNLOCKED,
-           &vfe);
-       rw_exit(&veriexec_op_lock);
+       r = veriexec_file_verify(l, vp, 0, name, flag, &vfe);
 
        if ((r  == 0) && (vfe != NULL))
-               rw_exit(&vfe->lock);
+               vfe_unbusy(vfe);
 
        if (found != NULL)
                *found = (vfe != NULL) ? true : false;
@@ -712,6 +848,81 @@
        return (r);
 }
 
+#ifdef notyet
+/*
+ * Evaluate per-page fingerprints.
+ */
+int
+veriexec_page_verify(struct veriexec_file_entry *vfe, struct vm_page *pg,
+    size_t idx, struct lwp *l)
+{
+       void *ctx;
+       u_char *fp;
+       u_char *page_fp;
+       int error;
+       vaddr_t kva;
+
+       if (vfe->page_fp_status == PAGE_FP_NONE)
+               return (0);
+
+       if (vfe->page_fp_status == PAGE_FP_FAIL)
+               return (EPERM);
+
+       if (idx >= vfe->npages)
+               return (0);
+
+       ctx = kmem_alloc(vfe->ops->context_size, KM_SLEEP);
+       fp = kmem_alloc(vfe->ops->hash_len, KM_SLEEP);
+       kva = uvm_km_alloc(kernel_map, PAGE_SIZE, VM_PGCOLOR_BUCKET(pg),
+           UVM_KMF_COLORMATCH | UVM_KMF_VAONLY | UVM_KMF_WAITVA);
+       pmap_kenter_pa(kva, VM_PAGE_TO_PHYS(pg), VM_PROT_READ, 0);
+       pmap_update(pmap_kernel());
+
+       page_fp = (u_char *) vfe->page_fp + (vfe->ops->hash_len * idx);
+       (vfe->ops->init)(ctx);
+       (vfe->ops->update)(ctx, (void *) kva,
+                          ((vfe->npages - 1) == idx) ? vfe->last_page_size
+                                                     : PAGE_SIZE);
+       (vfe->ops->final)(fp, ctx);
+
+       pmap_kremove(kva, PAGE_SIZE);
+       pmap_update(pmap_kernel());
+       uvm_km_free(kernel_map, kva, PAGE_SIZE, UVM_KMF_VAONLY);
+
+       error = veriexec_fp_cmp(vfe->ops, page_fp, fp);
+       if (error) {
+               const char *msg;
+
+               if (veriexec_strict > VERIEXEC_LEARNING) {
+                       msg = "Pages modified: Killing process.";
+               } else {
+                       msg = "Pages modified.";
+                       error = 0;
+               }
+
+               veriexec_file_report(msg, "[page_in]", l,
+                   REPORT_ALWAYS|REPORT_ALARM);
+
+               if (error) {
+                       ksiginfo_t ksi;
+
+                       KSI_INIT(&ksi);
+                       ksi.ksi_signo = SIGKILL;
+                       ksi.ksi_code = SI_NOINFO;
+                       ksi.ksi_pid = l->l_proc->p_pid;
+                       ksi.ksi_uid = 0;
+
+                       kpsignal(l->l_proc, &ksi, NULL);
+               }
+       }
+
+       kmem_free(ctx, vfe->ops->context_size);
+       kmem_free(fp, vfe->ops->hash_len);
+
+       return (error);
+}
+#endif /* notyet */
+
 /*
  * Veriexec remove policy code.
  */
@@ -724,9 +935,10 @@
        if (veriexec_bypass && (veriexec_strict == VERIEXEC_LEARNING))
                return 0;
 
-       rw_enter(&veriexec_op_lock, RW_READER);
+       veriexec_busy();
+
        vfe = veriexec_get(vp);
-       rw_exit(&veriexec_op_lock);
+       veriexec_unbusy();
 
        if (vfe == NULL) {
                /* Lockdown mode: Deny access to non-monitored files. */
@@ -745,6 +957,7 @@
        else
                error = veriexec_file_delete(l, vp);
 
+
        return error;
 }
 
@@ -759,84 +972,87 @@
 veriexec_renamechk(struct lwp *l, struct vnode *fromvp, const char *fromname,
     struct vnode *tovp, const char *toname)
 {
-       struct veriexec_file_entry *fvfe = NULL, *tvfe = NULL;
+       struct veriexec_file_entry *vfe, *tvfe;
 
        if (veriexec_bypass && (veriexec_strict == VERIEXEC_LEARNING))
                return 0;
 
-       rw_enter(&veriexec_op_lock, RW_READER);
+       veriexec_busy();
 
        if (veriexec_strict >= VERIEXEC_LOCKDOWN) {
                log(LOG_ALERT, "Veriexec: Preventing rename of `%s' to "
                    "`%s', uid=%u, pid=%u: Lockdown mode.\n", fromname, toname,
                    kauth_cred_geteuid(l->l_cred), l->l_proc->p_pid);
-               rw_exit(&veriexec_op_lock);
+
+               veriexec_unbusy();
                return (EPERM);
        }
 
-       fvfe = veriexec_get(fromvp);
+       vfe = veriexec_get(fromvp);
+       tvfe = NULL;
        if (tovp != NULL)
                tvfe = veriexec_get(tovp);
 
-       if ((fvfe == NULL) && (tvfe == NULL)) {
-               /* None of them is monitored */
-               rw_exit(&veriexec_op_lock);
-               return 0;
-       }
+       if ((vfe != NULL) || (tvfe != NULL)) {
+               if (veriexec_strict >= VERIEXEC_IPS) {
+                       log(LOG_ALERT, "Veriexec: Preventing rename of `%s' "
+                           "to `%s', uid=%u, pid=%u: IPS mode, %s "
+                           "monitored.\n", fromname, toname,
+                           kauth_cred_geteuid(l->l_cred),
+                           l->l_proc->p_pid, (vfe != NULL && tvfe != NULL) ?
+                           "files" : "file");
 
-       if (veriexec_strict >= VERIEXEC_IPS) {
-               log(LOG_ALERT, "Veriexec: Preventing rename of `%s' "
-                   "to `%s', uid=%u, pid=%u: IPS mode, %s "
-                   "monitored.\n", fromname, toname,
-                   kauth_cred_geteuid(l->l_cred),
-                   l->l_proc->p_pid, (fvfe != NULL && tvfe != NULL) ?
-                   "files" : "file");
-               rw_exit(&veriexec_op_lock);
-               return (EPERM);
-       }
+                       veriexec_unbusy();
+                       return (EPERM);
+               }
 
-       if (fvfe != NULL) {
                /*
                 * Monitored file is renamed; filename no longer relevant.
-                */
-
-               /*
+                *
                 * XXX: We could keep the buffer, and when (and if) updating the
                 * XXX: filename post-rename, re-allocate it only if it's not
                 * XXX: big enough for the new filename.
                 */
+               if (vfe != NULL) {
+                       /* XXXX get write lock on vfe here? */
 
-               /* XXX: Get write lock on fvfe here? */
-
-               VERIEXEC_RW_UPGRADE(&veriexec_op_lock);
-               /* once we have the op lock in write mode
-                * there should be no locks on any file
-                * entries so we can destroy the object.
-                */
-
-               if (fvfe->filename_len > 0)
-                       kmem_free(fvfe->filename, fvfe->filename_len);
-
-               fvfe->filename = NULL;
-               fvfe->filename_len = 0;
-
-               rw_downgrade(&veriexec_op_lock);
-       }
+                       mutex_enter(&veriexec_op_lock);
+                       veriexec_refcnt--;
+                       while (veriexec_refcnt > 0) {
+                               cv_wait(&veriexec_cv, &veriexec_op_lock);
+                       }
+
+                       /*
+                        * once we get here there should be no locks
+                        * on any file entries so we can destroy the
+                        * object.
+                        */
+
+                       if (vfe->filename_len > 0)
+                               kmem_free(vfe->filename, vfe->filename_len);
+
+                       vfe->filename = NULL;
+                       vfe->filename_len = 0;
+                       veriexec_refcnt++;
+                       mutex_exit(&veriexec_op_lock);
+               }
 
-       log(LOG_NOTICE, "Veriexec: %s file `%s' renamed to "
-           "%s file `%s', uid=%u, pid=%u.\n", (fvfe != NULL) ?
-           "Monitored" : "Non-monitored", fromname, (tvfe != NULL) ?
-           "monitored" : "non-monitored", toname,
-           kauth_cred_geteuid(l->l_cred), l->l_proc->p_pid);
+               log(LOG_NOTICE, "Veriexec: %s file `%s' renamed to "
+                   "%s file `%s', uid=%u, pid=%u.\n", (vfe != NULL) ?
+                   "Monitored" : "Non-monitored", fromname, (tvfe != NULL) ?
+                   "monitored" : "non-monitored", toname,
+                   kauth_cred_geteuid(l->l_cred), l->l_proc->p_pid);
 
-       rw_exit(&veriexec_op_lock);
+               veriexec_unbusy();
 
-       if (tvfe != NULL) {
                /*
                 * Monitored file is overwritten. Remove the entry.
                 */
-               (void)veriexec_file_delete(l, tovp);
-       }
+               if (tvfe != NULL)
+                       (void)veriexec_file_delete(l, tovp);
+
+       } else
+               veriexec_unbusy();
 
        return (0);
 }
@@ -845,37 +1061,38 @@
 veriexec_file_free(struct veriexec_file_entry *vfe)
 {
        if (vfe != NULL) {
+               KASSERT(vfe->vfe_refcnt == 0);
                if (vfe->fp != NULL)
                        kmem_free(vfe->fp, vfe->ops->hash_len);
+               if (vfe->page_fp != NULL)
+                       kmem_free(vfe->page_fp, vfe->ops->hash_len);
                if (vfe->filename != NULL)
                        kmem_free(vfe->filename, vfe->filename_len);
-               rw_destroy(&vfe->lock);
+               mutex_destroy(&vfe->vfe_lock);
+               cv_destroy(&vfe->vfe_cv);
                kmem_free(vfe, sizeof(*vfe));
        }
 }
 
 static void
-veriexec_file_purge(struct veriexec_file_entry *vfe, int have_lock)
+veriexec_file_purge(struct veriexec_file_entry *vfe)
 {
        if (vfe == NULL)
                return;
 
-       if (have_lock == VERIEXEC_UNLOCKED)
-               rw_enter(&vfe->lock, RW_WRITER);
-       else
-               VERIEXEC_RW_UPGRADE(&vfe->lock);
+       mutex_enter(&vfe->vfe_lock);
+       while (vfe->vfe_refcnt > 0) {
+               cv_wait(&vfe->vfe_cv, &vfe->vfe_lock);
+       }
 
        vfe->status = FINGERPRINT_NOTEVAL;
-       if (have_lock == VERIEXEC_UNLOCKED)
-               rw_exit(&vfe->lock);
-       else
-               rw_downgrade(&vfe->lock);
+       mutex_exit(&vfe->vfe_lock);
 }
 
 static void
 veriexec_file_purge_cb(struct veriexec_file_entry *vfe, void *cookie)
 {
-       veriexec_file_purge(vfe, VERIEXEC_UNLOCKED);
+       veriexec_file_purge(vfe);
 }
 
 /*
@@ -885,9 +1102,14 @@
 void
 veriexec_purge(struct vnode *vp)
 {
-       rw_enter(&veriexec_op_lock, RW_READER);
-       veriexec_file_purge(veriexec_get(vp), VERIEXEC_UNLOCKED);
-       rw_exit(&veriexec_op_lock);
+
+       mutex_enter(&veriexec_op_lock);
+       while (veriexec_refcnt > 0) {
+               cv_wait(&veriexec_cv, &veriexec_op_lock);
+       }
+
+       veriexec_file_purge(veriexec_get(vp));
+       mutex_exit(&veriexec_op_lock);
 }
 
 /*
@@ -924,7 +1146,7 @@
        struct veriexec_table_entry *vte;
 
        result = KAUTH_RESULT_DENY;
-       req = (enum kauth_device_req)(uintptr_t)arg0;
+       req = (enum kauth_device_req)arg0;
 
        switch (action) {
        case KAUTH_DEVICE_RAWIO_SPEC: {
@@ -967,15 +1189,16 @@
                case VERIEXEC_IDS:
                        result = KAUTH_RESULT_DEFER;
 
-                       rw_enter(&veriexec_op_lock, RW_WRITER);
+                       veriexec_busy();
                        fileassoc_table_run(bvp->v_mount, veriexec_hook,
                            (fileassoc_cb_t)veriexec_file_purge_cb, NULL);
-                       rw_exit(&veriexec_op_lock);
-
+                       veriexec_unbusy();
                        break;
+
                case VERIEXEC_IPS:
                        result = KAUTH_RESULT_DENY;
                        break;
+
                case VERIEXEC_LOCKDOWN:
                        result = KAUTH_RESULT_DENY;
                        break;
@@ -1034,21 +1257,18 @@
 /*
  * Add a file to be monitored by Veriexec.
  *
- * Expected elements in dict:
- *     file, fp, fp-type, entry-type, keep-filename, eval-on-load.
+ * Expected elements in dict: file, fp, fp-type, entry-type.
  */
 int
 veriexec_file_add(struct lwp *l, prop_dictionary_t dict)
 {
        struct veriexec_table_entry *vte;
-       struct veriexec_file_entry *vfe = NULL;
-       struct veriexec_file_entry *ovfe;
+       struct veriexec_file_entry *vfe = NULL, *hh;
        struct vnode *vp;
        const char *file, *fp_type;
        int error;
-       bool ignore_dup = false;
 
-       if (!prop_dictionary_get_string(dict, "file", &file))
+       if (!prop_dictionary_get_cstring_nocopy(dict, "file", &file))
                return (EINVAL);
 
        error = namei_simple_kernel(file, NSM_FOLLOW_NOEMULROOT, &vp);
@@ -1059,19 +1279,26 @@
        if (vp->v_type != VREG) {
                log(LOG_ERR, "Veriexec: Not adding `%s': Not a regular file.\n",
                    file);
+
                error = EBADF;
+
                goto out;
        }
 
        vfe = kmem_zalloc(sizeof(*vfe), KM_SLEEP);
-       rw_init(&vfe->lock);
+       mutex_init(&vfe->vfe_lock, MUTEX_DEFAULT, IPL_NONE);
+       cv_init(&vfe->vfe_cv, "veriexec file entry");
+       vfe->vfe_refcnt = 0;
 
        /* Lookup fingerprint hashing algorithm. */
-       fp_type = prop_string_value(prop_dictionary_get(dict, "fp-type"));
+       fp_type = prop_string_cstring_nocopy(prop_dictionary_get(dict,
+           "fp-type"));
        if ((vfe->ops = veriexec_fpops_lookup(fp_type)) == NULL) {
                log(LOG_ERR, "Veriexec: Invalid or unknown fingerprint type "
                    "`%s' for file `%s'.\n", fp_type, file);
+
                error = EOPNOTSUPP;
+
                goto out;
        }
 
@@ -1079,15 +1306,48 @@
            vfe->ops->hash_len) {
                log(LOG_ERR, "Veriexec: Bad fingerprint length for `%s'.\n",
                    file);
+
                error = EINVAL;
+
                goto out;
        }
 
        vfe->fp = kmem_alloc(vfe->ops->hash_len, KM_SLEEP);
-       memcpy(vfe->fp, prop_data_value(prop_dictionary_get(dict, "fp")),
+       memcpy(vfe->fp, prop_data_data_nocopy(prop_dictionary_get(dict, "fp")),
            vfe->ops->hash_len);
 
-       rw_enter(&veriexec_op_lock, RW_WRITER);
+       mutex_enter(&veriexec_op_lock);
+       while (veriexec_refcnt > 0) {
+               cv_wait(&veriexec_cv, &veriexec_op_lock);
+       }
+
+       /*
+        * See if we already have an entry for this file. If we do, then
+        * let the user know and silently pretend to succeed.
+        */
+       hh = veriexec_get(vp);
+       if (hh != NULL) {
+               bool fp_mismatch;
+
+               if (strcmp(vfe->ops->type, fp_type) ||
+                   memcmp(hh->fp, vfe->fp, hh->ops->hash_len))
+                       fp_mismatch = true;
+               else
+                       fp_mismatch = false;
+
+               if ((veriexec_verbose >= 1) || fp_mismatch)
+                       log(LOG_NOTICE, "Veriexec: Duplicate entry for `%s' "
+                           "ignored. (%s fingerprint)\n", file,
+                           fp_mismatch ? "different" : "same");
+
+               mutex_exit(&veriexec_op_lock);
+               veriexec_file_free(vfe);
+
+               /* XXX Should this be EEXIST if fp_mismatch is true? */
+               error = 0;
+
+               goto out;
+       }
 
        /* Continue entry initialization. */
        if (prop_dictionary_get_uint8(dict, "entry-type", &vfe->type) == FALSE)
@@ -1100,7 +1360,9 @@
                if (extra_flags) {
                        log(LOG_NOTICE, "Veriexec: Contaminated flags `0x%x' "
                            "for `%s', skipping.\n", extra_flags, file);
+
                        error = EINVAL;
+
                        goto unlock_out;
                }
        }
@@ -1110,36 +1372,36 @@
 
        vfe->status = FINGERPRINT_NOTEVAL;
        if (prop_bool_true(prop_dictionary_get(dict, "keep-filename"))) {
-               vfe->filename = kmem_strdupsize(file, &vfe->filename_len,
-                   KM_SLEEP);
+               vfe->filename_len = strlen(file) + 1;
+               vfe->filename = kmem_alloc(vfe->filename_len, KM_SLEEP);
+               strlcpy(vfe->filename, file, vfe->filename_len);
        } else
                vfe->filename = NULL;
 
+       vfe->page_fp = NULL;
+       vfe->page_fp_status = PAGE_FP_NONE;
+       vfe->npages = 0;
+       vfe->last_page_size = 0;
+
        if (prop_bool_true(prop_dictionary_get(dict, "eval-on-load")) ||
            (vfe->type & VERIEXEC_UNTRUSTED)) {
-               u_char status;
+               u_char *digest;
+
+               digest = kmem_zalloc(vfe->ops->hash_len, KM_SLEEP);
 
-               error = veriexec_fp_status(l, vp, VERIEXEC_FILE_UNLOCKED,
-                   vfe, &status);
-               if (error)
+               error = veriexec_fp_calc(l, vp, VERIEXEC_UNLOCKED,
+                                        vfe, digest);
+               if (error) {
+                       kmem_free(digest, vfe->ops->hash_len);
                        goto unlock_out;
-               vfe->status = status;
-       }
+               }
 
-       /*
-        * If we already have an entry for this file, and it matches
-        * the new entry exactly (except for the filename, which may
-        * hard-linked!), we just ignore the new entry.  If the new
-        * entry differs, report the error.
-        */
-       if ((ovfe = veriexec_get(vp)) != NULL) {
-               error = EEXIST;
-               if (vfe->type == ovfe->type &&
-                   vfe->status == ovfe->status &&
-                   vfe->ops == ovfe->ops &&
-                   memcmp(vfe->fp, ovfe->fp, vfe->ops->hash_len) == 0)
-                       ignore_dup = true;
-               goto unlock_out;
+               if (veriexec_fp_cmp(vfe->ops, vfe->fp, digest) == 0)
+                       vfe->status = FINGERPRINT_VALID;
+               else
+                       vfe->status = FINGERPRINT_NOMATCH;
+
+               kmem_free(digest, vfe->ops->hash_len);
        }
 
        vte = veriexec_table_lookup(vp->v_mount);
@@ -1158,22 +1420,18 @@
        veriexec_bypass = 0;
 
   unlock_out:
-       rw_exit(&veriexec_op_lock);
+       mutex_exit(&veriexec_op_lock);
 
   out:
        vrele(vp);
        if (error)
                veriexec_file_free(vfe);
 
-       if (ignore_dup && error == EEXIST)
-               error = 0;
-
        return (error);
 }
 
 int
-veriexec_table_delete(struct lwp *l, struct mount *mp)
-{
+veriexec_table_delete(struct lwp *l, struct mount *mp) {
        struct veriexec_table_entry *vte;
 
        vte = veriexec_table_lookup(mp);
@@ -1187,8 +1445,7 @@
 }
 
 int
-veriexec_file_delete(struct lwp *l, struct vnode *vp)
-{
+veriexec_file_delete(struct lwp *l, struct vnode *vp) {
        struct veriexec_table_entry *vte;
        int error;
 
@@ -1196,13 +1453,15 @@
        if (vte == NULL)
                return (ENOENT);
 
-       rw_enter(&veriexec_op_lock, RW_WRITER);
+       mutex_enter(&veriexec_op_lock);
+       while (veriexec_refcnt > 0) {
+               cv_wait(&veriexec_cv, &veriexec_op_lock);
+       }
+
        error = fileassoc_clear(vp, veriexec_hook);
-       rw_exit(&veriexec_op_lock);
-       if (!error) {
-               KASSERT(vte->vte_count > 0);
+       if (!error)
                vte->vte_count--;
-       }
+       mutex_exit(&veriexec_op_lock);
 
        return (error);
 }
@@ -1215,34 +1474,36 @@
 {
        if (vfe->filename)
                prop_dictionary_set(rdict, "file",
-                   prop_string_create_copy(vfe->filename));
+                   prop_string_create_cstring(vfe->filename));
        prop_dictionary_set_uint8(rdict, "entry-type", vfe->type);
        prop_dictionary_set_uint8(rdict, "status", vfe->status);
        prop_dictionary_set(rdict, "fp-type",
-           prop_string_create_copy(vfe->ops->type));
+           prop_string_create_cstring(vfe->ops->type));
        prop_dictionary_set(rdict, "fp",
-           prop_data_create_copy(vfe->fp, vfe->ops->hash_len));
+           prop_data_create_data(vfe->fp, vfe->ops->hash_len));
 }
 
 int
 veriexec_convert(struct vnode *vp, prop_dictionary_t rdict)
 {
        struct veriexec_file_entry *vfe;
+       int error;
+
 
-       rw_enter(&veriexec_op_lock, RW_READER);
+       error = 0;
+       veriexec_busy();
 
        vfe = veriexec_get(vp);
        if (vfe == NULL) {
-               rw_exit(&veriexec_op_lock);
-               return (ENOENT);
+               error = ENOENT;
+       } else {
+               vfe_busy(vfe);
+               veriexec_file_convert(vfe, rdict);
+               vfe_unbusy(vfe);
        }
 
-       rw_enter(&vfe->lock, RW_READER);
-       veriexec_file_convert(vfe, rdict);
-       rw_exit(&vfe->lock);
-
-       rw_exit(&veriexec_op_lock);
-       return (0);
+       veriexec_unbusy();
+       return (error);
 }
 
 int
@@ -1254,7 +1515,7 @@
            || doing_shutdown)
                return (0);
 
-       rw_enter(&veriexec_op_lock, RW_READER);
+       veriexec_busy();
 
        switch (veriexec_strict) {
        case VERIEXEC_LEARNING:
@@ -1293,7 +1554,7 @@
                break;
        }
 
-       rw_exit(&veriexec_op_lock);
+       veriexec_unbusy();
        return (error);
 }
 
@@ -1321,14 +1582,12 @@
                goto out;
        }
 
-       rw_enter(&veriexec_op_lock, RW_READER);
-       error = veriexec_file_verify(l, vp, path, VERIEXEC_FILE,
-                                    VERIEXEC_FILE_LOCKED, &vfe);
+       error = veriexec_file_verify(l, vp, IO_NODELOCKED, path,
+                                    VERIEXEC_FILE, &vfe);
 
-       if (error) {
-               rw_exit(&veriexec_op_lock);
+       if (error)
                goto out;
-       }
+
 
        if ((vfe != NULL) && ((fmode & FWRITE) || (fmode & O_TRUNC))) {
                veriexec_file_report(vfe, "Write access request.", path, l,
@@ -1337,15 +1596,16 @@
                /* IPS mode: Deny write access to monitored files. */
                if (veriexec_strict >= VERIEXEC_IPS)
                        error = EPERM;
-               else
-                       veriexec_file_purge(vfe, VERIEXEC_LOCKED);
+               else {
+                       vfe_unbusy(vfe);
+                       veriexec_file_purge(vfe);
+               }
        }
 
        if (vfe != NULL)
-               rw_exit(&vfe->lock);
+               vfe_unbusy(vfe);
 
-       rw_exit(&veriexec_op_lock);
- out:
+out:
        return (error);
 }
 
@@ -1368,15 +1628,23 @@
 int
 veriexec_dump(struct lwp *l, prop_array_t rarray)
 {
-       mount_iterator_t *iter;
        struct mount *mp;
+       mount_iterator_t *mpiter;
+
+       mountlist_iterator_init(&mpiter);
+
+       while ((mp = mountlist_iterator_trynext(mpiter)) != NULL) {
+               /* If it fails, the file-system is [being] unmounted. */
+               if (vfs_busy(mp) != 0)
+                       continue;
 
-       mountlist_iterator_init(&iter);
-       while ((mp = mountlist_iterator_next(iter)) != NULL) {
                fileassoc_table_run(mp, veriexec_hook,
                    (fileassoc_cb_t)veriexec_file_dump, rarray);
+
+               vfs_unbusy(mp);
        }
-       mountlist_iterator_destroy(iter);
+
+       mountlist_iterator_destroy(mpiter);
 
        return (0);
 }
@@ -1384,19 +1652,27 @@
 int
 veriexec_flush(struct lwp *l)
 {
-       mount_iterator_t *iter;
        struct mount *mp;
+       mount_iterator_t *mpiter;
        int error = 0;
 
-       mountlist_iterator_init(&iter);
-       while ((mp = mountlist_iterator_next(iter)) != NULL) {
+       mountlist_iterator_init(&mpiter);
+
+       while ((mp = mountlist_iterator_trynext(mpiter)) != NULL) {
                int lerror;
 
+               /* If it fails, the file-system is [being] unmounted. */
+               if (vfs_busy(mp) != 0)
+                       continue;
+
                lerror = veriexec_table_delete(l, mp);
                if (lerror && lerror != ENOENT)
                        error = lerror;
+
+               vfs_unbusy(mp);
        }
-       mountlist_iterator_destroy(iter);
+
+       mountlist_iterator_destroy(mpiter);
 
        return (error);
 }

Reply via email to