On Sun, May 17, 2020 at 02:30:57PM -0700, Kees Cook wrote:
> On Sun, May 17, 2020 at 09:02:15AM -0600, Tycho Andersen wrote:
> 
> I'm going read this thread more carefully tomorrow, but I just wanted to
> mention that I'd *like* to extend seccomp_data for doing deep argument
> inspection of the new syscalls. I think it's the least bad of many
> designs, and I'll write that up in more detail. (I would *really* like
> to avoid extending seccomp's BPF language, and instead allow probing
> into the struct copied from userspace, etc.)
> 
> Anyway, it's very related to this, so, yeah, probably we need a v2 of the
> notif API, but I'll try to get all the ideas here collected in one place.
I scratched together a proposal of what I think would make a not-terrible
V2 API. I'm sure there's bugs in this code, but I think it's workable --
or at least a place to start. The biggest thing I think we should consider
is unrolling seccomp_data if we don't intend to add new BPF-accessible
fields.

If also uses read(2), so we get to take advantage of read(2)'s ability
to pass a size along with the read, as opposed to doing ioctl tricks.
It also makes programming from against it slightly simpler. I can imagine
that the send API could be similar, in that it could support write, and
thus making it 100% usable from Go (and the like) without requiring
a separate OS-thread be spun up to interact with the listener.

diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index c1735455bc53..db6e5335ec6a 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -113,6 +113,41 @@ struct seccomp_notif_resp {
        __u32 flags;
 };
 
+#define SECCOMP_DATA_SIZE_VER0 64
+
+#define SECCOMP_NOTIF_CONFIG_SIZE_VER0 16
+
+/* Optional seccomp return fields */
+/* __u32 for triggering pid */
+#define SECCOMP_NOTIF_FIELD_PID                (1UL << 1)
+/* __u32 for triggering  thread group leader id */
+#define SECCOMP_NOTIF_FIELD_TGID       (1UL << 1)
+
+struct seccomp_notif_config {
+       __u32 size;
+       /*
+        * The supported SECCOMP_DATA_SIZE to be returned. If the size passed in
+        * is unsupported (seccomp_data is too small or if it is larger than the
+        * currently supported size), EINVAL or E2BIG will be, respectively,
+        * returned, and this will be set to the latest supported size.
+        */
+       __u32 seccomp_data_size;
+       /*
+        * This is an OR'd together list of SECCOMP_NOTIF_FIELD_*. If an
+        * unsupported field is requested, ENOTSUPP will be returned.
+        */
+       __u64 optional_fields;
+};
+
+/* read(2) Notification format example:
+ * struct seccomp_notif_read {
+ *     __u64 id;
+ *     __u32 pid; if SECCOMP_NOTIF_FIELD_PID
+ *     __u32 tgid; if SECCOMP_NOTIF_FIELD_TGID
+ *     struct seccomp_data data;
+ * };
+ */
+
 #define SECCOMP_IOC_MAGIC              '!'
 #define SECCOMP_IO(nr)                 _IO(SECCOMP_IOC_MAGIC, nr)
 #define SECCOMP_IOR(nr, type)          _IOR(SECCOMP_IOC_MAGIC, nr, type)
@@ -124,4 +159,10 @@ struct seccomp_notif_resp {
 #define SECCOMP_IOCTL_NOTIF_SEND       SECCOMP_IOWR(1, \
                                                struct seccomp_notif_resp)
 #define SECCOMP_IOCTL_NOTIF_ID_VALID   SECCOMP_IOR(2, __u64)
+/*
+ * Configure the data structure to be returned by read(2).
+ * Returns the size of the data structure which will be returned.
+ */
+#define SECCOMP_IOCTL_NOTIF_CONFIG     SECCOMP_IOR(3,  \
+                                               struct seccomp_notif_config)
 #endif /* _UAPI_LINUX_SECCOMP_H */
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 55a6184f5990..4670cb03c390 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -95,12 +95,19 @@ struct seccomp_knotif {
  * @next_id: The id of the next request.
  * @notifications: A list of struct seccomp_knotif elements.
  * @wqh: A wait queue for poll.
+ * @read_size: The size of the expected read. If it is set to 0, it means that
+ *             reading notifications via read(2) is not yet configured. Until
+ *             then, EINVAL will be returned via read(2).
+ * @data_size: The supported size of seccomp_data.
+ * @optional_fields: The optional fields to return on read
  */
 struct notification {
        struct semaphore request;
        u64 next_id;
        struct list_head notifications;
        wait_queue_head_t wqh;
+       u32 read_size;
+       u64 optional_fields;
 };
 
 /**
@@ -1021,79 +1028,160 @@ static int seccomp_notify_release(struct inode *inode, 
struct file *file)
        return 0;
 }
 
-static long seccomp_notify_recv(struct seccomp_filter *filter,
-                               void __user *buf)
+static void seccomp_reset_notif(struct seccomp_filter *filter,
+                               u64 id)
 {
-       struct seccomp_knotif *knotif = NULL, *cur;
-       struct seccomp_notif unotif;
-       ssize_t ret;
-
-       /* Verify that we're not given garbage to keep struct extensible. */
-       ret = check_zeroed_user(buf, sizeof(unotif));
-       if (ret < 0)
-               return ret;
-       if (!ret)
-               return -EINVAL;
+       struct seccomp_knotif *cur;
 
-       memset(&unotif, 0, sizeof(unotif));
+       /*
+        * Something went wrong. To make sure that we keep this
+        * notification alive, let's reset it back to INIT. It
+        * may have died when we released the lock, so we need to make
+        * sure it's still around.
+        */
+       mutex_lock(&filter->notify_lock);
+       list_for_each_entry(cur, &filter->notif->notifications, list) {
+               if (cur->id == id) {
+                       cur->state = SECCOMP_NOTIFY_INIT;
+                       up(&filter->notif->request);
+                       break;
+               }
+       }
+       mutex_unlock(&filter->notify_lock);
+}
+/*
+ * Returns the next knotif available. If the return is a non-error, it will
+ * be with notify_lock held. It is up to the caller to unlock it.
+ */
+static struct seccomp_knotif *seccomp_get_notif(struct seccomp_filter *filter)
+{
+       struct seccomp_knotif *cur;
+       int ret;
 
        ret = down_interruptible(&filter->notif->request);
        if (ret < 0)
-               return ret;
+               return ERR_PTR(ret);
 
        mutex_lock(&filter->notify_lock);
        list_for_each_entry(cur, &filter->notif->notifications, list) {
                if (cur->state == SECCOMP_NOTIFY_INIT) {
-                       knotif = cur;
-                       break;
+                       cur->state = SECCOMP_NOTIFY_SENT;
+                       wake_up_poll(&filter->notif->wqh,
+                                    EPOLLOUT | EPOLLWRNORM);
+                       return cur;
                }
        }
-
+       mutex_unlock(&filter->notify_lock);
        /*
         * If we didn't find a notification, it could be that the task was
         * interrupted by a fatal signal between the time we were woken and
         * when we were able to acquire the rw lock.
         */
-       if (!knotif) {
-               ret = -ENOENT;
-               goto out;
-       }
+       return ERR_PTR(-ENOENT);
+}
+
+static long seccomp_notify_recv(struct seccomp_filter *filter,
+                               void __user *buf)
+{
+       struct seccomp_knotif *knotif;
+       struct seccomp_notif unotif;
+       ssize_t ret;
+
+       /* Verify that we're not given garbage to keep struct extensible. */
+       ret = check_zeroed_user(buf, sizeof(unotif));
+       if (ret < 0)
+               return ret;
+       if (!ret)
+               return -EINVAL;
+
+       memset(&unotif, 0, sizeof(unotif));
+
+       knotif = seccomp_get_notif(filter);
+       if (IS_ERR(knotif))
+               return PTR_ERR(knotif);
 
        unotif.id = knotif->id;
        unotif.pid = task_pid_vnr(knotif->task);
        unotif.data = *(knotif->data);
 
-       knotif->state = SECCOMP_NOTIFY_SENT;
-       wake_up_poll(&filter->notif->wqh, EPOLLOUT | EPOLLWRNORM);
-       ret = 0;
-out:
        mutex_unlock(&filter->notify_lock);
 
-       if (ret == 0 && copy_to_user(buf, &unotif, sizeof(unotif))) {
-               ret = -EFAULT;
+       if (!copy_to_user(buf, &unotif, sizeof(unotif)))
+               return 0;
 
-               /*
-                * Userspace screwed up. To make sure that we keep this
-                * notification alive, let's reset it back to INIT. It
-                * may have died when we released the lock, so we need to make
-                * sure it's still around.
-                */
-               knotif = NULL;
-               mutex_lock(&filter->notify_lock);
-               list_for_each_entry(cur, &filter->notif->notifications, list) {
-                       if (cur->id == unotif.id) {
-                               knotif = cur;
-                               break;
-                       }
-               }
+       seccomp_reset_notif(filter, unotif.id);
+       return -EFAULT;
+}
 
-               if (knotif) {
-                       knotif->state = SECCOMP_NOTIFY_INIT;
-                       up(&filter->notif->request);
-               }
-               mutex_unlock(&filter->notify_lock);
+/* Append the src value to the dst, and return the new cursor */
+#define append(dst, src)       ({              \
+       typeof(src) _src = src;                 \
+       memcpy(dst, &_src, sizeof(_src));       \
+       dst + sizeof(_src);                     \
+})
+
+/*
+ * Append the value pointer to by the pointer, src, to the dst
+ * and return the new cursor
+ */
+#define append_ptr(dst, src)   ({              \
+       typeof(src) _src = src;                 \
+       memcpy(dst, _src, sizeof(*_src));       \
+       dst + sizeof(*_src);                    \
+})
+
+static ssize_t seccomp_notify_read(struct file *file, char __user *buf,
+                                  size_t count, loff_t *ppos)
+{
+       struct seccomp_filter *filter = file->private_data;
+       u32 optional_fields, read_size;
+       struct seccomp_knotif *knotif;
+       void *kbuf, *cursor;
+       int ret;
+       u64 id;
+
+       ret = mutex_lock_interruptible(&filter->notify_lock);
+       if (ret)
+               return ret;
+       read_size = filter->notif->read_size;
+       optional_fields = filter->notif->optional_fields;
+       mutex_unlock(&filter->notify_lock);
+
+       if (read_size == 0)
+               return -EINVAL;
+       if (count < read_size)
+               return -ENOSPC;
+
+       knotif = seccomp_get_notif(filter);
+       if (IS_ERR(knotif))
+               return PTR_ERR(knotif);
+
+       id = knotif->id;
+       kbuf = kzalloc(read_size, GFP_KERNEL);
+       if (!kbuf) {
+               ret = -ENOMEM;
+               goto out;
        }
 
+       cursor = kbuf;
+       cursor = append(cursor, knotif->id);
+       if (filter->notif->optional_fields & SECCOMP_NOTIF_FIELD_PID)
+               cursor = append(cursor, task_pid_vnr(knotif->task));
+       if (filter->notif->optional_fields & SECCOMP_NOTIF_FIELD_TGID)
+               cursor = append(cursor, task_tgid_vnr(knotif->task));
+       cursor = append_ptr(cursor, knotif->data);
+       WARN_ON_ONCE(cursor != kbuf + read_size);
+
+       ret = copy_to_user(buf, kbuf, read_size);
+       if (!ret)
+               ret = read_size;
+
+       kfree(kbuf);
+out:
+       mutex_unlock(&filter->notify_lock);
+       if (ret < 0)
+               seccomp_reset_notif(filter, id);
+
        return ret;
 }
 
@@ -1175,6 +1263,70 @@ static long seccomp_notify_id_valid(struct 
seccomp_filter *filter,
        return ret;
 }
 
+static long seccomp_notif_config(struct seccomp_filter *filter,
+                                struct seccomp_notif_config __user *uconfig)
+{
+       struct seccomp_notif_config config;
+       /* ret_size is a minimum of 8, given id */
+       int ret, notification_size = 8;
+       u32 size;
+
+
+       ret = get_user(size, &uconfig->size);
+       if (ret)
+               return ret;
+       ret = copy_struct_from_user(&config, sizeof(config), uconfig, size);
+       if (ret)
+               return ret;
+
+       /*
+        * This needs to be bumped every time we change seccomp_data's size
+        */
+       BUILD_BUG_ON(sizeof(struct seccomp_data) != SECCOMP_DATA_SIZE_VER0);
+       if (config.seccomp_data_size < SECCOMP_DATA_SIZE_VER0) {
+               ret = -EINVAL;
+               goto invalid_seccomp_data_size;
+       }
+       if (config.seccomp_data_size > SECCOMP_DATA_SIZE_VER0) {
+               ret = -E2BIG;
+               goto invalid_seccomp_data_size;
+       }
+       notification_size += config.seccomp_data_size;
+
+       if (config.optional_fields & 
~(SECCOMP_NOTIF_FIELD_PID|SECCOMP_NOTIF_FIELD_TGID))
+               return -ENOTSUPP;
+
+       if (config.optional_fields & SECCOMP_NOTIF_FIELD_PID)
+               notification_size += 4;
+       if (config.optional_fields & SECCOMP_NOTIF_FIELD_TGID)
+               notification_size += 4;
+
+
+       ret = mutex_lock_interruptible(&filter->notify_lock);
+       if (ret < 0)
+               return ret;
+
+       if (filter->notif->read_size) {
+               ret = -EBUSY;
+               goto out;
+       }
+
+       ret = notification_size;
+       filter->notif->read_size = notification_size;
+       filter->notif->optional_fields = config.optional_fields;
+
+out:
+       mutex_unlock(&filter->notify_lock);
+
+       return ret;
+
+invalid_seccomp_data_size:
+       if (put_user(SECCOMP_DATA_SIZE_VER0, &uconfig->seccomp_data_size))
+               return -EFAULT;
+
+       return ret;
+}
+
 static long seccomp_notify_ioctl(struct file *file, unsigned int cmd,
                                 unsigned long arg)
 {
@@ -1188,6 +1340,8 @@ static long seccomp_notify_ioctl(struct file *file, 
unsigned int cmd,
                return seccomp_notify_send(filter, buf);
        case SECCOMP_IOCTL_NOTIF_ID_VALID:
                return seccomp_notify_id_valid(filter, buf);
+       case SECCOMP_IOCTL_NOTIF_CONFIG:
+               return seccomp_notif_config(filter, buf);
        default:
                return -EINVAL;
        }
@@ -1224,6 +1378,7 @@ static const struct file_operations seccomp_notify_ops = {
        .release = seccomp_notify_release,
        .unlocked_ioctl = seccomp_notify_ioctl,
        .compat_ioctl = seccomp_notify_ioctl,
+       .read = seccomp_notify_read,
 };
 
 static struct file *init_listener(struct seccomp_filter *filter)


> 
> -- 
> Kees Cook
> _______________________________________________
> Containers mailing list
> [email protected]
> https://lists.linuxfoundation.org/mailman/listinfo/containers

Reply via email to