From: Yishai Hadas <[email protected]>

Expose DMABUF functionality to userspace through the uverbs interface,
enabling InfiniBand/RDMA devices to export PCI based memory regions
(e.g. device memory) as DMABUF file descriptors. This allows
zero-copy sharing of RDMA memory with other subsystems that support the
dma-buf framework.

A new UVERBS_OBJECT_DMABUF object type and allocation method were
introduced.

During allocation, uverbs invokes the driver to supply the
rdma_user_mmap_entry associated with the given page offset (pgoff).

Based on the returned rdma_user_mmap_entry, uverbs requests the driver
to provide the corresponding physical-memory details as well as the
driver’s PCI provider information.

Using this information, dma_buf_export() is called; if it succeeds,
uobj->object is set to the underlying file pointer returned by the
dma-buf framework.

The file descriptor number follows the standard uverbs allocation flow,
but the file pointer comes from the dma-buf subsystem, including its own
fops and private data.

When an mmap entry is removed, uverbs iterates over its associated
DMABUFs, marks them as revoked, and calls dma_buf_move_notify() so that
their importers are notified.

The same procedure applies during the disassociate flow; final cleanup
occurs when the application closes the file.

Signed-off-by: Yishai Hadas <[email protected]>
Signed-off-by: Edward Srouji <[email protected]>
---
 drivers/infiniband/core/Makefile                  |   1 +
 drivers/infiniband/core/device.c                  |   2 +
 drivers/infiniband/core/ib_core_uverbs.c          |  22 +++
 drivers/infiniband/core/rdma_core.c               |  28 ++--
 drivers/infiniband/core/rdma_core.h               |   1 +
 drivers/infiniband/core/uverbs.h                  |  10 ++
 drivers/infiniband/core/uverbs_std_types_dmabuf.c | 176 ++++++++++++++++++++++
 drivers/infiniband/core/uverbs_uapi.c             |   1 +
 include/rdma/ib_verbs.h                           |   9 ++
 include/rdma/uverbs_types.h                       |   1 +
 include/uapi/rdma/ib_user_ioctl_cmds.h            |  10 ++
 11 files changed, 249 insertions(+), 12 deletions(-)

diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index f483e0c12444..a2a7a9d2e0d3 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -33,6 +33,7 @@ ib_umad-y :=                  user_mad.o
 ib_uverbs-y :=                 uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
                                rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
                                uverbs_std_types_cq.o \
+                               uverbs_std_types_dmabuf.o \
                                uverbs_std_types_dmah.o \
                                uverbs_std_types_flow_action.o 
uverbs_std_types_dm.o \
                                uverbs_std_types_mr.o 
uverbs_std_types_counters.o \
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 4e09f6e0995e..416242b9c158 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2765,6 +2765,7 @@ void ib_set_device_ops(struct ib_device *dev, const 
struct ib_device_ops *ops)
        SET_DEVICE_OP(dev_ops, map_mr_sg);
        SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
        SET_DEVICE_OP(dev_ops, mmap);
+       SET_DEVICE_OP(dev_ops, mmap_get_pfns);
        SET_DEVICE_OP(dev_ops, mmap_free);
        SET_DEVICE_OP(dev_ops, modify_ah);
        SET_DEVICE_OP(dev_ops, modify_cq);
@@ -2775,6 +2776,7 @@ void ib_set_device_ops(struct ib_device *dev, const 
struct ib_device_ops *ops)
        SET_DEVICE_OP(dev_ops, modify_srq);
        SET_DEVICE_OP(dev_ops, modify_wq);
        SET_DEVICE_OP(dev_ops, peek_cq);
+       SET_DEVICE_OP(dev_ops, pgoff_to_mmap_entry);
        SET_DEVICE_OP(dev_ops, pre_destroy_cq);
        SET_DEVICE_OP(dev_ops, poll_cq);
        SET_DEVICE_OP(dev_ops, port_groups);
diff --git a/drivers/infiniband/core/ib_core_uverbs.c 
b/drivers/infiniband/core/ib_core_uverbs.c
index b51bd7087a88..b02cf9061f09 100644
--- a/drivers/infiniband/core/ib_core_uverbs.c
+++ b/drivers/infiniband/core/ib_core_uverbs.c
@@ -5,9 +5,13 @@
  * Copyright 2019 Marvell. All rights reserved.
  */
 #include <linux/xarray.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-resv.h>
 #include "uverbs.h"
 #include "core_priv.h"
 
+MODULE_IMPORT_NS("DMA_BUF");
+
 /**
  * rdma_umap_priv_init() - Initialize the private data of a vma
  *
@@ -229,12 +233,27 @@ EXPORT_SYMBOL(rdma_user_mmap_entry_put);
  */
 void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
 {
+       struct ib_uverbs_dmabuf_file *uverbs_dmabuf, *tmp;
+
        if (!entry)
                return;
 
+       mutex_lock(&entry->dmabufs_lock);
        xa_lock(&entry->ucontext->mmap_xa);
        entry->driver_removed = true;
        xa_unlock(&entry->ucontext->mmap_xa);
+       list_for_each_entry_safe(uverbs_dmabuf, tmp, &entry->dmabufs, 
dmabufs_elm) {
+               dma_resv_lock(uverbs_dmabuf->dmabuf->resv, NULL);
+               list_del(&uverbs_dmabuf->dmabufs_elm);
+               uverbs_dmabuf->revoked = true;
+               dma_buf_move_notify(uverbs_dmabuf->dmabuf);
+               dma_resv_wait_timeout(uverbs_dmabuf->dmabuf->resv,
+                                     DMA_RESV_USAGE_BOOKKEEP, false,
+                                     MAX_SCHEDULE_TIMEOUT);
+               dma_resv_unlock(uverbs_dmabuf->dmabuf->resv);
+       }
+       mutex_unlock(&entry->dmabufs_lock);
+
        kref_put(&entry->ref, rdma_user_mmap_entry_free);
 }
 EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
@@ -274,6 +293,9 @@ int rdma_user_mmap_entry_insert_range(struct ib_ucontext 
*ucontext,
                return -EINVAL;
 
        kref_init(&entry->ref);
+       INIT_LIST_HEAD(&entry->dmabufs);
+       mutex_init(&entry->dmabufs_lock);
+
        entry->ucontext = ucontext;
 
        /*
diff --git a/drivers/infiniband/core/rdma_core.c 
b/drivers/infiniband/core/rdma_core.c
index b6eda2fb0911..3e0a8b9cd288 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -809,21 +809,10 @@ const struct uverbs_obj_type_class uverbs_idr_class = {
 };
 EXPORT_SYMBOL(uverbs_idr_class);
 
-/*
- * Users of UVERBS_TYPE_ALLOC_FD should set this function as the struct
- * file_operations release method.
- */
-int uverbs_uobject_fd_release(struct inode *inode, struct file *filp)
+int uverbs_uobject_release(struct ib_uobject *uobj)
 {
        struct ib_uverbs_file *ufile;
-       struct ib_uobject *uobj;
 
-       /*
-        * This can only happen if the fput came from alloc_abort_fd_uobject()
-        */
-       if (!filp->private_data)
-               return 0;
-       uobj = filp->private_data;
        ufile = uobj->ufile;
 
        if (down_read_trylock(&ufile->hw_destroy_rwsem)) {
@@ -850,6 +839,21 @@ int uverbs_uobject_fd_release(struct inode *inode, struct 
file *filp)
        uverbs_uobject_put(uobj);
        return 0;
 }
+
+/*
+ * Users of UVERBS_TYPE_ALLOC_FD should set this function as the struct
+ * file_operations release method.
+ */
+int uverbs_uobject_fd_release(struct inode *inode, struct file *filp)
+{
+       /*
+        * This can only happen if the fput came from alloc_abort_fd_uobject()
+        */
+       if (!filp->private_data)
+               return 0;
+
+       return uverbs_uobject_release(filp->private_data);
+}
 EXPORT_SYMBOL(uverbs_uobject_fd_release);
 
 /*
diff --git a/drivers/infiniband/core/rdma_core.h 
b/drivers/infiniband/core/rdma_core.h
index a59b087611cb..55f1e3558856 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -156,6 +156,7 @@ extern const struct uapi_definition 
uverbs_def_obj_counters[];
 extern const struct uapi_definition uverbs_def_obj_cq[];
 extern const struct uapi_definition uverbs_def_obj_device[];
 extern const struct uapi_definition uverbs_def_obj_dm[];
+extern const struct uapi_definition uverbs_def_obj_dmabuf[];
 extern const struct uapi_definition uverbs_def_obj_dmah[];
 extern const struct uapi_definition uverbs_def_obj_flow_action[];
 extern const struct uapi_definition uverbs_def_obj_intf[];
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 797e2fcc8072..66287e8e7ad7 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -133,6 +133,16 @@ struct ib_uverbs_completion_event_file {
        struct ib_uverbs_event_queue            ev_queue;
 };
 
+struct ib_uverbs_dmabuf_file {
+       struct ib_uobject uobj;
+       struct dma_buf *dmabuf;
+       struct list_head dmabufs_elm;
+       struct rdma_user_mmap_entry *mmap_entry;
+       struct dma_buf_phys_vec phys_vec;
+       struct p2pdma_provider *provider;
+       u8 revoked :1;
+};
+
 struct ib_uverbs_event {
        union {
                struct ib_uverbs_async_event_desc       async;
diff --git a/drivers/infiniband/core/uverbs_std_types_dmabuf.c 
b/drivers/infiniband/core/uverbs_std_types_dmabuf.c
new file mode 100644
index 000000000000..05980f4fa500
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_dmabuf.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#include <linux/dma-buf-mapping.h>
+#include <linux/pci-p2pdma.h>
+#include <linux/dma-resv.h>
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int uverbs_dmabuf_attach(struct dma_buf *dmabuf,
+                               struct dma_buf_attachment *attachment)
+{
+       if (!attachment->peer2peer)
+               return -EOPNOTSUPP;
+
+       return 0;
+}
+
+static struct sg_table *
+uverbs_dmabuf_map(struct dma_buf_attachment *attachment,
+                 enum dma_data_direction dir)
+{
+       struct ib_uverbs_dmabuf_file *priv = attachment->dmabuf->priv;
+
+       dma_resv_assert_held(priv->dmabuf->resv);
+
+       if (priv->revoked)
+               return ERR_PTR(-ENODEV);
+
+       return dma_buf_phys_vec_to_sgt(attachment, priv->provider,
+                                      &priv->phys_vec, 1, priv->phys_vec.len,
+                                      dir);
+}
+
+static void uverbs_dmabuf_unmap(struct dma_buf_attachment *attachment,
+                               struct sg_table *sgt,
+                               enum dma_data_direction dir)
+{
+       dma_buf_free_sgt(attachment, sgt, dir);
+}
+
+static int uverbs_dmabuf_pin(struct dma_buf_attachment *attach)
+{
+       return -EOPNOTSUPP;
+}
+
+static void uverbs_dmabuf_release(struct dma_buf *dmabuf)
+{
+       struct ib_uverbs_dmabuf_file *priv = dmabuf->priv;
+
+       /*
+        * This can only happen if the fput came from alloc_abort_fd_uobject()
+        */
+       if (!priv->uobj.context)
+               return;
+
+       uverbs_uobject_release(&priv->uobj);
+}
+
+static const struct dma_buf_ops uverbs_dmabuf_ops = {
+       .attach = uverbs_dmabuf_attach,
+       .map_dma_buf = uverbs_dmabuf_map,
+       .unmap_dma_buf = uverbs_dmabuf_unmap,
+       .pin = uverbs_dmabuf_pin,
+       .release = uverbs_dmabuf_release,
+};
+
+static int UVERBS_HANDLER(UVERBS_METHOD_DMABUF_ALLOC)(
+       struct uverbs_attr_bundle *attrs)
+{
+       struct ib_uobject *uobj =
+               uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DMABUF_HANDLE)
+                       ->obj_attr.uobject;
+       struct ib_uverbs_dmabuf_file *uverbs_dmabuf =
+               container_of(uobj, struct ib_uverbs_dmabuf_file, uobj);
+       struct ib_device *ib_dev = attrs->context->device;
+       struct rdma_user_mmap_entry *mmap_entry;
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+       off_t pg_off;
+       int ret;
+
+       ret = uverbs_get_const(&pg_off, attrs, UVERBS_ATTR_ALLOC_DMABUF_PGOFF);
+       if (ret)
+               return ret;
+
+       mmap_entry = ib_dev->ops.pgoff_to_mmap_entry(attrs->context, pg_off);
+       if (!mmap_entry)
+               return -EINVAL;
+
+       ret = ib_dev->ops.mmap_get_pfns(mmap_entry, &uverbs_dmabuf->phys_vec,
+                                       &uverbs_dmabuf->provider);
+       if (ret)
+               goto err;
+
+       exp_info.ops = &uverbs_dmabuf_ops;
+       exp_info.size = uverbs_dmabuf->phys_vec.len;
+       exp_info.flags = O_CLOEXEC;
+       exp_info.priv = uverbs_dmabuf;
+
+       uverbs_dmabuf->dmabuf = dma_buf_export(&exp_info);
+       if (IS_ERR(uverbs_dmabuf->dmabuf)) {
+               ret = PTR_ERR(uverbs_dmabuf->dmabuf);
+               goto err;
+       }
+
+       INIT_LIST_HEAD(&uverbs_dmabuf->dmabufs_elm);
+       mutex_lock(&mmap_entry->dmabufs_lock);
+       if (mmap_entry->driver_removed)
+               ret = -EIO;
+       else
+               list_add_tail(&uverbs_dmabuf->dmabufs_elm, 
&mmap_entry->dmabufs);
+       mutex_unlock(&mmap_entry->dmabufs_lock);
+       if (ret)
+               goto err_revoked;
+
+       uobj->object = uverbs_dmabuf->dmabuf->file;
+       uverbs_dmabuf->mmap_entry = mmap_entry;
+       uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_ALLOC_DMABUF_HANDLE);
+       return 0;
+
+err_revoked:
+       dma_buf_put(uverbs_dmabuf->dmabuf);
+err:
+       rdma_user_mmap_entry_put(mmap_entry);
+       return ret;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+       UVERBS_METHOD_DMABUF_ALLOC,
+       UVERBS_ATTR_FD(UVERBS_ATTR_ALLOC_DMABUF_HANDLE,
+                      UVERBS_OBJECT_DMABUF,
+                      UVERBS_ACCESS_NEW,
+                      UA_MANDATORY),
+       UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DMABUF_PGOFF,
+                          UVERBS_ATTR_TYPE(u64),
+                          UA_MANDATORY));
+
+static void uverbs_dmabuf_fd_destroy_uobj(struct ib_uobject *uobj,
+                                         enum rdma_remove_reason why)
+{
+       struct ib_uverbs_dmabuf_file *uverbs_dmabuf =
+               container_of(uobj, struct ib_uverbs_dmabuf_file, uobj);
+
+       mutex_lock(&uverbs_dmabuf->mmap_entry->dmabufs_lock);
+       dma_resv_lock(uverbs_dmabuf->dmabuf->resv, NULL);
+       if (!uverbs_dmabuf->revoked) {
+               uverbs_dmabuf->revoked = true;
+               list_del(&uverbs_dmabuf->dmabufs_elm);
+               dma_buf_move_notify(uverbs_dmabuf->dmabuf);
+               dma_resv_wait_timeout(uverbs_dmabuf->dmabuf->resv,
+                                     DMA_RESV_USAGE_BOOKKEEP, false,
+                                     MAX_SCHEDULE_TIMEOUT);
+       }
+       dma_resv_unlock(uverbs_dmabuf->dmabuf->resv);
+       mutex_unlock(&uverbs_dmabuf->mmap_entry->dmabufs_lock);
+
+       /* Matches the get done as part of pgoff_to_mmap_entry() */
+       rdma_user_mmap_entry_put(uverbs_dmabuf->mmap_entry);
+};
+
+DECLARE_UVERBS_NAMED_OBJECT(
+       UVERBS_OBJECT_DMABUF,
+       UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_dmabuf_file),
+                            uverbs_dmabuf_fd_destroy_uobj,
+                            NULL, NULL, O_RDONLY),
+                            &UVERBS_METHOD(UVERBS_METHOD_DMABUF_ALLOC));
+
+const struct uapi_definition uverbs_def_obj_dmabuf[] = {
+       UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DMABUF),
+                                     UAPI_DEF_OBJ_NEEDS_FN(mmap_get_pfns),
+                                     
UAPI_DEF_OBJ_NEEDS_FN(pgoff_to_mmap_entry),
+       {}
+};
diff --git a/drivers/infiniband/core/uverbs_uapi.c 
b/drivers/infiniband/core/uverbs_uapi.c
index e00ea63175bd..38d0bbbee796 100644
--- a/drivers/infiniband/core/uverbs_uapi.c
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -631,6 +631,7 @@ static const struct uapi_definition uverbs_core_api[] = {
        UAPI_DEF_CHAIN(uverbs_def_obj_cq),
        UAPI_DEF_CHAIN(uverbs_def_obj_device),
        UAPI_DEF_CHAIN(uverbs_def_obj_dm),
+       UAPI_DEF_CHAIN(uverbs_def_obj_dmabuf),
        UAPI_DEF_CHAIN(uverbs_def_obj_dmah),
        UAPI_DEF_CHAIN(uverbs_def_obj_flow_action),
        UAPI_DEF_CHAIN(uverbs_def_obj_intf),
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 6c372a37c482..5be67013c8ae 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -43,6 +43,7 @@
 #include <uapi/rdma/rdma_user_ioctl.h>
 #include <uapi/rdma/ib_user_ioctl_verbs.h>
 #include <linux/pci-tph.h>
+#include <linux/dma-buf.h>
 
 #define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN
 
@@ -2363,6 +2364,9 @@ struct rdma_user_mmap_entry {
        unsigned long start_pgoff;
        size_t npages;
        bool driver_removed;
+       /* protects access to dmabufs */
+       struct mutex dmabufs_lock;
+       struct list_head dmabufs;
 };
 
 /* Return the offset (in bytes) the user should pass to libc's mmap() */
@@ -2500,6 +2504,11 @@ struct ib_device_ops {
         * Therefore needs to be implemented by the driver in mmap_free.
         */
        void (*mmap_free)(struct rdma_user_mmap_entry *entry);
+       int (*mmap_get_pfns)(struct rdma_user_mmap_entry *entry,
+                            struct dma_buf_phys_vec *phys_vec,
+                            struct p2pdma_provider **provider);
+       struct rdma_user_mmap_entry *(*pgoff_to_mmap_entry)(struct ib_ucontext 
*ucontext,
+                                                           off_t pg_off);
        void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
        int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
        int (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index 26ba919ac245..6a253b7dc5ea 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -186,6 +186,7 @@ struct ib_uverbs_file {
 extern const struct uverbs_obj_type_class uverbs_idr_class;
 extern const struct uverbs_obj_type_class uverbs_fd_class;
 int uverbs_uobject_fd_release(struct inode *inode, struct file *filp);
+int uverbs_uobject_release(struct ib_uobject *uobj);
 
 #define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) -    \
                                   sizeof(char))
diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h 
b/include/uapi/rdma/ib_user_ioctl_cmds.h
index 35da4026f452..72041c1b0ea5 100644
--- a/include/uapi/rdma/ib_user_ioctl_cmds.h
+++ b/include/uapi/rdma/ib_user_ioctl_cmds.h
@@ -56,6 +56,7 @@ enum uverbs_default_objects {
        UVERBS_OBJECT_COUNTERS,
        UVERBS_OBJECT_ASYNC_EVENT,
        UVERBS_OBJECT_DMAH,
+       UVERBS_OBJECT_DMABUF,
 };
 
 enum {
@@ -263,6 +264,15 @@ enum uverbs_methods_dmah {
        UVERBS_METHOD_DMAH_FREE,
 };
 
+enum uverbs_attrs_alloc_dmabuf_cmd_attr_ids {
+       UVERBS_ATTR_ALLOC_DMABUF_HANDLE,
+       UVERBS_ATTR_ALLOC_DMABUF_PGOFF,
+};
+
+enum uverbs_methods_dmabuf {
+       UVERBS_METHOD_DMABUF_ALLOC,
+};
+
 enum uverbs_attrs_reg_dm_mr_cmd_attr_ids {
        UVERBS_ATTR_REG_DM_MR_HANDLE,
        UVERBS_ATTR_REG_DM_MR_OFFSET,

-- 
2.49.0

Reply via email to