Add a dma-buf heap for DT coherent reserved-memory (i.e., 'shared-dma-pool' without 'reusable' property), exposing one heap per region for userspace buffers.
The heap binds a synthetic platform device to each region so coherent allocations use the correct dev->dma_mem, and it defers registration until late_initcall when normal allocator are available. This patch includes charging of the coherent heap allocator to the dmem cgroup. Signed-off-by: Albert Esteve <[email protected]> --- This patch introduces a new driver to expose DT coherent reserved-memory regions as dma-buf heaps, allowing userspace buffers to be created. Since these regions are device-dependent, we bind a synthetic platform device to each region so coherent allocations use the correct dev->dma_mem. Following Eric’s [1] and Maxime’s [2] work on charging DMA buffers allocated from userspace to cgroups (dmem), this patch adds the same charging pattern used by CMA heaps patch. Charging is done only through the dma-buf heap interface so it can be attributed to a userspace allocator. This allows each device-specific reserved-memory region to enforce its own limits. [1] https://lore.kernel.org/all/[email protected]/ [2] https://lore.kernel.org/all/[email protected]/ --- drivers/dma-buf/heaps/Kconfig | 17 ++ drivers/dma-buf/heaps/Makefile | 1 + drivers/dma-buf/heaps/coherent_heap.c | 485 ++++++++++++++++++++++++++++++++++ include/linux/dma-heap.h | 11 + kernel/dma/coherent.c | 9 + 5 files changed, 523 insertions(+) diff --git a/drivers/dma-buf/heaps/Kconfig b/drivers/dma-buf/heaps/Kconfig index a5eef06c42264..93765dca164e3 100644 --- a/drivers/dma-buf/heaps/Kconfig +++ b/drivers/dma-buf/heaps/Kconfig @@ -12,3 +12,20 @@ config DMABUF_HEAPS_CMA Choose this option to enable dma-buf CMA heap. This heap is backed by the Contiguous Memory Allocator (CMA). If your system has these regions, you should say Y here. + +config DMABUF_HEAPS_COHERENT + bool "DMA-BUF Coherent Reserved-Memory Heap" + depends on DMABUF_HEAPS && OF_RESERVED_MEM && DMA_DECLARE_COHERENT + help + Choose this option to enable coherent reserved-memory dma-buf heaps. + This heap is backed by non-reusable DT "shared-dma-pool" regions. + If your system defines coherent reserved-memory regions, you should + say Y here. + +config COHERENT_AREAS_DEFERRED + int "Max deferred coherent reserved-memory regions" + depends on DMABUF_HEAPS_COHERENT + default 16 + help + Maximum number of coherent reserved-memory regions that can be + deferred for later registration during early boot. diff --git a/drivers/dma-buf/heaps/Makefile b/drivers/dma-buf/heaps/Makefile index 974467791032f..96bda7a65f041 100644 --- a/drivers/dma-buf/heaps/Makefile +++ b/drivers/dma-buf/heaps/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_DMABUF_HEAPS_SYSTEM) += system_heap.o obj-$(CONFIG_DMABUF_HEAPS_CMA) += cma_heap.o +obj-$(CONFIG_DMABUF_HEAPS_COHERENT) += coherent_heap.o diff --git a/drivers/dma-buf/heaps/coherent_heap.c b/drivers/dma-buf/heaps/coherent_heap.c new file mode 100644 index 0000000000000..870b2b89aefcb --- /dev/null +++ b/drivers/dma-buf/heaps/coherent_heap.c @@ -0,0 +1,485 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DMABUF heap for coherent reserved-memory regions + * + * Copyright (C) 2026 Red Hat, Inc. + * Author: Albert Esteve <[email protected]> + * + */ + +#include <linux/cgroup_dmem.h> +#include <linux/dma-heap.h> +#include <linux/dma-buf.h> +#include <linux/dma-mapping.h> +#include <linux/err.h> +#include <linux/highmem.h> +#include <linux/iosys-map.h> +#include <linux/of_reserved_mem.h> +#include <linux/platform_device.h> +#include <linux/scatterlist.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> + +#define DEFERRED_AREAS_MAX CONFIG_COHERENT_AREAS_DEFERRED + +/* + * Early init can't use normal memory management yet (memblock is used + * instead), so keep a small deferred list and retry at late_initcall. + */ +static struct reserved_mem *rmem_areas_deferred[DEFERRED_AREAS_MAX]; +static unsigned int rmem_areas_deferred_num; + +static int coherent_heap_add_deferred(struct reserved_mem *rmem) +{ + if (rmem_areas_deferred_num >= DEFERRED_AREAS_MAX) { + pr_warn("Deferred heap areas list full, dropping %s\n", + rmem->name ? rmem->name : "unknown"); + return -EINVAL; + } + rmem_areas_deferred[rmem_areas_deferred_num++] = rmem; + return 0; +} + +struct coherent_heap { + struct dma_heap *heap; + struct reserved_mem *rmem; + char *name; + struct device *dev; + struct platform_device *pdev; +#if IS_ENABLED(CONFIG_CGROUP_DMEM) + struct dmem_cgroup_region *cg; +#endif +}; + +struct coherent_heap_buffer { + struct coherent_heap *heap; + struct list_head attachments; + struct mutex lock; + unsigned long len; + dma_addr_t dma_addr; + void *alloc_vaddr; + struct page **pages; + pgoff_t pagecount; + int vmap_cnt; + void *vaddr; +#if IS_ENABLED(CONFIG_CGROUP_DMEM) + struct dmem_cgroup_pool_state *pool; +#endif +}; + +struct dma_heap_attachment { + struct device *dev; + struct sg_table table; + struct list_head list; + bool mapped; +}; + +static int coherent_heap_attach(struct dma_buf *dmabuf, + struct dma_buf_attachment *attachment) +{ + struct coherent_heap_buffer *buffer = dmabuf->priv; + struct dma_heap_attachment *a; + int ret; + + a = kzalloc_obj(*a); + if (!a) + return -ENOMEM; + + ret = sg_alloc_table_from_pages(&a->table, buffer->pages, + buffer->pagecount, 0, + buffer->pagecount << PAGE_SHIFT, + GFP_KERNEL); + if (ret) { + kfree(a); + return ret; + } + + a->dev = attachment->dev; + INIT_LIST_HEAD(&a->list); + a->mapped = false; + + attachment->priv = a; + + mutex_lock(&buffer->lock); + list_add(&a->list, &buffer->attachments); + mutex_unlock(&buffer->lock); + + return 0; +} + +static void coherent_heap_detach(struct dma_buf *dmabuf, + struct dma_buf_attachment *attachment) +{ + struct coherent_heap_buffer *buffer = dmabuf->priv; + struct dma_heap_attachment *a = attachment->priv; + + mutex_lock(&buffer->lock); + list_del(&a->list); + mutex_unlock(&buffer->lock); + + sg_free_table(&a->table); + kfree(a); +} + +static struct sg_table *coherent_heap_map_dma_buf(struct dma_buf_attachment *attachment, + enum dma_data_direction direction) +{ + struct dma_heap_attachment *a = attachment->priv; + struct sg_table *table = &a->table; + int ret; + + ret = dma_map_sgtable(attachment->dev, table, direction, 0); + if (ret) + return ERR_PTR(-ENOMEM); + a->mapped = true; + + return table; +} + +static void coherent_heap_unmap_dma_buf(struct dma_buf_attachment *attachment, + struct sg_table *table, + enum dma_data_direction direction) +{ + struct dma_heap_attachment *a = attachment->priv; + + a->mapped = false; + dma_unmap_sgtable(attachment->dev, table, direction, 0); +} + +static int coherent_heap_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, + enum dma_data_direction direction) +{ + struct coherent_heap_buffer *buffer = dmabuf->priv; + struct dma_heap_attachment *a; + + mutex_lock(&buffer->lock); + if (buffer->vmap_cnt) + invalidate_kernel_vmap_range(buffer->vaddr, buffer->len); + + list_for_each_entry(a, &buffer->attachments, list) { + if (!a->mapped) + continue; + dma_sync_sgtable_for_cpu(a->dev, &a->table, direction); + } + mutex_unlock(&buffer->lock); + + return 0; +} + +static int coherent_heap_dma_buf_end_cpu_access(struct dma_buf *dmabuf, + enum dma_data_direction direction) +{ + struct coherent_heap_buffer *buffer = dmabuf->priv; + struct dma_heap_attachment *a; + + mutex_lock(&buffer->lock); + if (buffer->vmap_cnt) + flush_kernel_vmap_range(buffer->vaddr, buffer->len); + + list_for_each_entry(a, &buffer->attachments, list) { + if (!a->mapped) + continue; + dma_sync_sgtable_for_device(a->dev, &a->table, direction); + } + mutex_unlock(&buffer->lock); + + return 0; +} + +static int coherent_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) +{ + struct coherent_heap_buffer *buffer = dmabuf->priv; + struct coherent_heap *coh_heap = buffer->heap; + + return dma_mmap_coherent(coh_heap->dev, vma, buffer->alloc_vaddr, + buffer->dma_addr, buffer->len); +} + +static void *coherent_heap_do_vmap(struct coherent_heap_buffer *buffer) +{ + void *vaddr; + + vaddr = vmap(buffer->pages, buffer->pagecount, VM_MAP, PAGE_KERNEL); + if (!vaddr) + return ERR_PTR(-ENOMEM); + + return vaddr; +} + +static int coherent_heap_vmap(struct dma_buf *dmabuf, struct iosys_map *map) +{ + struct coherent_heap_buffer *buffer = dmabuf->priv; + void *vaddr; + int ret = 0; + + mutex_lock(&buffer->lock); + if (buffer->vmap_cnt) { + buffer->vmap_cnt++; + iosys_map_set_vaddr(map, buffer->vaddr); + goto out; + } + + vaddr = coherent_heap_do_vmap(buffer); + if (IS_ERR(vaddr)) { + ret = PTR_ERR(vaddr); + goto out; + } + + buffer->vaddr = vaddr; + buffer->vmap_cnt++; + iosys_map_set_vaddr(map, buffer->vaddr); +out: + mutex_unlock(&buffer->lock); + + return ret; +} + +static void coherent_heap_vunmap(struct dma_buf *dmabuf, struct iosys_map *map) +{ + struct coherent_heap_buffer *buffer = dmabuf->priv; + + mutex_lock(&buffer->lock); + if (!--buffer->vmap_cnt) { + vunmap(buffer->vaddr); + buffer->vaddr = NULL; + } + mutex_unlock(&buffer->lock); + iosys_map_clear(map); +} + +static void coherent_heap_dma_buf_release(struct dma_buf *dmabuf) +{ + struct coherent_heap_buffer *buffer = dmabuf->priv; + struct coherent_heap *coh_heap = buffer->heap; + + if (buffer->vmap_cnt > 0) { + WARN(1, "%s: buffer still mapped in the kernel\n", __func__); + vunmap(buffer->vaddr); + buffer->vaddr = NULL; + buffer->vmap_cnt = 0; + } + + if (buffer->alloc_vaddr) + dma_free_coherent(coh_heap->dev, buffer->len, buffer->alloc_vaddr, + buffer->dma_addr); + kfree(buffer->pages); +#if IS_ENABLED(CONFIG_CGROUP_DMEM) + dmem_cgroup_uncharge(buffer->pool, buffer->len); +#endif + kfree(buffer); +} + +static const struct dma_buf_ops coherent_heap_buf_ops = { + .attach = coherent_heap_attach, + .detach = coherent_heap_detach, + .map_dma_buf = coherent_heap_map_dma_buf, + .unmap_dma_buf = coherent_heap_unmap_dma_buf, + .begin_cpu_access = coherent_heap_dma_buf_begin_cpu_access, + .end_cpu_access = coherent_heap_dma_buf_end_cpu_access, + .mmap = coherent_heap_mmap, + .vmap = coherent_heap_vmap, + .vunmap = coherent_heap_vunmap, + .release = coherent_heap_dma_buf_release, +}; + +static struct dma_buf *coherent_heap_allocate(struct dma_heap *heap, + unsigned long len, + u32 fd_flags, + u64 heap_flags) +{ + struct coherent_heap *coh_heap; + struct coherent_heap_buffer *buffer; + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + size_t size = PAGE_ALIGN(len); + pgoff_t pagecount = size >> PAGE_SHIFT; + struct dma_buf *dmabuf; + int ret = -ENOMEM; + pgoff_t pg; + + coh_heap = dma_heap_get_drvdata(heap); + if (!coh_heap) + return ERR_PTR(-EINVAL); + if (!coh_heap->dev) + return ERR_PTR(-ENODEV); + + buffer = kzalloc_obj(*buffer); + if (!buffer) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&buffer->attachments); + mutex_init(&buffer->lock); + buffer->len = size; + buffer->heap = coh_heap; + buffer->pagecount = pagecount; + +#if IS_ENABLED(CONFIG_CGROUP_DMEM) + if (mem_accounting) { + ret = dmem_cgroup_try_charge(coh_heap->cg, size, + &buffer->pool, NULL); + if (ret) + goto free_buffer; + } +#endif + + buffer->alloc_vaddr = dma_alloc_coherent(coh_heap->dev, buffer->len, + &buffer->dma_addr, GFP_KERNEL); + if (!buffer->alloc_vaddr) { + ret = -ENOMEM; +#if IS_ENABLED(CONFIG_CGROUP_DMEM) + goto uncharge_cgroup; +#else + goto free_buffer; +#endif + } + + buffer->pages = kmalloc_array(pagecount, sizeof(*buffer->pages), + GFP_KERNEL); + if (!buffer->pages) { + ret = -ENOMEM; + goto free_dma; + } + + for (pg = 0; pg < pagecount; pg++) + buffer->pages[pg] = virt_to_page((char *)buffer->alloc_vaddr + + (pg * PAGE_SIZE)); + + /* create the dmabuf */ + exp_info.exp_name = dma_heap_get_name(heap); + exp_info.ops = &coherent_heap_buf_ops; + exp_info.size = buffer->len; + exp_info.flags = fd_flags; + exp_info.priv = buffer; + dmabuf = dma_buf_export(&exp_info); + if (IS_ERR(dmabuf)) { + ret = PTR_ERR(dmabuf); + goto free_pages; + } + return dmabuf; + +free_pages: + kfree(buffer->pages); +free_dma: + dma_free_coherent(coh_heap->dev, buffer->len, buffer->alloc_vaddr, + buffer->dma_addr); +#if IS_ENABLED(CONFIG_CGROUP_DMEM) +uncharge_cgroup: + dmem_cgroup_uncharge(buffer->pool, size); +#endif +free_buffer: + kfree(buffer); + return ERR_PTR(ret); +} + +static const struct dma_heap_ops coherent_heap_ops = { + .allocate = coherent_heap_allocate, +}; + +static int __coherent_heap_register(struct reserved_mem *rmem) +{ + struct dma_heap_export_info exp_info; + struct coherent_heap *coh_heap; +#if IS_ENABLED(CONFIG_CGROUP_DMEM) + struct dmem_cgroup_region *region; +#endif + const char *rmem_name; + int ret; + + if (!rmem) + return -EINVAL; + + rmem_name = rmem->name ? rmem->name : "unknown"; + + coh_heap = kzalloc_obj(*coh_heap); + if (!coh_heap) + return -ENOMEM; + + coh_heap->name = kasprintf(GFP_KERNEL, "coherent_%s", rmem_name); + if (!coh_heap->name) { + ret = -ENOMEM; + goto free_coherent_heap; + } + + coh_heap->rmem = rmem; + + /* create a platform device per rmem and bind it */ + coh_heap->pdev = platform_device_register_simple("coherent-heap", + PLATFORM_DEVID_AUTO, + NULL, 0); + if (IS_ERR(coh_heap->pdev)) { + ret = PTR_ERR(coh_heap->pdev); + goto free_name; + } + + if (rmem->ops && rmem->ops->device_init) { + ret = rmem->ops->device_init(rmem, &coh_heap->pdev->dev); + if (ret) + goto pdev_unregister; + } + + coh_heap->dev = &coh_heap->pdev->dev; +#if IS_ENABLED(CONFIG_CGROUP_DMEM) + region = dmem_cgroup_register_region(rmem->size, "coh/%s", rmem_name); + if (IS_ERR(region)) { + ret = PTR_ERR(region); + goto pdev_unregister; + } + coh_heap->cg = region; +#endif + + exp_info.name = coh_heap->name; + exp_info.ops = &coherent_heap_ops; + exp_info.priv = coh_heap; + + coh_heap->heap = dma_heap_add(&exp_info); + if (IS_ERR(coh_heap->heap)) { + ret = PTR_ERR(coh_heap->heap); + goto cg_unregister; + } + + return 0; + +cg_unregister: +#if IS_ENABLED(CONFIG_CGROUP_DMEM) + dmem_cgroup_unregister_region(coh_heap->cg); +#endif +pdev_unregister: + platform_device_unregister(coh_heap->pdev); + coh_heap->pdev = NULL; +free_name: + kfree(coh_heap->name); +free_coherent_heap: + kfree(coh_heap); + + return ret; +} + +int dma_heap_coherent_register(struct reserved_mem *rmem) +{ + int ret; + + ret = __coherent_heap_register(rmem); + if (ret == -ENOMEM) + return coherent_heap_add_deferred(rmem); + return ret; +} + +static int __init coherent_heap_register_deferred(void) +{ + unsigned int i; + int ret; + + for (i = 0; i < rmem_areas_deferred_num; i++) { + struct reserved_mem *rmem = rmem_areas_deferred[i]; + + ret = __coherent_heap_register(rmem); + if (ret) { + pr_warn("Failed to add coherent heap %s", + rmem->name ? rmem->name : "unknown"); + continue; + } + } + + return 0; +} +late_initcall(coherent_heap_register_deferred); +MODULE_DESCRIPTION("DMA-BUF heap for coherent reserved-memory regions"); diff --git a/include/linux/dma-heap.h b/include/linux/dma-heap.h index 648328a64b27e..e894cfa1ecf1a 100644 --- a/include/linux/dma-heap.h +++ b/include/linux/dma-heap.h @@ -9,9 +9,11 @@ #ifndef _DMA_HEAPS_H #define _DMA_HEAPS_H +#include <linux/errno.h> #include <linux/types.h> struct dma_heap; +struct reserved_mem; /** * struct dma_heap_ops - ops to operate on a given heap @@ -48,4 +50,13 @@ struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info); extern bool mem_accounting; +#if IS_ENABLED(CONFIG_DMABUF_HEAPS_COHERENT) +int dma_heap_coherent_register(struct reserved_mem *rmem); +#else +static inline int dma_heap_coherent_register(struct reserved_mem *rmem) +{ + return -EOPNOTSUPP; +} +#endif + #endif /* _DMA_HEAPS_H */ diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c index 1147497bc512c..f49d13e460e4b 100644 --- a/kernel/dma/coherent.c +++ b/kernel/dma/coherent.c @@ -9,6 +9,7 @@ #include <linux/module.h> #include <linux/dma-direct.h> #include <linux/dma-map-ops.h> +#include <linux/dma-heap.h> struct dma_coherent_mem { void *virt_base; @@ -393,6 +394,14 @@ static int __init rmem_dma_setup(struct reserved_mem *rmem) rmem->ops = &rmem_dma_ops; pr_info("Reserved memory: created DMA memory pool at %pa, size %ld MiB\n", &rmem->base, (unsigned long)rmem->size / SZ_1M); + + if (IS_ENABLED(CONFIG_DMABUF_HEAPS_COHERENT)) { + int ret = dma_heap_coherent_register(rmem); + + if (ret) + pr_warn("Reserved memory: failed to register coherent heap for %s (%d)\n", + rmem->name ? rmem->name : "unknown", ret); + } return 0; } --- base-commit: 6de23f81a5e08be8fbf5e8d7e9febc72a5b5f27f change-id: 20260223-b4-dmabuf-heap-coherent-rmem-91fd3926afe9 Best regards, -- Albert Esteve <[email protected]>
