This patch introduces the initial integration of `pcache`, a Linux kernel block layer module that leverages persistent memory (PMem) as a high-performance caching layer for traditional block devices (e.g., SSDs, HDDs).
- Persistent Memory as Cache: - `pcache` uses DAX-enabled persistent memory (e.g., `/dev/pmemX`) to provide fast, byte-addressable, non-volatile caching for block devices. - Supports both direct-mapped and vmap-based access depending on DAX capabilities. - Modular Architecture: - `cache_dev`: represents a persistent memory device used as a cache. - `backing_dev`: represents an individual block device being cached. - `logic_dev`: exposes a block device (`/dev/pcacheX`) to userspace, serving as the frontend interface for I/O. - `cache`: implements core caching logic (hit/miss, writeback, GC, etc.). Design Motivation: `pcache` is designed to bridge the performance gap between slow-but-large storage (HDDs, SATA/NVMe SSDs) and emerging byte-addressable persistent memory. Compared to traditional block layer caching, `pcache` is persistent, low-latency, highly concurrent, and more amenable to modern storage-class memory devices than legacy caching designs. This patch finalizes the series by wiring up the initialization entry point (`pcache_init()`), sysfs bus registration, root device handling, and Kconfig glue. With this, the `pcache` subsystem is ready to load as a kernel module and serve as a cache engine for block I/O. Signed-off-by: Dongsheng Yang <dongsheng.y...@linux.dev> --- MAINTAINERS | 8 ++ drivers/block/Kconfig | 2 + drivers/block/Makefile | 2 + drivers/block/pcache/Kconfig | 16 +++ drivers/block/pcache/Makefile | 4 + drivers/block/pcache/main.c | 194 ++++++++++++++++++++++++++++++++++ 6 files changed, 226 insertions(+) create mode 100644 drivers/block/pcache/Kconfig create mode 100644 drivers/block/pcache/Makefile create mode 100644 drivers/block/pcache/main.c diff --git a/MAINTAINERS b/MAINTAINERS index 00e94bec401e..5ee5879072b9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18026,6 +18026,14 @@ S: Maintained F: drivers/leds/leds-pca9532.c F: include/linux/leds-pca9532.h +PCACHE (Pmem as cache for block device) +M: Dongsheng Yang <dongsheng.y...@linux.dev> +M: Zheng Gu <cen...@gmail.com> +R: Linggang Zeng <linggang.li...@gmail.com> +L: linux-block@vger.kernel.org +S: Maintained +F: drivers/block/pcache/ + PCI DRIVER FOR AARDVARK (Marvell Armada 3700) M: Thomas Petazzoni <thomas.petazz...@bootlin.com> M: Pali Rohár <p...@kernel.org> diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index a97f2c40c640..27731dbed7f6 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -192,6 +192,8 @@ config BLK_DEV_LOOP_MIN_COUNT source "drivers/block/drbd/Kconfig" +source "drivers/block/pcache/Kconfig" + config BLK_DEV_NBD tristate "Network block device support" depends on NET diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 1105a2d4fdcb..40b96ccbd414 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -43,3 +43,5 @@ obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk/ obj-$(CONFIG_BLK_DEV_UBLK) += ublk_drv.o swim_mod-y := swim.o swim_asm.o + +obj-$(CONFIG_BLK_DEV_PCACHE) += pcache/ diff --git a/drivers/block/pcache/Kconfig b/drivers/block/pcache/Kconfig new file mode 100644 index 000000000000..2dc77354a4b1 --- /dev/null +++ b/drivers/block/pcache/Kconfig @@ -0,0 +1,16 @@ +config BLK_DEV_PCACHE + tristate "Persistent memory for cache of Block Device (Experimental)" + depends on DEV_DAX && FS_DAX + help + PCACHE provides a mechanism to use persistent memory (e.g., CXL persistent memory, + DAX-enabled devices) as a high-performance cache layer in front of + traditional block devices such as SSDs or HDDs. + + PCACHE is implemented as a kernel module that integrates with the block + layer and supports direct access (DAX) to persistent memory for low-latency, + byte-addressable caching. + + Note: This feature is experimental and should be tested thoroughly + before use in production environments. + + If unsure, say 'N'. diff --git a/drivers/block/pcache/Makefile b/drivers/block/pcache/Makefile new file mode 100644 index 000000000000..0e7316ae20e1 --- /dev/null +++ b/drivers/block/pcache/Makefile @@ -0,0 +1,4 @@ +pcache-y := main.o cache_dev.o backing_dev.o segment.o meta_segment.o logic_dev.o cache.o cache_segment.o cache_key.o cache_req.o cache_writeback.o cache_gc.o + +obj-$(CONFIG_BLK_DEV_PCACHE) += pcache.o + diff --git a/drivers/block/pcache/main.c b/drivers/block/pcache/main.c new file mode 100644 index 000000000000..d0430c64aff3 --- /dev/null +++ b/drivers/block/pcache/main.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright(C) 2025, Dongsheng Yang <dongsheng.y...@linux.dev> + */ + +#include <linux/capability.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/parser.h> + +#include "pcache_internal.h" +#include "cache_dev.h" +#include "logic_dev.h" + +enum { + PCACHE_REG_OPT_ERR = 0, + PCACHE_REG_OPT_FORCE, + PCACHE_REG_OPT_FORMAT, + PCACHE_REG_OPT_PATH, +}; + +static const match_table_t register_opt_tokens = { + { PCACHE_REG_OPT_FORCE, "force=%u" }, + { PCACHE_REG_OPT_FORMAT, "format=%u" }, + { PCACHE_REG_OPT_PATH, "path=%s" }, + { PCACHE_REG_OPT_ERR, NULL } +}; + +static int parse_register_options(char *buf, + struct pcache_cache_dev_register_options *opts) +{ + substring_t args[MAX_OPT_ARGS]; + char *o, *p; + int token, ret = 0; + + o = buf; + + while ((p = strsep(&o, ",\n")) != NULL) { + if (!*p) + continue; + + token = match_token(p, register_opt_tokens, args); + switch (token) { + case PCACHE_REG_OPT_PATH: + if (match_strlcpy(opts->path, &args[0], + PCACHE_PATH_LEN) == 0) { + ret = -EINVAL; + break; + } + break; + case PCACHE_REG_OPT_FORCE: + if (match_uint(args, &token)) { + ret = -EINVAL; + goto out; + } + opts->force = (token != 0); + break; + case PCACHE_REG_OPT_FORMAT: + if (match_uint(args, &token)) { + ret = -EINVAL; + goto out; + } + opts->format = (token != 0); + break; + default: + pr_err("unknown parameter or missing value '%s'\n", p); + ret = -EINVAL; + goto out; + } + } + +out: + return ret; +} + +static ssize_t cache_dev_unregister_store(const struct bus_type *bus, const char *ubuf, + size_t size) +{ + u32 cache_dev_id; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (sscanf(ubuf, "cache_dev_id=%u", &cache_dev_id) != 1) + return -EINVAL; + + ret = cache_dev_unregister(cache_dev_id); + if (ret < 0) + return ret; + + return size; +} + +static ssize_t cache_dev_register_store(const struct bus_type *bus, const char *ubuf, + size_t size) +{ + struct pcache_cache_dev_register_options opts = { 0 }; + char *buf; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + buf = kmemdup(ubuf, size + 1, GFP_KERNEL); + if (IS_ERR(buf)) { + pr_err("failed to dup buf for adm option: %d", (int)PTR_ERR(buf)); + return PTR_ERR(buf); + } + buf[size] = '\0'; + + ret = parse_register_options(buf, &opts); + if (ret < 0) { + kfree(buf); + return ret; + } + kfree(buf); + + ret = cache_dev_register(&opts); + if (ret < 0) + return ret; + + return size; +} + +static BUS_ATTR_WO(cache_dev_unregister); +static BUS_ATTR_WO(cache_dev_register); + +static struct attribute *pcache_bus_attrs[] = { + &bus_attr_cache_dev_unregister.attr, + &bus_attr_cache_dev_register.attr, + NULL, +}; + +static const struct attribute_group pcache_bus_group = { + .attrs = pcache_bus_attrs, +}; +__ATTRIBUTE_GROUPS(pcache_bus); + +const struct bus_type pcache_bus_type = { + .name = "pcache", + .bus_groups = pcache_bus_groups, +}; + +static void pcache_root_dev_release(struct device *dev) +{ +} + +struct device pcache_root_dev = { + .init_name = "pcache", + .release = pcache_root_dev_release, +}; + +static int __init pcache_init(void) +{ + int ret; + + ret = device_register(&pcache_root_dev); + if (ret < 0) { + put_device(&pcache_root_dev); + goto err; + } + + ret = bus_register(&pcache_bus_type); + if (ret < 0) + goto device_unregister; + + ret = pcache_blkdev_init(); + if (ret < 0) + goto bus_unregister; + + return 0; + +bus_unregister: + bus_unregister(&pcache_bus_type); +device_unregister: + device_unregister(&pcache_root_dev); +err: + + return ret; +} + +static void pcache_exit(void) +{ + pcache_blkdev_exit(); + bus_unregister(&pcache_bus_type); + device_unregister(&pcache_root_dev); +} + +MODULE_AUTHOR("Dongsheng Yang <dongsheng.y...@linux.dev>"); +MODULE_DESCRIPTION("PMem for Cache of block device"); +MODULE_LICENSE("GPL v2"); +module_init(pcache_init); +module_exit(pcache_exit); -- 2.34.1