On Thu, Jun 12, 2025 at 02:12:42PM -0700, jane....@oracle.com wrote: > > On 6/12/2025 1:31 AM, Mike Rapoport wrote: > > From: "Mike Rapoport (Microsoft)" <r...@kernel.org> > > > > There are use cases, for example virtual machine hosts, that create > > "persistent" memory regions using memmap= option on x86 or dummy > > pmem-region device tree nodes on DT based systems. > > > > Both these options are inflexible because they create static regions and > > the layout of the "persistent" memory cannot be adjusted without reboot. > > > > Add a ramdax driver that allows creation of DIMM devices on top of > > E820_TYPE_PRAM regions and devicetree pmem-region nodes. > > > > The DIMMs support label space management on the "device" and provide a > > flexible way to access RAM using fsdax and devdax. > > Just curious, how does the new driver work with Michal Clapinski's recent > patch that adds > "nd_e820.pmem=ss[KMG],nn[KMG][,mode=fsdax/devdax,align=aa[KMG]]" kernel > parameter ?
The new driver and nd_e820 are mutually exclusive. > thanks, > -jane > > > > > Signed-off-by: Mike Rapoport (Mircosoft) <r...@kernel.org> > > --- > > drivers/nvdimm/Kconfig | 15 +++ > > drivers/nvdimm/Makefile | 1 + > > drivers/nvdimm/ramdax.c | 279 ++++++++++++++++++++++++++++++++++++++++ > > 3 files changed, 295 insertions(+) > > create mode 100644 drivers/nvdimm/ramdax.c > > > > diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig > > index fde3e17c836c..7aae74a29f10 100644 > > --- a/drivers/nvdimm/Kconfig > > +++ b/drivers/nvdimm/Kconfig > > @@ -97,6 +97,21 @@ config OF_PMEM > > Select Y if unsure. > > +config RAMDAX > > + tristate "Support persistent memory interfaces on RAM carveouts" > > + depends on OF || (X86 && X86_PMEM_LEGACY=n) > > + select X86_PMEM_LEGACY_DEVICE > > + default LIBNVDIMM > > + help > > + Allows creation of DAX devices on RAM carveouts. > > + > > + Memory ranges that are manually specified by the > > + 'memmap=nn[KMG]!ss[KMG]' kernel command line or defined by dummy > > + pmem-region device tree nodes would be managed by this driver as DIMM > > + devices with support for dynamic layout of namespaces. > > + > > + Select N if unsure. > > + > > config NVDIMM_KEYS > > def_bool y > > depends on ENCRYPTED_KEYS > > diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile > > index ba0296dca9db..8c268814936c 100644 > > --- a/drivers/nvdimm/Makefile > > +++ b/drivers/nvdimm/Makefile > > @@ -5,6 +5,7 @@ obj-$(CONFIG_ND_BTT) += nd_btt.o > > obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o > > obj-$(CONFIG_OF_PMEM) += of_pmem.o > > obj-$(CONFIG_VIRTIO_PMEM) += virtio_pmem.o nd_virtio.o > > +obj-$(CONFIG_RAMDAX) += ramdax.o > > nd_pmem-y := pmem.o > > diff --git a/drivers/nvdimm/ramdax.c b/drivers/nvdimm/ramdax.c > > new file mode 100644 > > index 000000000000..67b0a240c0ae > > --- /dev/null > > +++ b/drivers/nvdimm/ramdax.c > > @@ -0,0 +1,279 @@ > > +// SPDX-License-Identifier: GPL-2.0-only > > +/* > > + * Copyright (c) 2015, Mike Rapoport, Microsoft > > + * > > + * Based on e820 pmem driver: > > + * Copyright (c) 2015, Christoph Hellwig. > > + * Copyright (c) 2015, Intel Corporation. > > + */ > > +#include <linux/platform_device.h> > > +#include <linux/memory_hotplug.h> > > +#include <linux/libnvdimm.h> > > +#include <linux/module.h> > > +#include <linux/numa.h> > > +#include <linux/io.h> > > +#include <linux/of.h> > > + > > +#include <uapi/linux/ndctl.h> > > + > > +#define LABEL_AREA_SIZE SZ_128K > > + > > +struct ramdax_dimm { > > + struct nvdimm *nvdimm; > > + void *label_area; > > +}; > > + > > +static void ramdax_remove(struct platform_device *pdev) > > +{ > > + struct nvdimm_bus *nvdimm_bus = platform_get_drvdata(pdev); > > + > > + /* FIXME: cleanup dimm and region devices */ > > + > > + nvdimm_bus_unregister(nvdimm_bus); > > +} > > + > > +static int ramdax_register_region(struct resource *res, > > + struct nvdimm *nvdimm, > > + struct nvdimm_bus *nvdimm_bus) > > +{ > > + struct nd_mapping_desc mapping; > > + struct nd_region_desc ndr_desc; > > + struct nd_interleave_set *nd_set; > > + int nid = phys_to_target_node(res->start); > > + > > + nd_set = kzalloc(sizeof(*nd_set), GFP_KERNEL); > > + if (!nd_set) > > + return -ENOMEM; > > + > > + nd_set->cookie1 = get_random_u64(); > > + nd_set->cookie2 = nd_set->cookie1; > > + > > + memset(&mapping, 0, sizeof(mapping)); > > + mapping.nvdimm = nvdimm; > > + mapping.start = 0; > > + mapping.size = resource_size(res) - LABEL_AREA_SIZE; > > + > > + memset(&ndr_desc, 0, sizeof(ndr_desc)); > > + ndr_desc.res = res; > > + ndr_desc.numa_node = numa_map_to_online_node(nid); > > + ndr_desc.target_node = nid; > > + ndr_desc.num_mappings = 1; > > + ndr_desc.mapping = &mapping; > > + ndr_desc.nd_set = nd_set; > > + > > + if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc)) > > + goto err_free_nd_set; > > + > > + return 0; > > + > > +err_free_nd_set: > > + kfree(nd_set); > > + return -ENXIO; > > +} > > + > > +static int ramdax_register_dimm(struct resource *res, void *data) > > +{ > > + resource_size_t start = res->start; > > + resource_size_t size = resource_size(res); > > + unsigned long flags = 0, cmd_mask = 0; > > + struct nvdimm_bus *nvdimm_bus = data; > > + struct ramdax_dimm *dimm; > > + int err; > > + > > + dimm = kzalloc(sizeof(*dimm), GFP_KERNEL); > > + if (!dimm) > > + return -ENOMEM; > > + > > + dimm->label_area = memremap(start + size - LABEL_AREA_SIZE, > > + LABEL_AREA_SIZE, MEMREMAP_WB); > > + if (!dimm->label_area) > > + goto err_free_dimm; > > + > > + set_bit(NDD_LABELING, &flags); > > + set_bit(NDD_REGISTER_SYNC, &flags); > > + set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask); > > + set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask); > > + set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask); > > + dimm->nvdimm = nvdimm_create(nvdimm_bus, dimm, > > + /* dimm_attribute_groups */ NULL, > > + flags, cmd_mask, 0, NULL); > > + if (!dimm->nvdimm) { > > + err = -ENOMEM; > > + goto err_unmap_label; > > + } > > + > > + err = ramdax_register_region(res, dimm->nvdimm, nvdimm_bus); > > + if (err) > > + goto err_remove_nvdimm; > > + > > + return 0; > > + > > +err_remove_nvdimm: > > + nvdimm_delete(dimm->nvdimm); > > +err_unmap_label: > > + memunmap(dimm->label_area); > > +err_free_dimm: > > + kfree(dimm); > > + return err; > > +} > > + > > +static int ramdax_get_config_size(struct nvdimm *nvdimm, int buf_len, > > + struct nd_cmd_get_config_size *cmd) > > +{ > > + if (sizeof(*cmd) > buf_len) > > + return -EINVAL; > > + > > + *cmd = (struct nd_cmd_get_config_size){ > > + .status = 0, > > + .config_size = LABEL_AREA_SIZE, > > + .max_xfer = 8, > > + }; > > + > > + return 0; > > +} > > + > > +static int ramdax_get_config_data(struct nvdimm *nvdimm, int buf_len, > > + struct nd_cmd_get_config_data_hdr *cmd) > > +{ > > + struct ramdax_dimm *dimm = nvdimm_provider_data(nvdimm); > > + > > + if (sizeof(*cmd) > buf_len) > > + return -EINVAL; > > + if (struct_size(cmd, out_buf, cmd->in_length) > buf_len) > > + return -EINVAL; > > + if (cmd->in_offset + cmd->in_length > LABEL_AREA_SIZE) > > + return -EINVAL; > > + > > + memcpy(cmd->out_buf, dimm->label_area + cmd->in_offset, buf_len); > > + > > + return 0; > > +} > > + > > +static int ramdax_set_config_data(struct nvdimm *nvdimm, int buf_len, > > + struct nd_cmd_set_config_hdr *cmd) > > +{ > > + struct ramdax_dimm *dimm = nvdimm_provider_data(nvdimm); > > + > > + if (sizeof(*cmd) > buf_len) > > + return -EINVAL; > > + if (struct_size(cmd, in_buf, cmd->in_length) > buf_len) > > + return -EINVAL; > > + if (cmd->in_offset + cmd->in_length > LABEL_AREA_SIZE) > > + return -EINVAL; > > + > > + memcpy(dimm->label_area + cmd->in_offset, cmd->in_buf, buf_len); > > + > > + return 0; > > +} > > + > > +static int ramdax_nvdimm_ctl(struct nvdimm *nvdimm, unsigned int cmd, > > + void *buf, unsigned int buf_len) > > +{ > > + unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm); > > + > > + if (!test_bit(cmd, &cmd_mask)) > > + return -ENOTTY; > > + > > + switch (cmd) { > > + case ND_CMD_GET_CONFIG_SIZE: > > + return ramdax_get_config_size(nvdimm, buf_len, buf); > > + case ND_CMD_GET_CONFIG_DATA: > > + return ramdax_get_config_data(nvdimm, buf_len, buf); > > + case ND_CMD_SET_CONFIG_DATA: > > + return ramdax_set_config_data(nvdimm, buf_len, buf); > > + default: > > + return -ENOTTY; > > + } > > +} > > + > > +static int ramdax_ctl(struct nvdimm_bus_descriptor *nd_desc, > > + struct nvdimm *nvdimm, unsigned int cmd, void *buf, > > + unsigned int buf_len, int *cmd_rc) > > +{ > > + /* > > + * No firmware response to translate, let the transport error > > + * code take precedence. > > + */ > > + *cmd_rc = 0; > > + > > + if (!nvdimm) > > + return -ENOTTY; > > + return ramdax_nvdimm_ctl(nvdimm, cmd, buf, buf_len); > > +} > > + > > +static int ramdax_probe_of(struct platform_device *pdev, > > + struct nvdimm_bus *bus, struct device_node *np) > > +{ > > + int err; > > + > > + for (int i = 0; i < pdev->num_resources; i++) { > > + err = ramdax_register_dimm(&pdev->resource[i], bus); > > + if (err) > > + goto err_unregister; > > + } > > + > > + return 0; > > + > > +err_unregister: > > + /* > > + * FIXME: should we unregister the dimms that were registered > > + * successfully > > + */ > > + return err; > > +} > > + > > +static int ramdax_probe(struct platform_device *pdev) > > +{ > > + static struct nvdimm_bus_descriptor nd_desc; > > + struct device *dev = &pdev->dev; > > + struct nvdimm_bus *nvdimm_bus; > > + struct device_node *np; > > + int rc = -ENXIO; > > + > > + nd_desc.provider_name = "ramdax"; > > + nd_desc.module = THIS_MODULE; > > + nd_desc.ndctl = ramdax_ctl; > > + nvdimm_bus = nvdimm_bus_register(dev, &nd_desc); > > + if (!nvdimm_bus) > > + goto err; > > + > > + np = dev_of_node(&pdev->dev); > > + if (np) > > + rc = ramdax_probe_of(pdev, nvdimm_bus, np); > > + else > > + rc = walk_iomem_res_desc(IORES_DESC_PERSISTENT_MEMORY_LEGACY, > > + IORESOURCE_MEM, 0, -1, nvdimm_bus, > > + ramdax_register_dimm); > > + if (rc) > > + goto err; > > + > > + platform_set_drvdata(pdev, nvdimm_bus); > > + > > + return 0; > > +err: > > + nvdimm_bus_unregister(nvdimm_bus); > > + return rc; > > +} > > + > > +#ifdef CONFIG_OF > > +static const struct of_device_id ramdax_of_matches[] = { > > + { .compatible = "pmem-region", }, > > + { }, > > +}; > > +MODULE_DEVICE_TABLE(of, ramdax_of_matches); > > +#endif > > + > > +static struct platform_driver ramdax_driver = { > > + .probe = ramdax_probe, > > + .remove = ramdax_remove, > > + .driver = { > > + .name = "e820_pmem", > > + .of_match_table = of_match_ptr(ramdax_of_matches), > > + }, > > +}; > > + > > +module_platform_driver(ramdax_driver); > > + > > +MODULE_DESCRIPTION("NVDIMM support for e820 type-12 memory and OF > > pmem-region"); > > +MODULE_LICENSE("GPL"); > > +MODULE_AUTHOR("Microsoft Corporation"); > -- Sincerely yours, Mike.