DC DAX regions are populated with dax_resource children that each carry a
backing tag uuid and a per-allocation sequence number (seq_num). Add the
userspace claim semantics that resolve those tagged groups into DAX
devices.
A DC region's seed dax device is created at 0-size on probe; userspace
populates it by writing to its 'uuid' attribute:
* A non-null UUID claims every dax_resource on this region whose tag
matches, in seq_num order via uuid_claim_tagged(). The match set
must form a dense 1..n sequence (no gap, no duplicate); the CXL
side maintains this invariant for both sharable allocations (where
the device stamps shared_extn_seq) and non-sharable allocations
(where cxl_add_pending assigns arrival-order seq). The resulting
DAX device's size equals the sum of every member extent's size.
* "0" claims a single untagged dax_resource via
uuid_claim_untagged(). Untagged extents are independent
allocations; collapsing several would aggregate unrelated capacity,
so each uuid="0" write consumes exactly one untagged resource.
* A write that matches no dax_resource returns -ENOENT; the device
stays at size 0.
uuid_show() reads back the backing tag uuid (or the null UUID for an
untagged claim). The attribute is read-only (0444) on non-DC dax
devices; writes to it on non-DC regions return -EOPNOTSUPP.
dev_dax_visible() exposes the uuid attribute only on DC dax devices.
Based on an original patch by Navneet Singh.
Signed-off-by: Ira Weiny <[email protected]>
Signed-off-by: Anisa Su <[email protected]>
---
Changes:
[anisa: split out from the original "Surface dc_extents" commit;
userspace tag-claim semantics only.]
---
drivers/dax/bus.c | 260 +++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 256 insertions(+), 4 deletions(-)
diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index c030eb103ad0..1dccb3e5cd0f 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -5,6 +5,7 @@
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/slab.h>
+#include <linux/sort.h>
#include <linux/dax.h>
#include <linux/io.h>
#include "dax-private.h"
@@ -1316,6 +1317,89 @@ static ssize_t dev_dax_resize(struct dax_region
*dax_region,
return 0;
}
+/* DC extents are all-or-nothing: an extent is either free or fully claimed. */
+static bool dax_resource_in_use(const struct dax_resource *dax_resource)
+{
+ return dax_resource->use_cnt > 0;
+}
+
+struct dax_uuid_match {
+ const struct dax_region *dax_region;
+ const uuid_t *uuid;
+};
+
+static int find_uuid_extent(struct device *dev, const void *data)
+{
+ const struct dax_uuid_match *match = data;
+ struct dax_resource *dax_resource;
+
+ if (!match->dax_region->dc_ops->is_extent(dev))
+ return 0;
+
+ dax_resource = dev_get_drvdata(dev);
+ if (!dax_resource || dax_resource_in_use(dax_resource))
+ return 0;
+ return uuid_equal(&dax_resource->uuid, match->uuid);
+}
+
+struct dax_tag_collect {
+ const struct dax_region *dax_region;
+ const uuid_t *uuid;
+ struct dax_resource **arr;
+ unsigned int count;
+ unsigned int cap;
+};
+
+static int collect_uuid_extent(struct device *dev, void *data)
+{
+ struct dax_tag_collect *c = data;
+ struct dax_resource *dax_resource;
+
+ if (!c->dax_region->dc_ops->is_extent(dev))
+ return 0;
+
+ dax_resource = dev_get_drvdata(dev);
+ if (!dax_resource || dax_resource_in_use(dax_resource))
+ return 0;
+ if (!uuid_equal(&dax_resource->uuid, c->uuid))
+ return 0;
+
+ if (c->count == c->cap)
+ return -ENOSPC;
+ c->arr[c->count++] = dax_resource;
+ return 0;
+}
+
+static int count_uuid_extent(struct device *dev, void *data)
+{
+ struct dax_tag_collect *c = data;
+ struct dax_resource *dax_resource;
+
+ if (!c->dax_region->dc_ops->is_extent(dev))
+ return 0;
+
+ dax_resource = dev_get_drvdata(dev);
+ if (!dax_resource || dax_resource_in_use(dax_resource))
+ return 0;
+ if (!uuid_equal(&dax_resource->uuid, c->uuid))
+ return 0;
+
+ c->count++;
+ return 0;
+}
+
+static int dax_resource_seq_cmp(const void *a, const void *b)
+{
+ const struct dax_resource * const *pa = a;
+ const struct dax_resource * const *pb = b;
+
+ if ((*pa)->seq_num < (*pb)->seq_num)
+ return -1;
+ if ((*pa)->seq_num > (*pb)->seq_num)
+ return 1;
+ return 0;
+}
+
static ssize_t size_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
{
@@ -1548,13 +1632,177 @@ static DEVICE_ATTR_RO(numa_node);
static ssize_t uuid_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sysfs_emit(buf, "%d\n", 0);
+ struct dev_dax *dev_dax = to_dev_dax(dev);
+ int rc;
+
+ rc = down_read_interruptible(&dax_dev_rwsem);
+ if (rc)
+ return rc;
+
+ for (int i = 0; i < dev_dax->nr_range; i++) {
+ struct dax_resource *r = dev_dax->ranges[i].dax_resource;
+
+ if (r && !uuid_is_null(&r->uuid)) {
+ rc = sysfs_emit(buf, "%pUb\n", &r->uuid);
+ goto out;
+ }
+ }
+ rc = sysfs_emit(buf, "0\n");
+out:
+ up_read(&dax_dev_rwsem);
+ return rc;
+}
+
+static ssize_t uuid_claim_untagged(struct dax_region *dax_region,
+ struct dev_dax *dev_dax)
+{
+ struct dax_uuid_match match = {
+ .dax_region = dax_region,
+ .uuid = &uuid_null,
+ };
+ struct dax_resource *dax_resource;
+ resource_size_t to_alloc;
+ struct device *extent_dev;
+ ssize_t alloc;
+
+ extent_dev = device_find_child(dax_region->dev, &match,
+ find_uuid_extent);
+ if (!extent_dev)
+ return -ENOENT;
+
+ dax_resource = dev_get_drvdata(extent_dev);
+ to_alloc = resource_size(dax_resource->res);
+ alloc = __dev_dax_resize(dax_resource->res, dev_dax, to_alloc,
+ dax_resource);
+ put_device(extent_dev);
+ if (alloc < 0)
+ return alloc;
+ if (alloc == 0)
+ return -ENOENT;
+ dax_resource->use_cnt++;
+ return 0;
+}
+
+static ssize_t uuid_claim_tagged(struct dax_region *dax_region,
+ struct dev_dax *dev_dax, const uuid_t *uuid)
+{
+ struct dax_tag_collect c = {
+ .dax_region = dax_region,
+ .uuid = uuid,
+ };
+ unsigned int i;
+ ssize_t rc;
+
+ /* Two-pass: count, then collect into a sized array. */
+ device_for_each_child(dax_region->dev, &c, count_uuid_extent);
+ if (!c.count)
+ return -ENOENT;
+
+ c.arr = kmalloc_array(c.count, sizeof(*c.arr), GFP_KERNEL);
+ if (!c.arr)
+ return -ENOMEM;
+ c.cap = c.count;
+ c.count = 0;
+
+ rc = device_for_each_child(dax_region->dev, &c, collect_uuid_extent);
+ if (rc)
+ goto out;
+
+ sort(c.arr, c.count, sizeof(*c.arr), dax_resource_seq_cmp, NULL);
+
+ /*
+ * Tagged groups carry a dense 1..n @seq_num regardless of source
+ * (sharable: device-stamped; non-sharable: host-assigned in
+ * arrival order — see &struct dax_resource). A gap or
+ * out-of-range value here means an extent went missing on the
+ * cxl side (e.g. a per-extent failure in cxl_add_pending) or a
+ * cxl-side validation gap; in either case refuse the whole
+ * group rather than carve a partial allocation.
+ */
+ for (i = 0; i < c.count; i++) {
+ if (c.arr[i]->seq_num != i + 1) {
+ dev_WARN_ONCE(dax_region->dev, 1,
+ "tag %pUb seq invariant violated at slot %u
(got %u)\n",
+ uuid, i, c.arr[i]->seq_num);
+ rc = -EINVAL;
+ goto out;
+ }
+ }
+
+ for (i = 0; i < c.count; i++) {
+ resource_size_t to_alloc = resource_size(c.arr[i]->res);
+ ssize_t alloc;
+
+ alloc = __dev_dax_resize(c.arr[i]->res, dev_dax, to_alloc,
+ c.arr[i]);
+ if (alloc < 0) {
+ rc = alloc;
+ goto rollback;
+ }
+ if (alloc == 0) {
+ rc = -ENOSPC;
+ goto rollback;
+ }
+ c.arr[i]->use_cnt++;
+ }
+ rc = 0;
+ goto out;
+
+rollback:
+ /*
+ * Partial failure: trim every range we added in this attempt.
+ * trim_dev_dax_range pops the most-recently-appended range from
+ * dev_dax->ranges[] and decrements its dax_resource->use_cnt, so
+ * looping until we have undone @i additions restores both
+ * dev_dax->ranges[] and the matched dax_resources' use_cnt.
+ */
+ while (i-- > 0)
+ trim_dev_dax_range(dev_dax);
+out:
+ kfree(c.arr);
+ return rc;
}
static ssize_t uuid_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
{
- return -EOPNOTSUPP;
+ struct dev_dax *dev_dax = to_dev_dax(dev);
+ struct dax_region *dax_region = dev_dax->region;
+ uuid_t uuid;
+ ssize_t rc;
+
+ if (!is_dynamic(dax_region))
+ return -EOPNOTSUPP;
+
+ if (sysfs_streq(buf, "0"))
+ uuid_copy(&uuid, &uuid_null);
+ else {
+ rc = uuid_parse(buf, &uuid);
+ if (rc)
+ return rc;
+ }
+
+ rc = down_write_killable(&dax_region_rwsem);
+ if (rc)
+ return rc;
+ if (!dax_region->dev->driver) {
+ rc = -ENXIO;
+ goto err_region;
+ }
+ rc = down_write_killable(&dax_dev_rwsem);
+ if (rc)
+ goto err_region;
+
+ if (uuid_is_null(&uuid))
+ rc = uuid_claim_untagged(dax_region, dev_dax);
+ else
+ rc = uuid_claim_tagged(dax_region, dev_dax, &uuid);
+
+ up_write(&dax_dev_rwsem);
+err_region:
+ up_write(&dax_region_rwsem);
+
+ return rc < 0 ? rc : len;
}
static DEVICE_ATTR_RW(uuid);
@@ -1614,8 +1862,12 @@ static umode_t dev_dax_visible(struct kobject *kobj,
struct attribute *a, int n)
return 0;
if (a == &dev_attr_mapping.attr && is_dynamic(dax_region))
return 0;
- if ((a == &dev_attr_align.attr ||
- a == &dev_attr_size.attr) && is_static(dax_region))
+ if (a == &dev_attr_uuid.attr && !is_dynamic(dax_region))
+ return 0444;
+ if (a == &dev_attr_align.attr &&
+ (is_static(dax_region) || is_dynamic(dax_region)))
+ return 0444;
+ if (a == &dev_attr_size.attr && is_static(dax_region))
return 0444;
return a->mode;
}
--
2.43.0