On 2025/12/23 09:56, Hongbo Li wrote:
From: Hongzhen Luo <[email protected]>
Currently, reading files with different paths (or names) but the same
content will consume multiple copies of the page cache, even if the
content of these page caches is the same. For example, reading
identical files (e.g., *.so files) from two different minor versions of
container images will cost multiple copies of the same page cache,
since different containers have different mount points. Therefore,
sharing the page cache for files with the same content can save memory.
This introduces the page cache share feature in erofs. It allocate a
deduplicated inode and use its page cache as shared. Reads for files
with identical content will ultimately be routed to the page cache of
the deduplicated inode. In this way, a single page cache satisfies
multiple read requests for different files with the same contents.
Signed-off-by: Hongzhen Luo <[email protected]>
Signed-off-by: Hongbo Li <[email protected]>
---
fs/erofs/Makefile | 1 +
fs/erofs/internal.h | 29 ++++++
fs/erofs/ishare.c | 211 ++++++++++++++++++++++++++++++++++++++++++++
fs/erofs/super.c | 34 ++++++-
4 files changed, 272 insertions(+), 3 deletions(-)
create mode 100644 fs/erofs/ishare.c
diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile
index 549abc424763..a80e1762b607 100644
--- a/fs/erofs/Makefile
+++ b/fs/erofs/Makefile
@@ -10,3 +10,4 @@ erofs-$(CONFIG_EROFS_FS_ZIP_ZSTD) += decompressor_zstd.o
erofs-$(CONFIG_EROFS_FS_ZIP_ACCEL) += decompressor_crypto.o
erofs-$(CONFIG_EROFS_FS_BACKED_BY_FILE) += fileio.o
erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
+erofs-$(CONFIG_EROFS_FS_PAGE_CACHE_SHARE) += ishare.o
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 99e2857173c3..ae9560434324 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -304,6 +304,22 @@ struct erofs_inode {
};
#endif /* CONFIG_EROFS_FS_ZIP */
};
+#ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE
+ union {
+ /* internal dedup inode */
+ struct {
+ char *fingerprint;
+ spinlock_t lock;
+ /* all backing inodes */
+ struct list_head backing_head;
+ };
+
+ struct {
+ struct inode *ishare;
+ struct list_head backing_link;
+ };
I think it would be better to reform as below:
struct erofs_inode_fingerprint {
u8 *opaque;
int size;
};
struct list_head ishare_list;
union {
struct {
struct erofs_inode_fingerprint fingerprint;
spinlock_t ishare_lock;
};
struct inode *realinode;
};
+ };
+#endif
/* the corresponding vfs inode */
struct inode vfs_inode;
};
@@ -410,6 +426,7 @@ extern const struct inode_operations erofs_dir_iops;
extern const struct file_operations erofs_file_fops;
extern const struct file_operations erofs_dir_fops;
+extern const struct file_operations erofs_ishare_fops;
extern const struct iomap_ops z_erofs_iomap_report_ops;
@@ -541,6 +558,18 @@ static inline struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev) {
static inline void erofs_fscache_submit_bio(struct bio *bio) {}
#endif
+#ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE
+int __init erofs_init_ishare(void);
+void erofs_exit_ishare(void);
+bool erofs_ishare_fill_inode(struct inode *inode);
+void erofs_ishare_free_inode(struct inode *inode);
+#else
+static inline int erofs_init_ishare(void) { return 0; }
+static inline void erofs_exit_ishare(void) {}
+static inline bool erofs_ishare_fill_inode(struct inode *inode) { return
false; }
+static inline void erofs_ishare_free_inode(struct inode *inode) {}
+#endif // CONFIG_EROFS_FS_PAGE_CACHE_SHARE
+
long erofs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
long erofs_compat_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg);
diff --git a/fs/erofs/ishare.c b/fs/erofs/ishare.c
new file mode 100644
index 000000000000..4b46016bcd03
--- /dev/null
+++ b/fs/erofs/ishare.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2024, Alibaba Cloud
+ */
+#include <linux/xxhash.h>
+#include <linux/refcount.h>
+#include <linux/mount.h>
+#include <linux/mutex.h>
+#include <linux/ramfs.h>
+#include "internal.h"
+#include "xattr.h"
+
+#include "../internal.h"
+
+static struct vfsmount *erofs_ishare_mnt;
+
+static int erofs_ishare_iget5_eq(struct inode *inode, void *data)
+{
+ struct erofs_inode *vi = EROFS_I(inode);
struct erofs_inode_fingerprint *fp1 = &EROFS_I(inode)->fingerprint;
struct erofs_inode_fingerprint *fp2 = data;
return fp1->size == fp2->size &&
!memcmp(fp1->opaque, fp2->opaque, fp2->size);
return vi->fingerprint.opaque && memcmp(vi->
+
+ return vi->fingerprint && memcmp(vi->fingerprint, data,
+ sizeof(size_t) + *(size_t *)data) == 0;
+}
+
+static int erofs_ishare_iget5_set(struct inode *inode, void *data)
+{
+ struct erofs_inode *vi = EROFS_I(inode);
+> + vi->fingerprint = data;
vi->fingerprint = *(struct erofs_inode_fingerprint *)data;
+ INIT_LIST_HEAD(&vi->backing_head);
+ spin_lock_init(&vi->lock);
+ return 0;
+}
+
+bool erofs_ishare_fill_inode(struct inode *inode)
+{
+ struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
+ struct erofs_xattr_prefix_item *ishare_prefix;
just call
struct erofs_xattr_prefix_item *prefix;
is fine, since it's unambiguous.
+ struct erofs_inode *vi = EROFS_I(inode);
+ struct inode *idedup;
+ /*
+ * fingerprint layout:
+ * fingerprint length + fingerprint content (xattr_value + domain_id)
+ */
That is too hard to follow, just convert to what I mentioned above;
struct erofs_inode_fingerprint fp;
+ char *ishare_key, *fingerprint;
char *infix;
+ ssize_t ishare_vlen;
size_t valuelen;
+ unsigned long hash;
+ int key_idx;
int base_index;
+
+ if (!sbi->domain_id || !erofs_sb_has_ishare_xattrs(sbi))
+ return false;
+
+ ishare_prefix = sbi->xattr_prefixes + sbi->ishare_xattr_pfx;
+ ishare_key = ishare_prefix->prefix->infix;
+ key_idx = ishare_prefix->prefix->base_index;
+ ishare_vlen = erofs_getxattr(inode, key_idx, ishare_key, NULL, 0);
+ if (ishare_vlen <= 0 || ishare_vlen > (1 << sbi->blkszbits))
+ return false;
+
Then:
fp.size = valuelen + strlen(sbi->domain_id);
fp.opaque = kmalloc(fp.size, GFP_KERNEL);
And fix the remaining logic.
Thanks,
Gao Xiang