[f2fs-dev] [PATCH v4 1/2] libfs: reduce the number of memory allocations in generic_ci_match
During path traversal, the generic_ci_match function may be called multiple times. The number of memory allocations and releases in it accounts for a relatively high proportion in the flamegraph. This patch significantly reduces the number of memory allocations in generic_ci_match through pre - allocation. Signed-off-by: Yuwen Chen --- fs/ext4/namei.c| 2 +- fs/f2fs/dir.c | 2 +- fs/libfs.c | 33 ++--- include/linux/fs.h | 8 +++- 4 files changed, 39 insertions(+), 6 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index a178ac2294895..f235693bd71aa 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1443,7 +1443,7 @@ static bool ext4_match(struct inode *parent, return generic_ci_match(parent, fname->usr_fname, &fname->cf_name, de->name, - de->name_len) > 0; + de->name_len, NULL) > 0; } #endif diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index c36b3b22bfffd..4c6611fbd9574 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -197,7 +197,7 @@ static inline int f2fs_match_name(const struct inode *dir, if (fname->cf_name.name) return generic_ci_match(dir, fname->usr_fname, &fname->cf_name, - de_name, de_name_len); + de_name, de_name_len, NULL); #endif f.usr_fname = fname->usr_fname; diff --git a/fs/libfs.c b/fs/libfs.c index 9ea0ecc325a81..293b605971bbf 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1863,6 +1863,26 @@ static const struct dentry_operations generic_ci_dentry_ops = { #endif }; +#define DECRYPTED_NAME_PREALLOC_MIN_LEN 64 +static inline char *decrypted_name_prealloc_resize( + struct decrypted_name_prealloc *prealloc, + size_t wantlen) +{ + char *retbuf = NULL; + + if (prealloc->name && wantlen <= prealloc->namelen) + return prealloc->name; + + retbuf = kmalloc(wantlen + DECRYPTED_NAME_PREALLOC_MIN_LEN, GFP_KERNEL); + if (!retbuf) + return NULL; + + kfree(prealloc->name); + prealloc->name = retbuf; + prealloc->namelen = wantlen + DECRYPTED_NAME_PREALLOC_MIN_LEN; + return retbuf; +} + /** * generic_ci_match() - Match a name (case-insensitively) with a dirent. * This is a filesystem helper for comparison with directory entries. @@ -1873,6 +1893,7 @@ static const struct dentry_operations generic_ci_dentry_ops = { * @folded_name: Optional pre-folded name under lookup * @de_name: Dirent name. * @de_name_len: dirent name length. + * @prealloc: decrypted name memory buffer * * Test whether a case-insensitive directory entry matches the filename * being searched. If @folded_name is provided, it is used instead of @@ -1884,7 +1905,8 @@ static const struct dentry_operations generic_ci_dentry_ops = { int generic_ci_match(const struct inode *parent, const struct qstr *name, const struct qstr *folded_name, -const u8 *de_name, u32 de_name_len) +const u8 *de_name, u32 de_name_len, +struct decrypted_name_prealloc *prealloc) { const struct super_block *sb = parent->i_sb; const struct unicode_map *um = sb->s_encoding; @@ -1899,7 +1921,11 @@ int generic_ci_match(const struct inode *parent, if (WARN_ON_ONCE(!fscrypt_has_encryption_key(parent))) return -EINVAL; - decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL); + if (!prealloc) + decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL); + else + decrypted_name.name = decrypted_name_prealloc_resize( + prealloc, de_name_len); if (!decrypted_name.name) return -ENOMEM; res = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name, @@ -1928,7 +1954,8 @@ int generic_ci_match(const struct inode *parent, res = utf8_strncasecmp(um, name, &dirent); out: - kfree(decrypted_name.name); + if (!prealloc) + kfree(decrypted_name.name); if (res < 0 && sb_has_strict_encoding(sb)) { pr_err_ratelimited("Directory contains filename that is invalid UTF-8"); return 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index 4ec77da65f144..65307c8c11485 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3651,10 +3651,16 @@ extern int generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_check_addressable(unsigned, u64); extern void generic_set
Re: [f2fs-dev] [PATCH v3 1/2] libfs: reduce the number of memory allocations in generic_ci_match
On Sun, 6 Jul 2025 22:27:17 -0700 Christoph Hellwig wrote: > But I wonder why generic_ci_match is even called that often. Both ext4 > and f2fs support hashed lookups, so you should usually only see it called > for the main match, plus the occasional hash false positive, which should > be rate if the hash works. At present, in the latest version of Linux, in some scenarios, f2fs still uses linear search. The logic of linear search was introduced by Commit 91b587ba79e1 (f2fs: Introduce linear search for dentries). Commit 91b587ba79e1 was designed to solve the problem of inconsistent hashes before and after the rollback of Commit 5c26d2f1d3f5 ("unicode: Don't special case ignorable code points"), which led to files being inaccessible. In order to reduce the impact of linear search, in relatively new versions, the logic of turning off linear search has also been introduced. However, the triggering conditions for this turn - off logic on f2fs are rather strict: 1. Use the latest version of the fsck.f2fs tool to correct the file system. 2. Use a relatively new version of the kernel. (For example, linear search cannot be turned off in v6.6) The performance gain of this commit is very obvious in scenarios where linear search is not turned off. In scenarios where linear search is turned off, no performance problems will be introduced either. ___ Linux-f2fs-devel mailing list Linux-f2fs-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
[f2fs-dev] [PATCH] f2fs: improve the performance of f2fs_lookup
On the Android system, the file creation operation will call the f2fs_lookup function. When there are too many files in a directory, the generic_ci_match operation will be called repeatedly in large quantities. In extreme cases, the file creation speed will drop to three times per second. Signed-off-by: Yuwen Chen --- fs/ext4/namei.c| 2 +- fs/f2fs/dir.c | 24 +--- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/inline.c | 3 ++- fs/libfs.c | 32 +--- include/linux/fs.h | 8 +++- 6 files changed, 58 insertions(+), 14 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index a178ac2294895..f235693bd71aa 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1443,7 +1443,7 @@ static bool ext4_match(struct inode *parent, return generic_ci_match(parent, fname->usr_fname, &fname->cf_name, de->name, - de->name_len) > 0; + de->name_len, NULL) > 0; } #endif diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index c36b3b22bfffd..ee0cbeb80debd 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -176,6 +176,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, struct folio *dentry_folio, const struct f2fs_filename *fname, int *max_slots, + struct decrypted_name_prealloc *prealloc, bool use_hash) { struct f2fs_dentry_block *dentry_blk; @@ -184,12 +185,13 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, dentry_blk = folio_address(dentry_folio); make_dentry_ptr_block(dir, &d, dentry_blk); - return f2fs_find_target_dentry(&d, fname, max_slots, use_hash); + return f2fs_find_target_dentry(&d, fname, max_slots, prealloc, use_hash); } static inline int f2fs_match_name(const struct inode *dir, const struct f2fs_filename *fname, - const u8 *de_name, u32 de_name_len) + const u8 *de_name, u32 de_name_len, + struct decrypted_name_prealloc *prealloc) { struct fscrypt_name f; @@ -197,7 +199,7 @@ static inline int f2fs_match_name(const struct inode *dir, if (fname->cf_name.name) return generic_ci_match(dir, fname->usr_fname, &fname->cf_name, - de_name, de_name_len); + de_name, de_name_len, prealloc); #endif f.usr_fname = fname->usr_fname; @@ -210,6 +212,7 @@ static inline int f2fs_match_name(const struct inode *dir, struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, const struct f2fs_filename *fname, int *max_slots, + struct decrypted_name_prealloc *prealloc, bool use_hash) { struct f2fs_dir_entry *de; @@ -236,7 +239,8 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, if (!use_hash || de->hash_code == fname->hash) { res = f2fs_match_name(d->inode, fname, d->filename[bit_pos], - le16_to_cpu(de->name_len)); + le16_to_cpu(de->name_len), + prealloc); if (res < 0) return ERR_PTR(res); if (res) @@ -261,6 +265,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, unsigned int level, const struct f2fs_filename *fname, struct folio **res_folio, + struct decrypted_name_prealloc *prealloc, bool use_hash) { int s = GET_DENTRY_SLOTS(fname->disk_name.len); @@ -296,7 +301,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, } } - de = find_in_block(dir, dentry_folio, fname, &max_slots, use_hash); + de = find_in_block(dir, dentry_folio, fname, &max_slots, prealloc, + use_hash); if (IS_ERR(de)) { *res_folio = ERR_CAST(de); de = NULL; @@ -336,6 +342,7 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, unsigned int max_depth; unsigned int level; bool use_hash = tru
[f2fs-dev] [PATCH v2] f2fs: improve the performance of f2fs_lookup
On the Android system, the file creation operation will call the f2fs_lookup function. When there are too many files in a directory, the generic_ci_match operation will be called repeatedly in large quantities. In extreme cases, the file creation speed will drop to three times per second. Use the following program to conduct a file-creation test in the private program directory(/data/media/0/Android/data/*) of Android. int main(int argc, char **argv) { size_t fcnt = 0; char path[PATH_MAX]; char buf[4096] = {0}; int i, fd; if (argc < 2) return - EINVAL; fcnt = atoi(argv[1]); for (i = 0; i < fcnt; i++) { snprintf(path, sizeof(path), "./%d", i); fd = open(path, O_RDWR | O_CREAT, 0600); if (fd < 0) return - 1; write(fd, buf, sizeof(buf)); close(fd); } return 0; } The test platform is Snapdragon 8s Gen4, with a kernel version of v6.16 and a userdebug version. Before this submission was merged, when creating 2000 files, the performance test results are as follows: $ time /data/file_creater 2000 0m14.83s real 0m00.00s user 0m14.30s system 0m15.61s real 0m00.00s user 0m15.04s system 0m14.72s real 0m00.01s user 0m14.18s system After this submission was merged, the performance is as follows: $ time /data/file_creater 2000 0m08.17s real 0m00.00s user 0m07.86s system 0m08.16s real 0m00.01s user 0m07.86s system 0m08.15s real 0m00.00s user 0m07.86s system It was observed through perf that the generic_ci_match function was called a large number of times, which led to most of the time being spent on memory allocation and release. Due to a flush_dcache operation in the implementation of cts_cbc_decrypt, this memory cannot be allocated on the stack. Signed-off-by: Yuwen Chen --- fs/ext4/namei.c| 2 +- fs/f2fs/dir.c | 24 +--- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/inline.c | 3 ++- fs/libfs.c | 32 +--- include/linux/fs.h | 8 +++- 6 files changed, 58 insertions(+), 14 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index a178ac2294895..f235693bd71aa 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1443,7 +1443,7 @@ static bool ext4_match(struct inode *parent, return generic_ci_match(parent, fname->usr_fname, &fname->cf_name, de->name, - de->name_len) > 0; + de->name_len, NULL) > 0; } #endif diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index c36b3b22bfffd..ee0cbeb80debd 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -176,6 +176,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, struct folio *dentry_folio, const struct f2fs_filename *fname, int *max_slots, + struct decrypted_name_prealloc *prealloc, bool use_hash) { struct f2fs_dentry_block *dentry_blk; @@ -184,12 +185,13 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, dentry_blk = folio_address(dentry_folio); make_dentry_ptr_block(dir, &d, dentry_blk); - return f2fs_find_target_dentry(&d, fname, max_slots, use_hash); + return f2fs_find_target_dentry(&d, fname, max_slots, prealloc, use_hash); } static inline int f2fs_match_name(const struct inode *dir, const struct f2fs_filename *fname, - const u8 *de_name, u32 de_name_len) + const u8 *de_name, u32 de_name_len, + struct decrypted_name_prealloc *prealloc) { struct fscrypt_name f; @@ -197,7 +199,7 @@ static inline int f2fs_match_name(const struct inode *dir, if (fname->cf_name.name) return generic_ci_match(dir, fname->usr_fname, &fname->cf_name, - de_name, de_name_len); + de_name, de_name_len, prealloc); #endif f.usr_fname = fname->usr_fname; @@ -210,6 +212,7 @@ static inline int f2fs_match_name(const struct inode *dir, struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, const struct f2fs_filename *fname, int *max_slots, + struct decrypted_name_prealloc *prealloc, bool use_hash) { struct f2fs_dir_entry *de; @@ -236,7 +239,8 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, if (!use_hash || de->hash_code == fname->hash) { res = f2fs_match_name(d-&g
[f2fs-dev] [PATCH v3 2/2] f2fs: improve the performance of f2fs_lookup
On the Android system, the file creation operation will call the f2fs_lookup function. When there are too many files in a directory, the generic_ci_match operation will be called repeatedly in large quantities. In extreme cases, the file creation speed will drop to three times per second. Use the following program to conduct a file-creation test in the private program directory(/data/media/0/Android/data/*) of Android. int main(int argc, char **argv) { size_t fcnt = 0; char path[PATH_MAX]; char buf[4096] = {0}; int i, fd; if (argc < 2) return - EINVAL; fcnt = atoi(argv[1]); for (i = 0; i < fcnt; i++) { snprintf(path, sizeof(path), "./%d", i); fd = open(path, O_RDWR | O_CREAT, 0600); if (fd < 0) return - 1; write(fd, buf, sizeof(buf)); close(fd); } return 0; } The test platform is Snapdragon 8s Gen4, with a kernel version of v6.6 and a userdebug version. Before this submission was merged, when creating 2000 files, the performance test results are as follows: $ time /data/file_creater 2000 0m14.83s real 0m00.00s user 0m14.30s system 0m15.61s real 0m00.00s user 0m15.04s system 0m14.72s real 0m00.01s user 0m14.18s system After this submission was merged, the performance is as follows: $ time /data/file_creater 2000 0m08.17s real 0m00.00s user 0m07.86s system 0m08.16s real 0m00.01s user 0m07.86s system 0m08.15s real 0m00.00s user 0m07.86s system It was observed through perf that the generic_ci_match function was called a large number of times, which led to most of the time being spent on memory allocation and release. Due to a flush_dcache operation in the implementation of cts_cbc_decrypt, this memory cannot be allocated on the stack. Signed-off-by: Yuwen Chen --- fs/f2fs/dir.c| 24 +--- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/inline.c | 3 ++- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 4c6611fbd9574..ee0cbeb80debd 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -176,6 +176,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, struct folio *dentry_folio, const struct f2fs_filename *fname, int *max_slots, + struct decrypted_name_prealloc *prealloc, bool use_hash) { struct f2fs_dentry_block *dentry_blk; @@ -184,12 +185,13 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, dentry_blk = folio_address(dentry_folio); make_dentry_ptr_block(dir, &d, dentry_blk); - return f2fs_find_target_dentry(&d, fname, max_slots, use_hash); + return f2fs_find_target_dentry(&d, fname, max_slots, prealloc, use_hash); } static inline int f2fs_match_name(const struct inode *dir, const struct f2fs_filename *fname, - const u8 *de_name, u32 de_name_len) + const u8 *de_name, u32 de_name_len, + struct decrypted_name_prealloc *prealloc) { struct fscrypt_name f; @@ -197,7 +199,7 @@ static inline int f2fs_match_name(const struct inode *dir, if (fname->cf_name.name) return generic_ci_match(dir, fname->usr_fname, &fname->cf_name, - de_name, de_name_len, NULL); + de_name, de_name_len, prealloc); #endif f.usr_fname = fname->usr_fname; @@ -210,6 +212,7 @@ static inline int f2fs_match_name(const struct inode *dir, struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, const struct f2fs_filename *fname, int *max_slots, + struct decrypted_name_prealloc *prealloc, bool use_hash) { struct f2fs_dir_entry *de; @@ -236,7 +239,8 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, if (!use_hash || de->hash_code == fname->hash) { res = f2fs_match_name(d->inode, fname, d->filename[bit_pos], - le16_to_cpu(de->name_len)); + le16_to_cpu(de->name_len), + prealloc); if (res < 0) return ERR_PTR(res); if (res) @@ -261,6 +265,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, unsigned int level, const struct f2fs_f
[f2fs-dev] [PATCH v3 1/2] libfs: reduce the number of memory allocations in generic_ci_match
During path traversal, the generic_ci_match function may be called multiple times. The number of memory allocations and releases in it accounts for a relatively high proportion in the flamegraph. This patch significantly reduces the number of memory allocations in generic_ci_match through pre - allocation. Signed-off-by: Yuwen Chen --- fs/ext4/namei.c| 2 +- fs/f2fs/dir.c | 2 +- fs/libfs.c | 33 ++--- include/linux/fs.h | 8 +++- 4 files changed, 39 insertions(+), 6 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index a178ac2294895..f235693bd71aa 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1443,7 +1443,7 @@ static bool ext4_match(struct inode *parent, return generic_ci_match(parent, fname->usr_fname, &fname->cf_name, de->name, - de->name_len) > 0; + de->name_len, NULL) > 0; } #endif diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index c36b3b22bfffd..4c6611fbd9574 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -197,7 +197,7 @@ static inline int f2fs_match_name(const struct inode *dir, if (fname->cf_name.name) return generic_ci_match(dir, fname->usr_fname, &fname->cf_name, - de_name, de_name_len); + de_name, de_name_len, NULL); #endif f.usr_fname = fname->usr_fname; diff --git a/fs/libfs.c b/fs/libfs.c index 9ea0ecc325a81..d2a6b2a4fe11c 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1863,6 +1863,26 @@ static const struct dentry_operations generic_ci_dentry_ops = { #endif }; +#define DECRYPTED_NAME_PREALLOC_MIN_LEN 64 +static inline char *decrypted_name_prealloc_resize( + struct decrypted_name_prealloc *prealloc, + size_t wantlen) +{ + char *retbuf = NULL; + + if (prealloc->name && wantlen >= prealloc->namelen) + return prealloc->name; + + retbuf = kmalloc(wantlen + DECRYPTED_NAME_PREALLOC_MIN_LEN, GFP_KERNEL); + if (!retbuf) + return NULL; + + kfree(prealloc->name); + prealloc->name = retbuf; + prealloc->namelen = wantlen + DECRYPTED_NAME_PREALLOC_MIN_LEN; + return retbuf; +} + /** * generic_ci_match() - Match a name (case-insensitively) with a dirent. * This is a filesystem helper for comparison with directory entries. @@ -1873,6 +1893,7 @@ static const struct dentry_operations generic_ci_dentry_ops = { * @folded_name: Optional pre-folded name under lookup * @de_name: Dirent name. * @de_name_len: dirent name length. + * @prealloc: decrypted name memory buffer * * Test whether a case-insensitive directory entry matches the filename * being searched. If @folded_name is provided, it is used instead of @@ -1884,7 +1905,8 @@ static const struct dentry_operations generic_ci_dentry_ops = { int generic_ci_match(const struct inode *parent, const struct qstr *name, const struct qstr *folded_name, -const u8 *de_name, u32 de_name_len) +const u8 *de_name, u32 de_name_len, +struct decrypted_name_prealloc *prealloc) { const struct super_block *sb = parent->i_sb; const struct unicode_map *um = sb->s_encoding; @@ -1899,7 +1921,11 @@ int generic_ci_match(const struct inode *parent, if (WARN_ON_ONCE(!fscrypt_has_encryption_key(parent))) return -EINVAL; - decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL); + if (!prealloc) + decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL); + else + decrypted_name.name = decrypted_name_prealloc_resize( + prealloc, de_name_len); if (!decrypted_name.name) return -ENOMEM; res = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name, @@ -1928,7 +1954,8 @@ int generic_ci_match(const struct inode *parent, res = utf8_strncasecmp(um, name, &dirent); out: - kfree(decrypted_name.name); + if (!prealloc) + kfree(decrypted_name.name); if (res < 0 && sb_has_strict_encoding(sb)) { pr_err_ratelimited("Directory contains filename that is invalid UTF-8"); return 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index 4ec77da65f144..65307c8c11485 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3651,10 +3651,16 @@ extern int generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_check_addressable(unsigned, u64); extern void generic_set