[PATCH v3 1/2] hugetlbfs: support tracepoint

2024-07-22 Thread Hongbo Li
Add basic tracepoints for {alloc, evict, free}_inode, setattr and
fallocate. These can help users to debug hugetlbfs more conveniently.

Signed-off-by: Hongbo Li 
---
 MAINTAINERS  |   1 +
 include/trace/events/hugetlbfs.h | 156 +++
 2 files changed, 157 insertions(+)
 create mode 100644 include/trace/events/hugetlbfs.h

diff --git a/MAINTAINERS b/MAINTAINERS
index a0baccca11de..362e846e1294 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10351,6 +10351,7 @@ F:  Documentation/mm/hugetlbfs_reserv.rst
 F: Documentation/mm/vmemmap_dedup.rst
 F: fs/hugetlbfs/
 F: include/linux/hugetlb.h
+F: include/trace/events/hugetlbfs.h
 F: mm/hugetlb.c
 F: mm/hugetlb_vmemmap.c
 F: mm/hugetlb_vmemmap.h
diff --git a/include/trace/events/hugetlbfs.h b/include/trace/events/hugetlbfs.h
new file mode 100644
index ..8331c904a9ba
--- /dev/null
+++ b/include/trace/events/hugetlbfs.h
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hugetlbfs
+
+#if !defined(_TRACE_HUGETLBFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_HUGETLBFS_H
+
+#include 
+
+TRACE_EVENT(hugetlbfs_alloc_inode,
+
+   TP_PROTO(struct inode *inode, struct inode *dir, int mode),
+
+   TP_ARGS(inode, dir, mode),
+
+   TP_STRUCT__entry(
+   __field(dev_t,  dev)
+   __field(ino_t,  ino)
+   __field(ino_t,  dir)
+   __field(__u16,  mode)
+   ),
+
+   TP_fast_assign(
+   __entry->dev= inode->i_sb->s_dev;
+   __entry->ino= inode->i_ino;
+   __entry->dir= dir->i_ino;
+   __entry->mode   = mode;
+   ),
+
+   TP_printk("dev %d,%d ino %lu dir %lu mode 0%o",
+   MAJOR(__entry->dev), MINOR(__entry->dev),
+   (unsigned long) __entry->ino,
+   (unsigned long) __entry->dir, __entry->mode)
+);
+
+DECLARE_EVENT_CLASS(hugetlbfs__inode,
+
+   TP_PROTO(struct inode *inode),
+
+   TP_ARGS(inode),
+
+   TP_STRUCT__entry(
+   __field(dev_t,  dev)
+   __field(ino_t,  ino)
+   __field(__u16,  mode)
+   __field(loff_t, size)
+   __field(unsigned int,   nlink)
+   __field(unsigned int,   seals)
+   __field(blkcnt_t,   blocks)
+   ),
+
+   TP_fast_assign(
+   __entry->dev= inode->i_sb->s_dev;
+   __entry->ino= inode->i_ino;
+   __entry->mode   = inode->i_mode;
+   __entry->size   = inode->i_size;
+   __entry->nlink  = inode->i_nlink;
+   __entry->seals  = HUGETLBFS_I(inode)->seals;
+   __entry->blocks = inode->i_blocks;
+   ),
+
+   TP_printk("dev %d,%d ino %lu mode 0%o size %lld nlink %u seals %u 
blocks %llu",
+   MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) 
__entry->ino,
+   __entry->mode, __entry->size, __entry->nlink, __entry->seals,
+   (unsigned long long)__entry->blocks)
+);
+
+DEFINE_EVENT(hugetlbfs__inode, hugetlbfs_evict_inode,
+
+   TP_PROTO(struct inode *inode),
+
+   TP_ARGS(inode)
+);
+
+DEFINE_EVENT(hugetlbfs__inode, hugetlbfs_free_inode,
+
+   TP_PROTO(struct inode *inode),
+
+   TP_ARGS(inode)
+);
+
+TRACE_EVENT(hugetlbfs_setattr,
+
+   TP_PROTO(struct inode *inode, struct dentry *dentry,
+   struct iattr *attr),
+
+   TP_ARGS(inode, dentry, attr),
+
+   TP_STRUCT__entry(
+   __field(dev_t,  dev)
+   __field(ino_t,  ino)
+   __field(unsigned int,   d_len)
+   __string(d_name,dentry->d_name.name)
+   __field(unsigned int,   ia_valid)
+   __field(unsigned int,   ia_mode)
+   __field(loff_t, old_size)
+   __field(loff_t, ia_size)
+   ),
+
+   TP_fast_assign(
+   __entry->dev= inode->i_sb->s_dev;
+   __entry->ino= inode->i_ino;
+   __entry->d_len  = dentry->d_name.len;
+   __assign_str(d_name);
+   __entry->ia_valid   = attr->ia_valid;
+   __entry->ia_mode= attr->ia_mode;
+   __entry->old_size   = inode->i_size;
+   __entry->ia_size= attr->ia_size;
+   ),
+
+   TP_printk("dev %d,%d ino %lu name %.*s valid %#x mode 0%o old_size %lld 
size %lld",
+   MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned 
long)__

[PATCH v3 2/2] hugetlbfs: use tracepoints in hugetlbfs functions.

2024-07-22 Thread Hongbo Li
Here we use the hugetlbfs tracepoint to track the call stack. And
the output in trace is as follows:

```
   touch-5265[005] .43.246550: hugetlbfs_alloc_inode: dev 0,51 ino 
24621 dir 21959 mode 0100644
   touch-5265[005] .43.246638: hugetlbfs_setattr: dev 0,51 ino 
24621 name testfile valid 0x20070 mode 017 old_size 0 size 
-51622648042749952
truncate-5266[005] .45.590890: hugetlbfs_setattr: dev 0,51 ino 
24621 name testfile valid 0x2068 mode 00 old_size 0 size 2097152
rm-5273[007] .   110.052783: hugetlbfs_evict_inode: dev 0,51 ino 24621 
mode 0100644 size 2097152 nlink 0 seals 1 blocks 0
  -0   [007] ..s1.   110.059441: hugetlbfs_free_inode: dev 0,51 ino 
24621 mode 0100644 size 2097152 nlink 0 seals 1 blocks 0
```

Signed-off-by: Hongbo Li 
---
 fs/hugetlbfs/inode.c | 17 +++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 9f6cff356796..1689c01a11a0 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -39,6 +39,9 @@
 #include 
 #include 
 
+#define CREATE_TRACE_POINTS
+#include 
+
 static const struct address_space_operations hugetlbfs_aops;
 static const struct file_operations hugetlbfs_file_operations;
 static const struct inode_operations hugetlbfs_dir_inode_operations;
@@ -687,6 +690,7 @@ static void hugetlbfs_evict_inode(struct inode *inode)
 {
struct resv_map *resv_map;
 
+   trace_hugetlbfs_evict_inode(inode);
remove_inode_hugepages(inode, 0, LLONG_MAX);
 
/*
@@ -814,8 +818,10 @@ static long hugetlbfs_fallocate(struct file *file, int 
mode, loff_t offset,
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP;
 
-   if (mode & FALLOC_FL_PUNCH_HOLE)
-   return hugetlbfs_punch_hole(inode, offset, len);
+   if (mode & FALLOC_FL_PUNCH_HOLE) {
+   error = hugetlbfs_punch_hole(inode, offset, len);
+   goto out_nolock;
+   }
 
/*
 * Default preallocate case.
@@ -919,6 +925,9 @@ static long hugetlbfs_fallocate(struct file *file, int 
mode, loff_t offset,
inode_set_ctime_current(inode);
 out:
inode_unlock(inode);
+
+out_nolock:
+   trace_hugetlbfs_fallocate(inode, mode, offset, len, error);
return error;
 }
 
@@ -935,6 +944,8 @@ static int hugetlbfs_setattr(struct mnt_idmap *idmap,
if (error)
return error;
 
+   trace_hugetlbfs_setattr(inode, dentry, attr);
+
if (ia_valid & ATTR_SIZE) {
loff_t oldsize = inode->i_size;
loff_t newsize = attr->ia_size;
@@ -1033,6 +1044,7 @@ static struct inode *hugetlbfs_get_inode(struct 
super_block *sb,
break;
}
lockdep_annotate_inode_mutex_key(inode);
+   trace_hugetlbfs_alloc_inode(inode, dir, mode);
} else {
if (resv_map)
kref_put(_map->refs, resv_map_release);
@@ -1272,6 +1284,7 @@ static struct inode *hugetlbfs_alloc_inode(struct 
super_block *sb)
 
 static void hugetlbfs_free_inode(struct inode *inode)
 {
+   trace_hugetlbfs_free_inode(inode);
kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
 }
 
-- 
2.34.1




[PATCH v3 0/2] Introduce tracepoint for hugetlbfs

2024-07-22 Thread Hongbo Li
Here we add some basic tracepoints for debugging hugetlbfs: {alloc, free,
evict}_inode, setattr and fallocate.

v2 can be found at:
https://lore.kernel.org/all/zoyy-sfj5jvs8...@casper.infradead.org/T/

Changes since v2:
  - Simplify the tracepoint output for setattr.
  - Make every token be space separated.


v1 can be found at:
https://lore.kernel.org/linux-mm/20240701194906.3a9b6...@gandalf.local.home/T/

Changes since v1:
  - Decrease the parameters for setattr tracer suggested by Steve and Mathieu.
  - Replace current_user_ns() with init_user_ns when translate uid/gid.

Hongbo Li (2):
  hugetlbfs: support tracepoint
  hugetlbfs: use tracepoints in hugetlbfs functions.

 MAINTAINERS  |   1 +
 fs/hugetlbfs/inode.c |  17 +++-
 include/trace/events/hugetlbfs.h | 156 +++
 3 files changed, 172 insertions(+), 2 deletions(-)
 create mode 100644 include/trace/events/hugetlbfs.h

-- 
2.34.1




Re: [PATCH v2 1/2] hugetlbfs: support tracepoint

2024-07-05 Thread Hongbo Li




On 2024/7/5 7:34, Dave Chinner wrote:

On Thu, Jul 04, 2024 at 10:13:22AM -0400, Steven Rostedt wrote:

On Thu, 4 Jul 2024 22:56:29 +1000
Dave Chinner  wrote:


Having to do this is additional work when writing use-once scripts
that get thrown away when the tracepoint output analysis is done
is painful, and it's completely unnecessary if the tracepoint output
is completely space separated from the start.


If you are using scripts to parse the output, then you could just
enable the "fields" options, which will just ignore the TP_printk() and
print the fields in both their hex and decimal values:

  # trace-cmd start -e filemap -O fields

// the above fields change can also be done with:
//  echo 1 > /sys/kernel/tracing/options/fields

  # trace-cmd show
# tracer: nop
#
# entries-in-buffer/entries-written: 8/8   #P:8
#
#_-=> irqs-off/BH-disabled
#   / _=> need-resched
#  | / _---=> hardirq/softirq
#  || / _--=> preempt-depth
#  ||| / _-=> migrate-disable
#   / delay
#   TASK-PID CPU#  |  TIMESTAMP  FUNCTION
#  | | |   | | |
 less-2527[004] . 61949.896458: 
mm_filemap_add_to_page_cache: pfn=0x144625 (1328677) i_ino=0x335c6 (210374) 
index=0x0 (0) s_dev=0xfe3 (266338307) order=(0)
 less-2527[004] d..2. 61949.896926: 
mm_filemap_delete_from_page_cache: pfn=0x152b07 (1387271) i_ino=0x2d73a 
(186170) index=0x0 (0) s_dev=0xfe3 (266338307) order=(0)
  jbd2/vda3-8-268 [005] . 61954.461964: 
mm_filemap_add_to_page_cache: pfn=0x152b70 (1387376) i_ino=0xfe3 
(266338307) index=0x30bd33 (3194163) s_dev=0x3 (3) order=(0)
  jbd2/vda3-8-268 [005] . 61954.462669: 
mm_filemap_add_to_page_cache: pfn=0x15335b (1389403) i_ino=0xfe3 
(266338307) index=0x30bd40 (3194176) s_dev=0x3 (3) order=(0)
  jbd2/vda3-8-268 [005] . 62001.565391: 
mm_filemap_add_to_page_cache: pfn=0x13a996 (1288598) i_ino=0xfe3 
(266338307) index=0x30bd41 (3194177) s_dev=0x3 (3) order=(0)
  jbd2/vda3-8-268 [005] . 62001.566081: 
mm_filemap_add_to_page_cache: pfn=0x1446b5 (1328821) i_ino=0xfe3 
(266338307) index=0x30bd43 (3194179) s_dev=0x3 (3) order=(0)
 less-2530[004] . 62033.182309: 
mm_filemap_add_to_page_cache: pfn=0x13d755 (1300309) i_ino=0x2d73a (186170) 
index=0x0 (0) s_dev=0xfe3 (266338307) order=(0)
 less-2530[004] d..2. 62033.182801: 
mm_filemap_delete_from_page_cache: pfn=0x144625 (1328677) i_ino=0x335c6 
(210374) index=0x0 (0) s_dev=0xfe3 (266338307) order=(0)


Yes, I know about that. But this just makes things harder, because
now there are *3* different formats that have to be handled (i.e.
now we also have to strip "()" around numbers).
Perhaps if users want to filter the format, they could enable the 
"fields" option in a unified manner. As for TP_printk(), it depends on 
how to better display the data used for debugging.


Thanks,
Hongbo


-Dave.




Re: [PATCH v2 1/2] hugetlbfs: support tracepoint

2024-07-04 Thread Hongbo Li




On 2024/7/4 11:37, Matthew Wilcox wrote:

On Thu, Jul 04, 2024 at 11:07:03AM +0800, Hongbo Li wrote:

+   TP_printk("dev = (%d,%d), ino = %lu, dir = %lu, mode = 0%o",
+   MAJOR(__entry->dev), MINOR(__entry->dev),
+   (unsigned long) __entry->ino,
+   (unsigned long) __entry->dir, __entry->mode)


erofs and f2fs are the only two places that print devices like this.

"dev=%d:%d inode=%lx"

Why do we need dir and mode?

Thanks for reviewing!

Here dir and mode are used to track the creation of the directory tree.


Actually, why do we need a tracepoint on alloc_inode at all?  What
does it help us debug, and why does no other filesystem need an
alloc_inode tracepoint?

In fact, f2fs and ext4 have added this tracepoint such as 
trace_f2fs_new_inode(in f2fs) and trace_ext4_allocate_inode(in ext4). 
This can trace the lifecycle of an inode comprehensively. These 
tracepoints are used to debug some closed application scenarios, and 
also helping developers to debug the filesystem logic in hugetlbfs.


Thanks,
Hongbo



[PATCH v2 2/2] hugetlbfs: use tracepoints in hugetlbfs functions.

2024-07-03 Thread Hongbo Li
Here we use the hugetlbfs tracepoint to track the call stack. And
the output in trace is as follows:

```
touch-5250[002] .   123.557640: hugetlbfs_alloc_inode: dev = (0,49), 
ino = 29897, dir = 24079, mode = 0100644
touch-5250[002] .   123.557650: hugetlbfs_setattr: dev = (0,49), ino = 
29897, name = test1, ia_valid = 131184, ia_mode = 036720, ia_uid = 2544251907, 
ia_gid = 65534, old_size = 0, ia_size = 4064
truncate-5251[002] .   142.939424: hugetlbfs_setattr: dev = (0,49), ino 
= 29897, name = test1, ia_valid = 8296, ia_mode = 00, ia_uid = 0, ia_gid = 0, 
old_size = 0, ia_size = 2097152
rm-5273[002] .   412.618383: hugetlbfs_evict_inode: dev = (0,49), ino = 
29897, i_mode = 0100644, i_size = 2097152, i_nlink = 0, seals = 1, i_blocks = 0
-0   [002] ..s1.   412.634518: hugetlbfs_free_inode: dev = (0,49), 
ino = 29897, i_mode = 0100644, i_size = 2097152, i_nlink = 0, seals = 1, 
i_blocks = 0
```

Signed-off-by: Hongbo Li 
---
 fs/hugetlbfs/inode.c | 17 +++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 412f295acebe..2e826bbcb6ed 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -39,6 +39,9 @@
 #include 
 #include 
 
+#define CREATE_TRACE_POINTS
+#include 
+
 static const struct address_space_operations hugetlbfs_aops;
 static const struct file_operations hugetlbfs_file_operations;
 static const struct inode_operations hugetlbfs_dir_inode_operations;
@@ -686,6 +689,7 @@ static void hugetlbfs_evict_inode(struct inode *inode)
 {
struct resv_map *resv_map;
 
+   trace_hugetlbfs_evict_inode(inode);
remove_inode_hugepages(inode, 0, LLONG_MAX);
 
/*
@@ -813,8 +817,10 @@ static long hugetlbfs_fallocate(struct file *file, int 
mode, loff_t offset,
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP;
 
-   if (mode & FALLOC_FL_PUNCH_HOLE)
-   return hugetlbfs_punch_hole(inode, offset, len);
+   if (mode & FALLOC_FL_PUNCH_HOLE) {
+   error = hugetlbfs_punch_hole(inode, offset, len);
+   goto out_nolock;
+   }
 
/*
 * Default preallocate case.
@@ -918,6 +924,9 @@ static long hugetlbfs_fallocate(struct file *file, int 
mode, loff_t offset,
inode_set_ctime_current(inode);
 out:
inode_unlock(inode);
+
+out_nolock:
+   trace_hugetlbfs_fallocate(inode, mode, offset, len, error);
return error;
 }
 
@@ -934,6 +943,8 @@ static int hugetlbfs_setattr(struct mnt_idmap *idmap,
if (error)
return error;
 
+   trace_hugetlbfs_setattr(inode, dentry, attr);
+
if (ia_valid & ATTR_SIZE) {
loff_t oldsize = inode->i_size;
loff_t newsize = attr->ia_size;
@@ -1032,6 +1043,7 @@ static struct inode *hugetlbfs_get_inode(struct 
super_block *sb,
break;
}
lockdep_annotate_inode_mutex_key(inode);
+   trace_hugetlbfs_alloc_inode(inode, dir, mode);
} else {
if (resv_map)
kref_put(_map->refs, resv_map_release);
@@ -1274,6 +1286,7 @@ static struct inode *hugetlbfs_alloc_inode(struct 
super_block *sb)
 
 static void hugetlbfs_free_inode(struct inode *inode)
 {
+   trace_hugetlbfs_free_inode(inode);
kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
 }
 
-- 
2.34.1




[PATCH v2 0/2] Introduce tracepoint for hugetlbfs

2024-07-03 Thread Hongbo Li
Here we add some basic tracepoints for debugging hugetlbfs: {alloc, free,
evict}_inode, setattr and fallocate.

v1 can be found at:
https://lore.kernel.org/linux-mm/20240701194906.3a9b6...@gandalf.local.home/T/

Changes since v1:
  - Decrease the parameters for setattr tracer suggested by Steve and Mathieu.
  - Replace current_user_ns() with init_user_ns when translate uid/gid.

Hongbo Li (2):
  hugetlbfs: support tracepoint
  hugetlbfs: use tracepoints in hugetlbfs functions.

 MAINTAINERS  |   1 +
 fs/hugetlbfs/inode.c |  17 +++-
 include/trace/events/hugetlbfs.h | 160 +++
 3 files changed, 176 insertions(+), 2 deletions(-)
 create mode 100644 include/trace/events/hugetlbfs.h

-- 
2.34.1




[PATCH v2 1/2] hugetlbfs: support tracepoint

2024-07-03 Thread Hongbo Li
Add basic tracepoints for {alloc, evict, free}_inode, setattr and
fallocate. These can help users to debug hugetlbfs more conveniently.

Signed-off-by: Hongbo Li 
---
 MAINTAINERS  |   1 +
 include/trace/events/hugetlbfs.h | 160 +++
 2 files changed, 161 insertions(+)
 create mode 100644 include/trace/events/hugetlbfs.h

diff --git a/MAINTAINERS b/MAINTAINERS
index cd2ca0c3158e..865c48e92d40 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10188,6 +10188,7 @@ F:  Documentation/mm/hugetlbfs_reserv.rst
 F: Documentation/mm/vmemmap_dedup.rst
 F: fs/hugetlbfs/
 F: include/linux/hugetlb.h
+F: include/trace/events/hugetlbfs.h
 F: mm/hugetlb.c
 F: mm/hugetlb_vmemmap.c
 F: mm/hugetlb_vmemmap.h
diff --git a/include/trace/events/hugetlbfs.h b/include/trace/events/hugetlbfs.h
new file mode 100644
index ..975f584f6f51
--- /dev/null
+++ b/include/trace/events/hugetlbfs.h
@@ -0,0 +1,160 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hugetlbfs
+
+#if !defined(_TRACE_HUGETLBFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_HUGETLBFS_H
+
+#include 
+
+TRACE_EVENT(hugetlbfs_alloc_inode,
+
+   TP_PROTO(struct inode *inode, struct inode *dir, int mode),
+
+   TP_ARGS(inode, dir, mode),
+
+   TP_STRUCT__entry(
+   __field(dev_t,  dev)
+   __field(ino_t,  ino)
+   __field(ino_t,  dir)
+   __field(__u16,  mode)
+   ),
+
+   TP_fast_assign(
+   __entry->dev= inode->i_sb->s_dev;
+   __entry->ino= inode->i_ino;
+   __entry->dir= dir->i_ino;
+   __entry->mode   = mode;
+   ),
+
+   TP_printk("dev = (%d,%d), ino = %lu, dir = %lu, mode = 0%o",
+   MAJOR(__entry->dev), MINOR(__entry->dev),
+   (unsigned long) __entry->ino,
+   (unsigned long) __entry->dir, __entry->mode)
+);
+
+DECLARE_EVENT_CLASS(hugetlbfs__inode,
+
+   TP_PROTO(struct inode *inode),
+
+   TP_ARGS(inode),
+
+   TP_STRUCT__entry(
+   __field(dev_t,  dev)
+   __field(ino_t,  ino)
+   __field(__u16,  mode)
+   __field(loff_t, size)
+   __field(unsigned int,   nlink)
+   __field(unsigned int,   seals)
+   __field(blkcnt_t,   blocks)
+   ),
+
+   TP_fast_assign(
+   __entry->dev= inode->i_sb->s_dev;
+   __entry->ino= inode->i_ino;
+   __entry->mode   = inode->i_mode;
+   __entry->size   = inode->i_size;
+   __entry->nlink  = inode->i_nlink;
+   __entry->seals  = HUGETLBFS_I(inode)->seals;
+   __entry->blocks = inode->i_blocks;
+   ),
+
+   TP_printk("dev = (%d,%d), ino = %lu, i_mode = 0%o, i_size = %lld, 
i_nlink = %u, seals = %u, i_blocks = %llu",
+   MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) 
__entry->ino,
+   __entry->mode, __entry->size, __entry->nlink, __entry->seals,
+   (unsigned long long)__entry->blocks)
+);
+
+DEFINE_EVENT(hugetlbfs__inode, hugetlbfs_evict_inode,
+
+   TP_PROTO(struct inode *inode),
+
+   TP_ARGS(inode)
+);
+
+DEFINE_EVENT(hugetlbfs__inode, hugetlbfs_free_inode,
+
+   TP_PROTO(struct inode *inode),
+
+   TP_ARGS(inode)
+);
+
+TRACE_EVENT(hugetlbfs_setattr,
+
+   TP_PROTO(struct inode *inode, struct dentry *dentry,
+   struct iattr *attr),
+
+   TP_ARGS(inode, dentry, attr),
+
+   TP_STRUCT__entry(
+   __field(dev_t,  dev)
+   __field(ino_t,  ino)
+   __field(unsigned int,   d_len)
+   __string(d_name,dentry->d_name.name)
+   __field(unsigned int,   ia_valid)
+   __field(unsigned int,   ia_mode)
+   __field(unsigned int,   ia_uid)
+   __field(unsigned int,   ia_gid)
+   __field(loff_t, old_size)
+   __field(loff_t, ia_size)
+   ),
+
+   TP_fast_assign(
+   __entry->dev= inode->i_sb->s_dev;
+   __entry->ino= inode->i_ino;
+   __entry->d_len  = dentry->d_name.len;
+   __assign_str(d_name);
+   __entry->ia_valid   = attr->ia_valid;
+   __entry->ia_mode= attr->ia_mode;
+   __entry->ia_uid = from_kuid_munged(current_user_ns(), 
attr->ia_uid);
+   __entry->ia_gid = from_kgid_munged(current_user_ns(), 
attr->ia_

Re: [PATCH 2/2] hugetlbfs: use tracepoints in hugetlbfs functions.

2024-07-02 Thread Hongbo Li




On 2024/7/2 21:30, Mathieu Desnoyers wrote:

On 2024-07-02 07:55, Hongbo Li wrote:



On 2024/7/2 7:49, Steven Rostedt wrote:

On Wed, 12 Jun 2024 09:11:56 +0800
Hongbo Li  wrote:

@@ -934,6 +943,12 @@ static int hugetlbfs_setattr(struct mnt_idmap 
*idmap,

  if (error)
  return error;
+    trace_hugetlbfs_setattr(inode, dentry->d_name.len, 
dentry->d_name.name,

+    attr->ia_valid, attr->ia_mode,
+    from_kuid(_user_ns, attr->ia_uid),
+    from_kgid(_user_ns, attr->ia_gid),
+    inode->i_size, attr->ia_size);
+


That's a lot of parameters to pass to a tracepoint. Why not just pass 
the
dentry and attr and do the above in the TP_fast_assign() logic? That 
would

put less pressure on the icache for the code part.


Thanks for reviewing!

Some logic such as kuid_t --> uid_t might be reasonable obtained in 
filesystem layer. Passing the dentry and attr will let trace know the 
meaning of structure, perhaps tracepoint should not be aware of the

members of these structures as much as possible.


As maintainer of the LTTng out-of-tree kernel tracer, I appreciate the
effort to decouple instrumentation from the subsystem instrumentation,
but as long as the structure sits in public headers and the global
variables used within the TP_fast_assign() logic (e.g. init_user_ns)
are export-gpl, this is enough to make it easy for tracer integration

Thank you for your friendly elaboration and suggestion!
I will update this part based on your suggestion in next version.

Thanks,
Hongbo

and it keeps the tracepoint caller code footprint to a minimum.

The TRACE_EVENT definitions are specific to the subsystem anyway,
so I don't think it matters that the TRACE_EVENT() need to access
the dentry and attr structures.

So I agree with Steven's suggestion. However, just as a precision,
I suspect it will have mainly an impact on code size, but not
necessarily on icache footprint, because it will shrink the code
size within the tracepoint unlikely branch (cold instructions).

Thanks,

Mathieu



Thanks,
Hongbo



-- Steve







Re: [PATCH 2/2] hugetlbfs: use tracepoints in hugetlbfs functions.

2024-07-02 Thread Hongbo Li




On 2024/7/2 7:49, Steven Rostedt wrote:

On Wed, 12 Jun 2024 09:11:56 +0800
Hongbo Li  wrote:


@@ -934,6 +943,12 @@ static int hugetlbfs_setattr(struct mnt_idmap *idmap,
if (error)
return error;
  
+	trace_hugetlbfs_setattr(inode, dentry->d_name.len, dentry->d_name.name,

+   attr->ia_valid, attr->ia_mode,
+   from_kuid(_user_ns, attr->ia_uid),
+   from_kgid(_user_ns, attr->ia_gid),
+   inode->i_size, attr->ia_size);
+


That's a lot of parameters to pass to a tracepoint. Why not just pass the
dentry and attr and do the above in the TP_fast_assign() logic? That would
put less pressure on the icache for the code part.


Thanks for reviewing!

Some logic such as kuid_t --> uid_t might be reasonable obtained in 
filesystem layer. Passing the dentry and attr will let trace know the 
meaning of structure, perhaps tracepoint should not be aware of the

members of these structures as much as possible.

Thanks,
Hongbo



-- Steve





Re: [PATCH 2/2] hugetlbfs: use tracepoints in hugetlbfs functions.

2024-06-20 Thread Hongbo Li

Just a friendly ping to the patch :)

https://lore.kernel.org/all/20240612011156.2891254-1-lihongb...@huawei.com/

Thanks,
Hongbo

On 2024/6/12 9:11, Hongbo Li wrote:

Here we use the hugetlbfs tracepoint to track the call stack. And
the output in trace is as follows:

```
touch-5307[004] .  1402.167607: hugetlbfs_alloc_inode: dev = (0,50), 
ino = 21380, dir = 16921, mode = 0100644
touch-5307[004] .  1402.167638: hugetlbfs_setattr: dev = (0,50), ino = 
21380, name = testfile1, ia_valid = 131184, ia_mode = 0132434, ia_uid = 
2863018275, ia_gid = 4294967295, old_size = 0, ia_size = 4064
truncate-5328[003] .  1436.031054: hugetlbfs_setattr: dev = (0,50), ino 
= 21380, name = testfile1, ia_valid = 8296, ia_mode = 017, ia_uid = 
2862574544, ia_gid = 4294967295, old_size = 0, ia_size = 2097152
rm-5338[004] .  1484.426247: hugetlbfs_evict_inode: dev = (0,50), ino = 
21380, i_mode = 0100644, i_size = 2097152, i_nlink = 0, seals = 1, i_blocks = 0
-0   [004] ..s1.  1484.446668: hugetlbfs_free_inode: dev = (0,50), 
ino = 21380, i_mode = 0100644, i_size = 2097152, i_nlink = 0, seals = 1, i_blocks = 0
```

Signed-off-by: Hongbo Li 
---
  fs/hugetlbfs/inode.c | 21 +++--
  1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 412f295acebe..f3399c6a02ca 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -39,6 +39,9 @@
  #include 
  #include 
  
+#define CREATE_TRACE_POINTS

+#include 
+
  static const struct address_space_operations hugetlbfs_aops;
  static const struct file_operations hugetlbfs_file_operations;
  static const struct inode_operations hugetlbfs_dir_inode_operations;
@@ -686,6 +689,7 @@ static void hugetlbfs_evict_inode(struct inode *inode)
  {
struct resv_map *resv_map;
  
+	trace_hugetlbfs_evict_inode(inode);

remove_inode_hugepages(inode, 0, LLONG_MAX);
  
  	/*

@@ -813,8 +817,10 @@ static long hugetlbfs_fallocate(struct file *file, int 
mode, loff_t offset,
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP;
  
-	if (mode & FALLOC_FL_PUNCH_HOLE)

-   return hugetlbfs_punch_hole(inode, offset, len);
+   if (mode & FALLOC_FL_PUNCH_HOLE) {
+   error = hugetlbfs_punch_hole(inode, offset, len);
+   goto out_nolock;
+   }
  
  	/*

 * Default preallocate case.
@@ -918,6 +924,9 @@ static long hugetlbfs_fallocate(struct file *file, int 
mode, loff_t offset,
inode_set_ctime_current(inode);
  out:
inode_unlock(inode);
+
+out_nolock:
+   trace_hugetlbfs_fallocate(inode, mode, offset, len, error);
return error;
  }
  
@@ -934,6 +943,12 @@ static int hugetlbfs_setattr(struct mnt_idmap *idmap,

if (error)
return error;
  
+	trace_hugetlbfs_setattr(inode, dentry->d_name.len, dentry->d_name.name,

+   attr->ia_valid, attr->ia_mode,
+   from_kuid(_user_ns, attr->ia_uid),
+   from_kgid(_user_ns, attr->ia_gid),
+   inode->i_size, attr->ia_size);
+
if (ia_valid & ATTR_SIZE) {
loff_t oldsize = inode->i_size;
loff_t newsize = attr->ia_size;
@@ -1032,6 +1047,7 @@ static struct inode *hugetlbfs_get_inode(struct 
super_block *sb,
break;
}
lockdep_annotate_inode_mutex_key(inode);
+   trace_hugetlbfs_alloc_inode(inode, dir, mode);
} else {
if (resv_map)
kref_put(_map->refs, resv_map_release);
@@ -1274,6 +1290,7 @@ static struct inode *hugetlbfs_alloc_inode(struct 
super_block *sb)
  
  static void hugetlbfs_free_inode(struct inode *inode)

  {
+   trace_hugetlbfs_free_inode(inode);
kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
  }
  




[PATCH 1/2] hugetlbfs: support tracepoint

2024-06-11 Thread Hongbo Li
Add basic tracepoints for {alloc, evict, free}_inode, setattr and
fallocate. These can help users to debug hugetlbfs more conveniently.

Signed-off-by: Hongbo Li 
---
 MAINTAINERS  |   1 +
 include/trace/events/hugetlbfs.h | 164 +++
 2 files changed, 165 insertions(+)
 create mode 100644 include/trace/events/hugetlbfs.h

diff --git a/MAINTAINERS b/MAINTAINERS
index aacccb376c28..df6fe4aa0f50 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10187,6 +10187,7 @@ F:  Documentation/mm/hugetlbfs_reserv.rst
 F: Documentation/mm/vmemmap_dedup.rst
 F: fs/hugetlbfs/
 F: include/linux/hugetlb.h
+F: include/trace/events/hugetlbfs.h
 F: mm/hugetlb.c
 F: mm/hugetlb_vmemmap.c
 F: mm/hugetlb_vmemmap.h
diff --git a/include/trace/events/hugetlbfs.h b/include/trace/events/hugetlbfs.h
new file mode 100644
index ..a4d785c87155
--- /dev/null
+++ b/include/trace/events/hugetlbfs.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hugetlbfs
+
+#if !defined(_TRACE_HUGETLBFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_HUGETLBFS_H
+
+#include 
+
+TRACE_EVENT(hugetlbfs_alloc_inode,
+
+   TP_PROTO(struct inode *inode, struct inode *dir, int mode),
+
+   TP_ARGS(inode, dir, mode),
+
+   TP_STRUCT__entry(
+   __field(dev_t,  dev)
+   __field(ino_t,  ino)
+   __field(ino_t,  dir)
+   __field(__u16,  mode)
+   ),
+
+   TP_fast_assign(
+   __entry->dev= inode->i_sb->s_dev;
+   __entry->ino= inode->i_ino;
+   __entry->dir= dir->i_ino;
+   __entry->mode   = mode;
+   ),
+
+   TP_printk("dev = (%d,%d), ino = %lu, dir = %lu, mode = 0%o",
+   MAJOR(__entry->dev), MINOR(__entry->dev),
+   (unsigned long) __entry->ino,
+   (unsigned long) __entry->dir, __entry->mode)
+);
+
+DECLARE_EVENT_CLASS(hugetlbfs__inode,
+
+   TP_PROTO(struct inode *inode),
+
+   TP_ARGS(inode),
+
+   TP_STRUCT__entry(
+   __field(dev_t,  dev)
+   __field(ino_t,  ino)
+   __field(__u16,  mode)
+   __field(loff_t, size)
+   __field(unsigned int,   nlink)
+   __field(unsigned int,   seals)
+   __field(blkcnt_t,   blocks)
+   ),
+
+   TP_fast_assign(
+   __entry->dev= inode->i_sb->s_dev;
+   __entry->ino= inode->i_ino;
+   __entry->mode   = inode->i_mode;
+   __entry->size   = inode->i_size;
+   __entry->nlink  = inode->i_nlink;
+   __entry->seals  = HUGETLBFS_I(inode)->seals;
+   __entry->blocks = inode->i_blocks;
+   ),
+
+   TP_printk("dev = (%d,%d), ino = %lu, i_mode = 0%o, i_size = %lld, 
i_nlink = %u, seals = %u, i_blocks = %llu",
+   MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) 
__entry->ino,
+   __entry->mode, __entry->size, __entry->nlink, __entry->seals,
+   (unsigned long long)__entry->blocks)
+);
+
+DEFINE_EVENT(hugetlbfs__inode, hugetlbfs_evict_inode,
+
+   TP_PROTO(struct inode *inode),
+
+   TP_ARGS(inode)
+);
+
+DEFINE_EVENT(hugetlbfs__inode, hugetlbfs_free_inode,
+
+   TP_PROTO(struct inode *inode),
+
+   TP_ARGS(inode)
+);
+
+TRACE_EVENT(hugetlbfs_setattr,
+
+   TP_PROTO(struct inode *inode,
+   unsigned int d_len, const unsigned char *d_name,
+   unsigned int ia_valid, unsigned int ia_mode,
+   unsigned int ia_uid, unsigned int ia_gid,
+   loff_t old_size, loff_t ia_size),
+
+   TP_ARGS(inode, d_len, d_name,
+   ia_valid, ia_mode, ia_uid, ia_gid, old_size, ia_size),
+
+   TP_STRUCT__entry(
+   __field(dev_t,  dev)
+   __field(ino_t,  ino)
+   __field(unsigned int,   d_len)
+   __string(d_name,d_name)
+   __field(unsigned int,   ia_valid)
+   __field(unsigned int,   ia_mode)
+   __field(unsigned int,   ia_uid)
+   __field(unsigned int,   ia_gid)
+   __field(loff_t, old_size)
+   __field(loff_t, ia_size)
+   ),
+
+   TP_fast_assign(
+   __entry->dev= inode->i_sb->s_dev;
+   __entry->ino= inode->i_ino;
+   __entry->d_len  = d_len;
+   __assign_str(d_name);
+   __entry->ia_valid   = ia_valid;
+   __entry->ia_mode= ia_mode

[PATCH 2/2] hugetlbfs: use tracepoints in hugetlbfs functions.

2024-06-11 Thread Hongbo Li
Here we use the hugetlbfs tracepoint to track the call stack. And
the output in trace is as follows:

```
touch-5307[004] .  1402.167607: hugetlbfs_alloc_inode: dev = (0,50), 
ino = 21380, dir = 16921, mode = 0100644
touch-5307[004] .  1402.167638: hugetlbfs_setattr: dev = (0,50), ino = 
21380, name = testfile1, ia_valid = 131184, ia_mode = 0132434, ia_uid = 
2863018275, ia_gid = 4294967295, old_size = 0, ia_size = 4064
truncate-5328[003] .  1436.031054: hugetlbfs_setattr: dev = (0,50), ino 
= 21380, name = testfile1, ia_valid = 8296, ia_mode = 017, ia_uid = 
2862574544, ia_gid = 4294967295, old_size = 0, ia_size = 2097152
rm-5338[004] .  1484.426247: hugetlbfs_evict_inode: dev = (0,50), ino = 
21380, i_mode = 0100644, i_size = 2097152, i_nlink = 0, seals = 1, i_blocks = 0
-0   [004] ..s1.  1484.446668: hugetlbfs_free_inode: dev = (0,50), 
ino = 21380, i_mode = 0100644, i_size = 2097152, i_nlink = 0, seals = 1, 
i_blocks = 0
```

Signed-off-by: Hongbo Li 
---
 fs/hugetlbfs/inode.c | 21 +++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 412f295acebe..f3399c6a02ca 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -39,6 +39,9 @@
 #include 
 #include 
 
+#define CREATE_TRACE_POINTS
+#include 
+
 static const struct address_space_operations hugetlbfs_aops;
 static const struct file_operations hugetlbfs_file_operations;
 static const struct inode_operations hugetlbfs_dir_inode_operations;
@@ -686,6 +689,7 @@ static void hugetlbfs_evict_inode(struct inode *inode)
 {
struct resv_map *resv_map;
 
+   trace_hugetlbfs_evict_inode(inode);
remove_inode_hugepages(inode, 0, LLONG_MAX);
 
/*
@@ -813,8 +817,10 @@ static long hugetlbfs_fallocate(struct file *file, int 
mode, loff_t offset,
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP;
 
-   if (mode & FALLOC_FL_PUNCH_HOLE)
-   return hugetlbfs_punch_hole(inode, offset, len);
+   if (mode & FALLOC_FL_PUNCH_HOLE) {
+   error = hugetlbfs_punch_hole(inode, offset, len);
+   goto out_nolock;
+   }
 
/*
 * Default preallocate case.
@@ -918,6 +924,9 @@ static long hugetlbfs_fallocate(struct file *file, int 
mode, loff_t offset,
inode_set_ctime_current(inode);
 out:
inode_unlock(inode);
+
+out_nolock:
+   trace_hugetlbfs_fallocate(inode, mode, offset, len, error);
return error;
 }
 
@@ -934,6 +943,12 @@ static int hugetlbfs_setattr(struct mnt_idmap *idmap,
if (error)
return error;
 
+   trace_hugetlbfs_setattr(inode, dentry->d_name.len, dentry->d_name.name,
+   attr->ia_valid, attr->ia_mode,
+   from_kuid(_user_ns, attr->ia_uid),
+   from_kgid(_user_ns, attr->ia_gid),
+   inode->i_size, attr->ia_size);
+
if (ia_valid & ATTR_SIZE) {
loff_t oldsize = inode->i_size;
loff_t newsize = attr->ia_size;
@@ -1032,6 +1047,7 @@ static struct inode *hugetlbfs_get_inode(struct 
super_block *sb,
break;
}
lockdep_annotate_inode_mutex_key(inode);
+   trace_hugetlbfs_alloc_inode(inode, dir, mode);
} else {
if (resv_map)
kref_put(_map->refs, resv_map_release);
@@ -1274,6 +1290,7 @@ static struct inode *hugetlbfs_alloc_inode(struct 
super_block *sb)
 
 static void hugetlbfs_free_inode(struct inode *inode)
 {
+   trace_hugetlbfs_free_inode(inode);
kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
 }
 
-- 
2.34.1




[PATCH 0/2] Introduce tracepoint for hugetlbfs

2024-06-11 Thread Hongbo Li
Here we add some basic tracepoints for debugging hugetlbfs: {alloc, free,
evict}_inode, setattr and fallocate.

Hongbo Li (2):
  hugetlbfs: support tracepoint
  hugetlbfs: use tracepoints in hugetlbfs functions.

 MAINTAINERS  |   1 +
 fs/hugetlbfs/inode.c |  21 +++-
 include/trace/events/hugetlbfs.h | 164 +++
 3 files changed, 184 insertions(+), 2 deletions(-)
 create mode 100644 include/trace/events/hugetlbfs.h

-- 
2.34.1