[tip:x86/urgent] x86/vdso: Fix flip/flop vdso build bug

2019-07-12 Thread tip-bot for Naohiro Aota
Commit-ID:  e9a1379f9219be439f47a0f063431a92dc529eda
Gitweb: https://git.kernel.org/tip/e9a1379f9219be439f47a0f063431a92dc529eda
Author: Naohiro Aota 
AuthorDate: Fri, 12 Jul 2019 19:15:55 +0900
Committer:  Thomas Gleixner 
CommitDate: Fri, 12 Jul 2019 17:35:07 +0200

x86/vdso: Fix flip/flop vdso build bug

Two consecutive "make" on an already compiled kernel tree will show
different behavior:

$ make
  CALLscripts/checksyscalls.sh
  CALLscripts/atomic/check-atomics.sh
  DESCEND  objtool
  CHK include/generated/compile.h
  VDSOCHK arch/x86/entry/vdso/vdso64.so.dbg
  VDSOCHK arch/x86/entry/vdso/vdso32.so.dbg
Kernel: arch/x86/boot/bzImage is ready  (#3)
  Building modules, stage 2.
  MODPOST 12 modules

$ make
make
  CALLscripts/checksyscalls.sh
  CALLscripts/atomic/check-atomics.sh
  DESCEND  objtool
  CHK include/generated/compile.h
  VDSOarch/x86/entry/vdso/vdso64.so.dbg
  OBJCOPY arch/x86/entry/vdso/vdso64.so
  VDSO2C  arch/x86/entry/vdso/vdso-image-64.c
  CC  arch/x86/entry/vdso/vdso-image-64.o
  VDSOarch/x86/entry/vdso/vdso32.so.dbg
  OBJCOPY arch/x86/entry/vdso/vdso32.so
  VDSO2C  arch/x86/entry/vdso/vdso-image-32.c
  CC  arch/x86/entry/vdso/vdso-image-32.o
  AR  arch/x86/entry/vdso/built-in.a
  AR  arch/x86/entry/built-in.a
  AR  arch/x86/built-in.a
  GEN .version
  CHK include/generated/compile.h
  UPD include/generated/compile.h
  CC  init/version.o
  AR  init/built-in.a
  LD  vmlinux.o


This is causing "LD vmlinux" once every two times even without any
modifications. This is the same bug fixed in commit 92a4728608a8
("x86/boot: Fix if_changed build flip/flop bug"). Two "if_changed" cannot
be used in one target.

Fix this merging two commands into one function.

Fixes: 7ac870747988 ("x86/vdso: Switch to generic vDSO implementation")
Signed-off-by: Naohiro Aota 
Signed-off-by: Thomas Gleixner 
Tested-by: Vincenzo Frascino 
Reviewed-by: Vincenzo Frascino 
Reviewed-by: Masahiro Yamada 
Link: https://lkml.kernel.org/r/20190712101556.17833-1-naohiro.a...@wdc.com

---
 arch/x86/entry/vdso/Makefile | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 39106111be86..34773395139a 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -56,8 +56,7 @@ VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 
--no-undefined \
-z max-page-size=4096
 
 $(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
-   $(call if_changed,vdso)
-   $(call if_changed,vdso_check)
+   $(call if_changed,vdso_and_check)
 
 HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi 
-I$(srctree)/arch/$(SUBARCH)/include/uapi
 hostprogs-y+= vdso2c
@@ -127,8 +126,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
 
 $(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE
-   $(call if_changed,vdso)
-   $(call if_changed,vdso_check)
+   $(call if_changed,vdso_and_check)
 
 CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
 VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1
@@ -167,8 +165,7 @@ $(obj)/vdso32.so.dbg: FORCE \
  $(obj)/vdso32/note.o \
  $(obj)/vdso32/system_call.o \
  $(obj)/vdso32/sigreturn.o
-   $(call if_changed,vdso)
-   $(call if_changed,vdso_check)
+   $(call if_changed,vdso_and_check)
 
 #
 # The DSO images are built using a special linker script.
@@ -184,6 +181,9 @@ VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) 
\
-Bsymbolic
 GCOV_PROFILE := n
 
+quiet_cmd_vdso_and_check = VDSO$@
+  cmd_vdso_and_check = $(cmd_vdso); $(cmd_vdso_check)
+
 #
 # Install the unstripped copies of vdso*.so.  If our toolchain supports
 # build-id, install .build-id links as well.


[PATCH v2 2/2] arm64/vdso: fix flip/flop vdso build bug

2019-07-12 Thread Naohiro Aota
Running "make" on an already compiled kernel tree will rebuild the kernel
even without any modifications:

$ make ARCH=arm64 CROSS_COMPILE=/usr/bin/aarch64-unknown-linux-gnu-
arch/arm64/Makefile:58: CROSS_COMPILE_COMPAT not defined or empty, the compat 
vDSO will not be built
  CALLscripts/checksyscalls.sh
  CALLscripts/atomic/check-atomics.sh
  VDSOCHK arch/arm64/kernel/vdso/vdso.so.dbg
  VDSOSYM include/generated/vdso-offsets.h
  CHK include/generated/compile.h
  CC  arch/arm64/kernel/signal.o
  CC  arch/arm64/kernel/vdso.o
  CC  arch/arm64/kernel/signal32.o
  LD  arch/arm64/kernel/vdso/vdso.so.dbg
  OBJCOPY arch/arm64/kernel/vdso/vdso.so
  AS  arch/arm64/kernel/vdso/vdso.o
  AR  arch/arm64/kernel/vdso/built-in.a
  AR  arch/arm64/kernel/built-in.a
  GEN .version
  CHK include/generated/compile.h
  UPD include/generated/compile.h
  CC  init/version.o
  AR  init/built-in.a
  LD  vmlinux.o

This is the same bug fixed in commit 92a4728608a8 ("x86/boot: Fix
if_changed build flip/flop bug"). We cannot use two "if_changed" in one
target. Fix this build bug by merging two commands into one function.

Cc: Masahiro Yamada 
Fixes: 28b1a824a4f4 ("arm64: vdso: Substitute gettimeofday() with C 
implementation")
Signed-off-by: Naohiro Aota 
---
 arch/arm64/kernel/vdso/Makefile | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index 4ab863045188..068c614b1231 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -57,8 +57,7 @@ $(obj)/vdso.o : $(obj)/vdso.so
 
 # Link rule for the .so file, .lds has to be first
 $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
-   $(call if_changed,ld)
-   $(call if_changed,vdso_check)
+   $(call if_changed,ld_and_vdso_check)
 
 # Strip rule for the .so file
 $(obj)/%.so: OBJCOPYFLAGS := -S
@@ -77,6 +76,9 @@ include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
 quiet_cmd_vdsocc = VDSOCC   $@
   cmd_vdsocc = $(CC) $(a_flags) $(c_flags) -c -o $@ $<
 
+quiet_cmd_ld_and_vdso_check = LD  $@
+  cmd_ld_and_vdso_check = $(cmd_ld); $(cmd_vdso_check)
+
 # Install commands for the unstripped file
 quiet_cmd_vdso_install = INSTALL $@
   cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-- 
2.22.0



[PATCH v2 1/2] x86/vdso: fix flip/flop vdso build bug

2019-07-12 Thread Naohiro Aota
Two consecutive "make" on an already compiled kernel tree will show
different behavior:

$ make
  CALLscripts/checksyscalls.sh
  CALLscripts/atomic/check-atomics.sh
  DESCEND  objtool
  CHK include/generated/compile.h
  VDSOCHK arch/x86/entry/vdso/vdso64.so.dbg
  VDSOCHK arch/x86/entry/vdso/vdso32.so.dbg
Kernel: arch/x86/boot/bzImage is ready  (#3)
  Building modules, stage 2.
  MODPOST 12 modules

$ make
make
  CALLscripts/checksyscalls.sh
  CALLscripts/atomic/check-atomics.sh
  DESCEND  objtool
  CHK include/generated/compile.h
  VDSOarch/x86/entry/vdso/vdso64.so.dbg
  OBJCOPY arch/x86/entry/vdso/vdso64.so
  VDSO2C  arch/x86/entry/vdso/vdso-image-64.c
  CC  arch/x86/entry/vdso/vdso-image-64.o
  VDSOarch/x86/entry/vdso/vdso32.so.dbg
  OBJCOPY arch/x86/entry/vdso/vdso32.so
  VDSO2C  arch/x86/entry/vdso/vdso-image-32.c
  CC  arch/x86/entry/vdso/vdso-image-32.o
  AR  arch/x86/entry/vdso/built-in.a
  AR  arch/x86/entry/built-in.a
  AR  arch/x86/built-in.a
  GEN .version
  CHK include/generated/compile.h
  UPD include/generated/compile.h
  CC  init/version.o
  AR  init/built-in.a
  LD  vmlinux.o


This is causing "LD vmlinux" once every two times even without any
modifications. This is the same bug fixed in commit 92a4728608a8
("x86/boot: Fix if_changed build flip/flop bug"). We cannot use two
"if_changed" in one target. Fix this build bug by merging two commands into
one function.

Cc: Masahiro Yamada 
Fixes: 7ac870747988 ("x86/vdso: Switch to generic vDSO implementation")
Signed-off-by: Naohiro Aota 
---
 arch/x86/entry/vdso/Makefile | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 39106111be86..34773395139a 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -56,8 +56,7 @@ VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 
--no-undefined \
-z max-page-size=4096
 
 $(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
-   $(call if_changed,vdso)
-   $(call if_changed,vdso_check)
+   $(call if_changed,vdso_and_check)
 
 HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi 
-I$(srctree)/arch/$(SUBARCH)/include/uapi
 hostprogs-y+= vdso2c
@@ -127,8 +126,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
 
 $(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE
-   $(call if_changed,vdso)
-   $(call if_changed,vdso_check)
+   $(call if_changed,vdso_and_check)
 
 CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
 VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1
@@ -167,8 +165,7 @@ $(obj)/vdso32.so.dbg: FORCE \
  $(obj)/vdso32/note.o \
  $(obj)/vdso32/system_call.o \
  $(obj)/vdso32/sigreturn.o
-   $(call if_changed,vdso)
-   $(call if_changed,vdso_check)
+   $(call if_changed,vdso_and_check)
 
 #
 # The DSO images are built using a special linker script.
@@ -184,6 +181,9 @@ VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) 
\
-Bsymbolic
 GCOV_PROFILE := n
 
+quiet_cmd_vdso_and_check = VDSO$@
+  cmd_vdso_and_check = $(cmd_vdso); $(cmd_vdso_check)
+
 #
 # Install the unstripped copies of vdso*.so.  If our toolchain supports
 # build-id, install .build-id links as well.
-- 
2.22.0



Re: [PATCH] x86/vdso, arm64/vdso: fix flip/flop vdso build bug

2019-07-12 Thread Naohiro Aota

On Fri, Jul 12, 2019 at 03:24:01PM +0900, Masahiro Yamada wrote:

On Fri, Jul 12, 2019 at 2:46 PM Naohiro Aota  wrote:


Two consecutive "make" on an already compiled kernel tree will show
different behavior:

$ make
  CALLscripts/checksyscalls.sh
  CALLscripts/atomic/check-atomics.sh
  DESCEND  objtool
  CHK include/generated/compile.h
  VDSOCHK arch/x86/entry/vdso/vdso64.so.dbg
  VDSOCHK arch/x86/entry/vdso/vdso32.so.dbg
Kernel: arch/x86/boot/bzImage is ready  (#3)
  Building modules, stage 2.
  MODPOST 12 modules

$ make
  CALLscripts/checksyscalls.sh
  CALLscripts/atomic/check-atomics.sh
  DESCEND  objtool
  CHK include/generated/compile.h
  VDSOarch/x86/entry/vdso/vdso64.so.dbg
  OBJCOPY arch/x86/entry/vdso/vdso64.so
  VDSO2C  arch/x86/entry/vdso/vdso-image-64.c
  CC  arch/x86/entry/vdso/vdso-image-64.o
  VDSOarch/x86/entry/vdso/vdso32.so.dbg
  OBJCOPY arch/x86/entry/vdso/vdso32.so
  VDSO2C  arch/x86/entry/vdso/vdso-image-32.c
  CC  arch/x86/entry/vdso/vdso-image-32.o
  AR  arch/x86/entry/vdso/built-in.a
  AR  arch/x86/entry/built-in.a
  AR  arch/x86/built-in.a
  GEN .version
  CHK include/generated/compile.h
  UPD include/generated/compile.h
  CC  init/version.o
  AR  init/built-in.a
  LD  vmlinux.o


This is causing "LD vmlinux" once every two times even without any
modifications. This is the same bug fixed in commit 92a4728608a8
("x86/boot: Fix if_changed build flip/flop bug").  We cannot use two
"if_changed" in one target. Fix this build bug by merging two commands
into one function.

Signed-off-by: Naohiro Aota 



The code looks OK, but you should split this
into two patches, for arm64 and x86,
and then add Fixes: for each of them.


Thanks, I'll split and add the tags.




--
Best Regards
Masahiro Yamada


[PATCH] x86/vdso, arm64/vdso: fix flip/flop vdso build bug

2019-07-11 Thread Naohiro Aota
Two consecutive "make" on an already compiled kernel tree will show
different behavior:

$ make
  CALLscripts/checksyscalls.sh
  CALLscripts/atomic/check-atomics.sh
  DESCEND  objtool
  CHK include/generated/compile.h
  VDSOCHK arch/x86/entry/vdso/vdso64.so.dbg
  VDSOCHK arch/x86/entry/vdso/vdso32.so.dbg
Kernel: arch/x86/boot/bzImage is ready  (#3)
  Building modules, stage 2.
  MODPOST 12 modules

$ make
  CALLscripts/checksyscalls.sh
  CALLscripts/atomic/check-atomics.sh
  DESCEND  objtool
  CHK include/generated/compile.h
  VDSOarch/x86/entry/vdso/vdso64.so.dbg
  OBJCOPY arch/x86/entry/vdso/vdso64.so
  VDSO2C  arch/x86/entry/vdso/vdso-image-64.c
  CC  arch/x86/entry/vdso/vdso-image-64.o
  VDSOarch/x86/entry/vdso/vdso32.so.dbg
  OBJCOPY arch/x86/entry/vdso/vdso32.so
  VDSO2C  arch/x86/entry/vdso/vdso-image-32.c
  CC  arch/x86/entry/vdso/vdso-image-32.o
  AR  arch/x86/entry/vdso/built-in.a
  AR  arch/x86/entry/built-in.a
  AR  arch/x86/built-in.a
  GEN .version
  CHK include/generated/compile.h
  UPD include/generated/compile.h
  CC  init/version.o
  AR  init/built-in.a
  LD  vmlinux.o


This is causing "LD vmlinux" once every two times even without any
modifications. This is the same bug fixed in commit 92a4728608a8
("x86/boot: Fix if_changed build flip/flop bug").  We cannot use two
"if_changed" in one target. Fix this build bug by merging two commands
into one function.

Signed-off-by: Naohiro Aota 
---
 arch/arm64/kernel/vdso/Makefile |  6 --
 arch/x86/entry/vdso/Makefile| 12 ++--
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index 4ab863045188..068c614b1231 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -57,8 +57,7 @@ $(obj)/vdso.o : $(obj)/vdso.so
 
 # Link rule for the .so file, .lds has to be first
 $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
-   $(call if_changed,ld)
-   $(call if_changed,vdso_check)
+   $(call if_changed,ld_and_vdso_check)
 
 # Strip rule for the .so file
 $(obj)/%.so: OBJCOPYFLAGS := -S
@@ -77,6 +76,9 @@ include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
 quiet_cmd_vdsocc = VDSOCC   $@
   cmd_vdsocc = $(CC) $(a_flags) $(c_flags) -c -o $@ $<
 
+quiet_cmd_ld_and_vdso_check = LD  $@
+  cmd_ld_and_vdso_check = $(cmd_ld); $(cmd_vdso_check)
+
 # Install commands for the unstripped file
 quiet_cmd_vdso_install = INSTALL $@
   cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 39106111be86..34773395139a 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -56,8 +56,7 @@ VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 
--no-undefined \
-z max-page-size=4096
 
 $(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
-   $(call if_changed,vdso)
-   $(call if_changed,vdso_check)
+   $(call if_changed,vdso_and_check)
 
 HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi 
-I$(srctree)/arch/$(SUBARCH)/include/uapi
 hostprogs-y+= vdso2c
@@ -127,8 +126,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
 
 $(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE
-   $(call if_changed,vdso)
-   $(call if_changed,vdso_check)
+   $(call if_changed,vdso_and_check)
 
 CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
 VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1
@@ -167,8 +165,7 @@ $(obj)/vdso32.so.dbg: FORCE \
  $(obj)/vdso32/note.o \
  $(obj)/vdso32/system_call.o \
  $(obj)/vdso32/sigreturn.o
-   $(call if_changed,vdso)
-   $(call if_changed,vdso_check)
+   $(call if_changed,vdso_and_check)
 
 #
 # The DSO images are built using a special linker script.
@@ -184,6 +181,9 @@ VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) 
\
-Bsymbolic
 GCOV_PROFILE := n
 
+quiet_cmd_vdso_and_check = VDSO$@
+  cmd_vdso_and_check = $(cmd_vdso); $(cmd_vdso_check)
+
 #
 # Install the unstripped copies of vdso*.so.  If our toolchain supports
 # build-id, install .build-id links as well.
-- 
2.22.0



Re: [PATCH 02/19] btrfs: Get zone information of zoned block devices

2019-06-18 Thread Naohiro Aota
On 2019/06/13 22:58, Josef Bacik wrote:
> On Fri, Jun 07, 2019 at 10:10:08PM +0900, Naohiro Aota wrote:
>> If a zoned block device is found, get its zone information (number of zones
>> and zone size) using the new helper function btrfs_get_dev_zonetypes().  To
>> avoid costly run-time zone report commands to test the device zones type
>> during block allocation, attach the seqzones bitmap to the device structure
>> to indicate if a zone is sequential or accept random writes.
>>
>> This patch also introduces the helper function btrfs_dev_is_sequential() to
>> test if the zone storing a block is a sequential write required zone.
>>
>> Signed-off-by: Damien Le Moal 
>> Signed-off-by: Naohiro Aota 
>> ---
>>   fs/btrfs/volumes.c | 143 +
>>   fs/btrfs/volumes.h |  33 +++
>>   2 files changed, 176 insertions(+)
>>
> 
> We have enough problems with giant files already, please just add a separate
> hmzoned.c or whatever and put all the zone specific code in there.  That'll 
> save
> me time when I go and break a bunch of stuff out.  Thanks,
> 
> Josef
> 

Thank you for the reviews.

I'll add hmzoned.c and put the things (with more helpers/abstraction) there in 
the next version.

Thanks.


[PATCH 10/12] btrfs-progs: mkfs: Zoned block device support

2019-06-07 Thread Naohiro Aota
This patch makes the size of the temporary system group chunk equal to the
device zone size. It also enables PREP_DEVICE_HMZONED if the user enables
the HMZONED feature.

Enabling HMZONED feature is done using option "-O hmzoned". This feature is
incompatible for now with source directory setup.

Signed-off-by: Naohiro Aota 
---
 mkfs/common.c | 12 +++-
 mkfs/common.h |  1 +
 mkfs/main.c   | 45 +++--
 3 files changed, 47 insertions(+), 11 deletions(-)

diff --git a/mkfs/common.c b/mkfs/common.c
index f7e3badcf2b9..12af54c1d886 100644
--- a/mkfs/common.c
+++ b/mkfs/common.c
@@ -152,6 +152,7 @@ int make_btrfs(int fd, struct btrfs_mkfs_config *cfg)
int skinny_metadata = !!(cfg->features &
 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
u64 num_bytes;
+   u64 system_group_size;
 
buf = malloc(sizeof(*buf) + max(cfg->sectorsize, cfg->nodesize));
if (!buf)
@@ -312,12 +313,14 @@ int make_btrfs(int fd, struct btrfs_mkfs_config *cfg)
btrfs_set_item_offset(buf, btrfs_item_nr(nritems), itemoff);
btrfs_set_item_size(buf, btrfs_item_nr(nritems), item_size);
 
+   system_group_size = (cfg->features & BTRFS_FEATURE_INCOMPAT_HMZONED) ?
+   cfg->zone_size : BTRFS_MKFS_SYSTEM_GROUP_SIZE;
+
dev_item = btrfs_item_ptr(buf, nritems, struct btrfs_dev_item);
btrfs_set_device_id(buf, dev_item, 1);
btrfs_set_device_generation(buf, dev_item, 0);
btrfs_set_device_total_bytes(buf, dev_item, num_bytes);
-   btrfs_set_device_bytes_used(buf, dev_item,
-   BTRFS_MKFS_SYSTEM_GROUP_SIZE);
+   btrfs_set_device_bytes_used(buf, dev_item, system_group_size);
btrfs_set_device_io_align(buf, dev_item, cfg->sectorsize);
btrfs_set_device_io_width(buf, dev_item, cfg->sectorsize);
btrfs_set_device_sector_size(buf, dev_item, cfg->sectorsize);
@@ -345,7 +348,7 @@ int make_btrfs(int fd, struct btrfs_mkfs_config *cfg)
btrfs_set_item_size(buf, btrfs_item_nr(nritems), item_size);
 
chunk = btrfs_item_ptr(buf, nritems, struct btrfs_chunk);
-   btrfs_set_chunk_length(buf, chunk, BTRFS_MKFS_SYSTEM_GROUP_SIZE);
+   btrfs_set_chunk_length(buf, chunk, system_group_size);
btrfs_set_chunk_owner(buf, chunk, BTRFS_EXTENT_TREE_OBJECTID);
btrfs_set_chunk_stripe_len(buf, chunk, BTRFS_STRIPE_LEN);
btrfs_set_chunk_type(buf, chunk, BTRFS_BLOCK_GROUP_SYSTEM);
@@ -411,8 +414,7 @@ int make_btrfs(int fd, struct btrfs_mkfs_config *cfg)
(unsigned long)btrfs_dev_extent_chunk_tree_uuid(dev_extent),
BTRFS_UUID_SIZE);
 
-   btrfs_set_dev_extent_length(buf, dev_extent,
-   BTRFS_MKFS_SYSTEM_GROUP_SIZE);
+   btrfs_set_dev_extent_length(buf, dev_extent, system_group_size);
nritems++;
 
btrfs_set_header_bytenr(buf, cfg->blocks[MKFS_DEV_TREE]);
diff --git a/mkfs/common.h b/mkfs/common.h
index 28912906d0a9..d0e4c7b2c906 100644
--- a/mkfs/common.h
+++ b/mkfs/common.h
@@ -53,6 +53,7 @@ struct btrfs_mkfs_config {
u64 features;
/* Size of the filesystem in bytes */
u64 num_bytes;
+   u64 zone_size;
 
/* Output fields, set during creation */
 
diff --git a/mkfs/main.c b/mkfs/main.c
index 93c0b71c864e..cbfd45bee836 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -61,8 +61,12 @@ static int create_metadata_block_groups(struct btrfs_root 
*root, int mixed,
u64 bytes_used;
u64 chunk_start = 0;
u64 chunk_size = 0;
+   u64 system_group_size = 0;
int ret;
 
+   system_group_size = fs_info->fs_devices->hmzoned ?
+   fs_info->fs_devices->zone_size : BTRFS_MKFS_SYSTEM_GROUP_SIZE;
+
trans = btrfs_start_transaction(root, 1);
BUG_ON(IS_ERR(trans));
bytes_used = btrfs_super_bytes_used(fs_info->super_copy);
@@ -75,8 +79,8 @@ static int create_metadata_block_groups(struct btrfs_root 
*root, int mixed,
ret = btrfs_make_block_group(trans, fs_info, bytes_used,
 BTRFS_BLOCK_GROUP_SYSTEM,
 BTRFS_BLOCK_RESERVED_1M_FOR_SUPER,
-BTRFS_MKFS_SYSTEM_GROUP_SIZE);
-   allocation->system += BTRFS_MKFS_SYSTEM_GROUP_SIZE;
+system_group_size);
+   allocation->system += system_group_size;
if (ret)
return ret;
 
@@ -761,6 +765,7 @@ int main(int argc, char **argv)
int metadata_profile_opt = 0;
int discard = 1;
int ssd = 0;
+   int hmzoned = 0;
int force_overwrite = 0;
char *source_dir = NULL;
bool source_dir_set = false;
@@ -774,6 +779,7 @@ int main(int argc, char **argv)
u64 features = BTRFS_MKFS_DEFAULT_FEATURES;
struct mkfs_alloca

[PATCH 05/12] btrfs-progs: load and check zone information

2019-06-07 Thread Naohiro Aota
This patch checks if a device added to btrfs is a zoned block device. If it
is, load zones information and the zone size for the device.

For a btrfs volume composed of multiple zoned block devices, all devices
must have the same zone size.

Signed-off-by: Naohiro Aota 
---
 utils.c   | 10 ++
 volumes.c | 18 ++
 volumes.h |  3 +++
 3 files changed, 31 insertions(+)

diff --git a/utils.c b/utils.c
index d50304b1be80..a26fe7a5743c 100644
--- a/utils.c
+++ b/utils.c
@@ -250,6 +250,16 @@ int btrfs_add_to_fsid(struct btrfs_trans_handle *trans,
goto out;
}
 
+   ret = btrfs_get_zone_info(fd, path, fs_info->fs_devices->hmzoned,
+ &device->zinfo);
+   if (ret)
+   goto out;
+   if (device->zinfo.zone_size != fs_info->fs_devices->zone_size) {
+   error("Device zone size differ\n");
+   ret = -EINVAL;
+   goto out;
+   }
+
disk_super = (struct btrfs_super_block *)buf;
dev_item = &disk_super->dev_item;
 
diff --git a/volumes.c b/volumes.c
index 3a91b43b378b..f6d1b1e9dc7f 100644
--- a/volumes.c
+++ b/volumes.c
@@ -168,6 +168,8 @@ static int device_list_add(const char *path,
u64 found_transid = btrfs_super_generation(disk_super);
bool metadata_uuid = (btrfs_super_incompat_flags(disk_super) &
BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
+   int hmzoned = btrfs_super_incompat_flags(disk_super) &
+   BTRFS_FEATURE_INCOMPAT_HMZONED;
 
if (metadata_uuid)
fs_devices = find_fsid(disk_super->fsid,
@@ -257,6 +259,8 @@ static int device_list_add(const char *path,
if (fs_devices->lowest_devid > devid) {
fs_devices->lowest_devid = devid;
}
+   if (hmzoned)
+   fs_devices->hmzoned = 1;
*fs_devices_ret = fs_devices;
return 0;
 }
@@ -327,6 +331,8 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, 
int flags)
struct btrfs_device *device;
int ret;
 
+   fs_devices->zone_size = 0;
+
list_for_each_entry(device, &fs_devices->devices, dev_list) {
if (!device->name) {
printk("no name for device %llu, skip it now\n", 
device->devid);
@@ -350,6 +356,18 @@ int btrfs_open_devices(struct btrfs_fs_devices 
*fs_devices, int flags)
device->fd = fd;
if (flags & O_RDWR)
device->writeable = 1;
+
+   ret = btrfs_get_zone_info(fd, device->name, fs_devices->hmzoned,
+ &device->zinfo);
+   if (ret != 0)
+   goto fail;
+   if (!fs_devices->zone_size) {
+   fs_devices->zone_size = device->zinfo.zone_size;
+   } else if (device->zinfo.zone_size != fs_devices->zone_size) {
+   fprintf(stderr, "Device zone size differ\n");
+   ret = -EINVAL;
+   goto fail;
+   }
}
return 0;
 fail:
diff --git a/volumes.h b/volumes.h
index c9262ceaea93..6ec83fe43cfe 100644
--- a/volumes.h
+++ b/volumes.h
@@ -115,6 +115,9 @@ struct btrfs_fs_devices {
 
int seeding;
struct btrfs_fs_devices *seed;
+
+   u64 zone_size;
+   unsigned int hmzoned:1;
 };
 
 struct btrfs_bio_stripe {
-- 
2.21.0



[PATCH 11/12] btrfs-progs: device-add: support HMZONED device

2019-06-07 Thread Naohiro Aota
This patch check if the target file system is flagged as HMZONED. If it is,
the device to be added is flagged PREP_DEVICE_HMZONED.  Also add checks to
prevent mixing non-zoned devices and zoned devices.

Signed-off-by: Naohiro Aota 
---
 cmds-device.c | 29 +++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/cmds-device.c b/cmds-device.c
index e3e30b6d5ded..86ffb1a2a5c2 100644
--- a/cmds-device.c
+++ b/cmds-device.c
@@ -57,6 +57,9 @@ static int cmd_device_add(int argc, char **argv)
int discard = 1;
int force = 0;
int last_dev;
+   int res;
+   int hmzoned;
+   struct btrfs_ioctl_feature_flags feature_flags;
 
optind = 0;
while (1) {
@@ -92,12 +95,33 @@ static int cmd_device_add(int argc, char **argv)
if (fdmnt < 0)
return 1;
 
+   res = ioctl(fdmnt, BTRFS_IOC_GET_FEATURES, &feature_flags);
+   if (res) {
+   error("error getting feature flags '%s': %m", mntpnt);
+   return 1;
+   }
+   hmzoned = feature_flags.incompat_flags & BTRFS_FEATURE_INCOMPAT_HMZONED;
+
for (i = optind; i < last_dev; i++){
struct btrfs_ioctl_vol_args ioctl_args;
-   int devfd, res;
+   int devfd;
u64 dev_block_count = 0;
char *path;
 
+   if (hmzoned && zoned_model(argv[i]) == ZONED_NONE) {
+   error("cannot add non-zoned device to HMZONED file 
system '%s'",
+ argv[i]);
+   ret++;
+   continue;
+   }
+
+   if (!hmzoned && zoned_model(argv[i]) == ZONED_HOST_MANAGED) {
+   error("cannot add host managed zoned device to 
non-HMZONED file system '%s'",
+ argv[i]);
+   ret++;
+   continue;
+   }
+
res = test_dev_for_mkfs(argv[i], force);
if (res) {
ret++;
@@ -113,7 +137,8 @@ static int cmd_device_add(int argc, char **argv)
 
res = btrfs_prepare_device(devfd, argv[i], &dev_block_count, 0,
PREP_DEVICE_ZERO_END | PREP_DEVICE_VERBOSE |
-   (discard ? PREP_DEVICE_DISCARD : 0));
+   (discard ? PREP_DEVICE_DISCARD : 0) |
+   (hmzoned ? PREP_DEVICE_HMZONED : 0));
close(devfd);
if (res) {
ret++;
-- 
2.21.0



[PATCH 07/19] btrfs: do sequential extent allocation in HMZONED mode

2019-06-07 Thread Naohiro Aota
On HMZONED drives, writes must always be sequential and directed at a block
group zone write pointer position. Thus, block allocation in a block group
must also be done sequentially using an allocation pointer equal to the
block group zone write pointer plus the number of blocks allocated but not
yet written.

Sequential allocation function find_free_extent_seq() bypass the checks in
find_free_extent() and increase the reserved byte counter by itself. It is
impossible to revert once allocated region in the sequential allocation,
since it might race with other allocations and leave an allocation hole,
which breaks the sequential write rule.

Furthermore, this commit introduce two new variable to struct
btrfs_block_group_cache. "wp_broken" indicate that write pointer is broken
(e.g. not synced on a RAID1 block group) and mark that block group read
only. "unusable" keeps track of the size of once allocated then freed
region. Such region is never usable until resetting underlying zones.

Signed-off-by: Naohiro Aota 
---
 fs/btrfs/ctree.h|  24 +++
 fs/btrfs/extent-tree.c  | 378 ++--
 fs/btrfs/free-space-cache.c |  33 
 fs/btrfs/free-space-cache.h |   5 +
 4 files changed, 426 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6c00101407e4..f4bcd2a6ec12 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -582,6 +582,20 @@ struct btrfs_full_stripe_locks_tree {
struct mutex lock;
 };
 
+/* Block group allocation types */
+enum btrfs_alloc_type {
+
+   /* Regular first fit allocation */
+   BTRFS_ALLOC_FIT = 0,
+
+   /*
+* Sequential allocation: this is for HMZONED mode and
+* will result in ignoring free space before a block
+* group allocation offset.
+*/
+   BTRFS_ALLOC_SEQ = 1,
+};
+
 struct btrfs_block_group_cache {
struct btrfs_key key;
struct btrfs_block_group_item item;
@@ -592,6 +606,7 @@ struct btrfs_block_group_cache {
u64 reserved;
u64 delalloc_bytes;
u64 bytes_super;
+   u64 unusable;
u64 flags;
u64 cache_generation;
 
@@ -621,6 +636,7 @@ struct btrfs_block_group_cache {
unsigned int iref:1;
unsigned int has_caching_ctl:1;
unsigned int removed:1;
+   unsigned int wp_broken:1;
 
int disk_cache_state;
 
@@ -694,6 +710,14 @@ struct btrfs_block_group_cache {
 
/* Record locked full stripes for RAID5/6 block group */
struct btrfs_full_stripe_locks_tree full_stripe_locks_root;
+
+   /*
+* Allocation offset for the block group to implement sequential
+* allocation. This is used only with HMZONED mode enabled and if
+* the block group resides on a sequential zone.
+*/
+   enum btrfs_alloc_type alloc_type;
+   u64 alloc_offset;
 };
 
 /* delayed seq elem */
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 363db58f56b8..ebd0d6eae038 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -28,6 +28,7 @@
 #include "sysfs.h"
 #include "qgroup.h"
 #include "ref-verify.h"
+#include "rcu-string.h"
 
 #undef SCRAMBLE_DELAYED_REFS
 
@@ -590,6 +591,8 @@ static int cache_block_group(struct btrfs_block_group_cache 
*cache,
struct btrfs_caching_control *caching_ctl;
int ret = 0;
 
+   WARN_ON(cache->alloc_type == BTRFS_ALLOC_SEQ);
+
caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
if (!caching_ctl)
return -ENOMEM;
@@ -6555,6 +6558,19 @@ void btrfs_wait_block_group_reservations(struct 
btrfs_block_group_cache *bg)
wait_var_event(&bg->reservations, !atomic_read(&bg->reservations));
 }
 
+static void __btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
+  u64 ram_bytes, u64 num_bytes,
+  int delalloc)
+{
+   struct btrfs_space_info *space_info = cache->space_info;
+
+   cache->reserved += num_bytes;
+   space_info->bytes_reserved += num_bytes;
+   update_bytes_may_use(space_info, -ram_bytes);
+   if (delalloc)
+   cache->delalloc_bytes += num_bytes;
+}
+
 /**
  * btrfs_add_reserved_bytes - update the block_group and space info counters
  * @cache: The cache we are manipulating
@@ -6573,17 +6589,16 @@ static int btrfs_add_reserved_bytes(struct 
btrfs_block_group_cache *cache,
struct btrfs_space_info *space_info = cache->space_info;
int ret = 0;
 
+   /* should handled by find_free_extent_seq */
+   WARN_ON(cache->alloc_type == BTRFS_ALLOC_SEQ);
+
spin_lock(&space_info->lock);
spin_lock(&cache->lock);
-   if (cache->ro) {
+   if (cache->ro)
ret = -EAGAIN;
-   } else {
-   cache->reserved += num_bytes

[PATCH 04/19] btrfs: disable fallocate in HMZONED mode

2019-06-07 Thread Naohiro Aota
fallocate() is implemented by reserving actual extent instead of
reservations. This can result in exposing the sequential write constraint
of host-managed zoned block devices to the application, which would break
the POSIX semantic for the fallocated file.  To avoid this, report
fallocate() as not supported when in HMZONED mode for now.

In the future, we may be able to implement "in-memory" fallocate() in
HMZONED mode by utilizing space_info->bytes_may_use or so.

Signed-off-by: Naohiro Aota 
---
 fs/btrfs/file.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 89f5be2bfb43..e664b5363697 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3027,6 +3027,10 @@ static long btrfs_fallocate(struct file *file, int mode,
alloc_end = round_up(offset + len, blocksize);
cur_offset = alloc_start;
 
+   /* Do not allow fallocate in HMZONED mode */
+   if (btrfs_fs_incompat(btrfs_sb(inode->i_sb), HMZONED))
+   return -EOPNOTSUPP;
+
/* Make sure we aren't being give some crap mode */
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
 FALLOC_FL_ZERO_RANGE))
-- 
2.21.0



[PATCH 05/19] btrfs: disable direct IO in HMZONED mode

2019-06-07 Thread Naohiro Aota
Direct write I/Os can be directed at existing extents that have already
been written. Such write requests are prohibited on host-managed zoned
block devices. So disable direct IO support for a volume with HMZONED mode
enabled.

Signed-off-by: Naohiro Aota 
---
 fs/btrfs/inode.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6bebc0ca751d..89542c19d09e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8520,6 +8520,9 @@ static ssize_t check_direct_IO(struct btrfs_fs_info 
*fs_info,
unsigned int blocksize_mask = fs_info->sectorsize - 1;
ssize_t retval = -EINVAL;
 
+   if (btrfs_fs_incompat(fs_info, HMZONED))
+   goto out;
+
if (offset & blocksize_mask)
goto out;
 
-- 
2.21.0



[PATCH 03/19] btrfs: Check and enable HMZONED mode

2019-06-07 Thread Naohiro Aota
HMZONED mode cannot be used together with the RAID5/6 profile for now.
Introduce the function btrfs_check_hmzoned_mode() to check this. This
function will also check if HMZONED flag is enabled on the file system and
if the file system consists of zoned devices with equal zone size.

Additionally, as updates to the space cache are in-place, the space cache
cannot be located over sequential zones and there is no guarantees that the
device will have enough conventional zones to store this cache. Resolve
this problem by disabling completely the space cache.  This does not
introduces any problems with sequential block groups: all the free space is
located after the allocation pointer and no free space before the pointer.
There is no need to have such cache.

Signed-off-by: Damien Le Moal 
Signed-off-by: Naohiro Aota 
---
 fs/btrfs/ctree.h   |  3 ++
 fs/btrfs/dev-replace.c |  7 +++
 fs/btrfs/disk-io.c |  7 +++
 fs/btrfs/super.c   | 12 ++---
 fs/btrfs/volumes.c | 99 ++
 fs/btrfs/volumes.h |  1 +
 6 files changed, 124 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b81c331b28fa..6c00101407e4 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -806,6 +806,9 @@ struct btrfs_fs_info {
struct btrfs_root *uuid_root;
struct btrfs_root *free_space_root;
 
+   /* Zone size when in HMZONED mode */
+   u64 zone_size;
+
/* the log root tree is a directory of all the other log roots */
struct btrfs_root *log_root_tree;
 
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index ee0989c7e3a9..fbe5ea2a04ed 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -201,6 +201,13 @@ static int btrfs_init_dev_replace_tgtdev(struct 
btrfs_fs_info *fs_info,
return PTR_ERR(bdev);
}
 
+   if ((bdev_zoned_model(bdev) == BLK_ZONED_HM &&
+!btrfs_fs_incompat(fs_info, HMZONED)) ||
+   (!bdev_is_zoned(bdev) && btrfs_fs_incompat(fs_info, HMZONED))) {
+   ret = -EINVAL;
+   goto error;
+   }
+
filemap_write_and_wait(bdev->bd_inode->i_mapping);
 
devices = &fs_info->fs_devices->devices;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 663efce22d98..7c1404c76768 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3086,6 +3086,13 @@ int open_ctree(struct super_block *sb,
 
btrfs_free_extra_devids(fs_devices, 1);
 
+   ret = btrfs_check_hmzoned_mode(fs_info);
+   if (ret) {
+   btrfs_err(fs_info, "failed to init hmzoned mode: %d",
+   ret);
+   goto fail_block_groups;
+   }
+
ret = btrfs_sysfs_add_fsid(fs_devices, NULL);
if (ret) {
btrfs_err(fs_info, "failed to init sysfs fsid interface: %d",
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2c66d9ea6a3b..740a701f16c5 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -435,11 +435,13 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char 
*options,
bool saved_compress_force;
int no_compress = 0;
 
-   cache_gen = btrfs_super_cache_generation(info->super_copy);
-   if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
-   btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE);
-   else if (cache_gen)
-   btrfs_set_opt(info->mount_opt, SPACE_CACHE);
+   if (!btrfs_fs_incompat(info, HMZONED)) {
+   cache_gen = btrfs_super_cache_generation(info->super_copy);
+   if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
+   btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE);
+   else if (cache_gen)
+   btrfs_set_opt(info->mount_opt, SPACE_CACHE);
+   }
 
/*
 * Even the options are empty, we still need to do extra check
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b673178718e3..b6f367d19dc9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1524,6 +1524,83 @@ int btrfs_open_devices(struct btrfs_fs_devices 
*fs_devices,
return ret;
 }
 
+int btrfs_check_hmzoned_mode(struct btrfs_fs_info *fs_info)
+{
+   struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+   struct btrfs_device *device;
+   u64 hmzoned_devices = 0;
+   u64 nr_devices = 0;
+   u64 zone_size = 0;
+   int incompat_hmzoned = btrfs_fs_incompat(fs_info, HMZONED);
+   int ret = 0;
+
+   /* Count zoned devices */
+   list_for_each_entry(device, &fs_devices->devices, dev_list) {
+   if (!device->bdev)
+   continue;
+   if (bdev_zoned_model(device->bdev) == BLK_ZONED_HM ||
+   (bdev_zoned_model(device->bdev) == BLK_ZONED_HA &&
+incompat_hmzoned)) {
+   hmzoned_device

Re: [PATCH][RESEND] fs: always set I_DIRTY_TIME to fsync correctly on lazytime

2017-03-16 Thread Naohiro Aota
Hello, all

What is the status of this patch? Can this be picked up for some tree?

Regards,
Naohiro

2016-11-01 7:46 GMT+09:00 Jan Kara :
> On Tue 01-11-16 04:02:45, Naohiro Aota wrote:
>> While lazytime states that "The on-disk timestamps are updated only
>> when: ... - the application employs fsync(2), syncfs(2), or sync(2)"
>> [1], it does not write a timestamp update on fsync().
>>
>> [1] http://manpages.ubuntu.com/manpages/xenial/man8/mount.8.html
>>
>> The following commands will reproduce the problem:
>>
>> $ mount -o noatime,lazytime ext4.img /mnt/tmp
>> $ cd /mnt/tmp
>> (create an 128M file to fio, not to observe size update)
>> $ dd if=/dev/zero of=wxyz.0.0 bs=1M count=128
>> (do write/fsync)
>> $ fio --name wxyz --direct=1 --buffered=0 --size=128m --bs=64k --rw=write \
>>   --ioengine=sync --numjobs=1 --fsync=5
>>
>> Since fio invokes 1 fsync per 5 writes, we should see rapid journal
>> commits for timestamp update by tracing jbd2:jbd2_end_commit trace
>> point. Only we can see are, however, some periodic (~5 sec) commits from
>> bdi flush like below.
>>
>> $ trace jbd2:jbd2_end_commit
>> jbd2/loop0-8-1617  [002] 96.637351: jbd2_end_commit: dev 7,0 
>> transaction 5393 sync 0 head 5393
>> jbd2/loop0-8-1617  [000]    101.679411: jbd2_end_commit: dev 7,0 
>> transaction 5394 sync 0 head 5393
>> jbd2/loop0-8-1617  [003]    106.743628: jbd2_end_commit: dev 7,0 
>> transaction 5395 sync 0 head 5393
>> jbd2/loop0-8-1617  [001]    111.801964: jbd2_end_commit: dev 7,0 
>> transaction 5396 sync 0 head 5393
>> ...
>>
>> The problem is __mark_inode_dirty() does not always flag I_DIRTY_TIME.
>> It seems that it is no use to mark an inode I_DIRTY_TIME when the inode
>> is already I_DIRTY_INODE. However, by that decision, we're skipping
>> journal write if we invoke two fsync()s between two bdi flushes. As the
>> following table shows, any fsync after the first fsync do nothing (if
>> there's no update other than timestamp).
>>
>> Event| i_state  | journal
>> -+--+
>>| I_DIRTY_TIME | no write (lazytime)
>>   | I_DIRTY_SYNC | write timestamp update
>>| I_DIRTY_SYNC | no write (lazytime)
>>   | I_DIRTY_SYNC | no write *BUG*
>> ...
>>   | 0|
>>| I_DIRTY_TIME | no write (lazytime)
>>   | I_DIRTY_SYNC | write timestamp update
>>
>> We should set I_DIRTY_TIME on the second timestamp update to let fsync()
>> notice there's a timestamp update after the last inode writeout.
>>
>> After this patch, we can see rapid trace of journal commit:
>> $ trace jbd2:jbd2_end_commit
>> jbd2/loop0-8-1879  [002]    208.275057: jbd2_end_commit: dev 7,0 
>> transaction 5364 sync 0 head 3343
>> jbd2/loop0-8-1879  [000] ....   208.302539: jbd2_end_commit: dev 7,0 
>> transaction 5365 sync 0 head 3343
>> jbd2/loop0-8-1879  [000]    208.327238: jbd2_end_commit: dev 7,0 
>> transaction 5366 sync 0 head 3343
>> jbd2/loop0-8-1879  [003]    208.347618: jbd2_end_commit: dev 7,0 
>> transaction 5367 sync 0 head 3343
>> ...
>>
>> Reported-by: Asraa Ali Mardan 
>> Signed-off-by: Naohiro Aota 
>
> Thanks for the patch. It makes sense. You can add:
>
> Reviewed-by: Jan Kara 
>
> Jens, can you please merge the patch? Thanks!
>
> Honza
>> ---
>>
>>  fs/fs-writeback.c | 7 ---
>>  1 file changed, 4 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
>> index 05713a5..ace628c 100644
>> --- a/fs/fs-writeback.c
>> +++ b/fs/fs-writeback.c
>> @@ -2100,16 +2100,17 @@ void __mark_inode_dirty(struct inode *inode, int 
>> flags)
>>*/
>>   smp_mb();
>>
>> - if (((inode->i_state & flags) == flags) ||
>> - (dirtytime && (inode->i_state & I_DIRTY_INODE)))
>> + if ((inode->i_state & flags) == flags)
>>   return;
>>
>>   if (unlikely(block_dump))
>>   block_dump___mark_inode_dirty(inode);
>>
>>   spin_lock(&inode->i_lock);
>> - if (dirtytime && (inode->i_state & I_DIRTY_INODE))
>> + if (dirtytime && (inode->i_state & I_DIRTY_INODE)) {
>> + inode->i_state |= I_DIRTY_TIME;
>>   goto out_unlock_inode;
>> + }
>>   if ((inode->i_state & flags) != flags) {
>>   const int was_dirty = inode->i_state & I_DIRTY;
>>
>> --
>> 2.8.2
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> Jan Kara 
> SUSE Labs, CR


[PATCH] uapi: add missing install of userio.h

2017-03-08 Thread Naohiro Aota
While commit 5523662edd4f ("Input: add userio module") added userio.h
under the uapi/ directory, it forgot to add the header file to Kbuild.
Thus, the file is missing from header installation. This patch just
resolve the issue by adding the missing entry.

Signed-off-by: Naohiro Aota 
---
 include/uapi/linux/Kbuild | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index dd9820b1c779..f8d9fed17ba9 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -445,6 +445,7 @@ header-y += unistd.h
 header-y += unix_diag.h
 header-y += usbdevice_fs.h
 header-y += usbip.h
+header-y += userio.h
 header-y += utime.h
 header-y += utsname.h
 header-y += uuid.h
-- 
2.11.0



[PATCH][RESEND] fs: always set I_DIRTY_TIME to fsync correctly on lazytime

2016-10-31 Thread Naohiro Aota
While lazytime states that "The on-disk timestamps are updated only
when: ... - the application employs fsync(2), syncfs(2), or sync(2)"
[1], it does not write a timestamp update on fsync().

[1] http://manpages.ubuntu.com/manpages/xenial/man8/mount.8.html

The following commands will reproduce the problem:

$ mount -o noatime,lazytime ext4.img /mnt/tmp
$ cd /mnt/tmp
(create an 128M file to fio, not to observe size update)
$ dd if=/dev/zero of=wxyz.0.0 bs=1M count=128
(do write/fsync)
$ fio --name wxyz --direct=1 --buffered=0 --size=128m --bs=64k --rw=write \
  --ioengine=sync --numjobs=1 --fsync=5

Since fio invokes 1 fsync per 5 writes, we should see rapid journal
commits for timestamp update by tracing jbd2:jbd2_end_commit trace
point. Only we can see are, however, some periodic (~5 sec) commits from
bdi flush like below.

$ trace jbd2:jbd2_end_commit
jbd2/loop0-8-1617  [002] 96.637351: jbd2_end_commit: dev 7,0 
transaction 5393 sync 0 head 5393
jbd2/loop0-8-1617  [000]    101.679411: jbd2_end_commit: dev 7,0 
transaction 5394 sync 0 head 5393
jbd2/loop0-8-1617  [003]    106.743628: jbd2_end_commit: dev 7,0 
transaction 5395 sync 0 head 5393
jbd2/loop0-8-1617  [001]    111.801964: jbd2_end_commit: dev 7,0 
transaction 5396 sync 0 head 5393
...

The problem is __mark_inode_dirty() does not always flag I_DIRTY_TIME.
It seems that it is no use to mark an inode I_DIRTY_TIME when the inode
is already I_DIRTY_INODE. However, by that decision, we're skipping
journal write if we invoke two fsync()s between two bdi flushes. As the
following table shows, any fsync after the first fsync do nothing (if
there's no update other than timestamp).

Event| i_state  | journal
-+--+
   | I_DIRTY_TIME | no write (lazytime)
  | I_DIRTY_SYNC | write timestamp update
   | I_DIRTY_SYNC | no write (lazytime)
  | I_DIRTY_SYNC | no write *BUG*
...
  | 0|
   | I_DIRTY_TIME | no write (lazytime)
  | I_DIRTY_SYNC | write timestamp update

We should set I_DIRTY_TIME on the second timestamp update to let fsync()
notice there's a timestamp update after the last inode writeout.

After this patch, we can see rapid trace of journal commit:
$ trace jbd2:jbd2_end_commit
jbd2/loop0-8-1879  [002]    208.275057: jbd2_end_commit: dev 7,0 
transaction 5364 sync 0 head 3343
jbd2/loop0-8-1879  [000]    208.302539: jbd2_end_commit: dev 7,0 
transaction 5365 sync 0 head 3343
jbd2/loop0-8-1879  [000]    208.327238: jbd2_end_commit: dev 7,0 
transaction 5366 sync 0 head 3343
jbd2/loop0-8-1879  [003]    208.347618: jbd2_end_commit: dev 7,0 
transaction 5367 sync 0 head 3343
...

Reported-by: Asraa Ali Mardan 
Signed-off-by: Naohiro Aota 
---

 fs/fs-writeback.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 05713a5..ace628c 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2100,16 +2100,17 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 */
smp_mb();
 
-   if (((inode->i_state & flags) == flags) ||
-   (dirtytime && (inode->i_state & I_DIRTY_INODE)))
+   if ((inode->i_state & flags) == flags)
return;
 
if (unlikely(block_dump))
block_dump___mark_inode_dirty(inode);
 
spin_lock(&inode->i_lock);
-   if (dirtytime && (inode->i_state & I_DIRTY_INODE))
+   if (dirtytime && (inode->i_state & I_DIRTY_INODE)) {
+   inode->i_state |= I_DIRTY_TIME;
goto out_unlock_inode;
+   }
if ((inode->i_state & flags) != flags) {
const int was_dirty = inode->i_state & I_DIRTY;
 
-- 
2.8.2



[PATCH] fs: always set I_DIRTY_TIME to fsync correctly on lazytime

2016-10-16 Thread Naohiro Aota
While lazytime states that "The on-disk timestamps are updated only
when: ... - the application employs fsync(2), syncfs(2), or sync(2)"
[1], it does not write a timestamp update on fsync().

[1] http://manpages.ubuntu.com/manpages/xenial/man8/mount.8.html

The following commands will reproduce the problem:

$ mount -o noatime,lazytime ext4.img /mnt/tmp
$ cd /mnt/tmp
(create an 128M file to fio, not to observe size update)
$ dd if=/dev/zero of=wxyz.0.0 bs=1M count=128
(do write/fsync)
$ fio --name wxyz --direct=1 --buffered=0 --size=128m --bs=64k --rw=write \
  --ioengine=sync --numjobs=1 --fsync=5

Since fio invokes 1 fsync per 5 writes, we should see rapid journal
commits for timestamp update by tracing jbd2:jbd2_end_commit trace
point. Only we can see are, however, some periodic (~5 sec) commits from
bdi flush like below.

$ trace jbd2:jbd2_end_commit
jbd2/loop0-8-1617  [002] 96.637351: jbd2_end_commit: dev 7,0 
transaction 5393 sync 0 head 5393
jbd2/loop0-8-1617  [000]    101.679411: jbd2_end_commit: dev 7,0 
transaction 5394 sync 0 head 5393
jbd2/loop0-8-1617  [003]    106.743628: jbd2_end_commit: dev 7,0 
transaction 5395 sync 0 head 5393
jbd2/loop0-8-1617  [001]    111.801964: jbd2_end_commit: dev 7,0 
transaction 5396 sync 0 head 5393
...

The problem is __mark_inode_dirty() does not always flag I_DIRTY_TIME.
It seems that it is no use to mark an inode I_DIRTY_TIME when the inode
is already I_DIRTY_INODE. However, by that decision, we're skipping
journal write if we invoke two fsync()s between two bdi flushes. As the
following table shows, any fsync after the first fsync do nothing (if
there's no update other than timestamp).

Event| i_state  | journal
-+--+
   | I_DIRTY_TIME | no write (lazytime)
  | I_DIRTY_SYNC | write timestamp update
   | I_DIRTY_SYNC | no write (lazytime)
  | I_DIRTY_SYNC | no write *BUG*
...
  | 0|
   | I_DIRTY_TIME | no write (lazytime)
  | I_DIRTY_SYNC | write timestamp update

We should set I_DIRTY_TIME on the second timestamp update to let fsync()
notice there's a timestamp update after the last inode writeout.

After this patch, we can see rapid trace of journal commit:
$ trace jbd2:jbd2_end_commit
jbd2/loop0-8-1879  [002]    208.275057: jbd2_end_commit: dev 7,0 
transaction 5364 sync 0 head 3343
jbd2/loop0-8-1879  [000]    208.302539: jbd2_end_commit: dev 7,0 
transaction 5365 sync 0 head 3343
jbd2/loop0-8-1879  [000]    208.327238: jbd2_end_commit: dev 7,0 
transaction 5366 sync 0 head 3343
jbd2/loop0-8-1879  [003]    208.347618: jbd2_end_commit: dev 7,0 
transaction 5367 sync 0 head 3343
...

Reported-by: Asraa Ali Mardan 
Signed-off-by: Naohiro Aota 
---
 fs/fs-writeback.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 05713a5..ace628c 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2100,16 +2100,17 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 */
smp_mb();
 
-   if (((inode->i_state & flags) == flags) ||
-   (dirtytime && (inode->i_state & I_DIRTY_INODE)))
+   if ((inode->i_state & flags) == flags)
return;
 
if (unlikely(block_dump))
block_dump___mark_inode_dirty(inode);
 
spin_lock(&inode->i_lock);
-   if (dirtytime && (inode->i_state & I_DIRTY_INODE))
+   if (dirtytime && (inode->i_state & I_DIRTY_INODE)) {
+   inode->i_state |= I_DIRTY_TIME;
goto out_unlock_inode;
+   }
if ((inode->i_state & flags) != flags) {
const int was_dirty = inode->i_state & I_DIRTY;
 
-- 
2.8.2



Re: [PATCH] btrfs: let btrfs_delete_unused_bgs() to clean relocated bgs

2016-09-04 Thread Naohiro Aota
2016-09-02 (金) の 09:35 -0400 に Josef Bacik さんは書きました:
> On 09/02/2016 03:46 AM, Naohiro Aota wrote:
> > 
> > Currently, btrfs_relocate_chunk() is removing relocated BG by
> > itself. But
> > the work can be done by btrfs_delete_unused_bgs() (and it's better
> > since it
> > trim the BG). Let's dedupe the code.
> > 
> > While btrfs_delete_unused_bgs() is already hitting the relocated
> > BG, it
> > skip the BG since the BG has "ro" flag set (to keep balancing BG
> > intact).
> > On the other hand, btrfs cannot drop "ro" flag here to prevent
> > additional
> > writes. So this patch make use of "removed" flag.
> > btrfs_delete_unused_bgs() now detect the flag to distinguish
> > whether a
> > read-only BG is relocating or not.
> > 
> 
> This seems racey to me.  We remove the last part of the block group,
> it ends up 
> on the unused_bgs_list, we process this list, see that removed isn't
> set and we 
> skip it, then later we set removed, but it's too late.  I think the
> right way is 
> to actually do a transaction, set ->removed, manually add it to the 
> unused_bgs_list if it's not already, then end the transaction.  This
> way we are 
> guaranteed to have the bg on the list when it is ready to be
> removed.  This is 
> my analysis after looking at it for 10 seconds after being awake for
> like 30 
> minutes so if I'm missing something let me know.  Thanks,

I don't think a race will happen. Since we are holding
delete_unused_bgs_mutex here, btrfs_delte_unused_bgs() checks ->removed
flag after we unlock the mutex i.e. we setup the flag properly. For a
case btrfs_delete_usused_bgs() checks the BG before we hold
delte_unused_bgs_mutex, then that BG is removed by it (if it's empty)
and btrfs_relocate_chunk() should never see it.

Regards,
Naohiro


[PATCH] btrfs: let btrfs_delete_unused_bgs() to clean relocated bgs

2016-09-02 Thread Naohiro Aota
Currently, btrfs_relocate_chunk() is removing relocated BG by itself. But
the work can be done by btrfs_delete_unused_bgs() (and it's better since it
trim the BG). Let's dedupe the code.

While btrfs_delete_unused_bgs() is already hitting the relocated BG, it
skip the BG since the BG has "ro" flag set (to keep balancing BG intact).
On the other hand, btrfs cannot drop "ro" flag here to prevent additional
writes. So this patch make use of "removed" flag.
btrfs_delete_unused_bgs() now detect the flag to distinguish whether a
read-only BG is relocating or not.

Signed-off-by: Naohiro Aota 
---
 fs/btrfs/extent-tree.c |  2 +-
 fs/btrfs/volumes.c | 24 ++--
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 843ed27..d382735 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -10971,7 +10971,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info 
*fs_info)
spin_lock(&block_group->lock);
if (block_group->reserved ||
btrfs_block_group_used(&block_group->item) ||
-   block_group->ro ||
+   (block_group->ro && !block_group->removed) ||
list_is_singular(&block_group->list)) {
/*
 * We want to bail if we made new allocations or have
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7e6399f..1a6789d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2931,8 +2931,8 @@ out:
 static int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_offset)
 {
struct btrfs_root *extent_root;
-   struct btrfs_trans_handle *trans;
int ret;
+   struct btrfs_block_group_cache *block_group;
 
root = root->fs_info->chunk_root;
extent_root = root->fs_info->extent_root;
@@ -2962,21 +2962,17 @@ static int btrfs_relocate_chunk(struct btrfs_root 
*root, u64 chunk_offset)
if (ret)
return ret;
 
-   trans = btrfs_start_trans_remove_block_group(root->fs_info,
-chunk_offset);
-   if (IS_ERR(trans)) {
-   ret = PTR_ERR(trans);
-   btrfs_handle_fs_error(root->fs_info, ret, NULL);
-   return ret;
-   }
-
/*
-* step two, delete the device extents and the
-* chunk tree entries
+* step two, flag the chunk as removed and let
+* btrfs_delete_unused_bgs() remove it.
 */
-   ret = btrfs_remove_chunk(trans, root, chunk_offset);
-   btrfs_end_transaction(trans, extent_root);
-   return ret;
+   block_group = btrfs_lookup_block_group(root->fs_info, chunk_offset);
+   spin_lock(&block_group->lock);
+   block_group->removed = 1;
+   spin_unlock(&block_group->lock);
+   btrfs_put_block_group(block_group);
+
+   return 0;
 }
 
 static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
-- 
2.7.3



[tip:perf/urgent] perf probe: Support signedness casting

2016-08-09 Thread tip-bot for Naohiro Aota
Commit-ID:  19f00b011729417f69e4df53cc3fe5ecc25134a4
Gitweb: http://git.kernel.org/tip/19f00b011729417f69e4df53cc3fe5ecc25134a4
Author: Naohiro Aota 
AuthorDate: Tue, 9 Aug 2016 11:40:08 +0900
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Tue, 9 Aug 2016 10:52:22 -0300

perf probe: Support signedness casting

The 'perf probe' tool detects a variable's type and use the detected
type to add a new probe. Then, kprobes prints its variable in
hexadecimal format if the variable is unsigned and prints in decimal if
it is signed.

We sometimes want to see unsigned variable in decimal format (i.e.
sector_t or size_t). In that case, we need to investigate the variable's
size manually to specify just signedness.

This patch add signedness casting support. By specifying "s" or "u" as a
type, perf-probe will investigate variable size as usual and use the
specified signedness.

E.g. without this:

  $ perf probe -a 'submit_bio bio->bi_iter.bi_sector'
  Added new event:
probe:submit_bio (on submit_bio with bi_sector=bio->bi_iter.bi_sector)
  You can now use it in all perf tools, such as:
  perf record -e probe:submit_bio -aR sleep 1
  $ cat trace_pipe|head
  dbench-9692  [003] d..1   971.096633: submit_bio: 
(submit_bio+0x0/0x140) bi_sector=0x3a3d00
  dbench-9692  [003] d..1   971.096685: submit_bio: 
(submit_bio+0x0/0x140) bi_sector=0x1a3d80
  dbench-9692  [003] d..1   971.096687: submit_bio: 
(submit_bio+0x0/0x140) bi_sector=0x3a3d80
...
  // need to investigate the variable size
  $ perf probe -a 'submit_bio bio->bi_iter.bi_sector:s64'
  Added new event:
probe:submit_bio (on submit_bio with 
bi_sector=bio->bi_iter.bi_sector:s64)
  You can now use it in all perf tools, such as:
perf record -e probe:submit_bio -aR sleep 1

  With this:

  // just use "s" to cast its signedness
  $ perf probe -v -a 'submit_bio bio->bi_iter.bi_sector:s'
  Added new event:
probe:submit_bio (on submit_bio with bi_sector=bio->bi_iter.bi_sector:s)
  You can now use it in all perf tools, such as:
  perf record -e probe:submit_bio -aR sleep 1
  $ cat trace_pipe|head
  dbench-9689  [001] d..1  1212.391237: submit_bio: 
(submit_bio+0x0/0x140) bi_sector=128
  dbench-9689  [001] d..1  1212.391252: submit_bio: 
(submit_bio+0x0/0x140) bi_sector=131072
  dbench-9697  [006] d..1  1212.398611: submit_bio: 
(submit_bio+0x0/0x140) bi_sector=30208

  This commit also update perf-probe.txt to describe "types". Most parts
  are based on existing documentation: Documentation/trace/kprobetrace.txt

Committer note:

Testing using 'perf trace':

  # perf probe -a 'submit_bio bio->bi_iter.bi_sector'
  Added new event:
probe:submit_bio (on submit_bio with bi_sector=bio->bi_iter.bi_sector)

  You can now use it in all perf tools, such as:

perf record -e probe:submit_bio -aR sleep 1

  # trace --no-syscalls --ev probe:submit_bio
  0.000 probe:submit_bio:(ac3aee00) bi_sector=0xc133c0)
   3181.861 probe:submit_bio:(ac3aee00) bi_sector=0x6cffb8)
   3181.881 probe:submit_bio:(ac3aee00) bi_sector=0x6cffc0)
   3184.488 probe:submit_bio:(ac3aee00) bi_sector=0x6cffc8)

   4717.927 probe:submit_bio:(ac3aee00) bi_sector=0x4dc7a88)
   4717.970 probe:submit_bio:(ac3aee00) bi_sector=0x4dc7880)
  ^C[root@jouet ~]#

Now, using this new feature:

[root@jouet ~]# perf probe -a 'submit_bio bio->bi_iter.bi_sector:s'
Added new event:
  probe:submit_bio (on submit_bio with bi_sector=bio->bi_iter.bi_sector:s)

You can now use it in all perf tools, such as:

perf record -e probe:submit_bio -aR sleep 1

  [root@jouet ~]# trace --no-syscalls --ev probe:submit_bio
 0.000 probe:submit_bio:(ac3aee00) bi_sector=7145704)
 0.017 probe:submit_bio:(ac3aee00) bi_sector=7145712)
 0.019 probe:submit_bio:(ac3aee00) bi_sector=7145720)
 2.567 probe:submit_bio:(ac3aee00) bi_sector=7145728)
  5631.919 probe:submit_bio:(ac3aee00) bi_sector=0)
  5631.941 probe:submit_bio:(ac3aee00) bi_sector=8)
  5631.945 probe:submit_bio:(ac3aee00) bi_sector=16)
  5631.948 probe:submit_bio:(ac3aee00) bi_sector=24)
  ^C#

With callchains:

  # trace --no-syscalls --ev probe:submit_bio/max-stack=10/
 0.000 probe:submit_bio:(ac3aee00) bi_sector=50662544)
   submit_bio+0xa821 ([kernel.kallsyms])
   submit_bh+0xa8200013 ([kernel.kallsyms])
   
jbd2_journal_commit_transaction+0xa8200691 ([kernel.kallsyms])
   kjournald2+0xa82000ca ([kernel.kallsyms])
   kthread+0xa82000d8 ([kernel.kallsyms])

[PATCH v3] perf probe: Support signedness casting

2016-08-08 Thread Naohiro Aota
Perf-probe detects a variable's type and use the detected type to add new
probe. Then, kprobes prints its variable in hexadecimal format if the
variable is unsigned and prints in decimal if it is signed.

We sometimes want to see unsigned variable in decimal format (i.e.
sector_t or size_t). In that case, we need to investigate variable's
size manually to specify just signedness.

This patch add signedness casting support. By specifying "s" or "u" as a
type, perf-probe will investigate variable size as usual and use
the specified signedness.

E.g. without this:

$ perf probe -a 'submit_bio bio->bi_iter.bi_sector'
Added new event:
  probe:submit_bio (on submit_bio with bi_sector=bio->bi_iter.bi_sector)
You can now use it in all perf tools, such as:
perf record -e probe:submit_bio -aR sleep 1
$ cat trace_pipe|head
  dbench-9692  [003] d..1   971.096633: submit_bio: 
(submit_bio+0x0/0x140) bi_sector=0x3a3d00
  dbench-9692  [003] d..1   971.096685: submit_bio: 
(submit_bio+0x0/0x140) bi_sector=0x1a3d80
  dbench-9692  [003] d..1   971.096687: submit_bio: 
(submit_bio+0x0/0x140) bi_sector=0x3a3d80
...
// need to investigate the variable size
$ perf probe -a 'submit_bio bio->bi_iter.bi_sector:s64'
Added new event:
  probe:submit_bio (on submit_bio with bi_sector=bio->bi_iter.bi_sector:s64)
You can now use it in all perf tools, such as:
perf record -e probe:submit_bio -aR sleep 1

With this:

// just use "s" to cast its signedness
$ perf probe -v -a 'submit_bio bio->bi_iter.bi_sector:s'
Added new event:
  probe:submit_bio (on submit_bio with bi_sector=bio->bi_iter.bi_sector:s)
You can now use it in all perf tools, such as:
perf record -e probe:submit_bio -aR sleep 1
$ cat trace_pipe|head
  dbench-9689  [001] d..1  1212.391237: submit_bio: 
(submit_bio+0x0/0x140) bi_sector=128
  dbench-9689  [001] d..1  1212.391252: submit_bio: 
(submit_bio+0x0/0x140) bi_sector=131072
  dbench-9697  [006] d..1  1212.398611: submit_bio: 
(submit_bio+0x0/0x140) bi_sector=30208

This commit also update perf-probe.txt to describe "types". Most parts
are based on existing documentation: Documentation/trace/kprobetrace.txt

Signed-off-by: Naohiro Aota 
---
 tools/perf/Documentation/perf-probe.txt | 10 +-
 tools/perf/util/probe-finder.c  | 15 ---
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/tools/perf/Documentation/perf-probe.txt 
b/tools/perf/Documentation/perf-probe.txt
index 736da44..b303bcd 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -176,10 +176,18 @@ Each probe argument follows below syntax.
 
 'NAME' specifies the name of this argument (optional). You can use the name of 
local variable, local data structure member (e.g. var->field, var.field2), 
local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), 
or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name 
of this argument will be set as the last member name if you specify a local 
data structure member (e.g. field2 for 'var->field1.field2'.)
 '$vars' and '$params' special arguments are also available for NAME, '$vars' 
is expanded to the local variables (including function parameters) which can 
access at given probe point. '$params' is expanded to only the function 
parameters.
-'TYPE' casts the type of this argument (optional). If omitted, perf probe 
automatically set the type based on debuginfo. You can specify 'string' type 
only for the local variable or structure member which is an array of or a 
pointer to 'char' or 'unsigned char' type.
+'TYPE' casts the type of this argument (optional). If omitted, perf probe 
automatically set the type based on debuginfo. Currently, basic types 
(u8/u16/u32/u64/s8/s16/s32/s64), signedness casting (u/s), "string" and 
bitfield are supported. (see TYPES for detail)
 
 On x86 systems %REG is always the short form of the register: for example %AX. 
%RAX or %EAX is not valid.
 
+TYPES
+-
+Basic types (u8/u16/u32/u64/s8/s16/s32/s64) are integer types. Prefix 's' and 
'u' means those types are signed and unsigned respectively. Traced arguments 
are shown in decimal (signed) or hex (unsigned). You can also use 's' or 'u' to 
specify only signedness and leave its size auto-detected by perf probe.
+String type is a special type, which fetches a "null-terminated" string from 
kernel space. This means it will fail and store NULL if the string container 
has been paged out. You can specify 'string' type only for the local variable 
or structure member which is an array of or a pointer to 'char' or 'unsigned 
char

[PATCH v2] perf probe: Support signedness casting

2016-08-05 Thread Naohiro Aota
Perf-probe detects a variable's type and use the detected type to add new
probe. Then, kprobes prints its variable in hexadecimal format if the
variable is unsigned and prints in decimal if it is signed.

We sometimes want to see unsigned variable in decimal format (i.e.
sector_t or size_t). In that case, we need to investigate variable's
size manually to specify just signedness.

This patch add signedness casting support. By specifying "s" or "u" as a
type, perf-probe will investigate variable size as usual and use
the specified signedness.

E.g. without this:

$ perf probe -a 'submit_bio bio->bi_iter.bi_sector'
Added new event:
  probe:submit_bio (on submit_bio with bi_sector=bio->bi_iter.bi_sector)
You can now use it in all perf tools, such as:
perf record -e probe:submit_bio -aR sleep 1
$ cat trace_pipe|head
  dbench-9692  [003] d..1   971.096633: submit_bio: 
(submit_bio+0x0/0x140) bi_sector=0x3a3d00
  dbench-9692  [003] d..1   971.096685: submit_bio: 
(submit_bio+0x0/0x140) bi_sector=0x1a3d80
  dbench-9692  [003] d..1   971.096687: submit_bio: 
(submit_bio+0x0/0x140) bi_sector=0x3a3d80
...
// need to investigate the variable size
$ perf probe -a 'submit_bio bio->bi_iter.bi_sector:s64'
Added new event:
  probe:submit_bio (on submit_bio with bi_sector=bio->bi_iter.bi_sector:s64)
You can now use it in all perf tools, such as:
perf record -e probe:submit_bio -aR sleep 1

With this:

// just use "s" to cast its signedness
$ perf probe -v -a 'submit_bio bio->bi_iter.bi_sector:s'
Added new event:
  probe:submit_bio (on submit_bio with bi_sector=bio->bi_iter.bi_sector:s)
You can now use it in all perf tools, such as:
perf record -e probe:submit_bio -aR sleep 1
$ cat trace_pipe|head
  dbench-9689  [001] d..1  1212.391237: submit_bio: 
(submit_bio+0x0/0x140) bi_sector=128
  dbench-9689  [001] d..1  1212.391252: submit_bio: 
(submit_bio+0x0/0x140) bi_sector=131072
  dbench-9697  [006] d..1  1212.398611: submit_bio: 
(submit_bio+0x0/0x140) bi_sector=30208

This commit also update perf-probe.txt to describe "types". Most parts
are based on existing documentation: Documentation/trace/kprobetrace.txt

Signed-off-by: Naohiro Aota 
---
 tools/perf/Documentation/perf-probe.txt | 10 +-
 tools/perf/util/probe-finder.c  | 15 ---
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/tools/perf/Documentation/perf-probe.txt 
b/tools/perf/Documentation/perf-probe.txt
index 736da44..a23b124 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -176,10 +176,18 @@ Each probe argument follows below syntax.
 
 'NAME' specifies the name of this argument (optional). You can use the name of 
local variable, local data structure member (e.g. var->field, var.field2), 
local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), 
or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name 
of this argument will be set as the last member name if you specify a local 
data structure member (e.g. field2 for 'var->field1.field2'.)
 '$vars' and '$params' special arguments are also available for NAME, '$vars' 
is expanded to the local variables (including function parameters) which can 
access at given probe point. '$params' is expanded to only the function 
parameters.
-'TYPE' casts the type of this argument (optional). If omitted, perf probe 
automatically set the type based on debuginfo. You can specify 'string' type 
only for the local variable or structure member which is an array of or a 
pointer to 'char' or 'unsigned char' type.
+'TYPE' casts the type of this argument (optional). If omitted, perf probe 
automatically set the type based on debuginfo. Currently, basic types 
(u8/u16/u32/u64/s8/s16/s32/s64), "string" and bitfield are supported. (see 
TYPES for detail)
 
 On x86 systems %REG is always the short form of the register: for example %AX. 
%RAX or %EAX is not valid.
 
+TYPES
+-
+Basic types (u8/u16/u32/u64/s8/s16/s32/s64) are integer types. Prefix 's' and 
'u' means those types are signed and unsigned respectively. Traced arguments 
are shown in decimal (signed) or hex (unsigned). You can also use 's' or 'u' to 
specify only signedness and leave its size auto-detected by perf probe.
+String type is a special type, which fetches a "null-terminated" string from 
kernel space. This means it will fail and store NULL if the string container 
has been paged out. You can specify 'string' type only for the local variable 
or structure member which is an array of or a pointer to 'char' or 'unsigned 
char' type.
+Bitfield i

Re: [PATCH] perf probe: support signedness casting

2016-08-05 Thread Naohiro Aota
2016-08-05 (金) の 18:10 +0900 に Masami Hiramatsu さんは書きました:
> On Fri, 5 Aug 2016 14:33:53 +0900
> Naohiro Aota  wrote:
> 
> > 
> > Perf-probe detects a variable's type and use the detected type to
> > add new
> > probe. Then, kprobes prints its variable in hexadecimal format if
> > the
> > variable is unsigned and prints in decimal if it is signed.
> > 
> > We sometimes want to see unsigned variable in decimal format (e.g.
> > sector_t or size_t). In that case, we need to investigate
> > variable's
> > size manually to specify just signedness.
> > 
> > This patch add signedness casting support. By specifying "s" or "u"
> > as a
> > type, perf-probe will investigate variable size as usual and use
> > the specified signedness.
> OK, I could understand what the patch does from code. Please add an
> example, and update tools/perf/Documentation/perf-probe.txt too.

Thanks for the review. I'm posting updated patch with type
descriptions.

> Thank you,
> 
> > 
> > 
> > Signed-off-by: Naohiro Aota 
> > ---
> >  tools/perf/util/probe-finder.c | 15 ---
> >  1 file changed, 12 insertions(+), 3 deletions(-)
> > 
> > diff --git a/tools/perf/util/probe-finder.c
> > b/tools/perf/util/probe-finder.c
> > index f2d9ff0..5c290c6 100644
> > --- a/tools/perf/util/probe-finder.c
> > +++ b/tools/perf/util/probe-finder.c
> > @@ -297,10 +297,13 @@ static int convert_variable_type(Dwarf_Die
> > *vr_die,
> >     char sbuf[STRERR_BUFSIZE];
> >     int bsize, boffs, total;
> >     int ret;
> > +   char sign;
> >  
> >     /* TODO: check all types */
> > -   if (cast && strcmp(cast, "string") != 0) {
> > +   if (cast && strcmp(cast, "string") != 0 &&
> > +   strcmp(cast, "s") != 0 && strcmp(cast, "u") != 0) {
> >     /* Non string type is OK */
> > +   /* and respect signedness cast */
> >     tvar->type = strdup(cast);
> >     return (tvar->type == NULL) ? -ENOMEM : 0;
> >     }
> > @@ -361,6 +364,13 @@ static int convert_variable_type(Dwarf_Die
> > *vr_die,
> >     return (tvar->type == NULL) ? -ENOMEM : 0;
> >     }
> >  
> > +   if (cast && (strcmp(cast, "u") == 0))
> > +   sign = 'u';
> > +   else if (cast && (strcmp(cast, "s") == 0))
> > +   sign = 's';
> > +   else
> > +   sign = die_is_signed_type(&type) ? 's' : 'u';
> > +
> >     ret = dwarf_bytesize(&type);
> >     if (ret <= 0)
> >     /* No size ... try to use default type */
> > @@ -373,8 +383,7 @@ static int convert_variable_type(Dwarf_Die
> > *vr_die,
> >     dwarf_diename(&type),
> > MAX_BASIC_TYPE_BITS);
> >     ret = MAX_BASIC_TYPE_BITS;
> >     }
> > -   ret = snprintf(buf, 16, "%c%d",
> > -      die_is_signed_type(&type) ? 's' : 'u',
> > ret);
> > +   ret = snprintf(buf, 16, "%c%d", sign, ret);
> >  
> >  formatted:
> >     if (ret < 0 || ret >= 16) {
> > -- 
> > 2.7.3
> > 
> 
Western Digital Corporation (and its subsidiaries) E-mail Confidentiality 
Notice & Disclaimer:

This e-mail and any files transmitted with it may contain confidential or 
legally privileged information of WDC and/or its affiliates, and are intended 
solely for the use of the individual or entity to which they are addressed. If 
you are not the intended recipient, any disclosure, copying, distribution or 
any action taken or omitted to be taken in reliance on it, is prohibited. If 
you have received this e-mail in error, please notify the sender immediately 
and delete the e-mail in its entirety from your system.


[PATCH] perf probe: support signedness casting

2016-08-04 Thread Naohiro Aota
Perf-probe detects a variable's type and use the detected type to add new
probe. Then, kprobes prints its variable in hexadecimal format if the
variable is unsigned and prints in decimal if it is signed.

We sometimes want to see unsigned variable in decimal format (e.g.
sector_t or size_t). In that case, we need to investigate variable's
size manually to specify just signedness.

This patch add signedness casting support. By specifying "s" or "u" as a
type, perf-probe will investigate variable size as usual and use
the specified signedness.

Signed-off-by: Naohiro Aota 
---
 tools/perf/util/probe-finder.c | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index f2d9ff0..5c290c6 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -297,10 +297,13 @@ static int convert_variable_type(Dwarf_Die *vr_die,
char sbuf[STRERR_BUFSIZE];
int bsize, boffs, total;
int ret;
+   char sign;
 
/* TODO: check all types */
-   if (cast && strcmp(cast, "string") != 0) {
+   if (cast && strcmp(cast, "string") != 0 &&
+   strcmp(cast, "s") != 0 && strcmp(cast, "u") != 0) {
/* Non string type is OK */
+   /* and respect signedness cast */
tvar->type = strdup(cast);
return (tvar->type == NULL) ? -ENOMEM : 0;
}
@@ -361,6 +364,13 @@ static int convert_variable_type(Dwarf_Die *vr_die,
return (tvar->type == NULL) ? -ENOMEM : 0;
}
 
+   if (cast && (strcmp(cast, "u") == 0))
+   sign = 'u';
+   else if (cast && (strcmp(cast, "s") == 0))
+   sign = 's';
+   else
+   sign = die_is_signed_type(&type) ? 's' : 'u';
+
ret = dwarf_bytesize(&type);
if (ret <= 0)
/* No size ... try to use default type */
@@ -373,8 +383,7 @@ static int convert_variable_type(Dwarf_Die *vr_die,
dwarf_diename(&type), MAX_BASIC_TYPE_BITS);
ret = MAX_BASIC_TYPE_BITS;
}
-   ret = snprintf(buf, 16, "%c%d",
-  die_is_signed_type(&type) ? 's' : 'u', ret);
+   ret = snprintf(buf, 16, "%c%d", sign, ret);
 
 formatted:
if (ret < 0 || ret >= 16) {
-- 
2.7.3

Western Digital Corporation (and its subsidiaries) E-mail Confidentiality 
Notice & Disclaimer:

This e-mail and any files transmitted with it may contain confidential or 
legally privileged information of WDC and/or its affiliates, and are intended 
solely for the use of the individual or entity to which they are addressed. If 
you are not the intended recipient, any disclosure, copying, distribution or 
any action taken or omitted to be taken in reliance on it, is prohibited. If 
you have received this e-mail in error, please notify the sender immediately 
and delete the e-mail in its entirety from your system.



Re: [PATCH][RESEND] btrfs: fix search key advancing condition

2015-07-29 Thread Naohiro Aota
Hello, list.

Could any one take a look at on this? I believe this is a issue slowing
down ioctl(BTRFS_IOC_TREE_SEARCH) if the target key is missing.

On Tue, Jun 30, 2015 at 11:25 AM, Naohiro Aota  wrote:
> The search key advancing condition used in copy_to_sk() is loose. It can
> advance the key even if it reaches sk->max_*: e.g. when the max key = (512,
> 1024, -1) and the current key = (512, 1025, 10), it increments the
> offset by 1, continues hopeless search from (512, 1025, 11). This issue
> make ioctl() to take unexpectedly long time scanning all the leaf a blocks
> one by one.
>
> This commit fix the problem using standard way of key comparison:
> btrfs_comp_cpu_keys()
>
> Signed-off-by: Naohiro Aota 
> ---
>  fs/btrfs/ioctl.c | 12 +---
>  1 file changed, 9 insertions(+), 3 deletions(-)
>
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 1c22c65..07dc01d 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -1932,6 +1932,7 @@ static noinline int copy_to_sk(struct btrfs_root *root,
> u64 found_transid;
> struct extent_buffer *leaf;
> struct btrfs_ioctl_search_header sh;
> +   struct btrfs_key test;
> unsigned long item_off;
> unsigned long item_len;
> int nritems;
> @@ -2015,12 +2016,17 @@ static noinline int copy_to_sk(struct btrfs_root 
> *root,
> }
>  advance_key:
> ret = 0;
> -   if (key->offset < (u64)-1 && key->offset < sk->max_offset)
> +   test.objectid = sk->max_objectid;
> +   test.type = sk->max_type;
> +   test.offset = sk->max_offset;
> +   if (btrfs_comp_cpu_keys(key, &test) >= 0)
> +   ret = 1;
> +   else if (key->offset < (u64)-1)
> key->offset++;
> -   else if (key->type < (u8)-1 && key->type < sk->max_type) {
> +   else if (key->type < (u8)-1) {
> key->offset = 0;
> key->type++;
> -   } else if (key->objectid < (u64)-1 && key->objectid < 
> sk->max_objectid) {
> +   } else if (key->objectid < (u64)-1) {
> key->offset = 0;
> key->type = 0;
> key->objectid++;
> --
> 2.4.4
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH][RESEND] btrfs: fix search key advancing condition

2015-06-29 Thread Naohiro Aota
The search key advancing condition used in copy_to_sk() is loose. It can
advance the key even if it reaches sk->max_*: e.g. when the max key = (512,
1024, -1) and the current key = (512, 1025, 10), it increments the
offset by 1, continues hopeless search from (512, 1025, 11). This issue
make ioctl() to take unexpectedly long time scanning all the leaf a blocks
one by one.

This commit fix the problem using standard way of key comparison:
btrfs_comp_cpu_keys()

Signed-off-by: Naohiro Aota 
---
 fs/btrfs/ioctl.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 1c22c65..07dc01d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1932,6 +1932,7 @@ static noinline int copy_to_sk(struct btrfs_root *root,
u64 found_transid;
struct extent_buffer *leaf;
struct btrfs_ioctl_search_header sh;
+   struct btrfs_key test;
unsigned long item_off;
unsigned long item_len;
int nritems;
@@ -2015,12 +2016,17 @@ static noinline int copy_to_sk(struct btrfs_root *root,
}
 advance_key:
ret = 0;
-   if (key->offset < (u64)-1 && key->offset < sk->max_offset)
+   test.objectid = sk->max_objectid;
+   test.type = sk->max_type;
+   test.offset = sk->max_offset;
+   if (btrfs_comp_cpu_keys(key, &test) >= 0)
+   ret = 1;
+   else if (key->offset < (u64)-1)
key->offset++;
-   else if (key->type < (u8)-1 && key->type < sk->max_type) {
+   else if (key->type < (u8)-1) {
key->offset = 0;
key->type++;
-   } else if (key->objectid < (u64)-1 && key->objectid < sk->max_objectid) 
{
+   } else if (key->objectid < (u64)-1) {
key->offset = 0;
key->type = 0;
key->objectid++;
-- 
2.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] btrfs: tweak key advancing condition

2015-06-03 Thread Naohiro Aota
The key advancing condition used in copy_to_sk() is loose. It can
advance the key even if it reaches sk->max_*: e.g. when the max key = (512,
1024, -1) and the current key = (512, 1025, 10), it increments the
offset by 1, continues hopeless search from (512, 1025, 11). This issue
make ioctl() to take a lot of time scanning all the leaf a blocks one by
one.

This commit fix the problem using standard way of key comparison:
btrfs_comp_cpu_keys()

Signed-off-by: Naohiro Aota 
---
 fs/btrfs/ioctl.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 1c22c65..07dc01d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1932,6 +1932,7 @@ static noinline int copy_to_sk(struct btrfs_root *root,
u64 found_transid;
struct extent_buffer *leaf;
struct btrfs_ioctl_search_header sh;
+   struct btrfs_key test;
unsigned long item_off;
unsigned long item_len;
int nritems;
@@ -2015,12 +2016,17 @@ static noinline int copy_to_sk(struct btrfs_root *root,
}
 advance_key:
ret = 0;
-   if (key->offset < (u64)-1 && key->offset < sk->max_offset)
+   test.objectid = sk->max_objectid;
+   test.type = sk->max_type;
+   test.offset = sk->max_offset;
+   if (btrfs_comp_cpu_keys(key, &test) >= 0)
+   ret = 1;
+   else if (key->offset < (u64)-1)
key->offset++;
-   else if (key->type < (u8)-1 && key->type < sk->max_type) {
+   else if (key->type < (u8)-1) {
key->offset = 0;
key->type++;
-   } else if (key->objectid < (u64)-1 && key->objectid < sk->max_objectid) 
{
+   } else if (key->objectid < (u64)-1) {
key->offset = 0;
key->type = 0;
key->objectid++;
-- 
2.4.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:perf/urgent] perf probe: Find compilation directory path for lazy matching

2015-04-14 Thread tip-bot for Naohiro Aota
Commit-ID:  09ed8975c4b13be4469899b210f0e0936021ee8f
Gitweb: http://git.kernel.org/tip/09ed8975c4b13be4469899b210f0e0936021ee8f
Author: Naohiro Aota 
AuthorDate: Fri, 13 Mar 2015 14:18:40 +0900
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Mon, 13 Apr 2015 20:11:05 -0300

perf probe: Find compilation directory path for lazy matching

If we use lazy matching, it failed to open a souce file if perf command
is invoked outside of compilation directory:

$ perf probe -a '__schedule;clear_*'
Failed to open kernel/sched/core.c: No such file or directory
  Error: Failed to add events. (-2)

OTOH, other commands like "probe -L" can solve the souce directory by
themselves. Let's make it possible for lazy matching too!

Signed-off-by: Naohiro Aota 
Acked-by: Masami Hiramatsu 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/1426223923-1493-1-git-send-email-na...@elisp.net
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/probe-event.c  | 59 ---
 tools/perf/util/probe-finder.c | 71 +-
 tools/perf/util/probe-finder.h |  4 +++
 3 files changed, 74 insertions(+), 60 deletions(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 5483d98..d8bb616 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -655,65 +655,6 @@ static int try_to_find_probe_trace_events(struct 
perf_probe_event *pev,
return ntevs;
 }
 
-/*
- * Find a src file from a DWARF tag path. Prepend optional source path prefix
- * and chop off leading directories that do not exist. Result is passed back as
- * a newly allocated path on success.
- * Return 0 if file was found and readable, -errno otherwise.
- */
-static int get_real_path(const char *raw_path, const char *comp_dir,
-char **new_path)
-{
-   const char *prefix = symbol_conf.source_prefix;
-
-   if (!prefix) {
-   if (raw_path[0] != '/' && comp_dir)
-   /* If not an absolute path, try to use comp_dir */
-   prefix = comp_dir;
-   else {
-   if (access(raw_path, R_OK) == 0) {
-   *new_path = strdup(raw_path);
-   return *new_path ? 0 : -ENOMEM;
-   } else
-   return -errno;
-   }
-   }
-
-   *new_path = malloc((strlen(prefix) + strlen(raw_path) + 2));
-   if (!*new_path)
-   return -ENOMEM;
-
-   for (;;) {
-   sprintf(*new_path, "%s/%s", prefix, raw_path);
-
-   if (access(*new_path, R_OK) == 0)
-   return 0;
-
-   if (!symbol_conf.source_prefix) {
-   /* In case of searching comp_dir, don't retry */
-   zfree(new_path);
-   return -errno;
-   }
-
-   switch (errno) {
-   case ENAMETOOLONG:
-   case ENOENT:
-   case EROFS:
-   case EFAULT:
-   raw_path = strchr(++raw_path, '/');
-   if (!raw_path) {
-   zfree(new_path);
-   return -ENOENT;
-   }
-   continue;
-
-   default:
-   zfree(new_path);
-   return -errno;
-   }
-   }
-}
-
 #define LINEBUF_SIZE 256
 #define NR_ADDITIONAL_LINES 2
 
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 7831e2d..ff7865c 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -855,11 +855,22 @@ static int probe_point_lazy_walker(const char *fname, int 
lineno,
 static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
 {
int ret = 0;
+   char *fpath;
 
if (intlist__empty(pf->lcache)) {
+   const char *comp_dir;
+
+   comp_dir = cu_get_comp_dir(&pf->cu_die);
+   ret = get_real_path(pf->fname, comp_dir, &fpath);
+   if (ret < 0) {
+   pr_warning("Failed to find source file path.\n");
+   return ret;
+   }
+
/* Matching lazy line pattern */
-   ret = find_lazy_match_lines(pf->lcache, pf->fname,
+   ret = find_lazy_match_lines(pf->lcache, fpath,
pf->pev->point.lazy_line);
+   free(fpath);
if (ret <= 0)
return ret;
}
@@ -1622,3 +1633,61 @@ found:
return (ret < 0) ? ret : lf.found;
 }
 
+/*
+ * Find a src file from a DWARF tag path. Prepend o

[PATCH v3] perf probe: Find compilation directory path for lazy matching

2015-03-12 Thread Naohiro Aota
If we use lazy matching, it failed to open a souce file if perf command
is invoked outside of compilation directory:

$ perf probe -a '__schedule;clear_*'
Failed to open kernel/sched/core.c: No such file or directory
  Error: Failed to add events. (-2)

OTOH, other commands like "probe -L" can solve the souce directory by
themselves. Let's make it possible for lazy matching too!

Signed-off-by: Naohiro Aota 
---
 tools/perf/util/probe-event.c  | 59 ---
 tools/perf/util/probe-finder.c | 71 +-
 tools/perf/util/probe-finder.h |  4 +++
 3 files changed, 74 insertions(+), 60 deletions(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index f272a71..32a429b 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -648,65 +648,6 @@ static int try_to_find_probe_trace_events(struct 
perf_probe_event *pev,
return ntevs;
 }
 
-/*
- * Find a src file from a DWARF tag path. Prepend optional source path prefix
- * and chop off leading directories that do not exist. Result is passed back as
- * a newly allocated path on success.
- * Return 0 if file was found and readable, -errno otherwise.
- */
-static int get_real_path(const char *raw_path, const char *comp_dir,
-char **new_path)
-{
-   const char *prefix = symbol_conf.source_prefix;
-
-   if (!prefix) {
-   if (raw_path[0] != '/' && comp_dir)
-   /* If not an absolute path, try to use comp_dir */
-   prefix = comp_dir;
-   else {
-   if (access(raw_path, R_OK) == 0) {
-   *new_path = strdup(raw_path);
-   return *new_path ? 0 : -ENOMEM;
-   } else
-   return -errno;
-   }
-   }
-
-   *new_path = malloc((strlen(prefix) + strlen(raw_path) + 2));
-   if (!*new_path)
-   return -ENOMEM;
-
-   for (;;) {
-   sprintf(*new_path, "%s/%s", prefix, raw_path);
-
-   if (access(*new_path, R_OK) == 0)
-   return 0;
-
-   if (!symbol_conf.source_prefix) {
-   /* In case of searching comp_dir, don't retry */
-   zfree(new_path);
-   return -errno;
-   }
-
-   switch (errno) {
-   case ENAMETOOLONG:
-   case ENOENT:
-   case EROFS:
-   case EFAULT:
-   raw_path = strchr(++raw_path, '/');
-   if (!raw_path) {
-   zfree(new_path);
-   return -ENOENT;
-   }
-   continue;
-
-   default:
-   zfree(new_path);
-   return -errno;
-   }
-   }
-}
-
 #define LINEBUF_SIZE 256
 #define NR_ADDITIONAL_LINES 2
 
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 46f009a..0fd2df4 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -849,11 +849,22 @@ static int probe_point_lazy_walker(const char *fname, int 
lineno,
 static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
 {
int ret = 0;
+   char *fpath;
 
if (intlist__empty(pf->lcache)) {
+   const char *comp_dir;
+
+   comp_dir = cu_get_comp_dir(&pf->cu_die);
+   ret = get_real_path(pf->fname, comp_dir, &fpath);
+   if (ret < 0) {
+   pr_warning("Failed to find source file path.\n");
+   return ret;
+   }
+
/* Matching lazy line pattern */
-   ret = find_lazy_match_lines(pf->lcache, pf->fname,
+   ret = find_lazy_match_lines(pf->lcache, fpath,
pf->pev->point.lazy_line);
+   free(fpath);
if (ret <= 0)
return ret;
}
@@ -1616,3 +1627,61 @@ found:
return (ret < 0) ? ret : lf.found;
 }
 
+/*
+ * Find a src file from a DWARF tag path. Prepend optional source path prefix
+ * and chop off leading directories that do not exist. Result is passed back as
+ * a newly allocated path on success.
+ * Return 0 if file was found and readable, -errno otherwise.
+ */
+int get_real_path(const char *raw_path, const char *comp_dir,
+char **new_path)
+{
+   const char *prefix = symbol_conf.source_prefix;
+
+   if (!prefix) {
+   if (raw_path[0] != '/' && comp_dir)
+   /* If not an absolute path, try to use comp_dir */
+   prefi

Re: Re: [PATCH v2] perf probe: Find compilation directory path for lazy matching

2015-03-12 Thread Naohiro Aota
On Thu, Mar 12, 2015 at 10:42 AM, Masami Hiramatsu
 wrote:
> (2015/03/11 22:30), Arnaldo Carvalho de Melo wrote:
>> Em Wed, Mar 04, 2015 at 09:34:38PM +0900, Masami Hiramatsu escreveu:
>>> (2015/03/04 16:52), Naohiro Aota wrote:
>>>> If we use lazy matching, it failed to open a souce file if perf command
>>>> is invoked outside of compilation directory:
>>>>
>>>> $ perf probe -a '__schedule;clear_*'
>>>> Failed to open kernel/sched/core.c: No such file or directory
>>>>   Error: Failed to add events. (-2)
>>>>
>>>> OTOH, other commands like "probe -L" can solve the souce directory by
>>>> themselves. Let's make it possible for lazy matching too!
>>>>
>>>
>>> Looks good to me :)
>>>
>>> Acked-by: Masami Hiramatsu 
>>
>> This doesn't make sense... se below:
>>
>>> Thank you!
>>>
>>>> diff --git a/tools/perf/util/probe-finder.c 
>>>> b/tools/perf/util/probe-finder.c
>>>> index 46f009a..e6c0262 100644
>>>> --- a/tools/perf/util/probe-finder.c
>>>> +++ b/tools/perf/util/probe-finder.c
>>>> @@ -849,11 +849,22 @@ static int probe_point_lazy_walker(const char 
>>>> *fname, int lineno,
>>>>  static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder 
>>>> *pf)
>>>>  {
>>>> int ret = 0;
>>>> +   char *fpath;
>>>>
>>>> if (intlist__empty(pf->lcache)) {
>>>> +   const char *comp_dir;
>>>> +
>>>> +   comp_dir = cu_get_comp_dir(&pf->cu_die);
>>>> +   ret = get_real_path(pf->fname, comp_dir, &fpath);
>>>> +   if (ret < 0) {
>>>> +   pr_warning("Failed to find source file path.\n");
>>>> +   return ret;
>>>> +   }
>>>> +
>>>> /* Matching lazy line pattern */
>>>> -   ret = find_lazy_match_lines(pf->lcache, pf->fname,
>>>> +   ret = find_lazy_match_lines(pf->lcache, fpath,
>>>> pf->pev->point.lazy_line);
>>>> +   free(fpath);
>>>> if (ret <= 0)
>>>> return ret;
>>>> }
>>>> @@ -1616,3 +1627,61 @@ found:
>>>> return (ret < 0) ? ret : lf.found;
>>>>  }
>>>>
>>>> +/*
>>>> + * Find a src file from a DWARF tag path. Prepend optional source path 
>>>> prefix
>>>> + * and chop off leading directories that do not exist. Result is passed 
>>>> back as
>>>> + * a newly allocated path on success.
>>>> + * Return 0 if file was found and readable, -errno otherwise.
>>>> + */
>>>> +static int get_real_path(const char *raw_path, const char *comp_dir,
>>>> +char **new_path)
>>
>> The function is marked "static"
>>
>>>> +{
>>>> +   const char *prefix = symbol_conf.source_prefix;
>>>> +
>>>> +   if (!prefix) {
>>>> +   if (raw_path[0] != '/' && comp_dir)
>>>> +   /* If not an absolute path, try to use comp_dir */
>>>> +   prefix = comp_dir;
>>>> +   else {
>>>> +   if (access(raw_path, R_OK) == 0) {
>>>> +   *new_path = strdup(raw_path);
>>>> +   return *new_path ? 0 : -ENOMEM;
>>>> +   } else
>>>> +   return -errno;
>>>> +   }
>>>> +   }
>>>> +
>>>> +   *new_path = malloc((strlen(prefix) + strlen(raw_path) + 2));
>>>> +   if (!*new_path)
>>>> +   return -ENOMEM;
>>>> +
>>>> +   for (;;) {
>>>> +   sprintf(*new_path, "%s/%s", prefix, raw_path);
>>>> +
>>>> +   if (access(*new_path, R_OK) == 0)
>>>> +   return 0;
>>>> +
>>>> +   if (!symbol_conf.source_prefix) {
>>>> +   /* In case of searching comp_dir, don't retry */
>>>> +   zfree(new_path);
>>>> +   return -errno;
>>>> +   }
>>>> +
>>>&

[PATCH v2] perf probe: Find compilation directory path for lazy matching

2015-03-03 Thread Naohiro Aota
If we use lazy matching, it failed to open a souce file if perf command
is invoked outside of compilation directory:

$ perf probe -a '__schedule;clear_*'
Failed to open kernel/sched/core.c: No such file or directory
  Error: Failed to add events. (-2)

OTOH, other commands like "probe -L" can solve the souce directory by
themselves. Let's make it possible for lazy matching too!

Signed-off-by: Naohiro Aota 
---
 tools/perf/util/probe-event.c  | 59 ---
 tools/perf/util/probe-finder.c | 71 +-
 tools/perf/util/probe-finder.h |  4 +++
 3 files changed, 74 insertions(+), 60 deletions(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 1c570c2..adb8d1f 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -515,65 +515,6 @@ static int try_to_find_probe_trace_events(struct 
perf_probe_event *pev,
return ntevs;
 }
 
-/*
- * Find a src file from a DWARF tag path. Prepend optional source path prefix
- * and chop off leading directories that do not exist. Result is passed back as
- * a newly allocated path on success.
- * Return 0 if file was found and readable, -errno otherwise.
- */
-static int get_real_path(const char *raw_path, const char *comp_dir,
-char **new_path)
-{
-   const char *prefix = symbol_conf.source_prefix;
-
-   if (!prefix) {
-   if (raw_path[0] != '/' && comp_dir)
-   /* If not an absolute path, try to use comp_dir */
-   prefix = comp_dir;
-   else {
-   if (access(raw_path, R_OK) == 0) {
-   *new_path = strdup(raw_path);
-   return *new_path ? 0 : -ENOMEM;
-   } else
-   return -errno;
-   }
-   }
-
-   *new_path = malloc((strlen(prefix) + strlen(raw_path) + 2));
-   if (!*new_path)
-   return -ENOMEM;
-
-   for (;;) {
-   sprintf(*new_path, "%s/%s", prefix, raw_path);
-
-   if (access(*new_path, R_OK) == 0)
-   return 0;
-
-   if (!symbol_conf.source_prefix) {
-   /* In case of searching comp_dir, don't retry */
-   zfree(new_path);
-   return -errno;
-   }
-
-   switch (errno) {
-   case ENAMETOOLONG:
-   case ENOENT:
-   case EROFS:
-   case EFAULT:
-   raw_path = strchr(++raw_path, '/');
-   if (!raw_path) {
-   zfree(new_path);
-   return -ENOENT;
-   }
-   continue;
-
-   default:
-   zfree(new_path);
-   return -errno;
-   }
-   }
-}
-
 #define LINEBUF_SIZE 256
 #define NR_ADDITIONAL_LINES 2
 
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 46f009a..e6c0262 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -849,11 +849,22 @@ static int probe_point_lazy_walker(const char *fname, int 
lineno,
 static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
 {
int ret = 0;
+   char *fpath;
 
if (intlist__empty(pf->lcache)) {
+   const char *comp_dir;
+
+   comp_dir = cu_get_comp_dir(&pf->cu_die);
+   ret = get_real_path(pf->fname, comp_dir, &fpath);
+   if (ret < 0) {
+   pr_warning("Failed to find source file path.\n");
+   return ret;
+   }
+
/* Matching lazy line pattern */
-   ret = find_lazy_match_lines(pf->lcache, pf->fname,
+   ret = find_lazy_match_lines(pf->lcache, fpath,
pf->pev->point.lazy_line);
+   free(fpath);
if (ret <= 0)
return ret;
}
@@ -1616,3 +1627,61 @@ found:
return (ret < 0) ? ret : lf.found;
 }
 
+/*
+ * Find a src file from a DWARF tag path. Prepend optional source path prefix
+ * and chop off leading directories that do not exist. Result is passed back as
+ * a newly allocated path on success.
+ * Return 0 if file was found and readable, -errno otherwise.
+ */
+static int get_real_path(const char *raw_path, const char *comp_dir,
+char **new_path)
+{
+   const char *prefix = symbol_conf.source_prefix;
+
+   if (!prefix) {
+   if (raw_path[0] != '/' && comp_dir)
+   /* If not an absolute path, try to 

[PATCH 1/2] perf probe: export get_real_path

2015-02-25 Thread Naohiro Aota
Export it to use from util/probe-finder.c

Signed-off-by: Naohiro Aota 
---
 tools/perf/util/probe-event.c | 2 +-
 tools/perf/util/probe-event.h | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 919937e..1d0d505 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -520,7 +520,7 @@ static int try_to_find_probe_trace_events(struct 
perf_probe_event *pev,
  * a newly allocated path on success.
  * Return 0 if file was found and readable, -errno otherwise.
  */
-static int get_real_path(const char *raw_path, const char *comp_dir,
+int get_real_path(const char *raw_path, const char *comp_dir,
 char **new_path)
 {
const char *prefix = symbol_conf.source_prefix;
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index e01e994..30a3391 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -135,6 +135,8 @@ extern int show_available_vars(struct perf_probe_event 
*pevs, int npevs,
   struct strfilter *filter, bool externs);
 extern int show_available_funcs(const char *module, struct strfilter *filter,
bool user);
+extern int get_real_path(const char *raw_path, const char *comp_dir,
+char **new_path);
 
 /* Maximum index number of event-name postfix */
 #define MAX_EVENT_INDEX1024
-- 
2.3.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] perf probe: Find compilation directory path for lazy matching

2015-02-25 Thread Naohiro Aota
If we use lazy matching, it failed to open a souce file if perf command
is invoked outside of compilation directory:

$ perf probe -a '__schedule;clear_*'
Failed to open kernel/sched/core.c: No such file or directory
  Error: Failed to add events. (-2)

OTOH, other commands like "probe -L" can solve the souce directory by
themselves. Let's make it possible for lazy matching too!

Signed-off-by: Naohiro Aota 
---
 tools/perf/util/probe-finder.c | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index b5247d7..8e0714c 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -39,6 +39,7 @@
 #include "util.h"
 #include "symbol.h"
 #include "probe-finder.h"
+#include "probe-event.h"
 
 /* Kprobe tracer basic type is up to u64 */
 #define MAX_BASIC_TYPE_BITS64
@@ -849,11 +850,23 @@ static int probe_point_lazy_walker(const char *fname, int 
lineno,
 static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
 {
int ret = 0;
+   char *fpath;
 
if (intlist__empty(pf->lcache)) {
+   const char *comp_dir;
+
+   comp_dir = cu_get_comp_dir(&pf->cu_die);
+   ret = get_real_path(pf->fname, comp_dir, &fpath);
+   if (ret < 0) {
+   free(fpath);
+   pr_warning("Failed to find source file path.\n");
+   return ret;
+   }
+
/* Matching lazy line pattern */
-   ret = find_lazy_match_lines(pf->lcache, pf->fname,
+   ret = find_lazy_match_lines(pf->lcache, fpath,
pf->pev->point.lazy_line);
+   free(fpath);
if (ret <= 0)
return ret;
}
-- 
2.3.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] btrfs: clear bio reference after submit_one_bio()

2015-01-05 Thread Naohiro Aota
After submit_one_bio(), `bio' can go away. However submit_extent_page()
leave `bio' referable if submit_one_bio() failed (e.g. -ENOMEM on OOM).
It will cause invalid paging request when submit_extent_page() is called
next time.

I reproduced ENOMEM case with the following script (need
CONFIG_FAIL_PAGE_ALLOC, and CONFIG_FAULT_INJECTION_DEBUG_FS).

  #!/bin/bash

  dmesgout=dmesg.txt
  start=10
  end=30
  step=1000

  # btrfs options
  device=/dev/vdb1
  directory=/mnt/btrfs

  # fault-injection options
  percent=100
  times=3

  mkdir -p $directory || exit 1
  mount -o compress $device $directory || exit 1

  rm -f $directory/file || exit 1
  dd if=/dev/zero of=$directory/file bs=1M count=512 || exit 1

  for interval in `seq $start $step $end`; do
  dmesg -C
  echo 1 > /proc/sys/vm/drop_caches
  sync
  export FAILCMD_TYPE=fail_page_alloc
  ./failcmd.sh -p $percent -t $times -i $interval \
  --ignore-gfp-highmem=N --ignore-gfp-wait=N --min-order=0 \
  -- \
  cat $directory/file > /dev/null
  dmesg > ${dmesgout}
  if grep -q BUG: ${dmesgout}; then
  cat ${dmesgout}
  exit 1
  fi
  done

  umount $directory
  exit 0

Signed-off-by: Naohiro Aota 
---
 fs/btrfs/extent_io.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4ebabd2..4421161 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2816,8 +2816,10 @@ static int submit_extent_page(int rw, struct 
extent_io_tree *tree,
bio_add_page(bio, page, page_size, offset) < page_size) {
ret = submit_one_bio(rw, bio, mirror_num,
 prev_bio_flags);
-   if (ret < 0)
+   if (ret < 0) {
+   *bio_ret = NULL;
return ret;
+   }
bio = NULL;
} else {
return 0;
-- 
2.2.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:perf/core] perf python: Fix dependency for python/perf.so

2013-04-02 Thread tip-bot for Naohiro Aota
Commit-ID:  707ef2e69a53f2dc60cfa2ff5cb4bf5474206f33
Gitweb: http://git.kernel.org/tip/707ef2e69a53f2dc60cfa2ff5cb4bf5474206f33
Author: Naohiro Aota 
AuthorDate: Sun, 24 Mar 2013 22:37:54 +0900
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Mon, 1 Apr 2013 12:23:01 -0300

perf python: Fix dependency for python/perf.so

The python/perf.so binding lacks dependency for libtraceevent.a so that
it cause the following error building python/perf.so. This patch
introduce the dependency for it.

   $ make python/perf.so
   CHK -fstack-protector-all
   CHK -Wstack-protector
   CHK -Wvolatile-register-var
   CHK -D_FORTIFY_SOURCE=2
   CHK bionic
   CHK libelf
   CHK libdw
   CHK libunwind
   CHK -DLIBELF_MMAP
   CHK libaudit
   CHK libnewt
   CHK gtk2
   CHK -DHAVE_GTK_INFO_BAR
   CHK perl
   CHK python
   CHK python version
   CHK libbfd
   CHK -DHAVE_STRLCPY
   CHK -DHAVE_ON_EXIT
   CHK -DBACKTRACE_SUPPORT
   CHK libnuma
   GEN python/perf.so
   x86_64-pc-linux-gnu-gcc: error: ../lib/traceevent/libtraceevent.a: No such 
file or directory
   error: command 'x86_64-pc-linux-gnu-gcc' failed with exit status 1
   cp: cannot stat 'python_ext_build/lib/perf.so': No such file or directory
   make: *** [python/perf.so] Error 1

Signed-off-by: Naohiro Aota 
Cc: Ingo Molnar 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Link: 
http://lkml.kernel.org/r/87wqswzznx.fsf@locke.i-did-not-set--mail-host-address--so-tickle-me
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 07feae7..8b4c952 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -272,7 +272,7 @@ export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
 python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
 
 PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
-PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py
+PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT)
 
 $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
$(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] perf: Fix dependency for python/perf.so

2013-03-24 Thread Naohiro Aota
python/perf.so lacks dependency for libtraceevent.a so that it cause
the following error building python/perf.so. This patch introduce the
dependency for it.

   $ make python/perf.so
   CHK -fstack-protector-all
   CHK -Wstack-protector
   CHK -Wvolatile-register-var
   CHK -D_FORTIFY_SOURCE=2
   CHK bionic
   CHK libelf
   CHK libdw
   CHK libunwind
   CHK -DLIBELF_MMAP
   CHK libaudit
   CHK libnewt
   CHK gtk2
   CHK -DHAVE_GTK_INFO_BAR
   CHK perl
   CHK python
   CHK python version
   CHK libbfd
   CHK -DHAVE_STRLCPY
   CHK -DHAVE_ON_EXIT
   CHK -DBACKTRACE_SUPPORT
   CHK libnuma
   GEN python/perf.so
   x86_64-pc-linux-gnu-gcc: error: ../lib/traceevent/libtraceevent.a: No such 
file or directory
   error: command 'x86_64-pc-linux-gnu-gcc' failed with exit status 1
   cp: cannot stat 'python_ext_build/lib/perf.so': No such file or directory
   make: *** [python/perf.so] Error 1

Signed-off-by: Naohiro Aota 
---
 tools/perf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index bb74c79..d10ccda 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -262,7 +262,7 @@ export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
 python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
 
 PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
-PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py
+PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT)
 
 $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
$(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \
-- 
1.8.1.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/