[PATCH 1/3] Support instrumenting returns of instrumented functions

2018-11-04 Thread Andi Kleen
From: Andi Kleen 

When instrumenting programs using __fentry__ it is often useful
to instrument the function return too. Traditionally this
has been done by patching the return address on the stack
frame on entry. However this is fairly complicated (trace
function has to emulate a stack) and also slow because
it causes a branch misprediction on every return.

Add an option to generate call or nop instrumentation for
every return instead, including patch sections.

This will increase the program size slightly, but can be a
lot faster and simpler.

This version only instruments true returns, not sibling
calls or tail recursion. This matches the semantics of the
original stack.

gcc/:

2018-11-04  Andi Kleen  

* config/i386/i386-opts.h (enum instrument_return): Add.
* config/i386/i386.c (output_return_instrumentation): Add.
(ix86_output_function_return): Call output_return_instrumentation.
(ix86_output_call_insn): Call output_return_instrumentation.
* config/i386/i386.opt: Add -minstrument-return=.
* doc/invoke.texi (-minstrument-return): Document.

gcc/testsuite/:

2018-11-04  Andi Kleen  

* gcc.target/i386/returninst1.c: New test.
* gcc.target/i386/returninst2.c: New test.
* gcc.target/i386/returninst3.c: New test.
---
 gcc/config/i386/i386-opts.h |  6 
 gcc/config/i386/i386.c  | 36 +
 gcc/config/i386/i386.opt| 21 
 gcc/doc/invoke.texi | 14 
 gcc/testsuite/gcc.target/i386/returninst1.c | 14 
 gcc/testsuite/gcc.target/i386/returninst2.c | 21 
 gcc/testsuite/gcc.target/i386/returninst3.c |  9 ++
 7 files changed, 121 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/returninst1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/returninst2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/returninst3.c

diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
index 46366cbfa72..35e9413100e 100644
--- a/gcc/config/i386/i386-opts.h
+++ b/gcc/config/i386/i386-opts.h
@@ -119,4 +119,10 @@ enum indirect_branch {
   indirect_branch_thunk_extern
 };
 
+enum instrument_return {
+  instrument_return_none = 0,
+  instrument_return_call,
+  instrument_return_nop5
+};
+
 #endif
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index f9ef0b4445b..f7cd94a8139 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -28336,12 +28336,47 @@ ix86_output_indirect_jmp (rtx call_op)
 return "%!jmp\t%A0";
 }
 
+/* Output return instrumentation for current function if needed.  */
+
+static void
+output_return_instrumentation (void)
+{
+  if (ix86_instrument_return != instrument_return_none
+  && flag_fentry
+  && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
+{
+  if (ix86_flag_record_return)
+   fprintf (asm_out_file, "1:\n");
+  switch (ix86_instrument_return)
+   {
+   case instrument_return_call:
+ fprintf (asm_out_file, "\tcall\t__return__\n");
+ break;
+   case instrument_return_nop5:
+ /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1)  */
+ fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
+ break;
+   case instrument_return_none:
+ break;
+   }
+
+  if (ix86_flag_record_return)
+   {
+ fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
+ fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
+ fprintf (asm_out_file, "\t.previous\n");
+   }
+}
+}
+
 /* Output function return.  CALL_OP is the jump target.  Add a REP
prefix to RET if LONG_P is true and function return is kept.  */
 
 const char *
 ix86_output_function_return (bool long_p)
 {
+  output_return_instrumentation ();
+
   if (cfun->machine->function_return_type != indirect_branch_keep)
 {
   char thunk_name[32];
@@ -28454,6 +28489,7 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op)
 
   if (SIBLING_CALL_P (insn))
 {
+  output_return_instrumentation ();
   if (direct_p)
{
  if (ix86_nopic_noplt_attribute_p (call_op))
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index e7fbf9b6f99..5925b75244f 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1063,3 +1063,24 @@ Support WAITPKG built-in functions and code generation.
 mcldemote
 Target Report Mask(ISA_CLDEMOTE) Var(ix86_isa_flags2) Save
 Support CLDEMOTE built-in functions and code generation.
+
+minstrument-return=
+Target Report RejectNegative Joined Enum(instrument_return) 
Var(ix86_instrument_return) Init(instrument_return_none)
+Instrument function exit in instrumented functions with __fentry__.
+
+Enum
+Name(instrument_return) Type(enum instrument_return)
+Known choices for return instrumentation with -minstrument-return=
+
+EnumValue
+Enum(instrument_return) String(none) Val

[PING] Re: [PATCH 1/3] Support instrumenting returns of instrumented functions

2018-11-08 Thread Andi Kleen
Andi Kleen  writes:

Ping!

> From: Andi Kleen 
>
> When instrumenting programs using __fentry__ it is often useful
> to instrument the function return too. Traditionally this
> has been done by patching the return address on the stack
> frame on entry. However this is fairly complicated (trace
> function has to emulate a stack) and also slow because
> it causes a branch misprediction on every return.
>
> Add an option to generate call or nop instrumentation for
> every return instead, including patch sections.
>
> This will increase the program size slightly, but can be a
> lot faster and simpler.
>
> This version only instruments true returns, not sibling
> calls or tail recursion. This matches the semantics of the
> original stack.
>
> gcc/:
>
> 2018-11-04  Andi Kleen  
>
>   * config/i386/i386-opts.h (enum instrument_return): Add.
>   * config/i386/i386.c (output_return_instrumentation): Add.
>   (ix86_output_function_return): Call output_return_instrumentation.
>   (ix86_output_call_insn): Call output_return_instrumentation.
>   * config/i386/i386.opt: Add -minstrument-return=.
>   * doc/invoke.texi (-minstrument-return): Document.
>
> gcc/testsuite/:
>
> 2018-11-04  Andi Kleen  
>
>   * gcc.target/i386/returninst1.c: New test.
>   * gcc.target/i386/returninst2.c: New test.
>   * gcc.target/i386/returninst3.c: New test.
> ---
>  gcc/config/i386/i386-opts.h |  6 
>  gcc/config/i386/i386.c  | 36 +
>  gcc/config/i386/i386.opt| 21 
>  gcc/doc/invoke.texi | 14 
>  gcc/testsuite/gcc.target/i386/returninst1.c | 14 
>  gcc/testsuite/gcc.target/i386/returninst2.c | 21 
>  gcc/testsuite/gcc.target/i386/returninst3.c |  9 ++
>  7 files changed, 121 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/returninst1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/returninst2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/returninst3.c
>
> diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
> index 46366cbfa72..35e9413100e 100644
> --- a/gcc/config/i386/i386-opts.h
> +++ b/gcc/config/i386/i386-opts.h
> @@ -119,4 +119,10 @@ enum indirect_branch {
>indirect_branch_thunk_extern
>  };
>  
> +enum instrument_return {
> +  instrument_return_none = 0,
> +  instrument_return_call,
> +  instrument_return_nop5
> +};
> +
>  #endif
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index f9ef0b4445b..f7cd94a8139 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -28336,12 +28336,47 @@ ix86_output_indirect_jmp (rtx call_op)
>  return "%!jmp\t%A0";
>  }
>  
> +/* Output return instrumentation for current function if needed.  */
> +
> +static void
> +output_return_instrumentation (void)
> +{
> +  if (ix86_instrument_return != instrument_return_none
> +  && flag_fentry
> +  && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
> +{
> +  if (ix86_flag_record_return)
> + fprintf (asm_out_file, "1:\n");
> +  switch (ix86_instrument_return)
> + {
> + case instrument_return_call:
> +   fprintf (asm_out_file, "\tcall\t__return__\n");
> +   break;
> + case instrument_return_nop5:
> +   /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1)  */
> +   fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
> +   break;
> + case instrument_return_none:
> +   break;
> + }
> +
> +  if (ix86_flag_record_return)
> + {
> +   fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
> +   fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
> +   fprintf (asm_out_file, "\t.previous\n");
> + }
> +}
> +}
> +
>  /* Output function return.  CALL_OP is the jump target.  Add a REP
> prefix to RET if LONG_P is true and function return is kept.  */
>  
>  const char *
>  ix86_output_function_return (bool long_p)
>  {
> +  output_return_instrumentation ();
> +
>if (cfun->machine->function_return_type != indirect_branch_keep)
>  {
>char thunk_name[32];
> @@ -28454,6 +28489,7 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op)
>  
>if (SIBLING_CALL_P (insn))
>  {
> +  output_return_instrumentation ();
>if (direct_p)
>   {
> if (ix86_nopic_noplt_attribute_p (call_op))
> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> index e7fbf9b6f99..5925b75244f 100644
> --- a/gcc/config/i386/i386.opt
> +++ b/gcc/config/i386/i386.opt
> @@ -1063,3 +1063,24 @@ Support WAITPKG built-in functions and code generation.
>  mcldemote
>  Target Report Mask(ISA_CLDEMOTE) Var(ix86_isa_flags2) Save
>  Support CLDEMOTE built-in functions and code generation.
> +
> +minstrument-return=
> +Target Report RejectNegative Joined Enum(instrument_return) 
> Var(ix86_instrument_return) Init(instrument_return_none)
> +Instrument func

[PING^3] Re: [PATCH 1/3] Support instrumenting returns of instrumented functions

2018-11-21 Thread Andi Kleen
Andi Kleen  writes:

Ping^3!

> Andi Kleen  writes:
>
> Ping!^2
>
>> Andi Kleen  writes:
>>
>> Ping!
>>
>>> From: Andi Kleen 
>>>
>>> When instrumenting programs using __fentry__ it is often useful
>>> to instrument the function return too. Traditionally this
>>> has been done by patching the return address on the stack
>>> frame on entry. However this is fairly complicated (trace
>>> function has to emulate a stack) and also slow because
>>> it causes a branch misprediction on every return.
>>>
>>> Add an option to generate call or nop instrumentation for
>>> every return instead, including patch sections.
>>>
>>> This will increase the program size slightly, but can be a
>>> lot faster and simpler.
>>>
>>> This version only instruments true returns, not sibling
>>> calls or tail recursion. This matches the semantics of the
>>> original stack.
>>>
>>> gcc/:
>>>
>>> 2018-11-04  Andi Kleen  
>>>
>>> * config/i386/i386-opts.h (enum instrument_return): Add.
>>> * config/i386/i386.c (output_return_instrumentation): Add.
>>> (ix86_output_function_return): Call output_return_instrumentation.
>>> (ix86_output_call_insn): Call output_return_instrumentation.
>>> * config/i386/i386.opt: Add -minstrument-return=.
>>> * doc/invoke.texi (-minstrument-return): Document.
>>>
>>> gcc/testsuite/:
>>>
>>> 2018-11-04  Andi Kleen  
>>>
>>> * gcc.target/i386/returninst1.c: New test.
>>> * gcc.target/i386/returninst2.c: New test.
>>> * gcc.target/i386/returninst3.c: New test.
>>> ---
>>>  gcc/config/i386/i386-opts.h |  6 
>>>  gcc/config/i386/i386.c  | 36 +
>>>  gcc/config/i386/i386.opt| 21 
>>>  gcc/doc/invoke.texi | 14 
>>>  gcc/testsuite/gcc.target/i386/returninst1.c | 14 
>>>  gcc/testsuite/gcc.target/i386/returninst2.c | 21 
>>>  gcc/testsuite/gcc.target/i386/returninst3.c |  9 ++
>>>  7 files changed, 121 insertions(+)
>>>  create mode 100644 gcc/testsuite/gcc.target/i386/returninst1.c
>>>  create mode 100644 gcc/testsuite/gcc.target/i386/returninst2.c
>>>  create mode 100644 gcc/testsuite/gcc.target/i386/returninst3.c
>>>
>>> diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
>>> index 46366cbfa72..35e9413100e 100644
>>> --- a/gcc/config/i386/i386-opts.h
>>> +++ b/gcc/config/i386/i386-opts.h
>>> @@ -119,4 +119,10 @@ enum indirect_branch {
>>>indirect_branch_thunk_extern
>>>  };
>>>  
>>> +enum instrument_return {
>>> +  instrument_return_none = 0,
>>> +  instrument_return_call,
>>> +  instrument_return_nop5
>>> +};
>>> +
>>>  #endif
>>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>>> index f9ef0b4445b..f7cd94a8139 100644
>>> --- a/gcc/config/i386/i386.c
>>> +++ b/gcc/config/i386/i386.c
>>> @@ -28336,12 +28336,47 @@ ix86_output_indirect_jmp (rtx call_op)
>>>  return "%!jmp\t%A0";
>>>  }
>>>  
>>> +/* Output return instrumentation for current function if needed.  */
>>> +
>>> +static void
>>> +output_return_instrumentation (void)
>>> +{
>>> +  if (ix86_instrument_return != instrument_return_none
>>> +  && flag_fentry
>>> +  && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
>>> +{
>>> +  if (ix86_flag_record_return)
>>> +   fprintf (asm_out_file, "1:\n");
>>> +  switch (ix86_instrument_return)
>>> +   {
>>> +   case instrument_return_call:
>>> + fprintf (asm_out_file, "\tcall\t__return__\n");
>>> + break;
>>> +   case instrument_return_nop5:
>>> + /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1)  */
>>> + fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
>>> + break;
>>> +   case instrument_return_none:
>>> + break;
>>> +   }
>>> +
>>> +  if (ix86_flag_record_return)
>>> +   {
>>> + fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
>>> + fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
>>> + fprintf (asm_out_file, "\t.previous\n");
>>> +   }
>>> +}
>>> +}
>>> +
>>>  /* Output function return.  CALL_OP is the jump target.  Add a REP
>>> prefix to RET if LONG_P is true and function return is kept.  */
>>>  
>>>  const char *
>>>  ix86_output_function_return (bool long_p)
>>>  {
>>> +  output_return_instrumentation ();
>>> +
>>>if (cfun->machine->function_return_type != indirect_branch_keep)
>>>  {
>>>char thunk_name[32];
>>> @@ -28454,6 +28489,7 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op)
>>>  
>>>if (SIBLING_CALL_P (insn))
>>>  {
>>> +  output_return_instrumentation ();
>>>if (direct_p)
>>> {
>>>   if (ix86_nopic_noplt_attribute_p (call_op))
>>> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
>>> index e7fbf9b6f99..5925b75244f 100644
>>> --- a/gcc/config/i386/i386.opt
>>> +++ b/gcc/config/i386/i386.opt
>>> @@ -1063,3 +1063,24 @@ Support WAITPKG built-in functions and code 
>>> generation.
>>>  mcldemote
>>>

Re: [PING^3] Re: [PATCH 1/3] Support instrumenting returns of instrumented functions

2018-11-28 Thread Jan Hubicka
Hi,
I wonder how __return_loc sections work.  In general it would be nice to
avoid direct output of assembler code in favour of adding instructions
to rtl stream which should be possible to insert the call and return.
__return_loc seems harder, also I wonder if it makes unwind information
possible?

Honza


Re: [PING^3] Re: [PATCH 1/3] Support instrumenting returns of instrumented functions

2018-11-28 Thread Andi Kleen
On Wed, Nov 28, 2018 at 04:59:25PM +0100, Jan Hubicka wrote:
> Hi,
> I wonder how __return_loc sections work.  In general it would be nice to

You mean how it's used? It's for patching in/out return instrumentation
at runtime, and to find them we use the sections.

> avoid direct output of assembler code in favour of adding instructions
> to rtl stream which should be possible to insert the call and return.

That would be a full rewrite how all these hooks work. I was 
hoping extending the existing frame work would be good enough for now.

> __return_loc seems harder, also I wonder if it makes unwind information
> possible?

Do we need unwind information for calls? I thought it was only
for other stack manipulation.

-Andi


Re: [PING^3] Re: [PATCH 1/3] Support instrumenting returns of instrumented functions

2018-11-28 Thread Jan Hubicka
> On Wed, Nov 28, 2018 at 04:59:25PM +0100, Jan Hubicka wrote:
> > Hi,
> > I wonder how __return_loc sections work.  In general it would be nice to
> 
> You mean how it's used? It's for patching in/out return instrumentation
> at runtime, and to find them we use the sections.
> 
> > avoid direct output of assembler code in favour of adding instructions
> > to rtl stream which should be possible to insert the call and return.
> 
> That would be a full rewrite how all these hooks work. I was 
> hoping extending the existing frame work would be good enough for now.

Yep, it is ugly and it hits us ocassionaly as with the CET
instrumentation, but I suppose we should care about this in stage1.
> 
> > __return_loc seems harder, also I wonder if it makes unwind information
> > possible?
> 
> Do we need unwind information for calls? I thought it was only
> for other stack manipulation.

I just wonder if backtraces are right if something triggers in the extra
code.  If you make control flow to jump into separate section, I suppose
backtraces won't be right.  Not sure if that is necessarily issue?

Honza
> 
> -Andi


Re: [PING^3] Re: [PATCH 1/3] Support instrumenting returns of instrumented functions

2018-11-28 Thread Andi Kleen
On Wed, Nov 28, 2018 at 05:24:37PM +0100, Jan Hubicka wrote:
> > On Wed, Nov 28, 2018 at 04:59:25PM +0100, Jan Hubicka wrote:
> > > Hi,
> > > I wonder how __return_loc sections work.  In general it would be nice to
> > 
> > You mean how it's used? It's for patching in/out return instrumentation
> > at runtime, and to find them we use the sections.
> > 
> > > avoid direct output of assembler code in favour of adding instructions
> > > to rtl stream which should be possible to insert the call and return.
> > 
> > That would be a full rewrite how all these hooks work. I was 
> > hoping extending the existing frame work would be good enough for now.
> 
> Yep, it is ugly and it hits us ocassionaly as with the CET
> instrumentation, but I suppose we should care about this in stage1.
> > 
> > > __return_loc seems harder, also I wonder if it makes unwind information
> > > possible?
> > 
> > Do we need unwind information for calls? I thought it was only
> > for other stack manipulation.
> 
> I just wonder if backtraces are right if something triggers in the extra
> code.  If you make control flow to jump into separate section, I suppose
> backtraces won't be right.  Not sure if that is necessarily issue?

I don't think it is because the instrumentation only adds calls, and
calls don't get annotated in DWARF. The only issue I could think of
if is something patches in push instructions through the nops, 
but there is really nothing the compiler could do about that.

I tested gdb and it can backtrace through the return instrumentation.

Breakpoint 1, 0x00401182 in __return__ ()
(gdb) bt
#0  0x00401182 in __return__ ()
#1  0x004011a3 in f2 ()
#2  0x004011b7 in main ()
Backtrace stopped: previous frame inner to this frame (corrupt stack?)

-Andi


Re: [PING^3] Re: [PATCH 1/3] Support instrumenting returns of instrumented functions

2018-11-29 Thread Jan Hubicka
> 
> I don't think it is because the instrumentation only adds calls, and
> calls don't get annotated in DWARF. The only issue I could think of
> if is something patches in push instructions through the nops, 
> but there is really nothing the compiler could do about that.
> 
> I tested gdb and it can backtrace through the return instrumentation.
> 
> Breakpoint 1, 0x00401182 in __return__ ()
> (gdb) bt
> #0  0x00401182 in __return__ ()
> #1  0x004011a3 in f2 ()
> #2  0x004011b7 in main ()
> Backtrace stopped: previous frame inner to this frame (corrupt stack?)

:) Not most user friendly message though.
The patch is OK.

Honza
> 
> -Andi


Re: [PING^3] Re: [PATCH 1/3] Support instrumenting returns of instrumented functions

2018-12-02 Thread Dominique d'Humières
Hi Andi,

% gcc9 -S -pg -mfentry -minstrument-return=call -mrecord-return 
/opt/gcc/work/gcc/testsuite/gcc.target/i386/returninst1.c -m32
cc1: sorry, unimplemented: -mfentry isn't supported for 32-bit in combination 
with -fpic

The tests should be protected.

TIA

Dominique

Re: [PING^3] Re: [PATCH 1/3] Support instrumenting returns of instrumented functions

2018-12-02 Thread Andi Kleen
On Sun, Dec 02, 2018 at 05:26:32PM +0100, Dominique d'Humières wrote:
> Hi Andi,
> 
> % gcc9 -S -pg -mfentry -minstrument-return=call -mrecord-return 
> /opt/gcc/work/gcc/testsuite/gcc.target/i386/returninst1.c -m32
> cc1: sorry, unimplemented: -mfentry isn't supported for 32-bit in combination 
> with -fpic
> 
> The tests should be protected.

Thanks for the notice.
Here's a patch. Will commit as obvious unless someone objects.

-Andi


diff --git a/gcc/testsuite/gcc.target/i386/returninst1.c 
b/gcc/testsuite/gcc.target/i386/returninst1.c
index f970e75a774..a0ad155b88d 100644
--- a/gcc/testsuite/gcc.target/i386/returninst1.c
+++ b/gcc/testsuite/gcc.target/i386/returninst1.c
@@ -1,4 +1,4 @@
-/* { dg-do compile } */
+/* { dg-do compile { target { ! ia32 } } } } */
 /* { dg-options "-pg -mfentry -minstrument-return=call -mrecord-return" } */
 /* { dg-final { scan-assembler "call.*__return__" } } */
 /* { dg-final { scan-assembler "section.*return_loc" } } */
diff --git a/gcc/testsuite/gcc.target/i386/returninst2.c 
b/gcc/testsuite/gcc.target/i386/returninst2.c
index 716b38556dd..f55218798d8 100644
--- a/gcc/testsuite/gcc.target/i386/returninst2.c
+++ b/gcc/testsuite/gcc.target/i386/returninst2.c
@@ -1,4 +1,4 @@
-/* { dg-do compile } */
+/* { dg-do compile { target { ! ia32 } } } } */
 /* { dg-options "-pg -mfentry -minstrument-return=nop5 -mrecord-return" } */
 /* { dg-final { scan-assembler-times "0x0f, 0x1f, 0x44, 0x00, 0x00" 3 } } */
 /* { dg-final { scan-assembler "section.*return_loc" } } */
diff --git a/gcc/testsuite/gcc.target/i386/returninst3.c 
b/gcc/testsuite/gcc.target/i386/returninst3.c
index 5bbc60e8bd4..e0414d338cb 100644
--- a/gcc/testsuite/gcc.target/i386/returninst3.c
+++ b/gcc/testsuite/gcc.target/i386/returninst3.c
@@ -1,4 +1,4 @@
-/* { dg-do compile } */
+/* { dg-do compile { target { ! ia32 } } } } */
 /* { dg-options "-pg -mfentry -minstrument-return=call" } */
 /* { dg-final { scan-assembler-not "call.*__return__" } } */
 


Re: [PING^2] Re: [PATCH 1/3] Support instrumenting returns of instrumented functions

2018-11-15 Thread Andi Kleen
Andi Kleen  writes:

Ping!^2

> Andi Kleen  writes:
>
> Ping!
>
>> From: Andi Kleen 
>>
>> When instrumenting programs using __fentry__ it is often useful
>> to instrument the function return too. Traditionally this
>> has been done by patching the return address on the stack
>> frame on entry. However this is fairly complicated (trace
>> function has to emulate a stack) and also slow because
>> it causes a branch misprediction on every return.
>>
>> Add an option to generate call or nop instrumentation for
>> every return instead, including patch sections.
>>
>> This will increase the program size slightly, but can be a
>> lot faster and simpler.
>>
>> This version only instruments true returns, not sibling
>> calls or tail recursion. This matches the semantics of the
>> original stack.
>>
>> gcc/:
>>
>> 2018-11-04  Andi Kleen  
>>
>>  * config/i386/i386-opts.h (enum instrument_return): Add.
>>  * config/i386/i386.c (output_return_instrumentation): Add.
>>  (ix86_output_function_return): Call output_return_instrumentation.
>>  (ix86_output_call_insn): Call output_return_instrumentation.
>>  * config/i386/i386.opt: Add -minstrument-return=.
>>  * doc/invoke.texi (-minstrument-return): Document.
>>
>> gcc/testsuite/:
>>
>> 2018-11-04  Andi Kleen  
>>
>>  * gcc.target/i386/returninst1.c: New test.
>>  * gcc.target/i386/returninst2.c: New test.
>>  * gcc.target/i386/returninst3.c: New test.
>> ---
>>  gcc/config/i386/i386-opts.h |  6 
>>  gcc/config/i386/i386.c  | 36 +
>>  gcc/config/i386/i386.opt| 21 
>>  gcc/doc/invoke.texi | 14 
>>  gcc/testsuite/gcc.target/i386/returninst1.c | 14 
>>  gcc/testsuite/gcc.target/i386/returninst2.c | 21 
>>  gcc/testsuite/gcc.target/i386/returninst3.c |  9 ++
>>  7 files changed, 121 insertions(+)
>>  create mode 100644 gcc/testsuite/gcc.target/i386/returninst1.c
>>  create mode 100644 gcc/testsuite/gcc.target/i386/returninst2.c
>>  create mode 100644 gcc/testsuite/gcc.target/i386/returninst3.c
>>
>> diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
>> index 46366cbfa72..35e9413100e 100644
>> --- a/gcc/config/i386/i386-opts.h
>> +++ b/gcc/config/i386/i386-opts.h
>> @@ -119,4 +119,10 @@ enum indirect_branch {
>>indirect_branch_thunk_extern
>>  };
>>  
>> +enum instrument_return {
>> +  instrument_return_none = 0,
>> +  instrument_return_call,
>> +  instrument_return_nop5
>> +};
>> +
>>  #endif
>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> index f9ef0b4445b..f7cd94a8139 100644
>> --- a/gcc/config/i386/i386.c
>> +++ b/gcc/config/i386/i386.c
>> @@ -28336,12 +28336,47 @@ ix86_output_indirect_jmp (rtx call_op)
>>  return "%!jmp\t%A0";
>>  }
>>  
>> +/* Output return instrumentation for current function if needed.  */
>> +
>> +static void
>> +output_return_instrumentation (void)
>> +{
>> +  if (ix86_instrument_return != instrument_return_none
>> +  && flag_fentry
>> +  && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
>> +{
>> +  if (ix86_flag_record_return)
>> +fprintf (asm_out_file, "1:\n");
>> +  switch (ix86_instrument_return)
>> +{
>> +case instrument_return_call:
>> +  fprintf (asm_out_file, "\tcall\t__return__\n");
>> +  break;
>> +case instrument_return_nop5:
>> +  /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1)  */
>> +  fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
>> +  break;
>> +case instrument_return_none:
>> +  break;
>> +}
>> +
>> +  if (ix86_flag_record_return)
>> +{
>> +  fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
>> +  fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
>> +  fprintf (asm_out_file, "\t.previous\n");
>> +}
>> +}
>> +}
>> +
>>  /* Output function return.  CALL_OP is the jump target.  Add a REP
>> prefix to RET if LONG_P is true and function return is kept.  */
>>  
>>  const char *
>>  ix86_output_function_return (bool long_p)
>>  {
>> +  output_return_instrumentation ();
>> +
>>if (cfun->machine->function_return_type != indirect_branch_keep)
>>  {
>>char thunk_name[32];
>> @@ -28454,6 +28489,7 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op)
>>  
>>if (SIBLING_CALL_P (insn))
>>  {
>> +  output_return_instrumentation ();
>>if (direct_p)
>>  {
>>if (ix86_nopic_noplt_attribute_p (call_op))
>> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
>> index e7fbf9b6f99..5925b75244f 100644
>> --- a/gcc/config/i386/i386.opt
>> +++ b/gcc/config/i386/i386.opt
>> @@ -1063,3 +1063,24 @@ Support WAITPKG built-in functions and code 
>> generation.
>>  mcldemote
>>  Target Report Mask(ISA_CLDEMOTE) Var(ix86_isa_flags2) Save
>>  Support CLDEMOTE built-in functions and code generation.
>> +
>> +minstrument-re