Hi, Uros,

Thanks a lot for your comments.

> On Oct 19, 2020, at 2:30 PM, Uros Bizjak <ubiz...@gmail.com> wrote:
> 
>> 
>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> index f684954..620114f 100644
>> --- a/gcc/config/i386/i386.c
>> +++ b/gcc/config/i386/i386.c
>> @@ -3551,6 +3551,161 @@ ix86_function_value_regno_p (const unsigned int 
>> regno)
>>  return false;
>> }
>> 
>> +/* Check whether the register REGNO should be zeroed on X86.
>> +   When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
>> +   together, no need to zero it again.
>> +   Stack registers (st0-st7) and mm0-mm7 are aliased with each other.
>> +   very hard to be zeroed individually, don't zero individual st or
>> +   mm registgers at this time.  */
>> +
>> +static bool
>> +zero_call_used_regno_p (const unsigned int regno,
>> + bool all_sse_zeroed)
>> +{
>> +  return GENERAL_REGNO_P (regno)
>> +  || (!all_sse_zeroed && SSE_REGNO_P (regno))
>> +  || MASK_REGNO_P (regno);
>> +}
>> +
>> +/* Return the machine_mode that is used to zero register REGNO.  */
>> +
>> +static machine_mode
>> +zero_call_used_regno_mode (const unsigned int regno)
>> +{
>> +  /* NB: We only need to zero the lower 32 bits for integer registers
>> +     and the lower 128 bits for vector registers since destination are
>> +     zero-extended to the full register width.  */
>> +  if (GENERAL_REGNO_P (regno))
>> +    return SImode;
>> +  else if (SSE_REGNO_P (regno))
>> +    return V4SFmode;
>> +  else
>> +    return HImode;
>> +}
>> +
>> +/* Generate a rtx to zero all vector registers togetehr if possible,
>> +   otherwise, return NULL.  */
>> +
>> +static rtx
>> +zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
>> +{
>> +  if (!TARGET_AVX)
>> +    return NULL;
>> +
>> +  for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
>> +    if ((IN_RANGE (regno, FIRST_SSE_REG, LAST_SSE_REG)
>> +  || (TARGET_64BIT
>> +      && (REX_SSE_REGNO_P (regno)
>> +  || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno)))))
>> + && !TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
>> +      return NULL;
>> +
>> +  return gen_avx_vzeroall ();
>> +}
>> +
>> +/* Generate a rtx to zero all st and mm registers togetehr if possible,
>> +   otherwise, return NULL.  */
>> +
>> +static rtx
>> +zero_all_st_mm_registers (HARD_REG_SET need_zeroed_hardregs)
>> +{
>> +  if (!TARGET_MMX)
>> +    return NULL;
>> +
>> +  for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
>> +    if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno))
>> + && !TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
>> +      return NULL;
>> +
>> +  return gen_mmx_emms ();
> 
> emms is not clearing any register, it only loads x87FPUTagWord with
> FFFFH. So I think, the above is useless, as far as register clearing
> is concerned.

Thanks for the info.

So, for mm and st registers, should we clear them, and how?


> 
>> +}
>> +
>> +/* TARGET_ZERO_CALL_USED_REGS.  */
>> +/* Generate a sequence of instructions that zero registers specified by
>> +   NEED_ZEROED_HARDREGS.  Return the ZEROED_HARDREGS that are actually
>> +   zeroed.  */
>> +static HARD_REG_SET
>> +ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
>> +{
>> +  HARD_REG_SET zeroed_hardregs;
>> +  bool all_sse_zeroed = false;
>> +
>> +  /* first, let's see whether we can zero all vector registers together.  */
>> +  rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs);
>> +  if (zero_all_vec_insn)
>> +    {
>> +      emit_insn (zero_all_vec_insn);
>> +      all_sse_zeroed = true;
>> +    }
>> +
>> +  /* then, let's see whether we can zero all st+mm registers togeter.  */
>> +  rtx zero_all_st_mm_insn = zero_all_st_mm_registers (need_zeroed_hardregs);
>> +  if (zero_all_st_mm_insn)
>> +    emit_insn (zero_all_st_mm_insn);
>> +
>> +  /* Now, generate instructions to zero all the registers.  */
>> +
>> +  CLEAR_HARD_REG_SET (zeroed_hardregs);
>> +  rtx zero_gpr = NULL_RTX;
>> +  rtx zero_vector = NULL_RTX;
>> +  rtx zero_mask = NULL_RTX;
>> +
>> +  for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
>> +    {
>> +      if (!TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
>> + continue;
>> +      if (!zero_call_used_regno_p (regno, all_sse_zeroed))
>> + continue;
>> +
>> +      SET_HARD_REG_BIT (zeroed_hardregs, regno);
>> +
>> +      rtx reg, tmp;
>> +      machine_mode mode = zero_call_used_regno_mode (regno);
>> +
>> +      reg = gen_rtx_REG (mode, regno);
>> +
>> +      if (mode == SImode)
>> + if (zero_gpr == NULL_RTX)
>> +   {
>> +     zero_gpr = reg;
>> +     tmp = gen_rtx_SET (reg, const0_rtx);
>> +     if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
> 
> No need to complicate here, there is a peephole2 pattern that will perform:
> 
> ;; Attempt to always use XOR for zeroing registers (including FP modes).
> (define_peephole2
>  [(set (match_operand 0 "general_reg_operand")
>    (match_operand 1 "const0_operand"))]
> 
> So, simply load a register with 0 and leave to the peephole to do its magic.

Since the new register zeroing pass is after peephole2 pass, the above peephole 
optimization cannot be applied.

          NEXT_PASS (pass_peephole2);   ====> peephole2 
          NEXT_PASS (pass_if_after_reload);
          NEXT_PASS (pass_regrename);
          NEXT_PASS (pass_cprop_hardreg);
          NEXT_PASS (pass_fast_rtl_dce);
          NEXT_PASS (pass_reorder_blocks);
          NEXT_PASS (pass_leaf_regs);
          NEXT_PASS (pass_split_before_sched2);
          NEXT_PASS (pass_sched2);
          NEXT_PASS (pass_stack_regs);
          PUSH_INSERT_PASSES_WITHIN (pass_stack_regs)
              NEXT_PASS (pass_split_before_regstack);
              NEXT_PASS (pass_stack_regs_run);
          POP_INSERT_PASSES ()
      POP_INSERT_PASSES ()
      NEXT_PASS (pass_late_compilation);
      PUSH_INSERT_PASSES_WITHIN (pass_late_compilation)
          NEXT_PASS (pass_zero_call_used_regs);   ====> new zero registers pass.
          NEXT_PASS (pass_compute_alignments);
          NEXT_PASS (pass_variable_tracking);

So, the current code should still be necessary?

Thanks.

Qing


> 
> Other than these two issues, the (relatively trivial) x86 part LGTM.
> 
> Uros.
> 
>> +       {
>> + rtx clob = gen_rtx_CLOBBER (VOIDmode,
>> +     gen_rtx_REG (CCmode,
>> +  FLAGS_REG));
>> + tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
>> +      tmp,
>> +      clob));
>> +       }
>> +     emit_insn (tmp);
>> +   }
>> + else
>> +   emit_move_insn (reg, zero_gpr);
>> +      else if (mode == V4SFmode)
>> + if (zero_vector == NULL_RTX)
>> +   {
>> +     zero_vector = reg;
>> +     tmp = gen_rtx_SET (reg, const0_rtx);
>> +     emit_insn (tmp);
>> +   }
>> + else
>> +   emit_move_insn (reg, zero_vector);
>> +      else if (mode == HImode)
>> + if (zero_mask == NULL_RTX)
>> +   {
>> +     zero_mask = reg;
>> +     tmp = gen_rtx_SET (reg, const0_rtx);
>> +     emit_insn (tmp);
>> +   }
>> + else
>> +   emit_move_insn (reg, zero_mask);
>> +      else
>> + gcc_unreachable ();
>> +    }
>> +  return zeroed_hardregs;
>> +}
>> +
>> /* Define how to find the value returned by a function.
>>   VALTYPE is the data type of the value (as a tree).
>>   If the precise function being called is known, FUNC is its FUNCTION_DECL;
>> @@ -23229,6 +23384,9 @@ ix86_run_selftests (void)
>> #undef TARGET_FUNCTION_VALUE_REGNO_P
>> #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
>> 
>> +#undef TARGET_ZERO_CALL_USED_REGS
>> +#define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs
>> +
>> #undef TARGET_PROMOTE_FUNCTION_MODE
>> #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
>> 
>> diff --git a/gcc/coretypes.h b/gcc/coretypes.h
>> index 6b6cfcd..0ce5eb4 100644
>> --- a/gcc/coretypes.h
>> +++ b/gcc/coretypes.h
>> @@ -418,6 +418,19 @@ enum symbol_visibility
>>  VISIBILITY_INTERNAL
>> };
>> 
>> +/* Zero call-used registers type.  */
>> +enum zero_call_used_regs {
>> +  zero_call_used_regs_unset = 0,
>> +  zero_call_used_regs_skip,
>> +  zero_call_used_regs_used_gpr_arg,
>> +  zero_call_used_regs_used_arg,
>> +  zero_call_used_regs_all_arg,
>> +  zero_call_used_regs_used_gpr,
>> +  zero_call_used_regs_all_gpr,
>> +  zero_call_used_regs_used,
>> +  zero_call_used_regs_all
>> +};
>> +
>> /* enums used by the targetm.excess_precision hook.  */
>> 
>> enum flt_eval_method
>> diff --git a/gcc/df-scan.c b/gcc/df-scan.c
>> index 93b060f..630970b 100644
>> --- a/gcc/df-scan.c
>> +++ b/gcc/df-scan.c
>> @@ -3614,6 +3614,14 @@ df_update_entry_block_defs (void)
>> }
>> 
>> 
>> +/* Return true if REGNO is used by the epilogue.  */
>> +bool
>> +df_epilogue_uses_p (unsigned int regno)
>> +{
>> +    return (EPILOGUE_USES (regno)
>> +     || TEST_HARD_REG_BIT (crtl->zeroed_reg_set, regno));
>> +}
>> +
>> /* Set the bit for regs that are considered being used at the exit. */
>> 
>> static void
>> @@ -3661,7 +3669,7 @@ df_get_exit_block_use_set (bitmap exit_block_uses)
>>     epilogue as being live at the end of the function since they
>>     may be referenced by our caller.  */
>>  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
>> -    if (global_regs[i] || EPILOGUE_USES (i))
>> +    if (global_regs[i] || df_epilogue_uses_p (i))
>>      bitmap_set_bit (exit_block_uses, i);
>> 
>>  if (targetm.have_epilogue () && epilogue_completed)
>> @@ -3802,7 +3810,6 @@ df_hard_reg_init (void)
>>  initialized = true;
>> }
>> 
>> -
>> /* Recompute the parts of scanning that are based on regs_ever_live
>>   because something changed in that array.  */
>> 
>> @@ -3862,7 +3869,6 @@ df_regs_ever_live_p (unsigned int regno)
>>  return regs_ever_live[regno];
>> }
>> 
>> -
>> /* Set regs_ever_live[REGNO] to VALUE.  If this cause regs_ever_live
>>   to change, schedule that change for the next update.  */
>> 
>> diff --git a/gcc/df.h b/gcc/df.h
>> index 8b6ca8c..0f098d7 100644
>> --- a/gcc/df.h
>> +++ b/gcc/df.h
>> @@ -1085,6 +1085,7 @@ extern void df_update_entry_exit_and_calls (void);
>> extern bool df_hard_reg_used_p (unsigned int);
>> extern unsigned int df_hard_reg_used_count (unsigned int);
>> extern bool df_regs_ever_live_p (unsigned int);
>> +extern bool df_epilogue_uses_p (unsigned int);
>> extern void df_set_regs_ever_live (unsigned int, bool);
>> extern void df_compute_regs_ever_live (bool);
>> extern void df_scan_verify (void);
>> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
>> index c9f7299..f56f61a 100644
>> --- a/gcc/doc/extend.texi
>> +++ b/gcc/doc/extend.texi
>> @@ -3992,6 +3992,30 @@ performing a link with relocatable output (i.e.@: 
>> @code{ld -r}) on them.
>> A declaration to which @code{weakref} is attached and that is associated
>> with a named @code{target} must be @code{static}.
>> 
>> +@item zero_call_used_regs ("@var{choice}")
>> +@cindex @code{zero_call_used_regs} function attribute
>> +
>> +The @code{zero_call_used_regs} attribute causes the compiler to zero
>> +call-used registers at function return according to @var{choice}.
>> +This is used to increase the program security by either mitigating
>> +Return-Oriented Programming (ROP) or preventing information leak
>> +through registers.
>> +@samp{skip} doesn't zero call-used registers.
>> +
>> +@samp{used-arg-gpr} zeros used call-used general purpose registers that
>> +pass parameters. @samp{used-arg} zeros used call-used registers that
>> +pass parameters. @samp{arg} zeros all call-used registers that pass
>> +parameters.  These 3 choices are used for ROP mitigation.
>> +
>> +@samp{used-gpr} zeros call-used general purpose registers
>> +which are used in function.  @samp{all-gpr} zeros all
>> +call-used registers.  @samp{used} zeros call-used registers which
>> +are used in function.  @samp{all} zeros all call-used registers.
>> +These 4 choices are used for preventing information leak through
>> +registers.
>> +
>> +The default for the attribute is controlled by 
>> @option{-fzero-call-used-regs}.
>> +
>> @end table
>> 
>> @c This is the end of the target-independent attribute table
>> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
>> index c049932..aa04a3c 100644
>> --- a/gcc/doc/invoke.texi
>> +++ b/gcc/doc/invoke.texi
>> @@ -550,7 +550,7 @@ Objective-C and Objective-C++ Dialects}.
>> -funit-at-a-time  -funroll-all-loops  -funroll-loops @gol
>> -funsafe-math-optimizations  -funswitch-loops @gol
>> -fipa-ra  -fvariable-expansion-in-unroller  -fvect-cost-model  -fvpt @gol
>> --fweb  -fwhole-program  -fwpa  -fuse-linker-plugin @gol
>> +-fweb  -fwhole-program  -fwpa  -fuse-linker-plugin -fzero-call-used-regs 
>> @gol
>> --param @var{name}=@var{value}
>> -O  -O0  -O1  -O2  -O3  -Os  -Ofast  -Og}
>> 
>> @@ -12550,6 +12550,29 @@ int foo (void)
>> 
>> Not all targets support this option.
>> 
>> +@item -fzero-call-used-regs=@var{choice}
>> +@opindex fzero-call-used-regs
>> +Zero call-used registers at function return to increase the program
>> +security by either mitigating Return-Oriented Programming (ROP) or
>> +preventing information leak through registers.
>> +
>> +@samp{skip}, which is the default, doesn't zero call-used registers.
>> +
>> +@samp{used-gpr-arg} zeros used call-used general purpose registers that
>> +pass parameters. @samp{used-arg} zeros used call-used registers that
>> +pass parameters. @samp{all-arg} zeros all call-used registers that pass
>> +parameters.  These 3 choices are used for ROP mitigation.
>> +
>> +@samp{used-gpr} zeros call-used general purpose registers
>> +which are used in function.  @samp{all-gpr} zeros all
>> +call-used registers.  @samp{used} zeros call-used registers which
>> +are used in function.  @samp{all} zeros all call-used registers.
>> +These 4 choices are used for preventing information leak through
>> +registers.
>> +
>> +You can control this behavior for a specific function by using the function
>> +attribute @code{zero_call_used_regs}.  @xref{Function Attributes}.
>> +
>> @item --param @var{name}=@var{value}
>> @opindex param
>> In some places, GCC uses various constants to control the amount of
>> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
>> index 97437e8..7ecff05 100644
>> --- a/gcc/doc/tm.texi
>> +++ b/gcc/doc/tm.texi
>> @@ -12053,6 +12053,16 @@ argument list due to stack realignment.  Return 
>> @code{NULL} if no DRAP
>> is needed.
>> @end deftypefn
>> 
>> +@deftypefn {Target Hook} HARD_REG_SET TARGET_ZERO_CALL_USED_REGS 
>> (HARD_REG_SET @var{need_zeroed_hardregs})
>> +This target hook emits instructions to zero registers specified
>> +by @var{need_zeroed_hardregs} at function return, at the same time
>> +return the hard register set that are actually zeroed by the hook
>> +Define this hook if the target has more effecient instructions to
>> +zero call-used registers, or if the target only tries to zero a subset
>> +of @var{need_zeroed_hardregs}.
>> +If the hook is not defined, the default_zero_call_used_reg will be used.
>> +@end deftypefn
>> +
>> @deftypefn {Target Hook} bool TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS (void)
>> When optimization is disabled, this hook indicates whether or not
>> arguments should be allocated to stack slots.  Normally, GCC allocates
>> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
>> index 412e22c..a67dbea 100644
>> --- a/gcc/doc/tm.texi.in
>> +++ b/gcc/doc/tm.texi.in
>> @@ -8111,6 +8111,8 @@ and the associated definitions of those functions.
>> 
>> @hook TARGET_GET_DRAP_RTX
>> 
>> +@hook TARGET_ZERO_CALL_USED_REGS
>> +
>> @hook TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
>> 
>> @hook TARGET_CONST_ANCHOR
>> diff --git a/gcc/emit-rtl.h b/gcc/emit-rtl.h
>> index 92ad0dd6..2dbeace0 100644
>> --- a/gcc/emit-rtl.h
>> +++ b/gcc/emit-rtl.h
>> @@ -310,6 +310,9 @@ struct GTY(()) rtl_data {
>>     sets them.  */
>>  HARD_REG_SET asm_clobbers;
>> 
>> +  /* All hard registers that are zeroed at the return of the routine.  */
>> +  HARD_REG_SET zeroed_reg_set;
>> +
>>  /* The highest address seen during shorten_branches.  */
>>  int max_insn_address;
>> };
>> diff --git a/gcc/function.c b/gcc/function.c
>> index c612959..c8181bd 100644
>> --- a/gcc/function.c
>> +++ b/gcc/function.c
>> @@ -50,6 +50,7 @@ along with GCC; see the file COPYING3.  If not see
>> #include "emit-rtl.h"
>> #include "recog.h"
>> #include "rtl-error.h"
>> +#include "hard-reg-set.h"
>> #include "alias.h"
>> #include "fold-const.h"
>> #include "stor-layout.h"
>> @@ -5815,6 +5816,182 @@ make_prologue_seq (void)
>>  return seq;
>> }
>> 
>> +/* Check whether the hard register REGNO is live before the return insn 
>> RET.  */
>> +static bool
>> +is_live_reg_at_return (unsigned int regno, rtx_insn * ret)
>> +{
>> +  basic_block bb = BLOCK_FOR_INSN (ret);
>> +  auto_bitmap live_out;
>> +  bitmap_copy (live_out, df_get_live_out (bb));
>> +  df_simulate_one_insn_backwards (bb, ret, live_out);
>> +
>> +  if (REGNO_REG_SET_P (live_out, regno))
>> +    return true;
>> +
>> +  return false;
>> +}
>> +
>> +/* Emit a sequence of insns to zero the call-used-registers before RET.  */
>> +
>> +static void
>> +gen_call_used_regs_seq (rtx_insn *ret)
>> +{
>> +  bool gpr_only = true;
>> +  bool used_only = true;
>> +  bool arg_only = true;
>> +  enum zero_call_used_regs zero_regs_type = zero_call_used_regs_unset;
>> +  enum zero_call_used_regs attr_zero_regs_type
>> +     = zero_call_used_regs_unset;
>> +  tree attr_zero_regs
>> + = lookup_attribute ("zero_call_used_regs",
>> +     DECL_ATTRIBUTES (cfun->decl));
>> +
>> +  /* Get the type of zero_call_used_regs from function attribute.  */
>> +  if (attr_zero_regs)
>> +    {
>> +      /* The TREE_VALUE of an attribute is a TREE_LIST whose TREE_VALUE
>> +  is the attribute argument's value.  */
>> +      attr_zero_regs = TREE_VALUE (attr_zero_regs);
>> +      gcc_assert (TREE_CODE (attr_zero_regs) == TREE_LIST);
>> +      attr_zero_regs = TREE_VALUE (attr_zero_regs);
>> +      gcc_assert (TREE_CODE (attr_zero_regs) == STRING_CST);
>> +
>> +      if (strcmp (TREE_STRING_POINTER (attr_zero_regs), "skip") == 0)
>> + attr_zero_regs_type = zero_call_used_regs_skip;
>> +      else if (strcmp (TREE_STRING_POINTER (attr_zero_regs), "used-gpr-arg")
>> + == 0)
>> + attr_zero_regs_type = zero_call_used_regs_used_gpr_arg;
>> +      else if (strcmp (TREE_STRING_POINTER (attr_zero_regs), "used-arg") == 
>> 0)
>> + attr_zero_regs_type = zero_call_used_regs_used_arg;
>> +      else if (strcmp (TREE_STRING_POINTER (attr_zero_regs), "all-arg") == 
>> 0)
>> + attr_zero_regs_type = zero_call_used_regs_all_arg;
>> +      else if (strcmp (TREE_STRING_POINTER (attr_zero_regs), "used-gpr") == 
>> 0)
>> + attr_zero_regs_type = zero_call_used_regs_used_gpr;
>> +      else if (strcmp (TREE_STRING_POINTER (attr_zero_regs), "all-gpr") == 
>> 0)
>> + attr_zero_regs_type = zero_call_used_regs_all_gpr;
>> +      else if (strcmp (TREE_STRING_POINTER (attr_zero_regs), "used") == 0)
>> + attr_zero_regs_type = zero_call_used_regs_used;
>> +      else if (strcmp (TREE_STRING_POINTER (attr_zero_regs), "all") == 0)
>> + attr_zero_regs_type = zero_call_used_regs_all;
>> +      else
>> + gcc_assert (0);
>> +    }
>> +
>> +  if (flag_zero_call_used_regs)
>> +    if (!attr_zero_regs)
>> +      zero_regs_type = flag_zero_call_used_regs;
>> +    else
>> +      zero_regs_type = attr_zero_regs_type;
>> +  else
>> +    zero_regs_type = attr_zero_regs_type;
>> +
>> +  /* No need to zero call-used-regs when no user request is present.  */
>> +  if (zero_regs_type <= zero_call_used_regs_skip)
>> +    return;
>> +
>> +  /* No need to zero call-used-regs in main ().  */
>> +  if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
>> +    return;
>> +
>> +  /* No need to zero call-used-regs if __builtin_eh_return is called
>> +     since it isn't a normal function return.  */
>> +  if (crtl->calls_eh_return)
>> +    return;
>> +
>> +  /* If gpr_only is true, only zero call-used-registers that are
>> +     general-purpose registers; if used_only is true, only zero
>> +     call-used-registers that are used in the current function.  */
>> +
>> +  switch (zero_regs_type)
>> +    {
>> +      case zero_call_used_regs_used_arg:
>> + gpr_only = false;
>> + break;
>> +      case zero_call_used_regs_all_arg:
>> + gpr_only = false;
>> + used_only = false;
>> + break;
>> +      case zero_call_used_regs_used_gpr:
>> + arg_only = false;
>> + break;
>> +      case zero_call_used_regs_all_gpr:
>> + used_only = false;
>> + arg_only = false;
>> + break;
>> +      case zero_call_used_regs_used:
>> + gpr_only = false;
>> + arg_only = false;
>> + break;
>> +      case zero_call_used_regs_all:
>> + gpr_only = false;
>> + used_only = false;
>> + arg_only = false;
>> + break;
>> +      default:
>> + break;
>> +    }
>> +
>> +  /* For each of the hard registers, check to see whether we should zero it 
>> if:
>> +     1. it is a call-used-registers;
>> + and 2. it is not a fixed-registers;
>> + and 3. it is not live at the return of the routine;
>> + and 4. it is general registor if gpr_only is true;
>> + and 5. it is used in the routine if used_only is true;
>> + and 6. it is a register that passes parameter if arg_only is true;
>> +   */
>> +
>> +  HARD_REG_SET need_zeroed_hardregs;
>> +  CLEAR_HARD_REG_SET (need_zeroed_hardregs);
>> +  for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
>> +    {
>> +      if (!this_target_hard_regs->x_call_used_regs[regno])
>> + continue;
>> +      if (fixed_regs[regno])
>> + continue;
>> +      if (is_live_reg_at_return (regno, ret))
>> + continue;
>> +      if (gpr_only
>> +   && !TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], regno))
>> + continue;
>> +      if (used_only && !df_regs_ever_live_p (regno))
>> + continue;
>> +      if (arg_only && !FUNCTION_ARG_REGNO_P (regno))
>> + continue;
>> +
>> +      /* Now this is a register that we might want to zero.  */
>> +      SET_HARD_REG_BIT (need_zeroed_hardregs, regno);
>> +    }
>> +
>> +  if (hard_reg_set_empty_p (need_zeroed_hardregs))
>> +    return;
>> +
>> +  /* Now we get a hard register set that need to be zeroed, pass it to
>> +     target to generate zeroing sequence.  */
>> +  HARD_REG_SET zeroed_hardregs;
>> +  start_sequence ();
>> +  zeroed_hardregs = targetm.calls.zero_call_used_regs 
>> (need_zeroed_hardregs);
>> +  rtx_insn *seq = get_insns ();
>> +  end_sequence ();
>> +  if (seq)
>> +    {
>> +      /* emit the memory blockage and register clobber asm volatile before
>> +  the whole sequence.  */
>> +      start_sequence ();
>> +      expand_asm_reg_clobber_mem_blockage (zeroed_hardregs);
>> +      rtx_insn *seq_barrier = get_insns ();
>> +      end_sequence ();
>> +
>> +      emit_insn_before (seq_barrier, ret);
>> +      emit_insn_before (seq, ret);
>> +
>> +      /* update the data flow information.  */
>> +      crtl->zeroed_reg_set |= zeroed_hardregs;
>> +      df_set_bb_dirty (EXIT_BLOCK_PTR_FOR_FN (cfun));
>> +    }
>> +  return;
>> +}
>> +
>> +
>> /* Return a sequence to be used as the epilogue for the current function,
>>   or NULL.  */
>> 
>> @@ -6486,7 +6663,75 @@ make_pass_thread_prologue_and_epilogue (gcc::context 
>> *ctxt)
>> {
>>  return new pass_thread_prologue_and_epilogue (ctxt);
>> }
>> -
>> 
>> +
>> +static unsigned int
>> +rest_of_zero_call_used_regs (void)
>> +{
>> +  basic_block bb;
>> +  rtx_insn *insn;
>> +
>> +  /* This pass needs data flow information.  */
>> +  df_analyze ();
>> +
>> +  /* Search all the "return"s in the routine, and insert instruction 
>> sequence to
>> +     zero the call used registers.  */
>> +  FOR_EACH_BB_REVERSE_FN (bb, cfun)
>> +    if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun)
>> + || (single_succ_p (bb)
>> +     && single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun)))
>> +      FOR_BB_INSNS_REVERSE (bb, insn)
>> + if (JUMP_P (insn) && ANY_RETURN_P (JUMP_LABEL (insn)))
>> +   {
>> +     /* Now we can insert the instruction sequence to zero the call used
>> +        registers before this insn.  */
>> +     gen_call_used_regs_seq (insn);
>> +     break;
>> +   }
>> +
>> +  return 0;
>> +}
>> +
>> +namespace {
>> +
>> +const pass_data pass_data_zero_call_used_regs =
>> +{
>> +  RTL_PASS, /* type */
>> +  "zero_call_used_regs", /* name */
>> +  OPTGROUP_NONE, /* optinfo_flags */
>> +  TV_NONE, /* tv_id */
>> +  0, /* properties_required */
>> +  0, /* properties_provided */
>> +  0, /* properties_destroyed */
>> +  0, /* todo_flags_start */
>> +  0, /* todo_flags_finish */
>> +};
>> +
>> +class pass_zero_call_used_regs: public rtl_opt_pass
>> +{
>> +public:
>> +  pass_zero_call_used_regs (gcc::context *ctxt)
>> +    : rtl_opt_pass (pass_data_zero_call_used_regs, ctxt)
>> +  {}
>> +
>> +  /* opt_pass methods: */
>> +  virtual bool gate (function *)
>> +    {
>> +      return flag_zero_call_used_regs > zero_call_used_regs_unset;
>> +    }
>> +  virtual unsigned int execute (function *)
>> +    {
>> +      return rest_of_zero_call_used_regs ();
>> +    }
>> +
>> +}; // class pass_zero_call_used_regs
>> +
>> +} // anon namespace
>> +
>> +rtl_opt_pass *
>> +make_pass_zero_call_used_regs (gcc::context *ctxt)
>> +{
>> +  return new pass_zero_call_used_regs (ctxt);
>> +}
>> 
>> /* If CONSTRAINT is a matching constraint, then return its number.
>>   Otherwise, return -1.  */
>> diff --git a/gcc/optabs.c b/gcc/optabs.c
>> index 8ad7f4b..57e5c5d 100644
>> --- a/gcc/optabs.c
>> +++ b/gcc/optabs.c
>> @@ -6484,6 +6484,49 @@ expand_memory_blockage (void)
>>    expand_asm_memory_blockage ();
>> }
>> 
>> +/* Generate asm volatile("" : : : "memory") as a memory blockage, at the
>> +   same time clobbering the register set specified by ZEROED_REGS.  */
>> +
>> +void
>> +expand_asm_reg_clobber_mem_blockage (HARD_REG_SET zeroed_regs)
>> +{
>> +  rtx asm_op, clob_mem, clob_reg;
>> +
>> +  unsigned int num_of_regs = 0;
>> +  for (unsigned int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
>> +    if (TEST_HARD_REG_BIT (zeroed_regs, i))
>> +      num_of_regs++;
>> +
>> +  if (num_of_regs == 0)
>> +    return;
>> +
>> +  asm_op = gen_rtx_ASM_OPERANDS (VOIDmode, "", "", 0,
>> +  rtvec_alloc (0), rtvec_alloc (0),
>> +  rtvec_alloc (0), UNKNOWN_LOCATION);
>> +  MEM_VOLATILE_P (asm_op) = 1;
>> +
>> +  rtvec v = rtvec_alloc (num_of_regs + 2);
>> +
>> +  clob_mem = gen_rtx_SCRATCH (VOIDmode);
>> +  clob_mem = gen_rtx_MEM (BLKmode, clob_mem);
>> +  clob_mem = gen_rtx_CLOBBER (VOIDmode, clob_mem);
>> +
>> +  RTVEC_ELT (v,0) = asm_op;
>> +  RTVEC_ELT (v,1) = clob_mem;
>> +
>> +  unsigned int j = 2;
>> +  for (unsigned int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
>> +    if (TEST_HARD_REG_BIT (zeroed_regs, i))
>> +      {
>> + clob_reg  = gen_rtx_CLOBBER (VOIDmode, regno_reg_rtx[i]);
>> + RTVEC_ELT (v,j) = clob_reg;
>> +  j++;
>> +      }
>> +  gcc_assert (j == (num_of_regs + 2));
>> +
>> +  emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
>> +}
>> +
>> /* This routine will either emit the mem_thread_fence pattern or issue a
>>   sync_synchronize to generate a fence for memory model MEMMODEL.  */
>> 
>> diff --git a/gcc/optabs.h b/gcc/optabs.h
>> index 0b14700..bfa10c8 100644
>> --- a/gcc/optabs.h
>> +++ b/gcc/optabs.h
>> @@ -345,6 +345,8 @@ rtx expand_atomic_store (rtx, rtx, enum memmodel, bool);
>> rtx expand_atomic_fetch_op (rtx, rtx, rtx, enum rtx_code, enum memmodel,
>>      bool);
>> 
>> +extern void expand_asm_reg_clobber_mem_blockage (HARD_REG_SET);
>> +
>> extern bool insn_operand_matches (enum insn_code icode, unsigned int opno,
>>  rtx operand);
>> extern bool valid_multiword_target_p (rtx);
>> diff --git a/gcc/passes.def b/gcc/passes.def
>> index f865bdc..77d4676 100644
>> --- a/gcc/passes.def
>> +++ b/gcc/passes.def
>> @@ -492,6 +492,7 @@ along with GCC; see the file COPYING3.  If not see
>>      POP_INSERT_PASSES ()
>>      NEXT_PASS (pass_late_compilation);
>>      PUSH_INSERT_PASSES_WITHIN (pass_late_compilation)
>> +   NEXT_PASS (pass_zero_call_used_regs);
>>  NEXT_PASS (pass_compute_alignments);
>>  NEXT_PASS (pass_variable_tracking);
>>  NEXT_PASS (pass_free_cfg);
>> diff --git a/gcc/recog.c b/gcc/recog.c
>> index ce83b7f..472c2dc 100644
>> --- a/gcc/recog.c
>> +++ b/gcc/recog.c
>> @@ -923,6 +923,21 @@ validate_simplify_insn (rtx_insn *insn)
>>  return ((num_changes_pending () > 0) && (apply_change_group () > 0));
>> }
>> 
>> 
>> +
>> +bool
>> +valid_insn_p (rtx_insn *insn)
>> +{
>> +  recog_memoized (insn);
>> +  if (INSN_CODE (insn) < 0)
>> +    return false;
>> +  extract_insn (insn);
>> +  /* We don't know whether the insn will be in code that is optimized
>> +     for size or speed, so consider all enabled alternatives.  */
>> +  if (!constrain_operands (1, get_enabled_alternatives (insn)))
>> +    return false;
>> +  return true;
>> +}
>> +
>> /* Return 1 if OP is a valid general operand for machine mode MODE.
>>   This is either a register reference, a memory reference,
>>   or a constant.  In the case of a memory reference, the address
>> diff --git a/gcc/recog.h b/gcc/recog.h
>> index ae3675f..d87456c 100644
>> --- a/gcc/recog.h
>> +++ b/gcc/recog.h
>> @@ -113,6 +113,7 @@ extern void validate_replace_src_group (rtx, rtx, 
>> rtx_insn *);
>> extern bool validate_simplify_insn (rtx_insn *insn);
>> extern int num_changes_pending (void);
>> extern bool reg_fits_class_p (const_rtx, reg_class_t, int, machine_mode);
>> +extern bool valid_insn_p (rtx_insn *);
>> 
>> extern int offsettable_memref_p (rtx);
>> extern int offsettable_nonstrict_memref_p (rtx);
>> diff --git a/gcc/resource.c b/gcc/resource.c
>> index 0a9d594..90cf091 100644
>> --- a/gcc/resource.c
>> +++ b/gcc/resource.c
>> @@ -1186,7 +1186,7 @@ init_resource_info (rtx_insn *epilogue_insn)
>>       &end_of_function_needs, true);
>> 
>>  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
>> -    if (global_regs[i] || EPILOGUE_USES (i))
>> +    if (global_regs[i] || df_epilogue_uses_p (i))
>>      SET_HARD_REG_BIT (end_of_function_needs.regs, i);
>> 
>>  /* The registers required to be live at the end of the function are
>> diff --git a/gcc/target.def b/gcc/target.def
>> index ed2da15..7d6807d 100644
>> --- a/gcc/target.def
>> +++ b/gcc/target.def
>> @@ -5080,6 +5080,19 @@ argument list due to stack realignment.  Return 
>> @code{NULL} if no DRAP\n\
>> is needed.",
>> rtx, (void), NULL)
>> 
>> +/* Generate instruction sequence to zero call used registers.  */
>> +DEFHOOK
>> +(zero_call_used_regs,
>> + "This target hook emits instructions to zero registers specified\n\
>> +by @var{need_zeroed_hardregs} at function return, at the same time\n\
>> +return the hard register set that are actually zeroed by the hook\n\
>> +Define this hook if the target has more effecient instructions to\n\
>> +zero call-used registers, or if the target only tries to zero a subset\n\
>> +of @var{need_zeroed_hardregs}.\n\
>> +If the hook is not defined, the default_zero_call_used_reg will be used.",
>> + HARD_REG_SET, (HARD_REG_SET need_zeroed_hardregs),
>> +default_zero_call_used_regs)
>> +
>> /* Return true if all function parameters should be spilled to the
>>   stack.  */
>> DEFHOOK
>> diff --git a/gcc/targhooks.c b/gcc/targhooks.c
>> index 5d94fce..2318c324 100644
>> --- a/gcc/targhooks.c
>> +++ b/gcc/targhooks.c
>> @@ -56,6 +56,9 @@ along with GCC; see the file COPYING3.  If not see
>> #include "tree-ssa-alias.h"
>> #include "gimple-expr.h"
>> #include "memmodel.h"
>> +#include "backend.h"
>> +#include "emit-rtl.h"
>> +#include "df.h"
>> #include "tm_p.h"
>> #include "stringpool.h"
>> #include "tree-vrp.h"
>> @@ -987,6 +990,38 @@ default_function_value_regno_p (const unsigned int 
>> regno ATTRIBUTE_UNUSED)
>> #endif
>> }
>> 
>> +/* The default hook for TARGET_ZERO_CALL_USED_REGS.  */
>> +
>> +HARD_REG_SET
>> +default_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
>> +{
>> +  HARD_REG_SET zeroed_hardregs;
>> +  gcc_assert (!hard_reg_set_empty_p (need_zeroed_hardregs));
>> +
>> +  CLEAR_HARD_REG_SET (zeroed_hardregs);
>> +  for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
>> +    if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
>> +      {
>> + rtx_insn *last_insn = get_last_insn ();
>> + machine_mode mode = GET_MODE (regno_reg_rtx[regno]);
>> + rtx zero = CONST0_RTX (mode);
>> + rtx_insn *insn = emit_move_insn (regno_reg_rtx[regno], zero);
>> + if (!valid_insn_p (insn))
>> +   {
>> +     static bool issued_error;
>> +     if (!issued_error)
>> +       {
>> + issued_error = true;
>> + sorry ("-fzero-call-used-regs not supported on this target");
>> +       }
>> +     delete_insns_since (last_insn);
>> +   }
>> + else
>> +   SET_HARD_REG_BIT (zeroed_hardregs, regno);
>> +      }
>> +  return zeroed_hardregs;
>> +}
>> +
>> rtx
>> default_internal_arg_pointer (void)
>> {
>> diff --git a/gcc/targhooks.h b/gcc/targhooks.h
>> index 44ab926..e0a925f 100644
>> --- a/gcc/targhooks.h
>> +++ b/gcc/targhooks.h
>> @@ -160,6 +160,7 @@ extern unsigned int default_function_arg_round_boundary 
>> (machine_mode,
>> const_tree);
>> extern bool hook_bool_const_rtx_commutative_p (const_rtx, int);
>> extern rtx default_function_value (const_tree, const_tree, bool);
>> +extern HARD_REG_SET default_zero_call_used_regs (HARD_REG_SET);
>> extern rtx default_libcall_value (machine_mode, const_rtx);
>> extern bool default_function_value_regno_p (const unsigned int);
>> extern rtx default_internal_arg_pointer (void);
>> diff --git a/gcc/testsuite/c-c++-common/zero-scratch-regs-1.c 
>> b/gcc/testsuite/c-c++-common/zero-scratch-regs-1.c
>> new file mode 100644
>> index 0000000..f44add9
>> --- /dev/null
>> +++ b/gcc/testsuite/c-c++-common/zero-scratch-regs-1.c
>> @@ -0,0 +1,15 @@
>> +/* { dg-do run } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=all" } */
>> +
>> +volatile int result = 0;
>> +int
>> +__attribute__((noinline))
>> +foo (int x)
>> +{
>> +  return x;
>> +}
>> +int main()
>> +{
>> +  result = foo (2);
>> +  return 0;
>> +}
>> diff --git a/gcc/testsuite/c-c++-common/zero-scratch-regs-2.c 
>> b/gcc/testsuite/c-c++-common/zero-scratch-regs-2.c
>> new file mode 100644
>> index 0000000..7c8350b
>> --- /dev/null
>> +++ b/gcc/testsuite/c-c++-common/zero-scratch-regs-2.c
>> @@ -0,0 +1,16 @@
>> +/* { dg-do run } */
>> +/* { dg-options "-O2" } */
>> +
>> +volatile int result = 0;
>> +int
>> +__attribute__((noinline))
>> +__attribute__ ((zero_call_used_regs("all")))
>> +foo (int x)
>> +{
>> +  return x;
>> +}
>> +int main()
>> +{
>> +  result = foo (2);
>> +  return 0;
>> +}
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-1.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-1.c
>> new file mode 100644
>> index 0000000..9f61dc4
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-1.c
>> @@ -0,0 +1,12 @@
>> +/* { dg-do compile { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=used" } */
>> +
>> +void
>> +foo (void)
>> +{
>> +}
>> +
>> +/* { dg-final { scan-assembler-not "vzeroall" } } */
>> +/* { dg-final { scan-assembler-not "%xmm" } } */
>> +/* { dg-final { scan-assembler-not "xorl\[ \t\]*%" } } */
>> +/* { dg-final { scan-assembler-not "movl\[ \t\]*%" } } */
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-10.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-10.c
>> new file mode 100644
>> index 0000000..09048e5
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-10.c
>> @@ -0,0 +1,21 @@
>> +/* { dg-do compile { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=skip" } */
>> +
>> +extern int foo (int) __attribute__ ((zero_call_used_regs("all-gpr")));
>> +
>> +int
>> +foo (int x)
>> +{
>> +  return x;
>> +}
>> +
>> +/* { dg-final { scan-assembler-not "vzeroall" } } */
>> +/* { dg-final { scan-assembler-not "%xmm" } } */
>> +/* { dg-final { scan-assembler "xorl\[ \t\]*%edx, %edx" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%edx, %ecx" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%edx, %esi" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%edx, %edi" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%edx, %r8d" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%edx, %r9d" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%edx, %r10d" { target { ! ia32 
>> } } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%edx, %r11d" { target { ! ia32 
>> } } } } */
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-11.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-11.c
>> new file mode 100644
>> index 0000000..4862688
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-11.c
>> @@ -0,0 +1,39 @@
>> +/* { dg-do run { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=used-gpr" } */
>> +
>> +struct S { int i; };
>> +__attribute__((const, noinline, noclone))
>> +struct S foo (int x)
>> +{
>> +  struct S s;
>> +  s.i = x;
>> +  return s;
>> +}
>> +
>> +int a[2048], b[2048], c[2048], d[2048];
>> +struct S e[2048];
>> +
>> +__attribute__((noinline, noclone)) void
>> +bar (void)
>> +{
>> +  int i;
>> +  for (i = 0; i < 1024; i++)
>> +    {
>> +      e[i] = foo (i);
>> +      a[i+2] = a[i] + a[i+1];
>> +      b[10] = b[10] + i;
>> +      c[i] = c[2047 - i];
>> +      d[i] = d[i + 1];
>> +    }
>> +}
>> +
>> +int
>> +main ()
>> +{
>> +  int i;
>> +  bar ();
>> +  for (i = 0; i < 1024; i++)
>> +    if (e[i].i != i)
>> +      __builtin_abort ();
>> +  return 0;
>> +}
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-12.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-12.c
>> new file mode 100644
>> index 0000000..500251b
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-12.c
>> @@ -0,0 +1,39 @@
>> +/* { dg-do run { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=all-gpr" } */
>> +
>> +struct S { int i; };
>> +__attribute__((const, noinline, noclone))
>> +struct S foo (int x)
>> +{
>> +  struct S s;
>> +  s.i = x;
>> +  return s;
>> +}
>> +
>> +int a[2048], b[2048], c[2048], d[2048];
>> +struct S e[2048];
>> +
>> +__attribute__((noinline, noclone)) void
>> +bar (void)
>> +{
>> +  int i;
>> +  for (i = 0; i < 1024; i++)
>> +    {
>> +      e[i] = foo (i);
>> +      a[i+2] = a[i] + a[i+1];
>> +      b[10] = b[10] + i;
>> +      c[i] = c[2047 - i];
>> +      d[i] = d[i + 1];
>> +    }
>> +}
>> +
>> +int
>> +main ()
>> +{
>> +  int i;
>> +  bar ();
>> +  for (i = 0; i < 1024; i++)
>> +    if (e[i].i != i)
>> +      __builtin_abort ();
>> +  return 0;
>> +}
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-13.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-13.c
>> new file mode 100644
>> index 0000000..8b058e3
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-13.c
>> @@ -0,0 +1,21 @@
>> +/* { dg-do compile { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=all -march=corei7" } */
>> +
>> +void
>> +foo (void)
>> +{
>> +}
>> +
>> +/* { dg-final { scan-assembler-not "vzeroall" } } */
>> +/* { dg-final { scan-assembler "pxor\[ \t\]*%xmm0, %xmm0" } } */
>> +/* { dg-final { scan-assembler-times "movaps\[ \t\]*%xmm0, %xmm\[0-9\]+" 7 
>> { target { ia32 } } } } */
>> +/* { dg-final { scan-assembler-times "movaps\[ \t\]*%xmm0, %xmm\[0-9\]+" 15 
>> { target { ! ia32 } } } } */
>> +/* { dg-final { scan-assembler "xorl\[ \t\]*%eax, %eax" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %edx" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %ecx" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %esi" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %edi" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r8d" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r9d" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r10d" { target { ! ia32 
>> } } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r11d" { target { ! ia32 
>> } } } } */
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-14.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-14.c
>> new file mode 100644
>> index 0000000..d4eaaf7
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-14.c
>> @@ -0,0 +1,19 @@
>> +/* { dg-do compile { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=all -march=corei7 -mavx" } */
>> +
>> +void
>> +foo (void)
>> +{
>> +}
>> +
>> +/* { dg-final { scan-assembler-times "vzeroall" 1 } } */
>> +/* { dg-final { scan-assembler-not "%xmm" } } */
>> +/* { dg-final { scan-assembler "xorl\[ \t\]*%eax, %eax" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %edx" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %ecx" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %esi" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %edi" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r8d" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r9d" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r10d" { target { ! ia32 
>> } } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r11d" { target { ! ia32 
>> } } } } */
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-15.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-15.c
>> new file mode 100644
>> index 0000000..dd3bb90
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-15.c
>> @@ -0,0 +1,14 @@
>> +/* { dg-do compile { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=skip" } */
>> +
>> +extern void foo (void) __attribute__ ((zero_call_used_regs("used")));
>> +
>> +void
>> +foo (void)
>> +{
>> +}
>> +
>> +/* { dg-final { scan-assembler-not "vzeroall" } } */
>> +/* { dg-final { scan-assembler-not "%xmm" } } */
>> +/* { dg-final { scan-assembler-not "xorl\[ \t\]*%" } } */
>> +/* { dg-final { scan-assembler-not "movl\[ \t\]*%" } } */
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-16.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-16.c
>> new file mode 100644
>> index 0000000..e2274f6
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-16.c
>> @@ -0,0 +1,14 @@
>> +/* { dg-do compile { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=all" } */
>> +
>> +extern void foo (void) __attribute__ ((zero_call_used_regs("skip")));
>> +
>> +void
>> +foo (void)
>> +{
>> +}
>> +
>> +/* { dg-final { scan-assembler-not "vzeroall" } } */
>> +/* { dg-final { scan-assembler-not "%xmm" } } */
>> +/* { dg-final { scan-assembler-not "xorl\[ \t\]*%" } } */
>> +/* { dg-final { scan-assembler-not "movl\[ \t\]*%" } } */
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-17.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-17.c
>> new file mode 100644
>> index 0000000..7f5d153
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-17.c
>> @@ -0,0 +1,13 @@
>> +/* { dg-do compile { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=used" } */
>> +
>> +int
>> +foo (int x)
>> +{
>> +  return x;
>> +}
>> +
>> +/* { dg-final { scan-assembler-not "vzeroall" } } */
>> +/* { dg-final { scan-assembler-not "%xmm" } } */
>> +/* { dg-final { scan-assembler-not "xorl\[ \t\]*%" { target ia32 } } } */
>> +/* { dg-final { scan-assembler "xorl\[ \t\]*%edi, %edi" { target { ! ia32 } 
>> } } } */
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-18.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-18.c
>> new file mode 100644
>> index 0000000..fe13d2b
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-18.c
>> @@ -0,0 +1,13 @@
>> +/* { dg-do compile { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=used -march=corei7" } */
>> +
>> +float
>> +foo (float z, float y, float x)
>> +{
>> +  return x + y;
>> +}
>> +
>> +/* { dg-final { scan-assembler-not "vzeroall" } } */
>> +/* { dg-final { scan-assembler "pxor\[ \t\]*%xmm1, %xmm1" { target { ! ia32 
>> } } } } */
>> +/* { dg-final { scan-assembler "movaps\[ \t\]*%xmm1, %xmm2" { target { ! 
>> ia32 } } } } */
>> +/* { dg-final { scan-assembler-not "xorl\[ \t\]*%" } } */
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-19.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-19.c
>> new file mode 100644
>> index 0000000..205a532
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-19.c
>> @@ -0,0 +1,12 @@
>> +/* { dg-do compile { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=used -march=corei7" } */
>> +
>> +float
>> +foo (float z, float y, float x)
>> +{
>> +  return x;
>> +}
>> +
>> +/* { dg-final { scan-assembler-not "vzeroall" } } */
>> +/* { dg-final { scan-assembler "pxor\[ \t\]*%xmm2, %xmm2" { target { ! ia32 
>> } } } } */
>> +/* { dg-final { scan-assembler-not "xorl\[ \t\]*%" } } */
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-2.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-2.c
>> new file mode 100644
>> index 0000000..e046684
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-2.c
>> @@ -0,0 +1,19 @@
>> +/* { dg-do compile { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=all-gpr" } */
>> +
>> +void
>> +foo (void)
>> +{
>> +}
>> +
>> +/* { dg-final { scan-assembler-not "vzeroall" } } */
>> +/* { dg-final { scan-assembler-not "%xmm" } } */
>> +/* { dg-final { scan-assembler "xorl\[ \t\]*%eax, %eax" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %edx" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %ecx" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %esi" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %edi" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r8d" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r9d" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r10d" { target { ! ia32 
>> } } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r11d" { target { ! ia32 
>> } } } } */
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-20.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-20.c
>> new file mode 100644
>> index 0000000..4be8ff6
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-20.c
>> @@ -0,0 +1,23 @@
>> +/* { dg-do compile { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=all -march=corei7" } */
>> +
>> +float
>> +foo (float z, float y, float x)
>> +{
>> +  return x + y;
>> +}
>> +
>> +/* { dg-final { scan-assembler-not "vzeroall" } } */
>> +/* { dg-final { scan-assembler "pxor\[ \t\]*%xmm0, %xmm0" { target { ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "pxor\[ \t\]*%xmm1, %xmm1" { target { ! ia32 
>> } } } } */
>> +/* { dg-final { scan-assembler-times "movaps\[ \t\]*%xmm0, %xmm\[0-9\]+" 7 
>> { target { ia32 } } } } */
>> +/* { dg-final { scan-assembler-times "movaps\[ \t\]*%xmm1, %xmm\[0-9\]+" 14 
>> { target { ! ia32 } } } } */
>> +/* { dg-final { scan-assembler "xorl\[ \t\]*%eax, %eax" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %edx" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %ecx" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %esi" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %edi" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r8d" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r9d" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r10d" { target { ! ia32 
>> } } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r11d" { target { ! ia32 
>> } } } } */
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-21.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-21.c
>> new file mode 100644
>> index 0000000..0eb34e0
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-21.c
>> @@ -0,0 +1,14 @@
>> +/* { dg-do compile { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=skip -march=corei7" } */
>> +
>> +__attribute__ ((zero_call_used_regs("used")))
>> +float
>> +foo (float z, float y, float x)
>> +{
>> +  return x + y;
>> +}
>> +
>> +/* { dg-final { scan-assembler-not "vzeroall" } } */
>> +/* { dg-final { scan-assembler "pxor\[ \t\]*%xmm1, %xmm1" { target { ! ia32 
>> } } } } */
>> +/* { dg-final { scan-assembler "movaps\[ \t\]*%xmm1, %xmm2" { target { ! 
>> ia32 } } } } */
>> +/* { dg-final { scan-assembler-not "xorl\[ \t\]*%" } } */
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-22.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-22.c
>> new file mode 100644
>> index 0000000..76742bb
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-22.c
>> @@ -0,0 +1,20 @@
>> +/* { dg-do compile { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=all -march=corei7 -mavx" } */
>> +
>> +void
>> +foo (void)
>> +{
>> +}
>> +
>> +/* { dg-final { scan-assembler "vzeroall" } } */
>> +/* { dg-final { scan-assembler "emms" } } */
>> +/* { dg-final { scan-assembler-not "%xmm" } } */
>> +/* { dg-final { scan-assembler "xorl\[ \t\]*%eax, %eax" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %edx" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %ecx" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %esi" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %edi" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r8d" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r9d" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r10d" { target { ! ia32 
>> } } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r11d" { target { ! ia32 
>> } } } } */
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-23.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-23.c
>> new file mode 100644
>> index 0000000..18a5ffb
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-23.c
>> @@ -0,0 +1,28 @@
>> +/* { dg-do compile { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=all -march=corei7 -mavx512f" } */
>> +
>> +void
>> +foo (void)
>> +{
>> +}
>> +
>> +/* { dg-final { scan-assembler "vzeroall" } } */
>> +/* { dg-final { scan-assembler "emms" } } */
>> +/* { dg-final { scan-assembler-not "%xmm" } } */
>> +/* { dg-final { scan-assembler "xorl\[ \t\]*%eax, %eax" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %edx" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %ecx" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %esi" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %edi" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r8d" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r9d" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r10d" { target { ! ia32 
>> } } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r11d" { target { ! ia32 
>> } } } } */
>> +/* { dg-final { scan-assembler "kxorw\[ \t\]*%k0, %k0, %k0" { target { ! 
>> ia32 } } } } */
>> +/* { dg-final { scan-assembler "kmovw\[ \t\]*%k0, %k1" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "kmovw\[ \t\]*%k0, %k2" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "kmovw\[ \t\]*%k0, %k3" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "kmovw\[ \t\]*%k0, %k4" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "kmovw\[ \t\]*%k0, %k5" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "kmovw\[ \t\]*%k0, %k6" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "kmovw\[ \t\]*%k0, %k7" { target { ! ia32 } 
>> } } } */
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-24.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-24.c
>> new file mode 100644
>> index 0000000..208633e
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-24.c
>> @@ -0,0 +1,10 @@
>> +/* { dg-do compile { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=used-gpr-arg" } */
>> +
>> +int
>> +foo (int x)
>> +{
>> +  return x;
>> +}
>> +
>> +/* { dg-final { scan-assembler "xorl\[ \t\]*%edi, %edi" } } */
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-25.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-25.c
>> new file mode 100644
>> index 0000000..21e82c6
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-25.c
>> @@ -0,0 +1,10 @@
>> +/* { dg-do compile { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=used-arg" } */
>> +
>> +int
>> +foo (int x)
>> +{
>> +  return x;
>> +}
>> +
>> +/* { dg-final { scan-assembler "xorl\[ \t\]*%edi, %edi" } } */
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-26.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-26.c
>> new file mode 100644
>> index 0000000..293d2fe
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-26.c
>> @@ -0,0 +1,23 @@
>> +/* { dg-do compile { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=all-arg" } */
>> +
>> +int
>> +foo (int x)
>> +{
>> +  return x;
>> +}
>> +
>> +/* { dg-final { scan-assembler "xorl\[ \t\]*%edx, %edx" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%edx, %ecx" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%edx, %esi" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%edx, %edi" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%edx, %r8d" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%edx, %r9d" } } */
>> +/* { dg-final { scan-assembler "pxor\[ \t\]*%xmm0, %xmm0" } } */
>> +/* { dg-final { scan-assembler "movaps\[ \t\]*%xmm0, %xmm1" } } */
>> +/* { dg-final { scan-assembler "movaps\[ \t\]*%xmm0, %xmm2" } } */
>> +/* { dg-final { scan-assembler "movaps\[ \t\]*%xmm0, %xmm3" } } */
>> +/* { dg-final { scan-assembler "movaps\[ \t\]*%xmm0, %xmm4" } } */
>> +/* { dg-final { scan-assembler "movaps\[ \t\]*%xmm0, %xmm5" } } */
>> +/* { dg-final { scan-assembler "movaps\[ \t\]*%xmm0, %xmm6" } } */
>> +/* { dg-final { scan-assembler "movaps\[ \t\]*%xmm0, %xmm7" } } */
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-3.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-3.c
>> new file mode 100644
>> index 0000000..de71223
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-3.c
>> @@ -0,0 +1,12 @@
>> +/* { dg-do compile { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=skip" } */
>> +
>> +void
>> +foo (void)
>> +{
>> +}
>> +
>> +/* { dg-final { scan-assembler-not "vzeroall" } } */
>> +/* { dg-final { scan-assembler-not "%xmm" } } */
>> +/* { dg-final { scan-assembler-not "xorl\[ \t\]*%" } } */
>> +/* { dg-final { scan-assembler-not "movl\[ \t\]*%" } } */
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-4.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-4.c
>> new file mode 100644
>> index 0000000..ccfa441
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-4.c
>> @@ -0,0 +1,14 @@
>> +/* { dg-do compile { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=skip" } */
>> +
>> +extern void foo (void) __attribute__ ((zero_call_used_regs("used-gpr")));
>> +
>> +void
>> +foo (void)
>> +{
>> +}
>> +
>> +/* { dg-final { scan-assembler-not "vzeroall" } } */
>> +/* { dg-final { scan-assembler-not "%xmm" } } */
>> +/* { dg-final { scan-assembler-not "xorl\[ \t\]*%" } } */
>> +/* { dg-final { scan-assembler-not "movl\[ \t\]*%" } } */
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-5.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-5.c
>> new file mode 100644
>> index 0000000..6b46ca3
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-5.c
>> @@ -0,0 +1,20 @@
>> +/* { dg-do compile { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=skip" } */
>> +
>> +__attribute__ ((zero_call_used_regs("all-gpr")))
>> +void
>> +foo (void)
>> +{
>> +}
>> +
>> +/* { dg-final { scan-assembler-not "vzeroall" } } */
>> +/* { dg-final { scan-assembler-not "%xmm" } } */
>> +/* { dg-final { scan-assembler "xorl\[ \t\]*%eax, %eax" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %edx" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %ecx" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %esi" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %edi" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r8d" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r9d" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r10d" { target { ! ia32 
>> } } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%eax, %r11d" { target { ! ia32 
>> } } } } */
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-6.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-6.c
>> new file mode 100644
>> index 0000000..0680f38
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-6.c
>> @@ -0,0 +1,14 @@
>> +/* { dg-do compile { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=all-gpr" } */
>> +
>> +extern void foo (void) __attribute__ ((zero_call_used_regs("skip")));
>> +
>> +void
>> +foo (void)
>> +{
>> +}
>> +
>> +/* { dg-final { scan-assembler-not "vzeroall" } } */
>> +/* { dg-final { scan-assembler-not "%xmm" } } */
>> +/* { dg-final { scan-assembler-not "xorl\[ \t\]*%" } } */
>> +/* { dg-final { scan-assembler-not "movl\[ \t\]*%" } } */
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-7.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-7.c
>> new file mode 100644
>> index 0000000..534defa
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-7.c
>> @@ -0,0 +1,13 @@
>> +/* { dg-do compile { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=used-gpr" } */
>> +
>> +int
>> +foo (int x)
>> +{
>> +  return x;
>> +}
>> +
>> +/* { dg-final { scan-assembler-not "vzeroall" } } */
>> +/* { dg-final { scan-assembler-not "%xmm" } } */
>> +/* { dg-final { scan-assembler-not "xorl\[ \t\]*%" { target ia32 } } } */
>> +/* { dg-final { scan-assembler "xorl\[ \t\]*%edi, %edi" { target { ! ia32 } 
>> } } } */
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-8.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-8.c
>> new file mode 100644
>> index 0000000..477bb19
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-8.c
>> @@ -0,0 +1,19 @@
>> +/* { dg-do compile { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=all-gpr" } */
>> +
>> +int
>> +foo (int x)
>> +{
>> +  return x;
>> +}
>> +
>> +/* { dg-final { scan-assembler-not "vzeroall" } } */
>> +/* { dg-final { scan-assembler-not "%xmm" } } */
>> +/* { dg-final { scan-assembler "xorl\[ \t\]*%edx, %edx" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%edx, %ecx" } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%edx, %esi" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%edx, %edi" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%edx, %r8d" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%edx, %r9d" { target { ! ia32 } 
>> } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%edx, %r10d" { target { ! ia32 
>> } } } } */
>> +/* { dg-final { scan-assembler "movl\[ \t\]*%edx, %r11d" { target { ! ia32 
>> } } } } */
>> diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-9.c 
>> b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-9.c
>> new file mode 100644
>> index 0000000..a305a60
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-9.c
>> @@ -0,0 +1,15 @@
>> +/* { dg-do compile { target *-*-linux* } } */
>> +/* { dg-options "-O2 -fzero-call-used-regs=skip" } */
>> +
>> +extern int foo (int) __attribute__ ((zero_call_used_regs("used-gpr")));
>> +
>> +int
>> +foo (int x)
>> +{
>> +  return x;
>> +}
>> +
>> +/* { dg-final { scan-assembler-not "vzeroall" } } */
>> +/* { dg-final { scan-assembler-not "%xmm" } } */
>> +/* { dg-final { scan-assembler-not "xorl\[ \t\]*%" { target ia32 } } } */
>> +/* { dg-final { scan-assembler "xorl\[ \t\]*%edi, %edi" { target { ! ia32 } 
>> } } } */
>> diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
>> index 62e5b69..8afe8ee 100644
>> --- a/gcc/tree-pass.h
>> +++ b/gcc/tree-pass.h
>> @@ -592,6 +592,7 @@ extern rtl_opt_pass *make_pass_gcse2 (gcc::context 
>> *ctxt);
>> extern rtl_opt_pass *make_pass_split_after_reload (gcc::context *ctxt);
>> extern rtl_opt_pass *make_pass_thread_prologue_and_epilogue (gcc::context
>>     *ctxt);
>> +extern rtl_opt_pass *make_pass_zero_call_used_regs (gcc::context *ctxt);
>> extern rtl_opt_pass *make_pass_stack_adjustments (gcc::context *ctxt);
>> extern rtl_opt_pass *make_pass_sched_fusion (gcc::context *ctxt);
>> extern rtl_opt_pass *make_pass_peephole2 (gcc::context *ctxt);
>> --
>> 1.8.3.1

Reply via email to