[PATCH], Fix PR target/pr83862: Fix PowerPC long double signbit with -mabi=ieeelongdouble

2018-01-16 Thread Michael Meissner
PR target/83862 pointed out a problem I put into the 128-bit floating point
type signbit optimization.  The issue is we want to avoid doing a load to a
floating point/vector register and then a direct move to do signbit, so we
change the load to load the upper 64-bits of the floating point value to get
the sign bit.  Unfortunately, if the type is IEEE 128-bit and memory is
addressed with an indexed address on a little endian system, it generates an
illegal address and generates an internal compiler error.

I have tested this on a little endian power8 system, with bootstrap compilers.
There was not regression, and the new test passes.  Can I install this into the
trunk?

The same code is also in GCC 6 and 7.  While, -mabi=ieeelongdouble is not
supported in those releases, you can get a failure if you use an explicit
_Float128 type instead of long double.  Assuming that the bug shows up, can I
apply these patches to those branches as well?

[gcc]
2018-01-16  Michael Meissner  

PR target/83862
* config/rs6000/rs6000.c (rs6000_split_signbit): Do not create an
illegal address on little endian systems if the source value is in
memory addressed with indexed addressing.
* config/rs6000/rs6000.md (signbit2_dm): Likewise.
(signbit2_dm_ext): Likewise.

[gcc/testsuite]
2018-01-16  Michael Meissner  

PR target/83862
* gcc.target/powerpc/pr83862.c: New test.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/rs6000.c
===
--- gcc/config/rs6000/rs6000.c  (revision 256753)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -23341,9 +23341,27 @@ rs6000_split_signbit (rtx dest, rtx src)
 
   if (MEM_P (src))
 {
-  rtx mem = (WORDS_BIG_ENDIAN
-? adjust_address (src, DImode, 0)
-: adjust_address (src, DImode, 8));
+  rtx addr = XEXP (src, 0);
+  rtx mem;
+
+  if (!WORDS_BIG_ENDIAN)
+   {
+ /* Do not create an illegal address for indexed addressing when we
+add in the 8 to address the second word where the sign bit is.
+Instead use the desitnation register as a base register.  */
+ if (GET_CODE (addr) == PLUS
+ && !rs6000_legitimate_offset_address_p (DImode, addr, true, true))
+   {
+ emit_insn (gen_rtx_SET (dest, addr));
+ mem = change_address (src, DImode,
+   gen_rtx_PLUS (Pmode, dest, GEN_INT (8)));
+   }
+ else
+   mem = adjust_address (src, DImode, 8);
+   }
+  else
+   mem = adjust_address (src, DImode, 0);
+
   emit_insn (gen_rtx_SET (dest_di, mem));
 }
 
Index: gcc/config/rs6000/rs6000.md
===
--- gcc/config/rs6000/rs6000.md (revision 256753)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -4835,9 +4835,11 @@ (define_expand "copysign3"
   })
 
 ;; Optimize signbit on 64-bit systems with direct move to avoid doing the store
-;; and load.
+;; and load.  We restrict signbit coming from a load to use a base register for
+;; the destination, in case we need to use the base register as a tempoary
+;; address register.
 (define_insn_and_split "signbit2_dm"
-  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r")
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,,r")
(unspec:SI
 [(match_operand:SIGNBIT 1 "input_operand" "wa,m,r")]
 UNSPEC_SIGNBIT))]
@@ -4853,7 +4855,7 @@ (define_insn_and_split "signbit2_d
   (set_attr "type" "mftgpr,load,integer")])
 
 (define_insn_and_split "*signbit2_dm_ext"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r")
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,,r")
(any_extend:DI
 (unspec:SI
  [(match_operand:SIGNBIT 1 "input_operand" "wa,m,r")]
Index: gcc/testsuite/gcc.target/powerpc/pr83862.c
===
--- gcc/testsuite/gcc.target/powerpc/pr83862.c  (nonexistent)
+++ gcc/testsuite/gcc.target/powerpc/pr83862.c  (working copy)
@@ -0,0 +1,21 @@
+/* PR target/83862.c */
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-require-effective-target ppc_float128_sw } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mpower8-vector -O2 -mabi=ieeelongdouble -Wno-psabi" } */
+
+/* On little endian systems, optimizing signbit of IEEE 128-bit values from
+   memory could abort if the memory address was indexed (reg+reg).  The
+   optimization is only on 64-bit machines with direct move.
+
+   Compile with -g -O2 -mabi=ieeelongdouble -Wno-psabi.  */
+
+#ifndef TYPE
+#define TYPE long double
+#endif
+
+int sbr (TYPE a) { return __builtin_signbit (a); }
+int sbm 

[PATCH v2, rs6000] Implement 32- and 64-bit BE handling for -mno-speculate-indirect-jumps

2018-01-16 Thread Bill Schmidt
Hi,

This patch supercedes and extends 
https://gcc.gnu.org/ml/gcc-patches/2018-01/msg01479.html,
adding the remaining big-endian support for -mno-speculate-indirect-jumps.
This includes 32-bit support for indirect calls and sibling calls, and 
64-bit support for indirect calls.  The endian-neutral switch handling has
already been committed.

Using -m32 -O2 on safe-indirect-jumps-1.c results in a test for a sibling 
call, so this has been added as safe-indirect-jumps-8.c.  Also, 
safe-indirect-jumps-7.c adds a variant that will not generate a sibling 
call for -m32, so we still get indirect call coverage.

Bootstrapped and tested on powerpc64-linux-gnu and powerpc64le-linux-gnu 
with no regressions.  Is this okay for trunk?

Thanks,
Bill


[gcc]

2018-01-16  Bill Schmidt  

* config/rs6000/rs6000.md (*call_indirect_nonlocal_sysv):
Generate different code for -mno-speculate-indirect-jumps.
(*call_value_indirect_nonlocal_sysv): Likewise.
(*call_indirect_aix): Disable for
-mno-speculate-indirect-jumps.
(*call_indirect_aix_nospec): New define_insn.
(*call_value_indirect_aix): Disable for
-mno-speculate-indirect-jumps.
(*call_value_indirect_aix_nospec): New define_insn.
(*sibcall_nonlocal_sysv): Generate different code for
-mno-speculate-indirect-jumps.
(*sibcall_value_nonlocal_sysv): Likewise.

[gcc/testsuite]

2018-01-16  Bill Schmidt  

* gcc.target/powerpc/safe-indirect-jump-1.c: Remove endian
restriction, but still restrict to 64-bit.
* gcc.target/powerpc/safe-indirect-jump-7.c: New file.
* gcc.target/powerpc/safe-indirect-jump-8.c: New file.


Index: gcc/config/rs6000/rs6000.md
===
--- gcc/config/rs6000/rs6000.md (revision 256753)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -10453,10 +10453,35 @@
   else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS)
 output_asm_insn ("creqv 6,6,6", operands);
 
-  return "b%T0l";
+  if (rs6000_speculate_indirect_jumps
+  || which_alternative == 1 || which_alternative == 3)
+return "b%T0l";
+  else
+return "crset eq\;beq%T0l-";
 }
   [(set_attr "type" "jmpreg,jmpreg,jmpreg,jmpreg")
-   (set_attr "length" "4,4,8,8")])
+   (set (attr "length")
+   (cond [(and (eq (symbol_ref "which_alternative") (const_int 0))
+   (eq (symbol_ref "rs6000_speculate_indirect_jumps")
+   (const_int 1)))
+ (const_string "4")
+  (and (eq (symbol_ref "which_alternative") (const_int 0))
+   (eq (symbol_ref "rs6000_speculate_indirect_jumps")
+   (const_int 0)))
+ (const_string "8")
+  (eq (symbol_ref "which_alternative") (const_int 1))
+ (const_string "4")
+  (and (eq (symbol_ref "which_alternative") (const_int 2))
+   (eq (symbol_ref "rs6000_speculate_indirect_jumps")
+   (const_int 1)))
+ (const_string "8")
+  (and (eq (symbol_ref "which_alternative") (const_int 2))
+   (eq (symbol_ref "rs6000_speculate_indirect_jumps")
+   (const_int 0)))
+ (const_string "12")
+  (eq (symbol_ref "which_alternative") (const_int 3))
+ (const_string "8")]
+ (const_string "4")))])
 
 (define_insn_and_split "*call_nonlocal_sysv"
   [(call (mem:SI (match_operand:P 0 "symbol_ref_operand" "s,s"))
@@ -10541,10 +10566,35 @@
   else if (INTVAL (operands[3]) & CALL_V4_CLEAR_FP_ARGS)
 output_asm_insn ("creqv 6,6,6", operands);
 
-  return "b%T1l";
+  if (rs6000_speculate_indirect_jumps
+  || which_alternative == 1 || which_alternative == 3)
+return "b%T1l";
+  else
+return "crset eq\;beq%T1l-";
 }
   [(set_attr "type" "jmpreg,jmpreg,jmpreg,jmpreg")
-   (set_attr "length" "4,4,8,8")])
+   (set (attr "length")
+   (cond [(and (eq (symbol_ref "which_alternative") (const_int 0))
+   (eq (symbol_ref "rs6000_speculate_indirect_jumps")
+   (const_int 1)))
+ (const_string "4")
+  (and (eq (symbol_ref "which_alternative") (const_int 0))
+   (eq (symbol_ref "rs6000_speculate_indirect_jumps")
+   (const_int 0)))
+ (const_string "8")
+  (eq (symbol_ref "which_alternative") (const_int 1))
+ (const_string "4")
+  (and (eq (symbol_ref "which_alternative") (const_int 2))
+   (eq (symbol_ref "rs6000_speculate_indirect_jumps")
+   (const_int 1)))
+ (const_string "8")
+  (and (eq (symbol_ref "which_alternative") (const_int 2))
+   (eq (symbol_ref "rs6000_speculate_indirect_jumps")
+   

[committed] hppa: Switch hppa-linux to caller copies ABI

2018-01-16 Thread John David Anglin
The callee copies ABI used for 32-bit hppa causes no end of optimization 
issues and problems with
OpenMP.  The hppa target is only in Debian unstable and gentoo.  In both 
cases, packages are
rebuilt often.  So, Helge and I decided that it was better to break the 
ABI and accept whatever

problems that result from the switch.

Committed to trunk.

Dave

--
John David Anglin  dave.ang...@bell.net

2018-01-16  John David Anglin  

* config.gcc (hppa*-*-linux*): Change callee copies ABI to caller
copies.

Index: config.gcc
===
--- config.gcc  (revision 256716)
+++ config.gcc  (working copy)
@@ -1339,7 +1339,7 @@
gas=yes gnu_ld=yes
;;
 hppa*-*-linux*)
-   target_cpu_default="MASK_PA_11|MASK_NO_SPACE_REGS"
+   target_cpu_default="MASK_PA_11|MASK_NO_SPACE_REGS|MASK_CALLER_COPIES"
tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h 
pa/pa-linux.h \
 pa/pa32-regs.h pa/pa32-linux.h"
tmake_file="${tmake_file} pa/t-linux"


Re: [committed] hppa: Rework MALLOC_ABI_ALIGNMENT macro

2018-01-16 Thread Kamil Rytarowski
On 17.01.2018 02:52, John David Anglin wrote:
> When I defined MALLOC_ABI_ALIGNMENT, I inadvertently changed the default
> alignment for
> various hppa*-*-*bsd* targets.  Nick Hudson is still maintaining the
> netbsd target.
> 
> This patch corrects the default malloc alignment for 32-bit targest and
> moves the linux special
> case to its own file.
> 
> Committed to trunk.
> 
> Dave
> 

NetBSD supports hppa in Tier2.



signature.asc
Description: OpenPGP digital signature


[committed] hppa: Rework MALLOC_ABI_ALIGNMENT macro

2018-01-16 Thread John David Anglin
When I defined MALLOC_ABI_ALIGNMENT, I inadvertently changed the default 
alignment for
various hppa*-*-*bsd* targets.  Nick Hudson is still maintaining the 
netbsd target.


This patch corrects the default malloc alignment for 32-bit targest and 
moves the linux special

case to its own file.

Committed to trunk.

Dave

--
John David Anglin  dave.ang...@bell.net

2018-01-16  John David Anglin  

* config/pa.h (MALLOC_ABI_ALIGNMENT): Set 32-bit alignment default to
64 bits.
* config/pa/pa32-linux.h (MALLOC_ABI_ALIGNMENT): Set alignment to
128 bits.

Index: config/pa/pa.h
===
--- config/pa/pa.h  (revision 256561)
+++ config/pa/pa.h  (working copy)
@@ -307,7 +307,7 @@
POSIX types such as pthread_mutex_t require 16-byte alignment.  Again,
this is non critical since 16-byte alignment is no longer needed for
atomic operations.  */
-#define MALLOC_ABI_ALIGNMENT (TARGET_SOM ? 64 : 128)
+#define MALLOC_ABI_ALIGNMENT (TARGET_64BIT ? 128 : 64)
 
 /* Make arrays of chars word-aligned for the same reasons.  */
 #define DATA_ALIGNMENT(TYPE, ALIGN)\
Index: config/pa/pa32-linux.h
===
--- config/pa/pa32-linux.h  (revision 256561)
+++ config/pa/pa32-linux.h  (working copy)
@@ -62,3 +62,8 @@
 
 #undef  WCHAR_TYPE_SIZE
 #define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+/* POSIX types such as pthread_mutex_t require 16-byte alignment to retain
+   layout compatibility with the original linux thread implementation.  */
+#undef MALLOC_ABI_ALIGNMENT
+#define MALLOC_ABI_ALIGNMENT 128


[commited] hppa: Cleanup ASM_DECLARE_FUNCTION_NAME define in som.h

2018-01-16 Thread John David Anglin

This shortens and simplifies a number of lines in ASM_DECLARE_FUNCTION_NAME.

Committed to trunk.

Dave

--
John David Anglin  dave.ang...@bell.net

2018-01-16  John David Anglin  

* config/pa/som.h (ASM_DECLARE_FUNCTION_NAME): Cleanup type and mode
variables.

Index: config/pa/som.h
===
--- config/pa/som.h (revision 256744)
+++ config/pa/som.h (working copy)
@@ -119,11 +119,11 @@
 for (parm = DECL_ARGUMENTS (DECL), i = 0; parm && i < 4;   \
  parm = DECL_CHAIN (parm)) \
   {\
-if (TYPE_MODE (DECL_ARG_TYPE (parm)) == SFmode \
-&& ! TARGET_SOFT_FLOAT)\
+tree type = DECL_ARG_TYPE (parm);  \
+machine_mode mode = TYPE_MODE (type);  \
+if (mode == SFmode && ! TARGET_SOFT_FLOAT) \
   fprintf (FILE, ",ARGW%d=FR", i++);   \
-else if (TYPE_MODE (DECL_ARG_TYPE (parm)) == DFmode\
- && ! TARGET_SOFT_FLOAT)   \
+else if (mode == DFmode && ! TARGET_SOFT_FLOAT)\
   {\
 if (i <= 2)\
   {\
@@ -135,13 +135,10 @@
   }\
 else   \
   {\
-int arg_size = \
-  pa_function_arg_size (TYPE_MODE (DECL_ARG_TYPE (parm)),\
-DECL_ARG_TYPE (parm)); \
+int arg_size = pa_function_arg_size (mode, type);  \
 /* Passing structs by invisible reference uses \
one general register.  */   \
-if (arg_size > 2   \
-|| TREE_ADDRESSABLE (DECL_ARG_TYPE (parm)))\
+if (arg_size > 2 || TREE_ADDRESSABLE (type))   \
   arg_size = 1;\
 if (arg_size == 2 && i <= 2)   \
   {\


[committed] hppa: Fix move of FUNCTION_ARG_SIZE to pa.c

2018-01-16 Thread John David Anglin

We need to apply CEIL to the case where mode != BLKmode.

Committed to trunk.

Dave
--
John David Anglin  dave.ang...@bell.net

2018-01-16  John David Anglin  

* config/pa/pa.c (pa_function_arg_size): Apply CEIL to GET_MODE_SIZE
return value.

Index: config/pa/pa.c
===
--- config/pa/pa.c  (revision 256744)
+++ config/pa/pa.c  (working copy)
@@ -10842,9 +10842,10 @@
 HOST_WIDE_INT
 pa_function_arg_size (machine_mode mode, const_tree type)
 {
-  if (mode != BLKmode)
-return GET_MODE_SIZE (mode);
-  return CEIL (int_size_in_bytes (type), UNITS_PER_WORD);
+  HOST_WIDE_INT size;
+
+  size = mode != BLKmode ? GET_MODE_SIZE (mode) : int_size_in_bytes (type); 
+  return CEIL (size, UNITS_PER_WORD);
 }
 
 #include "gt-pa.h"


libbacktrace patch committed: Fix handling of inflate default dist table

2018-01-16 Thread Ian Lance Taylor
I misunderstood how the dist codes are handled in block type 1.  I
also think'od the length of the codes for the default table.  This
patch fixes these problems, along with a test case that exposes them.
Bootstrapped and ran libbacktrace tests on x86_64-pc-linux-gnu.
Committed to mainline.

Ian

2018-01-16  Ian Lance Taylor  

* elf.c (codes) [GENERATE_FIXED_HUFFMAN_TABLE]: Fix size to be
288.
(main) [GENERATE_FIXED_HUFFMAN_TABLE]: Pass 288 to
elf_zlib_inflate_table.  Generate elf_zlib_default_dist_table.
(elf_zlib_default_table): Update.
(elf_zlib_default_dist_table): New static array.
(elf_zlib_inflate): Use elf_zlib_default_dist_table for dist table
for block type 1.
* ztest.c (struct zlib_test): Add uncompressed_len.
(tests): Initialize uncompressed_len field.  Add new test case.
(test_samples): Use uncompressed_len field.
Index: elf.c
===
--- elf.c   (revision 256593)
+++ elf.c   (working copy)
@@ -1461,7 +1461,7 @@ elf_zlib_inflate_table (unsigned char *c
 #include 
 
 static uint16_t table[ZDEBUG_TABLE_SIZE];
-static unsigned char codes[287];
+static unsigned char codes[288];
 
 int
 main ()
@@ -1476,7 +1476,7 @@ main ()
 codes[i] = 7;
   for (i = 280; i <= 287; ++i)
 codes[i] = 8;
-  if (!elf_zlib_inflate_table ([0], 287, [0], [0]))
+  if (!elf_zlib_inflate_table ([0], 288, [0], [0]))
 {
   fprintf (stderr, "elf_zlib_inflate_table failed\n");
   exit (EXIT_FAILURE);
@@ -1495,48 +1495,72 @@ main ()
   printf ("\n");
 }
   printf ("};\n");
+  printf ("\n");
+
+  for (i = 0; i < 32; ++i)
+codes[i] = 5;
+  if (!elf_zlib_inflate_table ([0], 32, [0], [0]))
+{
+  fprintf (stderr, "elf_zlib_inflate_table failed\n");
+  exit (EXIT_FAILURE);
+}
+
+  printf ("static const uint16_t elf_zlib_default_dist_table[%#zx] =\n",
+ final_next_secondary + 0x100);
+  printf ("{\n");
+  for (i = 0; i < final_next_secondary + 0x100; i += 8)
+{
+  size_t j;
+
+  printf (" ");
+  for (j = i; j < final_next_secondary + 0x100 && j < i + 8; ++j)
+   printf (" %#x,", table[j]);
+  printf ("\n");
+}
+  printf ("};\n");
+
   return 0;
 }
 
 #endif
 
-/* The fixed table generated by the #ifdef'ed out main function
+/* The fixed tables generated by the #ifdef'ed out main function
above.  */
 
 static const uint16_t elf_zlib_default_table[0x170] =
 {
-  0xd00, 0xe50, 0xe10, 0xf18, 0xd10, 0xe70, 0xe30, 0x1232,
-  0xd08, 0xe60, 0xe20, 0x1212, 0xe00, 0xe80, 0xe40, 0x1252,
-  0xd04, 0xe58, 0xe18, 0x1202, 0xd14, 0xe78, 0xe38, 0x1242,
-  0xd0c, 0xe68, 0xe28, 0x1222, 0xe08, 0xe88, 0xe48, 0x1262,
-  0xd02, 0xe54, 0xe14, 0xf1c, 0xd12, 0xe74, 0xe34, 0x123a,
-  0xd0a, 0xe64, 0xe24, 0x121a, 0xe04, 0xe84, 0xe44, 0x125a,
-  0xd06, 0xe5c, 0xe1c, 0x120a, 0xd16, 0xe7c, 0xe3c, 0x124a,
-  0xd0e, 0xe6c, 0xe2c, 0x122a, 0xe0c, 0xe8c, 0xe4c, 0x126a,
-  0xd01, 0xe52, 0xe12, 0xf1a, 0xd11, 0xe72, 0xe32, 0x1236,
-  0xd09, 0xe62, 0xe22, 0x1216, 0xe02, 0xe82, 0xe42, 0x1256,
-  0xd05, 0xe5a, 0xe1a, 0x1206, 0xd15, 0xe7a, 0xe3a, 0x1246,
-  0xd0d, 0xe6a, 0xe2a, 0x1226, 0xe0a, 0xe8a, 0xe4a, 0x1266,
-  0xd03, 0xe56, 0xe16, 0xf1e, 0xd13, 0xe76, 0xe36, 0x123e,
-  0xd0b, 0xe66, 0xe26, 0x121e, 0xe06, 0xe86, 0xe46, 0x125e,
-  0xd07, 0xe5e, 0xe1e, 0x120e, 0xd17, 0xe7e, 0xe3e, 0x124e,
-  0xd0f, 0xe6e, 0xe2e, 0x122e, 0xe0e, 0xe8e, 0xe4e, 0x126e,
-  0xd00, 0xe51, 0xe11, 0xf19, 0xd10, 0xe71, 0xe31, 0x1234,
-  0xd08, 0xe61, 0xe21, 0x1214, 0xe01, 0xe81, 0xe41, 0x1254,
-  0xd04, 0xe59, 0xe19, 0x1204, 0xd14, 0xe79, 0xe39, 0x1244,
-  0xd0c, 0xe69, 0xe29, 0x1224, 0xe09, 0xe89, 0xe49, 0x1264,
-  0xd02, 0xe55, 0xe15, 0xf1d, 0xd12, 0xe75, 0xe35, 0x123c,
-  0xd0a, 0xe65, 0xe25, 0x121c, 0xe05, 0xe85, 0xe45, 0x125c,
-  0xd06, 0xe5d, 0xe1d, 0x120c, 0xd16, 0xe7d, 0xe3d, 0x124c,
-  0xd0e, 0xe6d, 0xe2d, 0x122c, 0xe0d, 0xe8d, 0xe4d, 0x126c,
-  0xd01, 0xe53, 0xe13, 0xf1b, 0xd11, 0xe73, 0xe33, 0x1238,
-  0xd09, 0xe63, 0xe23, 0x1218, 0xe03, 0xe83, 0xe43, 0x1258,
-  0xd05, 0xe5b, 0xe1b, 0x1208, 0xd15, 0xe7b, 0xe3b, 0x1248,
-  0xd0d, 0xe6b, 0xe2b, 0x1228, 0xe0b, 0xe8b, 0xe4b, 0x1268,
-  0xd03, 0xe57, 0xe17, 0x1200, 0xd13, 0xe77, 0xe37, 0x1240,
-  0xd0b, 0xe67, 0xe27, 0x1220, 0xe07, 0xe87, 0xe47, 0x1260,
-  0xd07, 0xe5f, 0xe1f, 0x1210, 0xd17, 0xe7f, 0xe3f, 0x1250,
-  0xd0f, 0xe6f, 0xe2f, 0x1230, 0xe0f, 0xe8f, 0xe4f, 0,
+  0xd00, 0xe50, 0xe10, 0xf18, 0xd10, 0xe70, 0xe30, 0x1230,
+  0xd08, 0xe60, 0xe20, 0x1210, 0xe00, 0xe80, 0xe40, 0x1250,
+  0xd04, 0xe58, 0xe18, 0x1200, 0xd14, 0xe78, 0xe38, 0x1240,
+  0xd0c, 0xe68, 0xe28, 0x1220, 0xe08, 0xe88, 0xe48, 0x1260,
+  0xd02, 0xe54, 0xe14, 0xf1c, 0xd12, 0xe74, 0xe34, 0x1238,
+  0xd0a, 0xe64, 0xe24, 0x1218, 0xe04, 0xe84, 0xe44, 0x1258,
+  0xd06, 0xe5c, 0xe1c, 0x1208, 0xd16, 0xe7c, 0xe3c, 0x1248,
+  0xd0e, 0xe6c, 0xe2c, 0x1228, 0xe0c, 0xe8c, 0xe4c, 0x1268,
+  0xd01, 0xe52, 0xe12, 0xf1a, 0xd11, 0xe72, 0xe32, 0x1234,
+  0xd09, 0xe62, 0xe22, 0x1214, 0xe02, 0xe82, 0xe42, 0x1254,
+  0xd05, 

Re: [C++ Patch] PR 81054 ("[7/8 Regression] ICE with volatile variable in constexpr function") [Take 2]

2018-01-16 Thread Paolo Carlini

.. regression testing actually completed successfully.

Paolo.


Re: [PATCH] make -Wrestrict for strcat more meaningful (PR 83698)

2018-01-16 Thread Martin Sebor

On 01/16/2018 02:32 PM, Jakub Jelinek wrote:

On Tue, Jan 16, 2018 at 01:36:26PM -0700, Martin Sebor wrote:

--- gcc/gimple-ssa-warn-restrict.c  (revision 256752)
+++ gcc/gimple-ssa-warn-restrict.c  (working copy)
@@ -384,6 +384,12 @@ builtin_memref::builtin_memref (tree expr, tree si
  base = SSA_NAME_VAR (base);
   }

+  if (DECL_P (base) && TREE_CODE (TREE_TYPE (base)) == ARRAY_TYPE)
+{
+  if (offrange[0] < 0 && offrange[1] > 0)
+   offrange[0] = 0;
+}


Why the 2 nested ifs?


No particular reason.  There may have been more code in there
that I ended up removing.  Or a comment.  I can remove the
extra braces when the patch is approved.




@@ -1079,14 +1085,35 @@ builtin_access::strcat_overlap ()
 return false;

   /* When strcat overlap is certain it is always a single byte:
- the terminatinn NUL, regardless of offsets and sizes.  When
+ the terminating NUL, regardless of offsets and sizes.  When
  overlap is only possible its range is [0, 1].  */
   acs.ovlsiz[0] = dstref->sizrange[0] == dstref->sizrange[1] ? 1 : 0;
   acs.ovlsiz[1] = 1;
-  acs.ovloff[0] = (dstref->sizrange[0] + dstref->offrange[0]).to_shwi ();
-  acs.ovloff[1] = (dstref->sizrange[1] + dstref->offrange[1]).to_shwi ();


You use to_shwi many times in the patch, do the callers or something earlier
in this function guarantee that you aren't throwing away any bits (unlike
tree_to_shwi, to_shwi method doesn't ICE, just throws away upper bits).
Especially when you perform additions like here, even if both wide_ints fit
into a shwi, the result might not.


No, I'm not sure.  In fact, it wouldn't surprise me if it did
happen.  It doesn't cause false positives or negatives but it
can make the offsets less than meaningful in cases where they
are within valid bounds.  There are also cases where they are
meaningless to begin with and there is little the pass can do
about that.

IMO, the ideal solution to the first problem is to add a format
specifier for wide ints to the pretty printer and get rid of
the conversions.  It's probably too late for something like
that now but I'd like to do it for GCC 9.  Unless someone
files a bug/regression, it's also too late for me to go and
try to find and fix these conversions now.

Martin

PS While looking for a case you asked about I came up with
the following.  I don't think there's any slicing involved
but the offsets are just as meaningless as if there were.
I think the way to do significantly better is to detect
out-of-bounds offsets earlier (e.g., as in this patch:
https://gcc.gnu.org/ml/gcc-patches/2017-10/msg02143.html)

$ cat z.c && gcc -O2 -S -Warray-bounds -m32 z.c
extern int a[];

void f (__PTRDIFF_TYPE__ i)
{
  if (i < __PTRDIFF_MAX__ - 7 || __PTRDIFF_MAX__ - 5 < i)
i = __PTRDIFF_MAX__ -  7;

  const int *s = a + i;

  __builtin_memcpy (a, [i], 3);
}
z.c: In function ‘f’:
z.c:10:3: warning: ‘__builtin_memcpy’ offset [-64, -48] is out of the 
bounds of object ‘a’ with type ‘int[]’ [-Warray-bounds]

   __builtin_memcpy (a, [i], 3);
   ^~
z.c:1:12: note: ‘a’ declared here
 extern int a[];
^



Re: [testsuite] Tweak Wrestrict.c

2018-01-16 Thread Martin Sebor

On 01/16/2018 03:54 PM, Eric Botcazou wrote:

This test fails on strict-alignment platforms because a call to memcpy is not
turned into a simple move and thus yields an additional warning:

warning: '__builtin_memcpy' writing 4 bytes into a region of size 0 overflows
the destination [-Wstringop-overflow=]


There should be just one warning per call, and (as it is) -Wrestrict
should suppress -Wstringop-overflow.  This suppression was a recent
change (r256683).  Is it not working for you?  The new assertion
fails for me with your change:

FAIL: c-c++-common/Wrestrict.c  -Wc++-compat  memcpy (test for warnings, 
line 761)


Martin




The attached patch tweaks the test so that this call to memcpy is preserved on
the other platforms too (by adding one character to the string) and adds the
additional dg-warning directive.

Tested on visium-elf & x86_64-suse-linux, applied on the mainline as obvious.


2018-01-16  Eric Botcazou  

* c-c++-common/Wrestrict.c (test_strcpy_range): Bump string size of one
test and add dg-warning for the -Wstringop-overflow warning.





Re: [testsuite] XFAIL Warray-bounds-4.c on SPARC and Visium

2018-01-16 Thread Martin Sebor

On 01/16/2018 03:39 PM, Eric Botcazou wrote:

On these platforms, one of the instances of the constructor generated in
test_strcpy_bounds_memarray_range is put into the constant pool so the strlen
pass cannot do its magic.

Tested on visium-elf, SPARC64 and x86-64/Linux, applied on the mainline.


Sorry about these lingering failures.  I'm aware of the assertion
failing on a number of targets.  The failure is being tracked in
bug 83462 (along with some others) but I haven't yet gotten around
to dealing with it.  The ideal fix is to have string_constant() get
the string from the constructor (tracked in bug 83543) but it's too
late for such a change now.   It might be easiest to simply skip
the assertion on all non-x86_64 targets.  I'm hoping to get to it
soon.

Martin




2018-01-16  Eric Botcazou  

* c-c++-common/Warray-bounds-4.c (test_strcpy_bounds_memarray_range):
XFAIL last test on SPARC and Visium.





Fix Warray-bounds-3.c on Visium

2018-01-16 Thread Eric Botcazou
This test fails on Visium because of 3 separate issues.  The first one is 
benign, it's a warning about a pointer mismatch between int32_t* and int*
(On most newlib targets, int32_t is long int instead of int) in the test.
The other 2 are in the code itself (but cancel each other on most targets):
builtin_memref::builtin_memref contains these lines:

  /* Determine the base object or pointer of the reference
 and its constant offset from the beginning of the base.  */
  base = get_addr_base_and_unit_offset (oper, );

  HOST_WIDE_INT const_off;
  if (base && off.is_constant (_off))
{
  offrange[0] += const_off;
  offrange[1] += const_off;

  /* Stash the reference for offset validation.  */
  ref = oper;

  /* Also stash the constant offset for offset validation.  */
  tree_code code = TREE_CODE (oper);
  if (code == COMPONENT_REF)
{
  tree field = TREE_OPERAND (ref, 1);
  tree fldoff = DECL_FIELD_OFFSET (field);
  if (TREE_CODE (fldoff) == INTEGER_CST)
refoff = const_off + wi::to_offset (fldoff);
}
}

The first problem is that DECL_FIELD_OFFSET doesn't return the offset of a 
field, it's byte_position because DECL_FIELD_BIT_OFFSET needs to be taken into 
account:

/* In a FIELD_DECL, this is the offset, in bits, of the first bit of the
   field from DECL_FIELD_OFFSET.  This field may be nonzero even for fields
   that are not bit fields (since DECL_OFFSET_ALIGN may be larger than the
   natural alignment of the field's type).  */
#define DECL_FIELD_BIT_OFFSET(NODE) \
  (FIELD_DECL_CHECK (NODE)->field_decl.bit_offset)

But replacing DECL_FIELD_OFFSET with byte_position is actually worse, because 
the offset of the field is then counted twice in refoff since it is already 
comprised in const_off.  In the end, the correct thing to do is just to equate 
refoff and const_off.

Tested on visium-elf & x86_64-suse-linux, applied on the mainline as obvious.


2018-01-16  Eric Botcazou  

* gimple-ssa-warn-restrict.c (builtin_memref::builtin_memref): For an
ADDR_EXPR, do not count the offset of a COMPONENT_REF twice.


2018-01-16  Eric Botcazou  

* c-c++-common/Warray-bounds-3.c (test_memmove_bounds): Fix mismatch.

-- 
Eric BotcazouIndex: gimple-ssa-warn-restrict.c
===
--- gimple-ssa-warn-restrict.c	(revision 256562)
+++ gimple-ssa-warn-restrict.c	(working copy)
@@ -312,11 +312,11 @@ builtin_memref::builtin_memref (tree exp
   if (TREE_CODE (expr) == ADDR_EXPR)
 {
   poly_int64 off;
-  tree oper = TREE_OPERAND (expr, 0);
+  tree op = TREE_OPERAND (expr, 0);
 
   /* Determine the base object or pointer of the reference
 	 and its constant offset from the beginning of the base.  */
-  base = get_addr_base_and_unit_offset (oper, );
+  base = get_addr_base_and_unit_offset (op, );
 
   HOST_WIDE_INT const_off;
   if (base && off.is_constant (_off))
@@ -325,17 +325,11 @@ builtin_memref::builtin_memref (tree exp
 	  offrange[1] += const_off;
 
 	  /* Stash the reference for offset validation.  */
-	  ref = oper;
+	  ref = op;
 
 	  /* Also stash the constant offset for offset validation.  */
-	  tree_code code = TREE_CODE (oper);
-	  if (code == COMPONENT_REF)
-	{
-	  tree field = TREE_OPERAND (ref, 1);
-	  tree fldoff = DECL_FIELD_OFFSET (field);
-	  if (TREE_CODE (fldoff) == INTEGER_CST)
-		refoff = const_off + wi::to_offset (fldoff);
-	}
+	  if (TREE_CODE (op) == COMPONENT_REF)
+	refoff = const_off;
 	}
   else
 	{
Index: testsuite/c-c++-common/Warray-bounds-3.c
===
--- testsuite/c-c++-common/Warray-bounds-3.c	(revision 256562)
+++ testsuite/c-c++-common/Warray-bounds-3.c	(working copy)
@@ -199,7 +199,8 @@ void test_memmove_bounds (char *d, const
   T (int,  2, a + SR ( 1, 3), pi, n);
   T (int,  2, a + SR ( 2, 3), pi, n);
 
-  T (int32_t, 2, a + SR ( 3, 4), pi, n);  /* { dg-warning "offset \\\[12, 16] is out of the bounds \\\[0, 8] of object .\[^\n\r]+. with type .int32_t ?\\\[2]." } */
+  const int32_t *pi32 = (const int32_t*)s;
+  T (int32_t, 2, a + SR ( 3, 4), pi32, n);  /* { dg-warning "offset \\\[12, 16] is out of the bounds \\\[0, 8] of object .\[^\n\r]+. with type .int32_t ?\\\[2]." } */
 }
 
 


C++ PATCH for c++/83714, ICE checking return from template

2018-01-16 Thread Jason Merrill
Like my recent patch for 83186, we were missing a build_non_dependent_expr.

Tested x86_64-pc-linux-gnu, applying to trunk.
commit 5c63951fe987acd133bf59532896a4b397f49f12
Author: Jason Merrill 
Date:   Tue Jan 16 17:19:22 2018 -0500

PR c++/83714 - ICE checking return in template.

* typeck.c (check_return_expr): Call build_non_dependent_expr.

diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
index f0dc03de111..d0adb798278 100644
--- a/gcc/cp/typeck.c
+++ b/gcc/cp/typeck.c
@@ -9333,6 +9333,9 @@ check_return_expr (tree retval, bool *no_warning)
 to undo it so we can try to treat it as an rvalue below.  */
   retval = maybe_undo_parenthesized_ref (retval);
 
+  if (processing_template_decl)
+   retval = build_non_dependent_expr (retval);
+
   /* Under C++11 [12.8/32 class.copy], a returned lvalue is sometimes
 treated as an rvalue for the purposes of overload resolution to
 favor move constructors over copy constructors.
diff --git a/gcc/testsuite/g++.dg/cpp0x/alias-decl-61.C 
b/gcc/testsuite/g++.dg/cpp0x/alias-decl-61.C
new file mode 100644
index 000..670d91a158c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/alias-decl-61.C
@@ -0,0 +1,16 @@
+// PR c++/83714
+// { dg-do compile { target c++11 } }
+
+class a {
+  typedef int b;
+  operator b();
+};
+struct c {
+  using d = a;
+};
+using e = c;
+
+template 
+e f(T) {
+  return e::d {};  // { dg-error "could not convert" }
+}


Re: [PATCH, rs6000] Bug fixes for the Power 9 stxvl and lxvl instructions.

2018-01-16 Thread Segher Boessenkool
Hi!

On Tue, Jan 16, 2018 at 11:57:14AM -0800, Carl Love wrote:
> The following patch contains fixes for the stxvl and lxvl instructions
> and XL_LEN_R builtin that were found while adding additional Power 9
> test cases for the various load and store builtins.  The new tests in
> builtins-5-p9-runnable.c and builtins-6-p9-runnable.c are included that
> exposed the bugs.

> gcc/ChangeLog:
> 
> 2018-01-16 Carl Love  
>   * config/rs6000/vsx.md (define_expand xl_len_r,
>   define_expand stxvl, define_expand *stxvl): Add match_dup
>   argument.

You should mention the rs6000-builtin.def change, too.  It is, uh,
important :-)  And maybe some words about the other changes in vsx.md?

> gcc/testsuite/ChangeLog:
> 
> 2018-01-16  Carl Love  
>   * gcc.target/powerpc/builtins-6-p9-runnable.c: Add additional tests.
>   Add debug print statements.
>   * gcc.target/powerpc/builtins-5-p9-runnable.c: Add test to do
>   16 byte vector load followed by a partial vector load.


> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -4624,10 +4624,12 @@ (define_expand "first_mismatch_or_eos_index_"
>  ;; Load VSX Vector with Length
>  (define_expand "lxvl"
>[(set (match_dup 3)
> -(match_operand:DI 2 "register_operand"))
> +(ashift:DI (match_operand:DI 2 "register_operand")
> +   (const_int 56)))
> (set (match_operand:V16QI 0 "vsx_register_operand")
>   (unspec:V16QI
>[(match_operand:DI 1 "gpc_reg_operand")
> +  (mem:V16QI (match_dup 1))

Please use a tab instead of eight spaces at the start of line.  (Twice
here, once in lxvll -- well two tabs on one line there).

Looks great otherwise.  Okay for trunk with those nits fixed (if the tests
work out, of course).  Thanks!


Segher


[testsuite] Tweak Wrestrict.c

2018-01-16 Thread Eric Botcazou
This test fails on strict-alignment platforms because a call to memcpy is not 
turned into a simple move and thus yields an additional warning:

warning: '__builtin_memcpy' writing 4 bytes into a region of size 0 overflows 
the destination [-Wstringop-overflow=]

The attached patch tweaks the test so that this call to memcpy is preserved on 
the other platforms too (by adding one character to the string) and adds the 
additional dg-warning directive.

Tested on visium-elf & x86_64-suse-linux, applied on the mainline as obvious.


2018-01-16  Eric Botcazou  

* c-c++-common/Wrestrict.c (test_strcpy_range): Bump string size of one
test and add dg-warning for the -Wstringop-overflow warning.

-- 
Eric BotcazouIndex: c-c++-common/Wrestrict.c
===
--- c-c++-common/Wrestrict.c	(revision 256562)
+++ c-c++-common/Wrestrict.c	(working copy)
@@ -758,7 +758,8 @@ void test_strcpy_range (void)
   T (8, "012", a + r, a);/* { dg-warning "accessing 4 bytes at offsets \\\[3, \[0-9\]+] and 0 may overlap 1 byte at offset 3" "strcpy" } */
 
   r = SR (DIFF_MAX - 2, DIFF_MAX - 1);
-  T (8, "012", a + r, a);/* { dg-warning "accessing 4 bytes at offsets \\\[\[0-9\]+, \[0-9\]+] and 0 overlaps" "strcpy" } */
+  T (8, "0123", a + r, a);/* { dg-warning "accessing 5 bytes at offsets \\\[\[0-9\]+, \[0-9\]+] and 0 overlaps" "strcpy" } */
+  /* { dg-warning "writing 5 bytes into a region of size 0 overflows" "memcpy" { target *-*-* } .-1 } */
 
   /* Exercise the full range of ptrdiff_t.  */
   r = signed_value ();


[testsuite] XFAIL Warray-bounds-4.c on SPARC and Visium

2018-01-16 Thread Eric Botcazou
On these platforms, one of the instances of the constructor generated in 
test_strcpy_bounds_memarray_range is put into the constant pool so the strlen 
pass cannot do its magic.

Tested on visium-elf, SPARC64 and x86-64/Linux, applied on the mainline.


2018-01-16  Eric Botcazou  

* c-c++-common/Warray-bounds-4.c (test_strcpy_bounds_memarray_range): 
XFAIL last test on SPARC and Visium.

-- 
Eric BotcazouIndex: c-c++-common/Warray-bounds-4.c
===
--- c-c++-common/Warray-bounds-4.c	(revision 256562)
+++ c-c++-common/Warray-bounds-4.c	(working copy)
@@ -64,5 +64,5 @@ void test_strcpy_bounds_memarray_range (
   TM ("01", "",ma.a5 + i, ma.a5);
   TM ("012", "",   ma.a5 + i, ma.a5);
   TM ("0123", "",  ma.a5 + i, ma.a5); /* { dg-warning "offset 6 from the object at .ma. is out of the bounds of referenced subobject .a5. with type .char\\\[5]. at offset 0" "strcpy" { xfail *-*-* } } */
-  TM ("", "012345", ma.a7 + i, ma.a7);/* { dg-warning "offset 13 from the object at .ma. is out of the bounds of referenced subobject .\(MA::\)?a7. with type .char ?\\\[7]. at offset 5" } */
+  TM ("", "012345", ma.a7 + i, ma.a7);/* { dg-warning "offset 13 from the object at .ma. is out of the bounds of referenced subobject .\(MA::\)?a7. with type .char ?\\\[7]. at offset 5" "strcpy" { xfail sparc*-*-* visium-*-* } } */
 }


Re: [PATCH] avoid assuming known string length is constant (PR 83896)

2018-01-16 Thread Jakub Jelinek
On Tue, Jan 16, 2018 at 03:20:24PM -0700, Martin Sebor wrote:
> Thanks for looking at it!  I confess it's not completely clear
> to me in what type the pass tracks string lengths.  For string
> constants, get_stridx() returns an int with the their length
> bit-flipped.  I tried to maintain that invariant in the change

That is because TREE_STRING_LENGTH is an int, so gcc doesn't allow
string literals longer than 2GB.  All other length are tracked as tree.

Jakub


Re: [PATCH, rs6000] (v2) Support for gimple folding of mergeh, mergel intrinsics

2018-01-16 Thread Segher Boessenkool
Hi!

On Tue, Jan 16, 2018 at 01:39:28PM -0600, Will Schmidt wrote:
> Sniff-tests of the target tests on a single system look OK.  Full regtests are
> currently running across assorted power systems.
> OK for trunk, pending successful results?

Just a few little things:

> 2018-01-16  Will Schmidt  
> 
>   * config/rs6000/rs6000.c: (rs6000_gimple_builtin) Add gimple folding
>   support for merge[hl].

The : goes after the ).

>  (define_insn "altivec_vmrghw_direct"
> -  [(set (match_operand:V4SI 0 "register_operand" "=v")
> -(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
> -  (match_operand:V4SI 2 "register_operand" "v")]
> - UNSPEC_VMRGH_DIRECT))]
> +  [(set (match_operand:V4SI 0 "register_operand" "=v,wa")
> + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v,wa")
> +   (match_operand:V4SI 2 "register_operand" "v,wa")]
> +  UNSPEC_VMRGH_DIRECT))]
>"TARGET_ALTIVEC"
> -  "vmrghw %0,%1,%2"
> +  "@
> +  vmrghw %0,%1,%2
> +  xxmrghw %x0,%x1,%x2"

Those last two lines should be indented one more space, so that everything
aligns (with the @).

> +  "@
> +  vmrglw %0,%1,%2
> +  xxmrglw %x0,%x1,%x2"

Same here of course.

> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/builtins-1-be-folded.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile { target { powerpc-*-* } } } */

Do you want powerpc*-*-*?  That is default in gcc.target/powerpc; dg-do
compile is default, too, so you can either say

/* { dg-do compile } */

or nothing at all, to taste.

But it looks like you want to restrict to BE?  We still don't have a
dejagnu thingy for that; you could put some #ifdef around it all (there
are some examples in other testcases).  Not ideal, but works.

> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-mergehl-float.c
> @@ -0,0 +1,26 @@
> +/* Verify that overloaded built-ins for vec_splat with float
> +   inputs produce the right code.  */
> +
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_vsx_ok } */
> +/* { dg-options "-maltivec -O2" } */

Either powerpc_altivec_ok or -mvsx?

> new file mode 100644
> index 000..ab5f54e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-mergehl-longlong.c
> @@ -0,0 +1,48 @@
> +/* Verify that overloaded built-ins for vec_merge* with long long
> +   inputs produce the right code.  */
> +
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_p8vector_ok } */
> +/* { dg-options "-mvsx -O2" } */

Either powerpc_vsx_ok or -mpower8-vector?

> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-mergehl-pixel.c
> @@ -0,0 +1,24 @@
> +/* Verify that overloaded built-ins for vec_splat with pixel
> +   inputs produce the right code.  */
> +
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_vsx_ok } */
> +/* { dg-options "-maltivec -mvsx -O2" } */

-mvsx implies -maltivec (not wrong of course, just a bit weird).

Okay for trunk with those nits fixed.  Thanks!


Segher


Re: [PATCH] avoid assuming known string length is constant (PR 83896)

2018-01-16 Thread Martin Sebor

On 01/16/2018 02:26 PM, Jakub Jelinek wrote:

On Tue, Jan 16, 2018 at 07:37:30PM +, Richard Sandiford wrote:

-/* Check if RHS is string_cst possibly wrapped by mem_ref.  */
+/* If RHS, either directly or indirectly, refers to a string of constant
+   length, return it.  Otherwise return a negative value.  */
+
 static int
 get_string_len (tree rhs)
 {


I think this should be returning HOST_WIDE_INT given the unconstrained
tree_to_shwi return.  Same type change for rhslen in the caller.

(Not my call, but it might be better to have a more specific function name,
given that the file already had "get_string_length" before this function
was added.)


Yeah, certainly for both.


@@ -2789,7 +2791,8 @@ get_string_len (tree rhs)
  if (idx > 0)
{
  strinfo *si = get_strinfo (idx);
- if (si && si->full_string_p)
+ if (si && si->full_string_p
+ && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
return tree_to_shwi (si->nonzero_chars);


tree_fits_shwi_p?


Surely that instead of TREE_CODE check, but even that will not make sure it
fits into host int, so yes, it should be HOST_WIDE_INT and the code should
make sure it is also >= 0.


I made these changes except for the last part:  How/when can
the length be negative?

Martin


Re: [PATCH] avoid assuming known string length is constant (PR 83896)

2018-01-16 Thread Martin Sebor

On 01/16/2018 12:37 PM, Richard Sandiford wrote:

Martin Sebor  writes:

Recent improvements to the strlen pass introduced the assumption
that when the length of a string has been recorded by the pass
the length is necessarily constant.  Bug 83896 shows that this
assumption is not always true, and that GCC fails with an ICE
when it doesn't hold.  To avoid the ICE the attached patch
removes the assumption.

x86_64-linux bootstrap successful, regression test in progress.

Martin

PR tree-optimization/83896 - ice in get_string_len on a call to strlen with 
non-constant length

gcc/ChangeLog:

PR tree-optimization/83896
* tree-ssa-strlen.c (get_string_len): Avoid assuming length is constant.

gcc/testsuite/ChangeLog:

PR tree-optimization/83896
* gcc.dg/strlenopt-43.c: New test.

Index: gcc/tree-ssa-strlen.c
===
--- gcc/tree-ssa-strlen.c   (revision 256752)
+++ gcc/tree-ssa-strlen.c   (working copy)
@@ -2772,7 +2772,9 @@ handle_pointer_plus (gimple_stmt_iterator *gsi)
 }
 }

-/* Check if RHS is string_cst possibly wrapped by mem_ref.  */
+/* If RHS, either directly or indirectly, refers to a string of constant
+   length, return it.  Otherwise return a negative value.  */
+
 static int
 get_string_len (tree rhs)
 {


I think this should be returning HOST_WIDE_INT given the unconstrained
tree_to_shwi return.  Same type change for rhslen in the caller.


Thanks for looking at it!  I confess it's not completely clear
to me in what type the pass tracks string lengths.  For string
constants, get_stridx() returns an int with the their length
bit-flipped.  I tried to maintain that invariant in the change
I introduced in the block toward the end of the function (in
a different patch).  But then in other places the pass works
with HOST_WIDE_INT, so it looks like it would be appropriate
to use here as well.

I tried to come up with a test case that would exercise this
conversion but couldn't.  If you (or someone else) have an idea
for one I'd be more than happy to add it to the test suite.


(Not my call, but it might be better to have a more specific function name,
given that the file already had "get_string_length" before this function
was added.)


I renamed it (again), this time to get_string_cst_length().
Nothing better came to mind.




@@ -2789,7 +2791,8 @@ get_string_len (tree rhs)
  if (idx > 0)
{
  strinfo *si = get_strinfo (idx);
- if (si && si->full_string_p)
+ if (si && si->full_string_p
+ && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
return tree_to_shwi (si->nonzero_chars);


tree_fits_shwi_p?


Sigh.  Yes.  I still keep forgetting about all these gotchas.
Dealing with integers is so painfully error-prone in GCC (as
evident from all the bug reports with ICEs for these things).

It would be much simpler and safer if tree_to_shwi() returned
true on success and false for error (e.g., null, non-const,
or overflow) and took an extra argument for the result.  Then
the code would become:

  HOST_WIDE_INT result;
  if (si && tree_to_shwi (, si->nonzero_chars))
return result;

and it would be nearly impossible to forget to check for bad
input.

Anyway, attached is an updated patch.

Martin



Thanks,
Richard


}
}
Index: gcc/testsuite/gcc.dg/strlenopt-43.c
===
--- gcc/testsuite/gcc.dg/strlenopt-43.c (nonexistent)
+++ gcc/testsuite/gcc.dg/strlenopt-43.c (working copy)
@@ -0,0 +1,13 @@
+/* PR tree-optimization/83896 - ice in get_string_len on a call to strlen
+   with non-constant length
+   { dg-do compile }
+   { dg-options "-O2 -Wall" } */
+
+extern char a[5];
+extern char b[];
+
+void f (void)
+{
+  if (__builtin_strlen (b) != 4)
+__builtin_memcpy (a, b, sizeof a);
+}


PR tree-optimization/83896 - ice in get_string_len on a call to strlen with non-constant length

gcc/ChangeLog:

	PR tree-optimization/83896
	* tree-ssa-strlen.c (get_string_len): Rename...
	(get_string_cst_length): ...to this.  Return HOST_WIDE_INT.
	Avoid assuming length is constant.
	(handle_char_store): Use HOST_WIDE_INT for string length.

gcc/testsuite/ChangeLog:

	PR tree-optimization/83896
	* gcc.dg/strlenopt-43.c: New test.

Index: gcc/tree-ssa-strlen.c
===
--- gcc/tree-ssa-strlen.c	(revision 256752)
+++ gcc/tree-ssa-strlen.c	(working copy)
@@ -2772,16 +2772,20 @@ handle_pointer_plus (gimple_stmt_iterator *gsi)
 }
 }
 
-/* Check if RHS is string_cst possibly wrapped by mem_ref.  */
-static int
-get_string_len (tree rhs)
+/* If RHS, either directly or indirectly, refers to a string of constant
+   length, return it.  Otherwise return a negative value.  */
+
+static HOST_WIDE_INT
+get_string_cst_length (tree rhs)
 {
   if (TREE_CODE 

Re: Compilation warning in simple-object-xcoff.c

2018-01-16 Thread Andreas Schwab
On Jan 16 2018, DJ Delorie  wrote:

> And it's not the host's bit size that counts; there are usually ways to
> get 64-bit file operations on 32-bit hosts.

If ACX_LARGEFILE doesn't succeed in enabling those 64-bit file
operations (thus making off_t a 64-bit type) then you are out of luck
(or AC_SYS_LARGEFILE doesn't support your host yet).

Andreas.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."


One more patch for PR80481

2018-01-16 Thread Vladimir Makarov

The patch changes the test to exclude solaris for

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80481

The first patch solved the problem for solaris too but solaris gcc still 
generates vmovaps in some different part of the code (unrelated to the 
problem) where linux gcc does not.


Committed as rev. 256761.


Index: testsuite/ChangeLog
===
--- testsuite/ChangeLog	(revision 256760)
+++ testsuite/ChangeLog	(working copy)
@@ -1,3 +1,8 @@
+2018-01-16  Vladimir Makarov  
+
+	PR rtl-optimization/80481
+	* g++.dg/pr80481.C: Exclude solaris.
+
 2018-01-16  Eric Botcazou  
 
 	* c-c++-common/patchable_function_entry-decl.c: Use 3 NOPs on Visium.
Index: testsuite/g++.dg/pr80481.C
===
--- testsuite/g++.dg/pr80481.C	(revision 256760)
+++ testsuite/g++.dg/pr80481.C	(working copy)
@@ -1,4 +1,4 @@
-// { dg-do compile { target i?86-*-* x86_64-*-* } }
+// { dg-do compile { target { i?86-*-* x86_64-*-* }  && { ! *-*-solaris* } } }
 // { dg-options "-Ofast -funroll-loops -fopenmp -march=knl" }
 // { dg-final { scan-assembler-not "vmovaps" } }
 


Re: [C++ Patch] PR 81054 ("[7/8 Regression] ICE with volatile variable in constexpr function") [Take 2]

2018-01-16 Thread Paolo Carlini

Hi Jason

On 16/01/2018 22:35, Jason Merrill wrote:

On Tue, Jan 16, 2018 at 3:32 PM, Paolo Carlini  wrote:

thus I figured out what was badly wrong in my first try: I misread
ensure_literal_type_for_constexpr_object and missed that it can return
NULL_TREE without emitting an hard error. Thus my first try even caused
miscompilations :( Anyway, when DECL_DECLARED_CONSTEXPR_P is true we are
safe and indeed we want to clear it as matter of error recovery. Then, in
this safe case the only change in the below is returning early, thus
avoiding any internal inconsistencies later and also the redundant /
misleading diagnostic which I already mentioned.

I can't see how this could be right.  In the cases where we don't give
an error (e.g. because we're dealing with an instantiation of a
variable template) there is no error, so we need to proceed with the
rest of cp_finish_decl as normal.
The cases where we don't give an error all fall under 
DECL_DECLARED_CONSTEXPR_P == false, thus aren't affected at all.


Unless I'm again misreading ensure_literal_type_for_constexpr_object, I 
hope not.


Paolo.


Re: [C++ Patch] PR 81054 ("[7/8 Regression] ICE with volatile variable in constexpr function") [Take 2]

2018-01-16 Thread Jason Merrill
On Tue, Jan 16, 2018 at 3:32 PM, Paolo Carlini  wrote:
> thus I figured out what was badly wrong in my first try: I misread
> ensure_literal_type_for_constexpr_object and missed that it can return
> NULL_TREE without emitting an hard error. Thus my first try even caused
> miscompilations :( Anyway, when DECL_DECLARED_CONSTEXPR_P is true we are
> safe and indeed we want to clear it as matter of error recovery. Then, in
> this safe case the only change in the below is returning early, thus
> avoiding any internal inconsistencies later and also the redundant /
> misleading diagnostic which I already mentioned.

I can't see how this could be right.  In the cases where we don't give
an error (e.g. because we're dealing with an instantiation of a
variable template) there is no error, so we need to proceed with the
rest of cp_finish_decl as normal.

Jason


Re: [PATCH] make -Wrestrict for strcat more meaningful (PR 83698)

2018-01-16 Thread Jakub Jelinek
On Tue, Jan 16, 2018 at 01:36:26PM -0700, Martin Sebor wrote:
> --- gcc/gimple-ssa-warn-restrict.c(revision 256752)
> +++ gcc/gimple-ssa-warn-restrict.c(working copy)
> @@ -384,6 +384,12 @@ builtin_memref::builtin_memref (tree expr, tree si
> base = SSA_NAME_VAR (base);
>}
>  
> +  if (DECL_P (base) && TREE_CODE (TREE_TYPE (base)) == ARRAY_TYPE)
> +{
> +  if (offrange[0] < 0 && offrange[1] > 0)
> + offrange[0] = 0;
> +}

Why the 2 nested ifs?

> @@ -1079,14 +1085,35 @@ builtin_access::strcat_overlap ()
>  return false;
>  
>/* When strcat overlap is certain it is always a single byte:
> - the terminatinn NUL, regardless of offsets and sizes.  When
> + the terminating NUL, regardless of offsets and sizes.  When
>   overlap is only possible its range is [0, 1].  */
>acs.ovlsiz[0] = dstref->sizrange[0] == dstref->sizrange[1] ? 1 : 0;
>acs.ovlsiz[1] = 1;
> -  acs.ovloff[0] = (dstref->sizrange[0] + dstref->offrange[0]).to_shwi ();
> -  acs.ovloff[1] = (dstref->sizrange[1] + dstref->offrange[1]).to_shwi ();

You use to_shwi many times in the patch, do the callers or something earlier
in this function guarantee that you aren't throwing away any bits (unlike
tree_to_shwi, to_shwi method doesn't ICE, just throws away upper bits).
Especially when you perform additions like here, even if both wide_ints fit
into a shwi, the result might not.

Jakub


Re: [PATCH] avoid assuming known string length is constant (PR 83896)

2018-01-16 Thread Jakub Jelinek
On Tue, Jan 16, 2018 at 07:37:30PM +, Richard Sandiford wrote:
> > -/* Check if RHS is string_cst possibly wrapped by mem_ref.  */
> > +/* If RHS, either directly or indirectly, refers to a string of constant
> > +   length, return it.  Otherwise return a negative value.  */
> > +
> >  static int
> >  get_string_len (tree rhs)
> >  {
> 
> I think this should be returning HOST_WIDE_INT given the unconstrained
> tree_to_shwi return.  Same type change for rhslen in the caller.
> 
> (Not my call, but it might be better to have a more specific function name,
> given that the file already had "get_string_length" before this function
> was added.)

Yeah, certainly for both.

> > @@ -2789,7 +2791,8 @@ get_string_len (tree rhs)
> >   if (idx > 0)
> > {
> >   strinfo *si = get_strinfo (idx);
> > - if (si && si->full_string_p)
> > + if (si && si->full_string_p
> > + && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
> > return tree_to_shwi (si->nonzero_chars);
> 
> tree_fits_shwi_p?

Surely that instead of TREE_CODE check, but even that will not make sure it
fits into host int, so yes, it should be HOST_WIDE_INT and the code should
make sure it is also >= 0.

Jakub


[testsuite] Tweak patchable function tests

2018-01-16 Thread Eric Botcazou
On Visium, the compiler sometimes emits a NOP to avoid a pipeline hazard.

Tested on visium-elf and x86_64-suse-linux, applied on the mainline.


2018-01-16  Eric Botcazou  

* c-c++-common/patchable_function_entry-decl.c: Use 3 NOPs on Visium.
* c-c++-common/patchable_function_entry-default.c: Use 4 NOPs on Visium.
* c-c++-common/patchable_function_entry-definition.c: Use 2 NOPs on 
Visium.

-- 
Eric Botcazou
Index: c-c++-common/patchable_function_entry-decl.c
===
--- c-c++-common/patchable_function_entry-decl.c	(revision 256562)
+++ c-c++-common/patchable_function_entry-decl.c	(working copy)
@@ -1,7 +1,8 @@
 /* { dg-do compile { target { ! nvptx*-*-* } } } */
 /* { dg-options "-O2 -fpatchable-function-entry=3,1" } */
-/* { dg-final { scan-assembler-times "nop" 2 { target { ! alpha*-*-* } } } } */
+/* { dg-final { scan-assembler-times "nop" 2 { target { ! { alpha*-*-* visium-*-* } } } } } */
 /* { dg-final { scan-assembler-times "bis" 2 { target alpha*-*-* } } } */
+/* { dg-final { scan-assembler-times "nop" 3 { target visium-*-* } } } */
 
 extern int a;
 
Index: c-c++-common/patchable_function_entry-default.c
===
--- c-c++-common/patchable_function_entry-default.c	(revision 256562)
+++ c-c++-common/patchable_function_entry-default.c	(working copy)
@@ -1,7 +1,8 @@
 /* { dg-do compile { target { ! nvptx*-*-* } } } */
 /* { dg-options "-O2 -fpatchable-function-entry=3,1" } */
-/* { dg-final { scan-assembler-times "nop" 3 { target { ! alpha*-*-* } } } } */
+/* { dg-final { scan-assembler-times "nop" 3 { target { ! { alpha*-*-* visium-*-* } } } } } */
 /* { dg-final { scan-assembler-times "bis" 3 { target alpha*-*-* } } } */
+/* { dg-final { scan-assembler-times "nop" 4 { target visium-*-* } } } */
 
 extern int a;
 
Index: c-c++-common/patchable_function_entry-definition.c
===
--- c-c++-common/patchable_function_entry-definition.c	(revision 256562)
+++ c-c++-common/patchable_function_entry-definition.c	(working copy)
@@ -1,7 +1,8 @@
 /* { dg-do compile { target { ! nvptx*-*-* } } } */
 /* { dg-options "-O2 -fpatchable-function-entry=3,1" } */
-/* { dg-final { scan-assembler-times "nop" 1 { target { ! alpha*-*-* } } } } */
+/* { dg-final { scan-assembler-times "nop" 1 { target { ! { alpha*-*-* visium-*-* } } } } } */
 /* { dg-final { scan-assembler-times "bis" 1 { target alpha*-*-* } } } */
+/* { dg-final { scan-assembler-times "nop" 2 { target visium-*-* } } } */
 
 extern int a;
 


[testsuite] Skip loop tests on Visium

2018-01-16 Thread Eric Botcazou
They either use too much space in the data segment or on the stack.

Tested on visium-elf, applied on the mainline.


2018-01-16  Eric Botcazou  

* gcc.dg/tree-ssa/ldist-27.c: Skip on Visium.
* gcc.dg/tree-ssa/loop-interchange-1.c: Likewise.
* gcc.dg/tree-ssa/loop-interchange-1b.c: Likewise.
* gcc.dg/tree-ssa/loop-interchange-2.c: Likewise.
* gcc.dg/tree-ssa/loop-interchange-3.c: Likewise.
* gcc.dg/tree-ssa/loop-interchange-4.c: Likewise.
* gcc.dg/tree-ssa/loop-interchange-5.c: Likewise.
* gcc.dg/tree-ssa/loop-interchange-6.c: Likewise.
* gcc.dg/tree-ssa/loop-interchange-7.c: Likewise.
* gcc.dg/tree-ssa/loop-interchange-8.c: Likewise.
* gcc.dg/tree-ssa/loop-interchange-9.c: Likewise.
* gcc.dg/tree-ssa/loop-interchange-10.c: Likewise.
* gcc.dg/tree-ssa/loop-interchange-11.c: Likewise.
* gcc.dg/tree-ssa/loop-interchange-14.c: Likewise.
* gcc.dg/tree-ssa/loop-interchange-15.c: Likewise.

-- 
Eric BotcazouIndex: gcc.dg/tree-ssa/ldist-27.c
===
--- gcc.dg/tree-ssa/ldist-27.c	(revision 256562)
+++ gcc.dg/tree-ssa/ldist-27.c	(working copy)
@@ -1,5 +1,6 @@
 /* { dg-do run } */
 /* { dg-options "-O3 -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */
+/* { dg-skip-if "too big data segment" { visium-*-* } } */
 
 #define M (300)
 #define N (200)
Index: gcc.dg/tree-ssa/loop-interchange-1.c
===
--- gcc.dg/tree-ssa/loop-interchange-1.c	(revision 256562)
+++ gcc.dg/tree-ssa/loop-interchange-1.c	(working copy)
@@ -1,5 +1,6 @@
 /* { dg-do run } */
 /* { dg-options "-O2 -floop-interchange -fassociative-math -fno-signed-zeros -fno-trapping-math -fdump-tree-linterchange-details" } */
+/* { dg-skip-if "too big data segment" { visium-*-* } } */
 
 /* Copied from graphite/interchange-4.c */
 
Index: gcc.dg/tree-ssa/loop-interchange-10.c
===
--- gcc.dg/tree-ssa/loop-interchange-10.c	(revision 256562)
+++ gcc.dg/tree-ssa/loop-interchange-10.c	(working copy)
@@ -1,5 +1,6 @@
 /* { dg-do run } */
 /* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } */
+/* { dg-skip-if "too big data segment" { visium-*-* } } */
 
 #define M 256
 int a[M][M], b[M][M];
Index: gcc.dg/tree-ssa/loop-interchange-11.c
===
--- gcc.dg/tree-ssa/loop-interchange-11.c	(revision 256562)
+++ gcc.dg/tree-ssa/loop-interchange-11.c	(working copy)
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } */
+/* { dg-skip-if "too big data segment" { visium-*-* } } */
 
 #define M 256
 int a[M][M], b[M][M];
Index: gcc.dg/tree-ssa/loop-interchange-14.c
===
--- gcc.dg/tree-ssa/loop-interchange-14.c	(revision 256562)
+++ gcc.dg/tree-ssa/loop-interchange-14.c	(working copy)
@@ -1,6 +1,7 @@
 /* PR tree-optimization/83337 */
 /* { dg-do run { target int32plus } } */
 /* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } */
+/* { dg-skip-if "too big data segment" { visium-*-* } } */
 
 /* Copied from graphite/interchange-5.c */
 
Index: gcc.dg/tree-ssa/loop-interchange-15.c
===
--- gcc.dg/tree-ssa/loop-interchange-15.c	(revision 256562)
+++ gcc.dg/tree-ssa/loop-interchange-15.c	(working copy)
@@ -2,6 +2,7 @@
 /* { dg-do run { target int32plus } } */
 /* { dg-options "-O2 -floop-interchange" } */
 /* { dg-require-effective-target alloca }  */
+/* { dg-skip-if "too big stack" { visium-*-* } } */
 
 /* Copied from graphite/interchange-5.c */
 
Index: gcc.dg/tree-ssa/loop-interchange-1b.c
===
--- gcc.dg/tree-ssa/loop-interchange-1b.c	(revision 256562)
+++ gcc.dg/tree-ssa/loop-interchange-1b.c	(working copy)
@@ -1,5 +1,6 @@
 /* { dg-do run } */
 /* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } */
+/* { dg-skip-if "too big data segment" { visium-*-* } } */
 
 /* Copied from graphite/interchange-4.c */
 
Index: gcc.dg/tree-ssa/loop-interchange-2.c
===
--- gcc.dg/tree-ssa/loop-interchange-2.c	(revision 256562)
+++ gcc.dg/tree-ssa/loop-interchange-2.c	(working copy)
@@ -1,5 +1,6 @@
 /* { dg-do run } */
 /* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } */
+/* { dg-skip-if "too big data segment" { visium-*-* } } */
 
 /* Copied from graphite/interchange-5.c */
 
Index: gcc.dg/tree-ssa/loop-interchange-3.c
===
--- gcc.dg/tree-ssa/loop-interchange-3.c	(revision 256562)
+++ gcc.dg/tree-ssa/loop-interchange-3.c	

[visium] Very minor tweak

2018-01-16 Thread Eric Botcazou
Tested on visium-elf, applied on the mainline.


2018-01-16  Eric Botcazou  

* config/visium/visium.md (nop): Tweak comment.
(hazard_nop): Likewise.

-- 
Eric BotcazouIndex: config/visium/visium.md
===
--- config/visium/visium.md	(revision 256562)
+++ config/visium/visium.md	(working copy)
@@ -2962,13 +2962,13 @@ (define_insn "dsi"
 (define_insn "nop"
   [(const_int 0)]
   ""
-  "nop			;generated nop"
+  "nop			;generated"
   [(set_attr "type" "nop")])
 
 (define_insn "hazard_nop"
   [(unspec_volatile [(const_int 0)] UNSPEC_NOP)]
   ""
-  "nop			;hazard avoidance nop"
+  "nop			;hazard avoidance"
   [(set_attr "type" "nop")])
 
 (define_insn "blockage"


Fix PR testsuite/77734 on SPARC

2018-01-16 Thread Eric Botcazou
We need to enable delayed-branch scheduling to have sibling calls on SPARC.

Tested on SPARC64/Linux, applied on the mainline and 7 branch.


2018-01-16  Eric Botcazou  

PR testsuite/77734
* gcc.dg/plugin/must-tail-call-1.c: Pass -fdelayed-branch on SPARC.

-- 
Eric BotcazouIndex: gcc.dg/plugin/must-tail-call-1.c
===
--- gcc.dg/plugin/must-tail-call-1.c	(revision 256562)
+++ gcc.dg/plugin/must-tail-call-1.c	(working copy)
@@ -1,3 +1,5 @@
+/* { dg-options "-fdelayed-branch" { target sparc*-*-* } } */
+
 extern void abort (void);
 
 int __attribute__((noinline,noclone))


[libstdc++] Fix 17_intro/names.cc on SPARC/Linux

2018-01-16 Thread Eric Botcazou
The SPARC-V8 architecture contains a Y register so  defines 
a structure with a 'y' field on Linux.

Tested on SPARC64/Linux, applied on the mainline and 7 branch as obvious.


2018-01-16  Eric Botcazou  

* testsuite/17_intro/names.cc: Undefine 'y' on SPARC/Linux.

-- 
Eric Botcazou
Index: testsuite/17_intro/names.cc
===
--- testsuite/17_intro/names.cc	(revision 256562)
+++ testsuite/17_intro/names.cc	(working copy)
@@ -112,4 +112,8 @@
 #undef r
 #endif
 
+#if defined (__linux__) && defined (__sparc__)
+#undef y
+#endif
+
 #include 


[PATCH] make -Wrestrict for strcat more meaningful (PR 83698)

2018-01-16 Thread Martin Sebor

PR 83698 - bogus offset in -Wrestrict messages for strcat of
unknown strings, points out that the offsets printed by
-Wrestrict for possibly overlapping strcat calls with unknown
strings don't look meaningful in some cases.  The root cause
of the bogus values is wrapping during the conversion from
offset_int in which the pass tracks numerical values to
HOST_WIDE_INT for printing.  (The problem will go away once
GCC's pretty-printer supports wide int formatting.)  For
instance, the following:

  extern char *d;
  strcat (d + 3, d + 5);

results in

  warning: ‘strcat’ accessing 0 or more bytes at offsets 3 and 5 may 
overlap 1 byte at offset [3, -9223372036854775806]


which, besides printing the bogus negative offset on LP64
targets, isn't correct because strcat always accesses at least
one byte (the nul) and there can be no overlap at offset 3.
To be more accurate, the warning should say something like:

  warning: ‘strcat’ accessing 3 or more bytes at offsets 3 and 5 may 
overlap 1 byte at offset 5 [-Wrestrict]


because the function must access at least 3 bytes in order to
cause an overlap, and when it does, the overlap starts at the
higher of the two offsets, i.e., 5.  (Though it's virtually
impossible to have a single sentence and a singled set of
numbers cover all the cases with perfect accuracy.)

The attached patch fixes these issues to make the printed values
make more sense.  (It doesn't affect when diagnostics are printed.)

Although this isn't strictly a regression, it has an impact on
the readability of the warnings.  If left unchanged, the original
messages are likely to confuse users and lead to bug reports.

Martin
PR tree-optimization/83698 - bogus offset in -Wrestrict messages for strcat of unknown strings

gcc/ChangeLog:

	PR tree-optimization/83698
	* gimple-ssa-warn-restrict.c (builtin_memref::builtin_memref): For
	arrays constrain the offset range to their bounds.
	(builtin_access::strcat_overlap): Adjust the bounds of overlap offset.
	(builtin_access::overlap): Avoid setting the size of overlap if it's
	already been set.
	(maybe_diag_overlap): Also consider arrays when deciding what values
	of offsets to include in diagnostics.

gcc/testsuite/ChangeLog:

	PR tree-optimization/83698
	* gcc.dg/Wrestrict-7.c: New test.
	* c-c++-common/Wrestrict.c: Adjust expected values for strcat.
	* gcc.target/i386/chkp-stropt-17.c: Same.

Index: gcc/gimple-ssa-warn-restrict.c
===
--- gcc/gimple-ssa-warn-restrict.c	(revision 256752)
+++ gcc/gimple-ssa-warn-restrict.c	(working copy)
@@ -384,6 +384,12 @@ builtin_memref::builtin_memref (tree expr, tree si
 	  base = SSA_NAME_VAR (base);
   }
 
+  if (DECL_P (base) && TREE_CODE (TREE_TYPE (base)) == ARRAY_TYPE)
+{
+  if (offrange[0] < 0 && offrange[1] > 0)
+	offrange[0] = 0;
+}
+
   if (size)
 {
   tree range[2];
@@ -1079,14 +1085,35 @@ builtin_access::strcat_overlap ()
 return false;
 
   /* When strcat overlap is certain it is always a single byte:
- the terminatinn NUL, regardless of offsets and sizes.  When
+ the terminating NUL, regardless of offsets and sizes.  When
  overlap is only possible its range is [0, 1].  */
   acs.ovlsiz[0] = dstref->sizrange[0] == dstref->sizrange[1] ? 1 : 0;
   acs.ovlsiz[1] = 1;
-  acs.ovloff[0] = (dstref->sizrange[0] + dstref->offrange[0]).to_shwi ();
-  acs.ovloff[1] = (dstref->sizrange[1] + dstref->offrange[1]).to_shwi ();
 
-  acs.sizrange[0] = wi::smax (acs.dstsiz[0], srcref->sizrange[0]).to_shwi ();
+  offset_int endoff = dstref->offrange[0] + dstref->sizrange[0];
+  if (endoff <= srcref->offrange[0])
+acs.ovloff[0] = wi::smin (maxobjsize, srcref->offrange[0]).to_shwi ();
+  else
+acs.ovloff[0] = wi::smin (maxobjsize, endoff).to_shwi ();
+
+  acs.sizrange[0] = wi::smax (wi::abs (endoff - srcref->offrange[0]) + 1,
+			  srcref->sizrange[0]).to_shwi ();
+  if (dstref->offrange[0] == dstref->offrange[1])
+{
+  if (srcref->offrange[0] == srcref->offrange[1])
+	acs.ovloff[1] = acs.ovloff[0];
+  else
+	acs.ovloff[1]
+	  = wi::smin (maxobjsize,
+		  srcref->offrange[1] + srcref->sizrange[1]).to_shwi ();
+}
+  else
+acs.ovloff[1]
+  = wi::smin (maxobjsize,
+		  dstref->offrange[1] + dstref->sizrange[1]).to_shwi ();
+
+  if (acs.sizrange[0] == 0)
+acs.sizrange[0] = 1;
   acs.sizrange[1] = wi::smax (acs.dstsiz[1], srcref->sizrange[1]).to_shwi ();
   return true;
 }
@@ -1224,8 +1251,12 @@ builtin_access::overlap ()
   /* Call the appropriate function to determine the overlap.  */
   if ((this->*detect_overlap) ())
 {
-  sizrange[0] = wi::smax (acs.dstsiz[0], srcref->sizrange[0]).to_shwi ();
-  sizrange[1] = wi::smax (acs.dstsiz[1], srcref->sizrange[1]).to_shwi ();
+  if (!sizrange[1])
+	{
+	  /* Unless the access size range has already been set, do so here.  */
+	  sizrange[0] = wi::smax (acs.dstsiz[0], srcref->sizrange[0]).to_shwi ();
+	  sizrange[1] = wi::smax 

[C++ Patch] PR 81054 ("[7/8 Regression] ICE with volatile variable in constexpr function") [Take 2]

2018-01-16 Thread Paolo Carlini

Hi again,

thus I figured out what was badly wrong in my first try: I misread 
ensure_literal_type_for_constexpr_object and missed that it can return 
NULL_TREE without emitting an hard error. Thus my first try even caused 
miscompilations :( Anyway, when DECL_DECLARED_CONSTEXPR_P is true we are 
safe and indeed we want to clear it as matter of error recovery. Then, 
in this safe case the only change in the below is returning early, thus 
avoiding any internal inconsistencies later and also the redundant / 
misleading diagnostic which I already mentioned. Testing on x86_64-linux 
is still in progress - in libstdc++ - but I separately checked that all 
the regressions are gone.


Thanks! Paolo.

/


/cp
2018-01-16  Paolo Carlini  

PR c++/81054
* decl.c (cp_finish_decl): Early return when
ensure_literal_type_for_constexpr_object returns NULL_TREE 
and DECL_DECLARED_CONSTEXPR_P is true.

/testsuite
2018-01-16  Paolo Carlini  

PR c++/81054
* g++.dg/cpp0x/constexpr-ice19.C: New.
Index: cp/decl.c
===
--- cp/decl.c   (revision 256753)
+++ cp/decl.c   (working copy)
@@ -6810,8 +6810,12 @@ cp_finish_decl (tree decl, tree init, bool init_co
   cp_apply_type_quals_to_decl (cp_type_quals (type), decl);
 }
 
-  if (!ensure_literal_type_for_constexpr_object (decl))
-DECL_DECLARED_CONSTEXPR_P (decl) = 0;
+  if (!ensure_literal_type_for_constexpr_object (decl)
+  && DECL_DECLARED_CONSTEXPR_P (decl))
+{
+  DECL_DECLARED_CONSTEXPR_P (decl) = 0;
+  return;
+}
 
   if (VAR_P (decl)
   && DECL_CLASS_SCOPE_P (decl)
Index: testsuite/g++.dg/cpp0x/constexpr-ice19.C
===
--- testsuite/g++.dg/cpp0x/constexpr-ice19.C(nonexistent)
+++ testsuite/g++.dg/cpp0x/constexpr-ice19.C(working copy)
@@ -0,0 +1,13 @@
+// PR c++/81054
+// { dg-do compile { target c++11 } }
+
+struct A
+{
+  volatile int i;
+  constexpr A() : i() {}
+};
+
+struct B
+{
+  static constexpr A a {};  // { dg-error "not literal" }
+};


Re: VIEW_CONVERT_EXPR slots for strict-align targets (PR 83884)

2018-01-16 Thread Eric Botcazou
> I'd assumed that variable-length types couldn't occur here, since it
> seems strange to view-convert a variable-length type to a fixed-length
> one.

This happens all the time in Ada when you convert an unconstrained type into 
one of its constrained subtypes (but the run-time sizes must match).

> But is this VIEW_CONVERT_EXPR really valid?  Maybe this is just
> papering over a deeper issue.  There again, the MAX in the old
> code was presumably there because the sizes can be different...

The problem is that Ada exposes VIEW_CONVERT_EXPR to the user and the user can 
do very weird things with it so you need to be prepared for the worst.

> 2018-01-16  Richard Sandiford  
> 
> gcc/
>   PR middle-end/83884
>   * expr.c (expand_expr_real_1): Use the size of GET_MODE (op0)
>   rather than the size of inner_type to determine the stack slot size
>   when handling VIEW_CONVERT_EXPRs on strict-alignment targets.

This looks good to me, thanks for fixing the problem.  Unexpectedly enough, I 
don't see the failures on SPARC (32-bit or 64-bit).

-- 
Eric Botcazou


Re: [PATCH] i386: More use reference of struct ix86_frame to avoid copy

2018-01-16 Thread H.J. Lu
On Tue, Jan 16, 2018 at 8:09 AM, H.J. Lu  wrote:
> On Tue, Jan 16, 2018 at 7:03 AM, Martin Liška  wrote:
>> On 01/16/2018 01:35 PM, H.J. Lu wrote:
>>> On Tue, Jan 16, 2018 at 3:40 AM, H.J. Lu  wrote:
 This patch has been used with my Spectre backport for GCC 7 for many
 weeks and has been checked into GCC 7 branch.  Should I revert it on
 GCC 7 branch or check it into trunk?
>>>
>>> Ada build failed with this on trunk:
>>>
>>> raised STORAGE_ERROR : stack overflow or erroneous memory access
>>> make[5]: *** 
>>> [/export/gnu/import/git/sources/gcc/gcc/ada/Make-generated.in:45:
>>> ada/sinfo.h] Error 1
>>
>> Hello.
>>
>> I know that you've already reverted the change, but it's possible to replace
>> struct ix86_frame  = cfun->machine->frame;
>>
>> with:
>> struct ix86_frame *frame = >machine->frame;
>>
>> And replace usages with point access operator (->). That would also avoid 
>> copying.
>
> Won't it be equivalent to reference?
>
>> One another question. After you switched to references, isn't the behavior 
>> of function
>> ix86_expand_epilogue as it also contains write to frame struct like:
>>
>>  14799/* Special care must be taken for the normal return case of a 
>> function
>>  14800   using eh_return: the eax and edx registers are marked as saved, 
>> but
>>  14801   not restored along this path.  Adjust the save location to 
>> match.  */
>>  14802if (crtl->calls_eh_return && style != 2)
>>  14803  frame.reg_save_offset -= 2 * UNITS_PER_WORD;
>
> That could be the issue.  I will double check it.
>

Revert the ix86_expand_epilogue change fixes the ada build.  I opened:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83905


-- 
H.J.


[PATCH, rs6000] Bug fixes for the Power 9 stxvl and lxvl instructions.

2018-01-16 Thread Carl Love
GCC maintainers:

The following patch contains fixes for the stxvl and lxvl instructions
and XL_LEN_R builtin that were found while adding additional Power 9
test cases for the various load and store builtins.  The new tests in
builtins-5-p9-runnable.c and builtins-6-p9-runnable.c are included that
exposed the bugs.

The test cases have been run and verified by hand on Power 9 without
error.  The full regressions on Power 8 LE, Power 8 BE and Power 9 are
currently running.

Please let me know if the patch is acceptable provided the regression
testing completes cleanly.  Thanks.

 Carl Love


---


gcc/ChangeLog:

2018-01-16 Carl Love  
* config/rs6000/vsx.md (define_expand xl_len_r,
define_expand stxvl, define_expand *stxvl): Add match_dup
argument.

gcc/testsuite/ChangeLog:

2018-01-16  Carl Love  
* gcc.target/powerpc/builtins-6-p9-runnable.c: Add additional tests.
Add debug print statements.
* gcc.target/powerpc/builtins-5-p9-runnable.c: Add test to do
16 byte vector load followed by a partial vector load.
---
 gcc/config/rs6000/rs6000-builtin.def   |4 +-
 gcc/config/rs6000/vsx.md   |   26 +-
 .../gcc.target/powerpc/builtins-5-p9-runnable.c|  150 +-
 .../gcc.target/powerpc/builtins-6-p9-runnable.c| 1759 
 4 files changed, 1214 insertions(+), 725 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index 0f7da6a4a..b17036c5a 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2197,8 +2197,8 @@ BU_P9V_OVERLOAD_2 (VIEDP, "insert_exp_dp")
 BU_P9V_OVERLOAD_2 (VIESP,  "insert_exp_sp")
 
 /* 2 argument vector functions added in ISA 3.0 (power9).  */
-BU_P9V_64BIT_VSX_2 (LXVL,  "lxvl", CONST,  lxvl)
-BU_P9V_64BIT_VSX_2 (XL_LEN_R,  "xl_len_r", CONST,  xl_len_r)
+BU_P9V_64BIT_VSX_2 (LXVL,  "lxvl", PURE,   lxvl)
+BU_P9V_64BIT_VSX_2 (XL_LEN_R,  "xl_len_r", PURE,   xl_len_r)
 
 BU_P9V_AV_2 (VEXTUBLX, "vextublx", CONST,  vextublx)
 BU_P9V_AV_2 (VEXTUBRX, "vextubrx", CONST,  vextubrx)
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0323e866f..03f8ec2d6 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4624,10 +4624,12 @@ (define_expand "first_mismatch_or_eos_index_"
 ;; Load VSX Vector with Length
 (define_expand "lxvl"
   [(set (match_dup 3)
-(match_operand:DI 2 "register_operand"))
+(ashift:DI (match_operand:DI 2 "register_operand")
+   (const_int 56)))
(set (match_operand:V16QI 0 "vsx_register_operand")
(unspec:V16QI
 [(match_operand:DI 1 "gpc_reg_operand")
+  (mem:V16QI (match_dup 1))
  (match_dup 3)]
 UNSPEC_LXVL))]
   "TARGET_P9_VECTOR && TARGET_64BIT"
@@ -4639,16 +4641,17 @@ (define_insn "*lxvl"
   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
(unspec:V16QI
 [(match_operand:DI 1 "gpc_reg_operand" "b")
- (match_operand:DI 2 "register_operand" "+r")]
+ (mem:V16QI (match_dup 1))
+ (match_operand:DI 2 "register_operand" "r")]
 UNSPEC_LXVL))]
   "TARGET_P9_VECTOR && TARGET_64BIT"
-  "sldi %2,%2, 56\; lxvl %x0,%1,%2"
-  [(set_attr "length" "8")
-   (set_attr "type" "vecload")])
+  "lxvl %x0,%1,%2"
+  [(set_attr "type" "vecload")])
 
 (define_insn "lxvll"
   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
(unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
+   (mem:V16QI (match_dup 1))
   (match_operand:DI 2 "register_operand" "r")]
  UNSPEC_LXVLL))]
   "TARGET_P9_VECTOR"
@@ -4677,6 +4680,7 @@ (define_expand "xl_len_r"
 (define_insn "stxvll"
   [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
(unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
+  (mem:V16QI (match_dup 1))
   (match_operand:DI 2 "register_operand" "r")]
  UNSPEC_STXVLL))]
   "TARGET_P9_VECTOR"
@@ -4686,10 +4690,12 @@ (define_insn "stxvll"
 ;; Store VSX Vector with Length
 (define_expand "stxvl"
   [(set (match_dup 3)
-   (match_operand:DI 2 "register_operand"))
+   (ashift:DI (match_operand:DI 2 "register_operand")
+  (const_int 56)))
(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
(unspec:V16QI
 [(match_operand:V16QI 0 "vsx_register_operand")
+ (mem:V16QI (match_dup 1))
  (match_dup 3)]
 UNSPEC_STXVL))]
   "TARGET_P9_VECTOR && TARGET_64BIT"
@@ -4701,12 +4707,12 @@ (define_insn "*stxvl"
   [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
(unspec:V16QI
 [(match_operand:V16QI 0 

Re: [PATCH 0/5] x86: CVE-2017-5715, aka Spectre

2018-01-16 Thread Uros Bizjak
On Sun, Jan 14, 2018 at 5:43 PM, Uros Bizjak  wrote:
> On Sun, Jan 14, 2018 at 5:35 PM, H.J. Lu  wrote:
>> On Sun, Jan 14, 2018 at 8:19 AM, Uros Bizjak  wrote:
>>> On Fri, Jan 12, 2018 at 9:01 AM, Uros Bizjak  wrote:
 On Thu, Jan 11, 2018 at 2:28 PM, H.J. Lu  wrote:

> Hi Uros,
>
> Can you take a look at my x86 backend changes so that they are ready
> to check in once we have consensus.

 Please finish the talks about the correct approach first. Once the
 consensus is reached, please post the final version of the patches for
 review.

 BTW: I have no detailed insight in these issues, so I'll look mostly
 at the implementation details, probably early next week.
>>>
>>> One general remark is on the usage of -1 as an invalid register
>>
>> This has been rewritten.  The checked in patch no longer does that.
>
> I'm looking directly into current indirect_thunk_name,
> output_indirect_thunk and output_indirect_thunk_function functions in
> i386.c which have plenty of the mentioned checks.

Improved with attached patch.

2018-01-16  Uros Bizjak  

* config/i386/i386.c (indirect_thunk_name): Declare regno
as unsigned int.  Compare regno with INVALID_REGNUM.
(output_indirect_thunk): Ditto.
(output_indirect_thunk_function): Ditto.
(ix86_code_end): Declare regno as unsigned int.  Use INVALID_REGNUM
in the call to output_indirect_thunk_function.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Uros.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ea9c462..7f233d1 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -10765,16 +10765,16 @@ static int indirect_thunks_bnd_used;
 /* Fills in the label name that should be used for the indirect thunk.  */
 
 static void
-indirect_thunk_name (char name[32], int regno, bool need_bnd_p,
-bool ret_p)
+indirect_thunk_name (char name[32], unsigned int regno,
+bool need_bnd_p, bool ret_p)
 {
-  if (regno >= 0 && ret_p)
+  if (regno != INVALID_REGNUM && ret_p)
 gcc_unreachable ();
 
   if (USE_HIDDEN_LINKONCE)
 {
   const char *bnd = need_bnd_p ? "_bnd" : "";
-  if (regno >= 0)
+  if (regno != INVALID_REGNUM)
{
  const char *reg_prefix;
  if (LEGACY_INT_REGNO_P (regno))
@@ -10792,7 +10792,7 @@ indirect_thunk_name (char name[32], int regno, bool 
need_bnd_p,
 }
   else
 {
-  if (regno >= 0)
+  if (regno != INVALID_REGNUM)
{
  if (need_bnd_p)
ASM_GENERATE_INTERNAL_LABEL (name, "LITBR", regno);
@@ -10844,7 +10844,7 @@ indirect_thunk_name (char name[32], int regno, bool 
need_bnd_p,
  */
 
 static void
-output_indirect_thunk (bool need_bnd_p, int regno)
+output_indirect_thunk (bool need_bnd_p, unsigned int regno)
 {
   char indirectlabel1[32];
   char indirectlabel2[32];
@@ -10874,7 +10874,7 @@ output_indirect_thunk (bool need_bnd_p, int regno)
 
   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
 
-  if (regno >= 0)
+  if (regno != INVALID_REGNUM)
 {
   /* MOV.  */
   rtx xops[2];
@@ -10898,12 +10898,12 @@ output_indirect_thunk (bool need_bnd_p, int regno)
 }
 
 /* Output a funtion with a call and return thunk for indirect branch.
-   If BND_P is true, the BND prefix is needed.   If REGNO != -1,  the
-   function address is in REGNO.  Otherwise, the function address is
+   If BND_P is true, the BND prefix is needed.  If REGNO != UNVALID_REGNUM,
+   the function address is in REGNO.  Otherwise, the function address is
on the top of stack.  */
 
 static void
-output_indirect_thunk_function (bool need_bnd_p, int regno)
+output_indirect_thunk_function (bool need_bnd_p, unsigned int regno)
 {
   char name[32];
   tree decl;
@@ -10952,7 +10952,7 @@ output_indirect_thunk_function (bool need_bnd_p, int 
regno)
ASM_OUTPUT_LABEL (asm_out_file, name);
   }
 
-  if (regno < 0)
+  if (regno == INVALID_REGNUM)
 {
   /* Create alias for __x86.return_thunk/__x86.return_thunk_bnd.  */
   char alias[32];
@@ -11026,16 +11026,16 @@ static void
 ix86_code_end (void)
 {
   rtx xops[2];
-  int regno;
+  unsigned int regno;
 
   if (indirect_thunk_needed)
-output_indirect_thunk_function (false, -1);
+output_indirect_thunk_function (false, INVALID_REGNUM);
   if (indirect_thunk_bnd_needed)
-output_indirect_thunk_function (true, -1);
+output_indirect_thunk_function (true, INVALID_REGNUM);
 
   for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
 {
-  int i = regno - FIRST_REX_INT_REG + LAST_INT_REG + 1;
+  unsigned int i = regno - FIRST_REX_INT_REG + LAST_INT_REG + 1;
   if ((indirect_thunks_used & (1 << i)))
output_indirect_thunk_function (false, regno);
 


[PATCH, rs6000] (v2) Support for gimple folding of mergeh, mergel intrinsics

2018-01-16 Thread Will Schmidt
Hi,
  Add support for gimple folding of the mergeh, mergel intrinsics.  Since the
low and high versions are almost identical, a new helper function is added
so that code can be shared.

The changes introduced here affect the existing target testcases
gcc.target/powerpc/builtins-1-be.c and builtins-1-le.c, such that
a number of the scan-assembler tests would fail due to instruction counts
changing.  Since the purpose of that test is to primarily ensure those
intrinsics are accepted by the compiler, I have disabled gimple-folding for
the existing tests that count instructions, and created new variants of those
tests with folding enabled and a higher optimization level, that do not count
instructions.

V2 updates,
  * thanks for the feedback & hints in how to make these improvements :-)
  * Reworked to merge the xxmrg* instructions into the existing define_insn
  stanzas.
  * Reworked to use the tree-vector-builder.h helpers, eliminating some
  constructor and assign statements.
  * a few more cosmetic touch-ups in nearby define_insns.
  * update target stanza for builtins-1-be-folded.c test.

Sniff-tests of the target tests on a single system look OK.  Full regtests are
currently running across assorted power systems.
OK for trunk, pending successful results?

Thanks,
-Will

[gcc]

2018-01-16  Will Schmidt  

* config/rs6000/rs6000.c: (rs6000_gimple_builtin) Add gimple folding
support for merge[hl].
(fold_mergehl_helper): New helper function.
(tree-vector-builder.h): New #include for tree_vector_builder usage.
* config/rs6000/altivec.md (altivec_vmrghw_direct): Add xxmrghw insn.
(altivec_vmrglw_direct): Add xxmrglw insn.

[testsuite]

2018-01-16  Will Schmidt  

* gcc.target/powerpc/fold-vec-mergehl-char.c: New.
* gcc.target/powerpc/fold-vec-mergehl-double.c: New.
* gcc.target/powerpc/fold-vec-mergehl-float.c: New.
* gcc.target/powerpc/fold-vec-mergehl-int.c: New.
* gcc.target/powerpc/fold-vec-mergehl-longlong.c: New.
* gcc.target/powerpc/fold-vec-mergehl-pixel.c: New.
* gcc.target/powerpc/fold-vec-mergehl-short.c: New.
* gcc.target/powerpc/builtins-1-be.c: Disable gimple-folding.
* gcc.target/powerpc/builtins-1-le.c: Disable gimple-folding.
* gcc.target/powerpc/builtins-1-be-folded.c: New.
* gcc.target/powerpc/builtins-1-le-folded.c: New.
* gcc.target/powerpc/builtins-1.fold.h: New.

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 733d920..bb00583 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -995,12 +995,12 @@
 }
   [(set_attr "type" "vecperm")])
 
 (define_insn "altivec_vmrghb_direct"
   [(set (match_operand:V16QI 0 "register_operand" "=v")
-(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
-   (match_operand:V16QI 2 "register_operand" "v")]
+   (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
+  (match_operand:V16QI 2 "register_operand" "v")]
  UNSPEC_VMRGH_DIRECT))]
   "TARGET_ALTIVEC"
   "vmrghb %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
@@ -1102,16 +1102,18 @@
 return "vmrglw %0,%2,%1";
 }
   [(set_attr "type" "vecperm")])
 
 (define_insn "altivec_vmrghw_direct"
-  [(set (match_operand:V4SI 0 "register_operand" "=v")
-(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
-  (match_operand:V4SI 2 "register_operand" "v")]
- UNSPEC_VMRGH_DIRECT))]
+  [(set (match_operand:V4SI 0 "register_operand" "=v,wa")
+   (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v,wa")
+ (match_operand:V4SI 2 "register_operand" "v,wa")]
+UNSPEC_VMRGH_DIRECT))]
   "TARGET_ALTIVEC"
-  "vmrghw %0,%1,%2"
+  "@
+  vmrghw %0,%1,%2
+  xxmrghw %x0,%x1,%x2"
   [(set_attr "type" "vecperm")])
 
 (define_insn "*altivec_vmrghsf"
   [(set (match_operand:V4SF 0 "register_operand" "=v")
 (vec_select:V4SF
@@ -1184,13 +1186,13 @@
 }
   [(set_attr "type" "vecperm")])
 
 (define_insn "altivec_vmrglb_direct"
   [(set (match_operand:V16QI 0 "register_operand" "=v")
-(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
-  (match_operand:V16QI 2 "register_operand" "v")]
-  UNSPEC_VMRGL_DIRECT))]
+   (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
+  (match_operand:V16QI 2 "register_operand" "v")]
+ UNSPEC_VMRGL_DIRECT))]
   "TARGET_ALTIVEC"
   "vmrglb %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
 (define_expand "altivec_vmrglh"
@@ -1242,11 +1244,11 @@
 
 (define_insn "altivec_vmrglh_direct"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
 (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
  

Re: [PATCH] avoid assuming known string length is constant (PR 83896)

2018-01-16 Thread Richard Sandiford
Martin Sebor  writes:
> Recent improvements to the strlen pass introduced the assumption
> that when the length of a string has been recorded by the pass
> the length is necessarily constant.  Bug 83896 shows that this
> assumption is not always true, and that GCC fails with an ICE
> when it doesn't hold.  To avoid the ICE the attached patch
> removes the assumption.
>
> x86_64-linux bootstrap successful, regression test in progress.
>
> Martin
>
> PR tree-optimization/83896 - ice in get_string_len on a call to strlen with 
> non-constant length
>
> gcc/ChangeLog:
>
>   PR tree-optimization/83896
>   * tree-ssa-strlen.c (get_string_len): Avoid assuming length is constant.
>
> gcc/testsuite/ChangeLog:
>
>   PR tree-optimization/83896
>   * gcc.dg/strlenopt-43.c: New test.
>
> Index: gcc/tree-ssa-strlen.c
> ===
> --- gcc/tree-ssa-strlen.c (revision 256752)
> +++ gcc/tree-ssa-strlen.c (working copy)
> @@ -2772,7 +2772,9 @@ handle_pointer_plus (gimple_stmt_iterator *gsi)
>  }
>  }
>  
> -/* Check if RHS is string_cst possibly wrapped by mem_ref.  */
> +/* If RHS, either directly or indirectly, refers to a string of constant
> +   length, return it.  Otherwise return a negative value.  */
> +
>  static int
>  get_string_len (tree rhs)
>  {

I think this should be returning HOST_WIDE_INT given the unconstrained
tree_to_shwi return.  Same type change for rhslen in the caller.

(Not my call, but it might be better to have a more specific function name,
given that the file already had "get_string_length" before this function
was added.)

> @@ -2789,7 +2791,8 @@ get_string_len (tree rhs)
> if (idx > 0)
>   {
> strinfo *si = get_strinfo (idx);
> -   if (si && si->full_string_p)
> +   if (si && si->full_string_p
> +   && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
>   return tree_to_shwi (si->nonzero_chars);

tree_fits_shwi_p?

Thanks,
Richard

>   }
>   }
> Index: gcc/testsuite/gcc.dg/strlenopt-43.c
> ===
> --- gcc/testsuite/gcc.dg/strlenopt-43.c   (nonexistent)
> +++ gcc/testsuite/gcc.dg/strlenopt-43.c   (working copy)
> @@ -0,0 +1,13 @@
> +/* PR tree-optimization/83896 - ice in get_string_len on a call to strlen
> +   with non-constant length
> +   { dg-do compile }
> +   { dg-options "-O2 -Wall" } */
> +
> +extern char a[5];
> +extern char b[];
> +
> +void f (void)
> +{
> +  if (__builtin_strlen (b) != 4)
> +__builtin_memcpy (a, b, sizeof a);
> +}


[PATCH] avoid assuming known string length is constant (PR 83896)

2018-01-16 Thread Martin Sebor

Recent improvements to the strlen pass introduced the assumption
that when the length of a string has been recorded by the pass
the length is necessarily constant.  Bug 83896 shows that this
assumption is not always true, and that GCC fails with an ICE
when it doesn't hold.  To avoid the ICE the attached patch
removes the assumption.

x86_64-linux bootstrap successful, regression test in progress.

Martin
PR tree-optimization/83896 - ice in get_string_len on a call to strlen with non-constant length

gcc/ChangeLog:

	PR tree-optimization/83896
	* tree-ssa-strlen.c (get_string_len): Avoid assuming length is constant.

gcc/testsuite/ChangeLog:

	PR tree-optimization/83896
	* gcc.dg/strlenopt-43.c: New test.

Index: gcc/tree-ssa-strlen.c
===
--- gcc/tree-ssa-strlen.c	(revision 256752)
+++ gcc/tree-ssa-strlen.c	(working copy)
@@ -2772,7 +2772,9 @@ handle_pointer_plus (gimple_stmt_iterator *gsi)
 }
 }
 
-/* Check if RHS is string_cst possibly wrapped by mem_ref.  */
+/* If RHS, either directly or indirectly, refers to a string of constant
+   length, return it.  Otherwise return a negative value.  */
+
 static int
 get_string_len (tree rhs)
 {
@@ -2789,7 +2791,8 @@ get_string_len (tree rhs)
 	  if (idx > 0)
 		{
 		  strinfo *si = get_strinfo (idx);
-		  if (si && si->full_string_p)
+		  if (si && si->full_string_p
+		  && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
 		return tree_to_shwi (si->nonzero_chars);
 		}
 	}
Index: gcc/testsuite/gcc.dg/strlenopt-43.c
===
--- gcc/testsuite/gcc.dg/strlenopt-43.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/strlenopt-43.c	(working copy)
@@ -0,0 +1,13 @@
+/* PR tree-optimization/83896 - ice in get_string_len on a call to strlen
+   with non-constant length
+   { dg-do compile }
+   { dg-options "-O2 -Wall" } */
+
+extern char a[5];
+extern char b[];
+
+void f (void)
+{
+  if (__builtin_strlen (b) != 4)
+__builtin_memcpy (a, b, sizeof a);
+}


[PATCH, rs6000] Implement ABI_AIX indirect call handling for -mno-speculate-indirect-jumps

2018-01-16 Thread Bill Schmidt
Hi,

This patch fills in a gap from the previous -mno-speculate-indirect-jumps
patch.  That patch didn't provide support for indirect calls using ABI_AIX
as the default ABI.  This fills in that missing support and changes the
one related powerpc64le-only test case to be compiled for all subtargets.

After some analysis, it doesn't appear possible for sibcalls to be
generated for ELFv1 or ELFv2 using a bctr, given the need for a local
call to avoid the required TOC restore afterwards.  I haven't been able
to find a way to get a bctr generated even when one could theoretically
prove the bctr must go to a local function.

This has been bootstrapped and tested on powerpc64le-linux-gnu with no
regressions.  Testing is still ongoing for powerpc64-linux-gnu.  Provided
that testing completes with no surprises, is this okay for trunk (and
shortly for backport to 7)?

Thanks!
Bill


[gcc]

2018-01-16  Bill Schmidt  

* config/rs6000/rs6000.md (*call_indirect_aix): Disable for
-mno-speculate-indirect-jumps.
(*call_indirect_aix_nospec): New define_insn.
(*call_value_indirect_aix): Disable for
-mno-speculate-indirect-jumps.
(*call_value_indirect_aix_nospec): New define_insn.

[gcc/testsuite]

2018-01-16  Bill Schmidt  

* gcc.target/powerpc/safe-indirect-jump-1.c: Remove
powerpc64le-only restriction.


Index: gcc/config/rs6000/rs6000.md
===
--- gcc/config/rs6000/rs6000.md (revision 256753)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -10669,11 +10669,22 @@
(use (match_operand:P 2 "memory_operand" ","))
(set (reg:P TOC_REGNUM) (unspec:P [(match_operand:P 3 "const_int_operand" 
"n,n")] UNSPEC_TOCSLOT))
(clobber (reg:P LR_REGNO))]
-  "DEFAULT_ABI == ABI_AIX"
+  "DEFAULT_ABI == ABI_AIX && rs6000_speculate_indirect_jumps"
   " 2,%2\;b%T0l\; 2,%3(1)"
   [(set_attr "type" "jmpreg")
(set_attr "length" "12")])
 
+(define_insn "*call_indirect_aix_nospec"
+  [(call (mem:SI (match_operand:P 0 "register_operand" "c,*l"))
+(match_operand 1 "" "g,g"))
+   (use (match_operand:P 2 "memory_operand" ","))
+   (set (reg:P TOC_REGNUM) (unspec:P [(match_operand:P 3 "const_int_operand" 
"n,n")] UNSPEC_TOCSLOT))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_AIX && !rs6000_speculate_indirect_jumps"
+  "crset eq\; 2,%2\;beq%T0l-\; 2,%3(1)"
+  [(set_attr "type" "jmpreg")
+   (set_attr "length" "16")])
+
 (define_insn "*call_value_indirect_aix"
   [(set (match_operand 0 "" "")
(call (mem:SI (match_operand:P 1 "register_operand" "c,*l"))
@@ -10681,11 +10692,23 @@
(use (match_operand:P 3 "memory_operand" ","))
(set (reg:P TOC_REGNUM) (unspec:P [(match_operand:P 4 "const_int_operand" 
"n,n")] UNSPEC_TOCSLOT))
(clobber (reg:P LR_REGNO))]
-  "DEFAULT_ABI == ABI_AIX"
+  "DEFAULT_ABI == ABI_AIX && rs6000_speculate_indirect_jumps"
   " 2,%3\;b%T1l\; 2,%4(1)"
   [(set_attr "type" "jmpreg")
(set_attr "length" "12")])
 
+(define_insn "*call_value_indirect_aix_nospec"
+  [(set (match_operand 0 "" "")
+   (call (mem:SI (match_operand:P 1 "register_operand" "c,*l"))
+ (match_operand 2 "" "g,g")))
+   (use (match_operand:P 3 "memory_operand" ","))
+   (set (reg:P TOC_REGNUM) (unspec:P [(match_operand:P 4 "const_int_operand" 
"n,n")] UNSPEC_TOCSLOT))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_AIX && !rs6000_speculate_indirect_jumps"
+  "crset eq\; 2,%3\;beq%T1l-\; 2,%4(1)"
+  [(set_attr "type" "jmpreg")
+   (set_attr "length" "16")])
+
 ;; Call to indirect functions with the ELFv2 ABI.
 ;; Operand0 is the addresss of the function to call
 ;; Operand2 is the offset of the stack location holding the current TOC pointer
Index: gcc/testsuite/gcc.target/powerpc/safe-indirect-jump-1.c
===
--- gcc/testsuite/gcc.target/powerpc/safe-indirect-jump-1.c (revision 
256753)
+++ gcc/testsuite/gcc.target/powerpc/safe-indirect-jump-1.c (working copy)
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-do compile } */
 /* { dg-additional-options "-mno-speculate-indirect-jumps" } */
 
 /* Test for deliberate misprediction of indirect calls for ELFv2.  */



Re: Compilation warning in simple-object-xcoff.c

2018-01-16 Thread DJ Delorie

Well, it should all work fine as long as the xcoff64 file is less than 4
Gb.

And it's not the host's bit size that counts; there are usually ways to
get 64-bit file operations on 32-bit hosts.


Re: Compilation warning in simple-object-xcoff.c

2018-01-16 Thread Eli Zaretskii
> From: DJ Delorie 
> Cc: gcc-patches@gcc.gnu.org, gdb-patc...@sourceware.org
> Date: Tue, 16 Jan 2018 13:00:48 -0500
> 
> 
> I think that warning is valid - the host has a 32-bit limit to file
> sizes (off_t) but it's trying to read a 64-bit offset (in that clause).
> It's warning you that you won't be able to handle files as large as the
> field implies.

If 32-bit off_t cannot handle this, then perhaps this file (or that
function) should not be compiled for a 32-bit host?


RE: [PATCH][WWWDOCS][AArch64][ARM] Update GCC 8 release notes

2018-01-16 Thread Tamar Christina
Hi Kyrill,

> 
> Hi Tamar,
> 
> On 16/01/18 16:56, Tamar Christina wrote:
> > Th 01/16/2018 16:36, James Greenhalgh wrote:
> >> On Tue, Jan 16, 2018 at 02:21:30PM +, Tamar Christina wrote:
> >>> Hi Kyrill,
> >>>
>  xgene1 was added a few releases ago, better to use one of the new
> additions from the above list.
>  For example -mtune=cortex-r52.
> >>> Thanks, I have updated the patch. I'll wait for an ok from an AArch64
> maintainer and a Docs maintainer.
> >> OK. But you have the same issue in the AArch64 part.
> > Thanks, I've updated the patch, I'll wait for a bit for a doc reviewer
> > if I don't hear anything I'll assume the patch is OK.
> 
> Gerald has confirmed a few times in the past that port maintainers can
> approve target-specific changes to the web pages, and there are words to
> that effect at:
> https://gcc.gnu.org/svnwrite.html .
> So I'd recommend you commit your patch once you've got approval for
> aarch64 and arm.
> Unless there's some specific part of the patch you'd like the docs maintainer
> to give you feedback on...

Ah, thanks! I'll commit the patch then. 

> Thanks again for working on this.
> Kyrill
> 
> >
> > Thanks,
> > Tamar
> >> James
> >>
> >>> Index: htdocs/gcc-8/changes.html
> >>>
> ==
> =
> >>> RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-8/changes.html,v
> >>> retrieving revision 1.26
> >>> diff -u -r1.26 changes.html
> >>> --- htdocs/gcc-8/changes.html 11 Jan 2018 09:31:53 -  1.26
> >>> +++ htdocs/gcc-8/changes.html 16 Jan 2018 14:12:57 -
> >>> @@ -147,7 +147,51 @@
> >>>
> >>>   AArch64
> >>>   
> >>> -  
> >>> +  
> >>> +The Armv8.4-A architecture is now supported.  It can be used by
> >>> +specifying the -march=armv8.4-a option.
> >>> +  
> >>> +  
> >>> +The Dot Product instructions are now supported as an optional
> extension to the
> >>> +Armv8.2-A architecture and newer and are mandatory on Armv8.4-A.
> The extension can be used by
> >>> +specifying the +dotprod architecture extension.  E.g.
> -march=armv8.2-a+dotprod.
> >>> +  
> >>> +  
> >>> +The Armv8-A +crypto extension has now been split
> into two extensions for finer grained control:
> >>> +
> >>> +   +aes which contains the Armv8-A AES
> crytographic instructions.
> >>> +   +sha2 which contains the Armv8-A SHA2 and
> SHA1 cryptographic instructions.
> >>> +
> >>> +Using +crypto will now enable these two extensions.
> >>> +  
> >>> +  
> >>> +New Armv8.4-A FP16 Floating Point Multiplication Variant instructions
> have been added.  These instructions are
> >>> +mandatory in Armv8.4-A but available as an optional extension to
> Armv8.2-A and Armv8.3-A.  The new extension
> >>> +can be used by specifying the +fp16fml architectural
> extension on Armv8.2-A and Armv8.3-A. On Armv8.4-A
> >>> +the instructions can be enabled by specifying +fp16.
> >>> +  
> >>> +  
> >>> +New cryptographic instructions have been added as optional
> extensions to Armv8.2-A and newer.  These instructions can
> >>> +be enabled with:
> >>> +
> >>> +  +sha3 New SHA3 and SHA2 instructions from
> Armv8.4-A.  This implies +sha2.
> >>> +  +sm4 New SM3 and SM4 instructions from
> Armv8.4-A.
> >>> +
> >>> + 
> >>> +  
> >>> +   Support has been added for the following processors
> >>> +   (GCC identifiers in parentheses):
> >>> +   
> >>> + Arm Cortex-A75 (cortex-a75).
> >>> +  Arm Cortex-A55 (cortex-a55).
> >>> +  Arm Cortex-A55/Cortex-A75 DynamIQ big.LITTLE (cortex-
> a75.cortex-a55).
> >>> +   
> >>> +   The GCC identifiers can be used
> >>> +   as arguments to the -mcpu or -
> mtune options,
> >>> +   for example: -mcpu=cortex-a75 or
> >>> +   -mtune=thunderx2t99p1 or as arguments to the
> equivalent target
> >>> +   attributes and pragmas.
> >>> +  
> >>>   
> >>>
> >>>   ARM
> >>> @@ -169,14 +213,58 @@
> >>>   removed in a future release.
> >>> 
> >>> 
> >>> -The default link behavior for ARMv6 and ARMv7-R targets has been
> >>> +The default link behavior for Armv6 and Armv7-R targets has
> >>> + been
> >>>   changed to produce BE8 format when generating big-endian images.
> A new
> >>>   flag -mbe32 can be used to force the linker to
> produce
> >>>   legacy BE32 format images.  There is no change of behavior for
> >>> -ARMv6-m and other ARMv7 or later targets: these already defaulted
> >>> +Armv6-M and other Armv7 or later targets: these already
> >>> + defaulted
> >>>   to BE8 format.  This change brings GCC into alignment with other
> >>>   compilers for the ARM architecture.
> >>> 
> >>> +  
> >>> +The Armv8-R architecture is now supported.  It can be used by
> specifying the
> >>> +-march=armv8-r option.
> >>> +  
> >>> +  
> >>> +The Armv8.3-A architecture is now supported.  It can be used by
> >>> +specifying the -march=armv8.3-a option.
> >>> +  

Re: Compilation warning in simple-object-xcoff.c

2018-01-16 Thread DJ Delorie

I think that warning is valid - the host has a 32-bit limit to file
sizes (off_t) but it's trying to read a 64-bit offset (in that clause).
It's warning you that you won't be able to handle files as large as the
field implies.

Can we hide the warning?  Probably.  Should we?  Debatable, as long as
we want 64-bit xcoff support in 32-bit filesystems.

Otherwise, we'd need to detect off_t overflow somehow, down the slippery
slope of reporting the error to the caller...


Re: VIEW_CONVERT_EXPR slots for strict-align targets (PR 83884)

2018-01-16 Thread Richard Biener
On January 16, 2018 5:14:50 PM GMT+01:00, Richard Sandiford 
 wrote:
>This PR is about a case in which we VIEW_CONVERT a variable-sized
>unaligned record:
>
>sizes-gimplified type_7 BLK
>size 
>unit-size 
>align:8 ...>
>
>to an aligned 32-bit integer.  The strict-alignment handling of
>this case creates an aligned temporary slot, moves the operand
>into the slot in the operand's original mode, then accesses the
>slot in the more-aligned result mode.
>
>Previously the size of the temporary slot was calculated using:
>
>  HOST_WIDE_INT temp_size
>= MAX (int_size_in_bytes (inner_type),
>   (HOST_WIDE_INT) GET_MODE_SIZE (mode));
>
>int_size_in_bytes would return -1 for the variable-length type,
>so we'd use the size of the result mode for the slot.  r256152 replaced
>int_size_in_bytes with tree_to_poly_uint64, which triggered an ICE.
>
>I'd assumed that variable-length types couldn't occur here, since it
>seems strange to view-convert a variable-length type to a fixed-length
>one.  It also seemed strange that (with the old code) we'd ignore the
>size of the operand if it was a variable V but honour it if it was a
>constant C, even though it's presumably possible for V to equal that
>C at runtime.
>
>If op0 has BLKmode we do a block copy of GET_MODE_SIZE (mode) bytes
>and then convert the slot to "mode":
>
> poly_uint64 mode_size = GET_MODE_SIZE (mode);
> ...
> if (GET_MODE (op0) == BLKmode)
>   {
> rtx size_rtx = gen_int_mode (mode_size, Pmode);
> emit_block_move (new_with_op0_mode, op0, size_rtx,
>  (modifier == EXPAND_STACK_PARM
>   ? BLOCK_OP_CALL_PARM
>   : BLOCK_OP_NORMAL));
>   }
> else
>   ...
>
> op0 = new_rtx;
>   }
>   }
>
> op0 = adjust_address (op0, mode, 0);
>
>so I think in that case just the size of "mode" is enough, even if op0
>is a fixed-size type.  For non-BLKmode op0 we first move in op0's mode
>and then convert the slot to "mode":
>
>   emit_move_insn (new_with_op0_mode, op0);
>
> op0 = new_rtx;
>   }
>   }
>
> op0 = adjust_address (op0, mode, 0);
>
>so I think we want the maximum of the two mode sizes in that case
>(assuming they can be different sizes).
>
>But is this VIEW_CONVERT_EXPR really valid?  

IMHO it is on the border of be being invalid (verify_gimple doesn't diagnose 
it). Using a BIT_FIELD_REF would be much better here. 

Richard. 

Maybe this is just
>papering over a deeper issue.  There again, the MAX in the old
>code was presumably there because the sizes can be different...
>
>Richard
>
>
>2018-01-16  Richard Sandiford  
>
>gcc/
>   PR middle-end/83884
>   * expr.c (expand_expr_real_1): Use the size of GET_MODE (op0)
>   rather than the size of inner_type to determine the stack slot size
>   when handling VIEW_CONVERT_EXPRs on strict-alignment targets.
>
>Index: gcc/expr.c
>===
>--- gcc/expr.c 2018-01-14 08:42:44.497155977 +
>+++ gcc/expr.c 2018-01-16 16:07:22.737883774 +
>@@ -11145,11 +11145,11 @@ expand_expr_real_1 (tree exp, rtx target
>   }
> else if (STRICT_ALIGNMENT)
>   {
>-tree inner_type = TREE_TYPE (treeop0);
> poly_uint64 mode_size = GET_MODE_SIZE (mode);
>-poly_uint64 op0_size
>-  = tree_to_poly_uint64 (TYPE_SIZE_UNIT (inner_type));
>-poly_int64 temp_size = upper_bound (op0_size, mode_size);
>+poly_uint64 temp_size = mode_size;
>+if (GET_MODE (op0) != BLKmode)
>+  temp_size = upper_bound (temp_size,
>+   GET_MODE_SIZE (GET_MODE (op0)));
> rtx new_rtx
>   = assign_stack_temp_for_type (mode, temp_size, type);
> rtx new_with_op0_mode



Re: [PATCH, rs6000] Fix ICE caused by recent patch: Generate lvx and stvx without swaps for aligned vector loads and stores

2018-01-16 Thread Segher Boessenkool
Hi Kelvin,

On Tue, Jan 16, 2018 at 11:15:12AM -0600, Kelvin Nilsen wrote:
> 
> A patch committed on 2018-01-10 is causing an ICE with existing test
> program $GCC_SRC/gcc/testsuite/gcc.target/powerpc/pr83399.c, when
> compiled with the -m32 option.  At the time of the commit, it was
> thought that this was a problem with the recent resolution of PR83399.
> However, further investigation revealed a problem with the patch that
> was just committed.  The generated code did not distinguish between 32-
> and 64-bit targets.
> 
> This patch corrects that problem.
> 
> This has been bootstrapped and tested without regressions on
> powerpc64le-unknown-linux (P8) and on powerpc64-unknown-linux (P7) with
> both -m32 and -m64 target options.  Is this ok for trunk?
> 
> 
> gcc/ChangeLog:
> 
> 2018-01-16  Kelvin Nilsen  
> 

PR target/83399
?  Or is there another PR?

>   * config/rs6000/rs6000-p8swap.c (rs6000_gen_stvx): Generate
>   different rtl trees depending on TARGET_64BIT.
>   (rs6000_gen_lvx): Likewise.
> 
> Index: gcc/config/rs6000/rs6000-p8swap.c
> ===
> --- gcc/config/rs6000/rs6000-p8swap.c (revision 256710)
> +++ gcc/config/rs6000/rs6000-p8swap.c (working copy)
> @@ -1554,23 +1554,31 @@ rs6000_gen_stvx (enum machine_mode mode, rtx dest_
>op1 = XEXP (memory_address, 0);
>op2 = XEXP (memory_address, 1);
>if (mode == V16QImode)
> - stvx = gen_altivec_stvx_v16qi_2op (src_exp, op1, op2);
> + stvx = TARGET_64BIT ? gen_altivec_stvx_v16qi_2op (src_exp, op1, op2)
> +   : gen_altivec_stvx_v16qi_2op_si (src_exp, op1, op2);

Please indent this like

stvx = TARGET_64BIT
   ? gen_altivec_stvx_v16qi_2op (src_exp, op1, op2)
   : gen_altivec_stvx_v16qi_2op_si (src_exp, op1, op2);

>if (mode == V16QImode)
> - stvx = gen_altivec_stvx_v16qi_1op (src_exp, memory_address);
> + stvx = TARGET_64BIT ?
> +   gen_altivec_stvx_v16qi_1op (src_exp, memory_address)
> +   : gen_altivec_stvx_v16qi_1op_si (src_exp, memory_address);

You should never have ? at the end of line; and ? and : indent with the
controlling expression.  So:

stvx = TARGET_64BIT
   ? gen_altivec_stvx_v16qi_1op (src_exp, memory_address)
   : gen_altivec_stvx_v16qi_1op_si (src_exp, memory_address);

Similar everywhere.  Okay with that changed.  Thanks!


Segher


Compilation warning in simple-object-xcoff.c

2018-01-16 Thread Eli Zaretskii
Compiling GDB 8.0.91 with mingw.org's MinGW GCC 6.0.3 produces this
warning in libiberty:

 gcc -c -DHAVE_CONFIG_H -O2 -gdwarf-4 -g3 -D__USE_MINGW_ACCESS  -I. 
-I./../include   -W -Wall -Wwrite-strings -Wc++-compat -Wstrict-prototypes 
-pedantic  -D_GNU_SOURCE ./simple-object-xcoff.c -o simple-object-xcoff.o
 ./simple-object-xcoff.c: In function 'simple_object_xcoff_find_sections':
 ./simple-object-xcoff.c:605:25: warning: left shift count >= width of type 
[-Wshift-count-overflow]
  x_scnlen = x_scnlen << 32
 ^~

And indeed x_scnlen is declared as a 32-bit data type off_t.

I'm willing to test patches if needed.

Thanks.


Re: [PATCH][WWWDOCS][AArch64][ARM] Update GCC 8 release notes

2018-01-16 Thread Kyrill Tkachov

Hi Tamar,

On 16/01/18 16:56, Tamar Christina wrote:

Th 01/16/2018 16:36, James Greenhalgh wrote:

On Tue, Jan 16, 2018 at 02:21:30PM +, Tamar Christina wrote:

Hi Kyrill,


xgene1 was added a few releases ago, better to use one of the new additions 
from the above list.
For example -mtune=cortex-r52.

Thanks, I have updated the patch. I'll wait for an ok from an AArch64 
maintainer and a Docs maintainer.

OK. But you have the same issue in the AArch64 part.

Thanks, I've updated the patch, I'll wait for a bit for a doc reviewer if I 
don't hear anything I'll assume
the patch is OK.


Gerald has confirmed a few times in the past that port maintainers can approve
target-specific changes to the web pages, and there are words to that effect at:
https://gcc.gnu.org/svnwrite.html .
So I'd recommend you commit your patch once you've got approval for aarch64 and 
arm.
Unless there's some specific part of the patch you'd like the docs maintainer 
to give you feedback on...

Thanks again for working on this.
Kyrill



Thanks,
Tamar

James


Index: htdocs/gcc-8/changes.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-8/changes.html,v
retrieving revision 1.26
diff -u -r1.26 changes.html
--- htdocs/gcc-8/changes.html   11 Jan 2018 09:31:53 -  1.26
+++ htdocs/gcc-8/changes.html   16 Jan 2018 14:12:57 -
@@ -147,7 +147,51 @@
  
  AArch64

  
-  
+  
+The Armv8.4-A architecture is now supported.  It can be used by
+specifying the -march=armv8.4-a option.
+  
+  
+The Dot Product instructions are now supported as an optional extension to 
the
+Armv8.2-A architecture and newer and are mandatory on Armv8.4-A.  The 
extension can be used by
+specifying the +dotprod architecture extension.  E.g. 
-march=armv8.2-a+dotprod.
+  
+  
+The Armv8-A +crypto extension has now been split into two 
extensions for finer grained control:
+
+   +aes which contains the Armv8-A AES crytographic 
instructions.
+   +sha2 which contains the Armv8-A SHA2 and SHA1 cryptographic 
instructions.
+
+Using +crypto will now enable these two extensions.
+  
+  
+New Armv8.4-A FP16 Floating Point Multiplication Variant instructions have 
been added.  These instructions are
+mandatory in Armv8.4-A but available as an optional extension to Armv8.2-A 
and Armv8.3-A.  The new extension
+can be used by specifying the +fp16fml architectural 
extension on Armv8.2-A and Armv8.3-A. On Armv8.4-A
+the instructions can be enabled by specifying +fp16.
+  
+  
+New cryptographic instructions have been added as optional extensions to 
Armv8.2-A and newer.  These instructions can
+be enabled with:
+
+  +sha3 New SHA3 and SHA2 instructions from Armv8.4-A.  This implies 
+sha2.
+  +sm4 New SM3 and SM4 instructions from Armv8.4-A.
+
+ 
+  
+   Support has been added for the following processors
+   (GCC identifiers in parentheses):
+   
+ Arm Cortex-A75 (cortex-a75).
+Arm Cortex-A55 (cortex-a55).
+Arm Cortex-A55/Cortex-A75 DynamIQ big.LITTLE 
(cortex-a75.cortex-a55).
+   
+   The GCC identifiers can be used
+   as arguments to the -mcpu or -mtune options,
+   for example: -mcpu=cortex-a75 or
+   -mtune=thunderx2t99p1 or as arguments to the equivalent 
target
+   attributes and pragmas.
+  
  
  
  ARM

@@ -169,14 +213,58 @@
  removed in a future release.


-The default link behavior for ARMv6 and ARMv7-R targets has been
+The default link behavior for Armv6 and Armv7-R targets has been
  changed to produce BE8 format when generating big-endian images.  A new
  flag -mbe32 can be used to force the linker to produce
  legacy BE32 format images.  There is no change of behavior for
-ARMv6-m and other ARMv7 or later targets: these already defaulted
+Armv6-M and other Armv7 or later targets: these already defaulted
  to BE8 format.  This change brings GCC into alignment with other
  compilers for the ARM architecture.

+  
+The Armv8-R architecture is now supported.  It can be used by specifying 
the
+-march=armv8-r option.
+  
+  
+The Armv8.3-A architecture is now supported.  It can be used by
+specifying the -march=armv8.3-a option.
+  
+  
+The Armv8.4-A architecture is now supported.  It can be used by
+specifying the -march=armv8.4-a option.
+  
+  
+ The Dot Product instructions are now supported as an optional extension 
to the
+ Armv8.2-A architecture and newer and are mandatory on Armv8.4-A.  The 
extension can be used by
+ specifying the +dotprod architecture extension.  E.g. 
-march=armv8.2-a+dotprod.
+  
+
+  
+Support for setting extensions and architectures using the GCC target 
pragma and attribute has been added.
+It can be used by specifying #pragma GCC target ("arch=..."), #pragma 
GCC target ("+extension"),
+

[PATCH, rs6000] Fix ICE caused by recent patch: Generate lvx and stvx without swaps for aligned vector loads and stores

2018-01-16 Thread Kelvin Nilsen

A patch committed on 2018-01-10 is causing an ICE with existing test
program $GCC_SRC/gcc/testsuite/gcc.target/powerpc/pr83399.c, when
compiled with the -m32 option.  At the time of the commit, it was
thought that this was a problem with the recent resolution of PR83399.
However, further investigation revealed a problem with the patch that
was just committed.  The generated code did not distinguish between 32-
and 64-bit targets.

This patch corrects that problem.

This has been bootstrapped and tested without regressions on
powerpc64le-unknown-linux (P8) and on powerpc64-unknown-linux (P7) with
both -m32 and -m64 target options.  Is this ok for trunk?


gcc/ChangeLog:

2018-01-16  Kelvin Nilsen  

* config/rs6000/rs6000-p8swap.c (rs6000_gen_stvx): Generate
different rtl trees depending on TARGET_64BIT.
(rs6000_gen_lvx): Likewise.

Index: gcc/config/rs6000/rs6000-p8swap.c
===
--- gcc/config/rs6000/rs6000-p8swap.c   (revision 256710)
+++ gcc/config/rs6000/rs6000-p8swap.c   (working copy)
@@ -1554,23 +1554,31 @@ rs6000_gen_stvx (enum machine_mode mode, rtx dest_
   op1 = XEXP (memory_address, 0);
   op2 = XEXP (memory_address, 1);
   if (mode == V16QImode)
-   stvx = gen_altivec_stvx_v16qi_2op (src_exp, op1, op2);
+   stvx = TARGET_64BIT ? gen_altivec_stvx_v16qi_2op (src_exp, op1, op2)
+ : gen_altivec_stvx_v16qi_2op_si (src_exp, op1, op2);
   else if (mode == V8HImode)
-   stvx = gen_altivec_stvx_v8hi_2op (src_exp, op1, op2);
+   stvx = TARGET_64BIT ? gen_altivec_stvx_v8hi_2op (src_exp, op1, op2)
+ : gen_altivec_stvx_v8hi_2op_si (src_exp, op1, op2);
 #ifdef HAVE_V8HFmode
   else if (mode == V8HFmode)
-   stvx = gen_altivec_stvx_v8hf_2op (src_exp, op1, op2);
+   stvx = TARGET_64BIT ? gen_altivec_stvx_v8hf_2op (src_exp, op1, op2)
+ : gen_altivec_stvx_v8hf_2op_si (src_exp, op1, op2);
 #endif
   else if (mode == V4SImode)
-   stvx = gen_altivec_stvx_v4si_2op (src_exp, op1, op2);
+   stvx = TARGET_64BIT ? gen_altivec_stvx_v4si_2op (src_exp, op1, op2)
+ : gen_altivec_stvx_v4si_2op_si (src_exp, op1, op2);
   else if (mode == V4SFmode)
-   stvx = gen_altivec_stvx_v4sf_2op (src_exp, op1, op2);
+   stvx = TARGET_64BIT ? gen_altivec_stvx_v4sf_2op (src_exp, op1, op2)
+ : gen_altivec_stvx_v4sf_2op_si (src_exp, op1, op2);
   else if (mode == V2DImode)
-   stvx = gen_altivec_stvx_v2di_2op (src_exp, op1, op2);
+   stvx = TARGET_64BIT ? gen_altivec_stvx_v2di_2op (src_exp, op1, op2)
+ : gen_altivec_stvx_v2di_2op_si (src_exp, op1, op2);
   else if (mode == V2DFmode)
-   stvx = gen_altivec_stvx_v2df_2op (src_exp, op1, op2);
+   stvx = TARGET_64BIT ? gen_altivec_stvx_v2df_2op (src_exp, op1, op2)
+ : gen_altivec_stvx_v2df_2op_si (src_exp, op1, op2);
   else if (mode == V1TImode)
-   stvx = gen_altivec_stvx_v1ti_2op (src_exp, op1, op2);
+   stvx = TARGET_64BIT ? gen_altivec_stvx_v1ti_2op (src_exp, op1, op2)
+ : gen_altivec_stvx_v1ti_2op_si (src_exp, op1, op2);
   else
/* KFmode, TFmode, other modes not expected in this context.  */
gcc_unreachable ();
@@ -1578,23 +1586,39 @@ rs6000_gen_stvx (enum machine_mode mode, rtx dest_
   else /* REG_P (memory_address) */
 {
   if (mode == V16QImode)
-   stvx = gen_altivec_stvx_v16qi_1op (src_exp, memory_address);
+   stvx = TARGET_64BIT ?
+ gen_altivec_stvx_v16qi_1op (src_exp, memory_address)
+ : gen_altivec_stvx_v16qi_1op_si (src_exp, memory_address);
   else if (mode == V8HImode)
-   stvx = gen_altivec_stvx_v8hi_1op (src_exp, memory_address);
+   stvx = TARGET_64BIT ?
+ gen_altivec_stvx_v8hi_1op (src_exp, memory_address)
+ : gen_altivec_stvx_v8hi_1op_si (src_exp, memory_address);
 #ifdef HAVE_V8HFmode
   else if (mode == V8HFmode)
-   stvx = gen_altivec_stvx_v8hf_1op (src_exp, memory_address);
+   stvx = TARGET_64BIT ?
+ gen_altivec_stvx_v8hf_1op (src_exp, memory_address)
+ : gen_altivec_stvx_v8hf_1op_si (src_exp, memory_address);
 #endif
   else if (mode == V4SImode)
-   stvx = gen_altivec_stvx_v4si_1op (src_exp, memory_address);
+   stvx =TARGET_64BIT ?
+ gen_altivec_stvx_v4si_1op (src_exp, memory_address)
+ : gen_altivec_stvx_v4si_1op_si (src_exp, memory_address);
   else if (mode == V4SFmode)
-   stvx = gen_altivec_stvx_v4sf_1op (src_exp, memory_address);
+   stvx = TARGET_64BIT ?
+ gen_altivec_stvx_v4sf_1op (src_exp, memory_address)
+ : gen_altivec_stvx_v4sf_1op_si (src_exp, memory_address);
   else if (mode == V2DImode)
-   stvx = gen_altivec_stvx_v2di_1op (src_exp, memory_address);
+   stvx = TARGET_64BIT ?
+ gen_altivec_stvx_v2di_1op (src_exp, memory_address)
+ : gen_altivec_stvx_v2di_1op_si 

Re: [PATCH] PR82964: Fix 128-bit immediate ICEs

2018-01-16 Thread James Greenhalgh
On Mon, Jan 15, 2018 at 11:34:19AM +, Wilco Dijkstra wrote:
> This fixes PR82964 which reports ICEs for some CONST_WIDE_INT immediates.
> It turns out decimal floating point CONST_DOUBLE get changed into
> CONST_WIDE_INT without checking the constraint on the operand, which 
> results in failures.  Avoid this by only allowing SF/DF/TF mode floating
> point constants in aarch64_legitimate_constant_p.  A similar issue can
> occur with 128-bit immediates which may be emitted even when disallowed
> in aarch64_legitimate_constant_p, and the constraints in movti_aarch64
> don't match.  Fix this with a new constraint and allowing valid immediates
> in aarch64_legitimate_constant_p.
> 
> Rather than allowing all 128-bit immediates and expanding in up to 8
> MOV/MOVK instructions, limit them to 4 instructions and use a literal
> load for other cases.  Improve the pr79041-2.c test to use a literal and
> skip it for -fpic.
> 
> This fixes all reported failures. OK for commit?

Most of this makes sense, but I don't understand this relaxation in
aarch64_legitimate_constant_p

> -  /* Do not allow wide int constants - this requires support in movti.  */
> +  /* Only allow simple 128-bit immediates.  */
>if (CONST_WIDE_INT_P (x))
> -return false;
> +return aarch64_mov128_immediate (x);

I can see why this could be correct, but it is unclear why it is neccessary
to fix the bug. What goes wrong if we leave this as "return false".

I think the patch looks OK otherwise, but I'd appreciate an answer on that
point before you commit.

Thanks,
James



Re: GCC 8.0.0 Status Report (2018-01-15), Trunk in Regression and Documentation fixes only mode

2018-01-16 Thread Joseph Myers
On Tue, 16 Jan 2018, Segher Boessenkool wrote:

> On Mon, Jan 15, 2018 at 09:21:07AM +0100, Richard Biener wrote:
> > We're still in pretty bad shape regression-wise.  Please also take
> > the opportunity to check the state of your favorite host/target
> > combination to make sure building and testing works appropriately.
> 
> I tested building Linux (the kernel) for all supported architectures.
> Everything builds (with my usual tweaks, link with libgcc etc.);
> except x86_64 and sh have more problems in the kernel, and mips has
> an ICE.  I'll open a PR for that one.

And all glibc architectures compile (and compile the testsuite) OK except 
for the sh4eb ICE reported in bug 83760 (and the longstanding coldfire 
issue, bug 68467).

-- 
Joseph S. Myers
jos...@codesourcery.com


GCC 6: i386: Move struct ix86_frame to machine_function

2018-01-16 Thread H.J. Lu
This is needed for GCC 6 backport of Spectre patches:

https://gcc.gnu.org/ml/gcc-patches/2018-01/msg01465.html
https://gcc.gnu.org/ml/gcc-patches/2018-01/msg01466.html
https://gcc.gnu.org/ml/gcc-patches/2018-01/msg01464.html

-- 
H.J.


[PATCH 1/2] GCC 6: ii386: Move struct ix86_frame to machine_function

2018-01-16 Thread H.J. Lu
From: hjl 

Make ix86_frame available to i386 code generation.  This is needed to
backport the patch set of -mindirect-branch= to mitigate variant #2 of
the speculative execution vulnerabilities on x86 processors identified
by CVE-2017-5715, aka Spectre.

Backport from mainline
2017-06-01  Bernd Edlinger  

* config/i386/i386.c (ix86_frame): Moved to ...
* config/i386/i386.h (ix86_frame): Here.
(machine_function): Add frame.
* config/i386/i386.c (ix86_compute_frame_layout): Repace the
frame argument with >machine->frame.
(ix86_can_use_return_insn_p): Don't pass  to
ix86_compute_frame_layout.  Copy frame from cfun->machine->frame.
(ix86_can_eliminate): Likewise.
(ix86_expand_prologue): Likewise.
(ix86_expand_epilogue): Likewise.
(ix86_expand_split_stack_prologue): Likewise.
---
 gcc/config/i386/i386.c | 68 ++
 gcc/config/i386/i386.h | 53 ++-
 2 files changed, 65 insertions(+), 56 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 8b5faac5129..a1ff32b648b 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2434,53 +2434,6 @@ struct GTY(()) stack_local_entry {
   struct stack_local_entry *next;
 };
 
-/* Structure describing stack frame layout.
-   Stack grows downward:
-
-   [arguments]
-   <- ARG_POINTER
-   saved pc
-
-   saved static chain  if ix86_static_chain_on_stack
-
-   saved frame pointer if frame_pointer_needed
-   <- HARD_FRAME_POINTER
-   [saved regs]
-   <- regs_save_offset
-   [padding0]
-
-   [saved SSE regs]
-   <- sse_regs_save_offset
-   [padding1]  |
-  |<- FRAME_POINTER
-   [va_arg registers]  |
-  |
-   [frame]|
-  |
-   [padding2] | = to_allocate
-   <- STACK_POINTER
-  */
-struct ix86_frame
-{
-  int nsseregs;
-  int nregs;
-  int va_arg_size;
-  int red_zone_size;
-  int outgoing_arguments_size;
-
-  /* The offsets relative to ARG_POINTER.  */
-  HOST_WIDE_INT frame_pointer_offset;
-  HOST_WIDE_INT hard_frame_pointer_offset;
-  HOST_WIDE_INT stack_pointer_offset;
-  HOST_WIDE_INT hfp_save_offset;
-  HOST_WIDE_INT reg_save_offset;
-  HOST_WIDE_INT sse_reg_save_offset;
-
-  /* When save_regs_using_mov is set, emit prologue using
- move instead of push instructions.  */
-  bool save_regs_using_mov;
-};
-
 /* Which cpu are we scheduling for.  */
 enum attr_cpu ix86_schedule;
 
@@ -2572,7 +2525,7 @@ static unsigned int ix86_function_arg_boundary 
(machine_mode,
const_tree);
 static rtx ix86_static_chain (const_tree, bool);
 static int ix86_function_regparm (const_tree, const_tree);
-static void ix86_compute_frame_layout (struct ix86_frame *);
+static void ix86_compute_frame_layout (void);
 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
 rtx, rtx, int);
 static void ix86_add_new_builtins (HOST_WIDE_INT);
@@ -10944,7 +10897,8 @@ ix86_can_use_return_insn_p (void)
   if (crtl->args.pops_args && crtl->args.size >= 32768)
 return 0;
 
-  ix86_compute_frame_layout ();
+  ix86_compute_frame_layout ();
+  frame = cfun->machine->frame;
   return (frame.stack_pointer_offset == UNITS_PER_WORD
  && (frame.nregs + frame.nsseregs) == 0);
 }
@@ -11355,8 +11309,8 @@ ix86_can_eliminate (const int from, const int to)
 HOST_WIDE_INT
 ix86_initial_elimination_offset (int from, int to)
 {
-  struct ix86_frame frame;
-  ix86_compute_frame_layout ();
+  ix86_compute_frame_layout ();
+  struct ix86_frame frame = cfun->machine->frame;
 
   if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
 return frame.hard_frame_pointer_offset;
@@ -11395,8 +11349,9 @@ ix86_builtin_setjmp_frame_value (void)
 /* Fill structure ix86_frame about frame of currently computed function.  */
 
 static void
-ix86_compute_frame_layout (struct ix86_frame *frame)
+ix86_compute_frame_layout (void)
 {
+  struct ix86_frame *frame = >machine->frame;
   unsigned HOST_WIDE_INT stack_alignment_needed;
   HOST_WIDE_INT offset;
   unsigned HOST_WIDE_INT preferred_alignment;
@@ -12702,7 +12657,8 @@ ix86_expand_prologue (void)
   m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
   m->fs.sp_valid = true;
 
-  ix86_compute_frame_layout ();
+  ix86_compute_frame_layout ();
+  frame = m->frame;
 
   if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
 {
@@ -13379,7 +13335,8 @@ ix86_expand_epilogue (int style)
   bool using_drap;
 
   ix86_finalize_stack_realign_flags ();
- 

[PATCH 0/2] GCC 6: i386: Move struct ix86_frame to machine_function

2018-01-16 Thread H.J. Lu
This patch set makes ix86_frame available to i386 code generation.  They
are needed to backport the patch set of -mindirect-branch= to mitigate
variant #2 of the speculative execution vulnerabilities on x86 processors
identified by CVE-2017-5715, aka Spectre.

Tested on Linux/i686 and Linux/x86-64.

hjl (2):
  i386: Move struct ix86_frame to machine_function
  i386: Use reference of struct ix86_frame to avoid copy

 gcc/config/i386/i386.c | 70 ++
 gcc/config/i386/i386.h | 53 +-
 2 files changed, 65 insertions(+), 58 deletions(-)

-- 
2.14.3



Re: [PATCH][WWWDOCS][AArch64][ARM] Update GCC 8 release notes

2018-01-16 Thread Tamar Christina
I seem to have forgotten the patch :)

The 01/16/2018 16:56, Tamar Christina wrote:
> Th 01/16/2018 16:36, James Greenhalgh wrote:
> > On Tue, Jan 16, 2018 at 02:21:30PM +, Tamar Christina wrote:
> > > Hi Kyrill,
> > > 
> > > > 
> > > > xgene1 was added a few releases ago, better to use one of the new 
> > > > additions from the above list.
> > > > For example -mtune=cortex-r52.
> > > 
> > > Thanks, I have updated the patch. I'll wait for an ok from an AArch64 
> > > maintainer and a Docs maintainer.
> > 
> > OK. But you have the same issue in the AArch64 part.
> 
> Thanks, I've updated the patch, I'll wait for a bit for a doc reviewer if I 
> don't hear anything I'll assume
> the patch is OK.
> 
> Thanks,
> Tamar
> > 
> > James
> > 
> > > Index: htdocs/gcc-8/changes.html
> > > ===
> > > RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-8/changes.html,v
> > > retrieving revision 1.26
> > > diff -u -r1.26 changes.html
> > > --- htdocs/gcc-8/changes.html 11 Jan 2018 09:31:53 -  1.26
> > > +++ htdocs/gcc-8/changes.html 16 Jan 2018 14:12:57 -
> > > @@ -147,7 +147,51 @@
> > >  
> > >  AArch64
> > >  
> > > -  
> > > +  
> > > +The Armv8.4-A architecture is now supported.  It can be used by
> > > +specifying the -march=armv8.4-a option.
> > > +  
> > > +  
> > > +The Dot Product instructions are now supported as an optional 
> > > extension to the
> > > +Armv8.2-A architecture and newer and are mandatory on Armv8.4-A.  
> > > The extension can be used by
> > > +specifying the +dotprod architecture extension.  E.g. 
> > > -march=armv8.2-a+dotprod.
> > > +  
> > > +  
> > > +The Armv8-A +crypto extension has now been split into 
> > > two extensions for finer grained control:
> > > +
> > > +   +aes which contains the Armv8-A AES crytographic 
> > > instructions.
> > > +   +sha2 which contains the Armv8-A SHA2 and SHA1 
> > > cryptographic instructions.
> > > +
> > > +Using +crypto will now enable these two extensions.
> > > +  
> > > +  
> > > +New Armv8.4-A FP16 Floating Point Multiplication Variant 
> > > instructions have been added.  These instructions are
> > > +mandatory in Armv8.4-A but available as an optional extension to 
> > > Armv8.2-A and Armv8.3-A.  The new extension
> > > +can be used by specifying the +fp16fml architectural 
> > > extension on Armv8.2-A and Armv8.3-A. On Armv8.4-A
> > > +the instructions can be enabled by specifying +fp16.
> > > +  
> > > +  
> > > +New cryptographic instructions have been added as optional 
> > > extensions to Armv8.2-A and newer.  These instructions can
> > > +be enabled with:
> > > +
> > > +  +sha3 New SHA3 and SHA2 instructions from 
> > > Armv8.4-A.  This implies +sha2.
> > > +  +sm4 New SM3 and SM4 instructions from Armv8.4-A.
> > > +
> > > + 
> > > +  
> > > +   Support has been added for the following processors
> > > +   (GCC identifiers in parentheses):
> > > +   
> > > + Arm Cortex-A75 (cortex-a75).
> > > +  Arm Cortex-A55 (cortex-a55).
> > > +  Arm Cortex-A55/Cortex-A75 DynamIQ big.LITTLE 
> > > (cortex-a75.cortex-a55).
> > > +   
> > > +   The GCC identifiers can be used
> > > +   as arguments to the -mcpu or -mtune 
> > > options,
> > > +   for example: -mcpu=cortex-a75 or
> > > +   -mtune=thunderx2t99p1 or as arguments to the 
> > > equivalent target
> > > +   attributes and pragmas.
> > > +  
> > >  
> > >  
> > >  ARM
> > > @@ -169,14 +213,58 @@
> > >  removed in a future release.
> > >
> > >
> > > -The default link behavior for ARMv6 and ARMv7-R targets has been
> > > +The default link behavior for Armv6 and Armv7-R targets has been
> > >  changed to produce BE8 format when generating big-endian images.  A 
> > > new
> > >  flag -mbe32 can be used to force the linker to produce
> > >  legacy BE32 format images.  There is no change of behavior for
> > > -ARMv6-m and other ARMv7 or later targets: these already defaulted
> > > +Armv6-M and other Armv7 or later targets: these already defaulted
> > >  to BE8 format.  This change brings GCC into alignment with other
> > >  compilers for the ARM architecture.
> > >
> > > +  
> > > +The Armv8-R architecture is now supported.  It can be used by 
> > > specifying the
> > > +-march=armv8-r option.
> > > +  
> > > +  
> > > +The Armv8.3-A architecture is now supported.  It can be used by
> > > +specifying the -march=armv8.3-a option.
> > > +  
> > > +  
> > > +The Armv8.4-A architecture is now supported.  It can be used by
> > > +specifying the -march=armv8.4-a option.
> > > +  
> > > +  
> > > + The Dot Product instructions are now supported as an optional 
> > > extension to the
> > > + Armv8.2-A architecture and newer and are mandatory on Armv8.4-A.  
> > > The extension can be used by
> > > + specifying the +dotprod 

[PATCH 2/2] GCC 6: ii386: Use reference of struct ix86_frame to avoid copy

2018-01-16 Thread H.J. Lu
From: hjl 

When there is no need to make a copy of ix86_frame, we can use reference
of struct ix86_frame to avoid copy.

Backport from mainline
2017-11-06  H.J. Lu  

* config/i386/i386.c (ix86_can_use_return_insn_p): Use reference
of struct ix86_frame.
(ix86_initial_elimination_offset): Likewise.
(ix86_expand_split_stack_prologue): Likewise.
---
 gcc/config/i386/i386.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index a1ff32b648b..13ebf107e90 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -10887,7 +10887,6 @@ symbolic_reference_mentioned_p (rtx op)
 bool
 ix86_can_use_return_insn_p (void)
 {
-  struct ix86_frame frame;
 
   if (! reload_completed || frame_pointer_needed)
 return 0;
@@ -10898,7 +10897,7 @@ ix86_can_use_return_insn_p (void)
 return 0;
 
   ix86_compute_frame_layout ();
-  frame = cfun->machine->frame;
+  struct ix86_frame  = cfun->machine->frame;
   return (frame.stack_pointer_offset == UNITS_PER_WORD
  && (frame.nregs + frame.nsseregs) == 0);
 }
@@ -11310,7 +11309,7 @@ HOST_WIDE_INT
 ix86_initial_elimination_offset (int from, int to)
 {
   ix86_compute_frame_layout ();
-  struct ix86_frame frame = cfun->machine->frame;
+  struct ix86_frame  = cfun->machine->frame;
 
   if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
 return frame.hard_frame_pointer_offset;
@@ -13821,7 +13820,6 @@ static GTY(()) rtx split_stack_fn_large;
 void
 ix86_expand_split_stack_prologue (void)
 {
-  struct ix86_frame frame;
   HOST_WIDE_INT allocate;
   unsigned HOST_WIDE_INT args_size;
   rtx_code_label *label;
@@ -13834,7 +13832,7 @@ ix86_expand_split_stack_prologue (void)
 
   ix86_finalize_stack_realign_flags ();
   ix86_compute_frame_layout ();
-  frame = cfun->machine->frame;
+  struct ix86_frame  = cfun->machine->frame;
   allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
 
   /* This is the label we will branch to if we have enough stack
-- 
2.14.3



Re: [PATCH][WWWDOCS][AArch64][ARM] Update GCC 8 release notes

2018-01-16 Thread Tamar Christina
Th 01/16/2018 16:36, James Greenhalgh wrote:
> On Tue, Jan 16, 2018 at 02:21:30PM +, Tamar Christina wrote:
> > Hi Kyrill,
> > 
> > > 
> > > xgene1 was added a few releases ago, better to use one of the new 
> > > additions from the above list.
> > > For example -mtune=cortex-r52.
> > 
> > Thanks, I have updated the patch. I'll wait for an ok from an AArch64 
> > maintainer and a Docs maintainer.
> 
> OK. But you have the same issue in the AArch64 part.

Thanks, I've updated the patch, I'll wait for a bit for a doc reviewer if I 
don't hear anything I'll assume
the patch is OK.

Thanks,
Tamar
> 
> James
> 
> > Index: htdocs/gcc-8/changes.html
> > ===
> > RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-8/changes.html,v
> > retrieving revision 1.26
> > diff -u -r1.26 changes.html
> > --- htdocs/gcc-8/changes.html   11 Jan 2018 09:31:53 -  1.26
> > +++ htdocs/gcc-8/changes.html   16 Jan 2018 14:12:57 -
> > @@ -147,7 +147,51 @@
> >  
> >  AArch64
> >  
> > -  
> > +  
> > +The Armv8.4-A architecture is now supported.  It can be used by
> > +specifying the -march=armv8.4-a option.
> > +  
> > +  
> > +The Dot Product instructions are now supported as an optional 
> > extension to the
> > +Armv8.2-A architecture and newer and are mandatory on Armv8.4-A.  The 
> > extension can be used by
> > +specifying the +dotprod architecture extension.  E.g. 
> > -march=armv8.2-a+dotprod.
> > +  
> > +  
> > +The Armv8-A +crypto extension has now been split into two 
> > extensions for finer grained control:
> > +
> > +   +aes which contains the Armv8-A AES crytographic 
> > instructions.
> > +   +sha2 which contains the Armv8-A SHA2 and SHA1 
> > cryptographic instructions.
> > +
> > +Using +crypto will now enable these two extensions.
> > +  
> > +  
> > +New Armv8.4-A FP16 Floating Point Multiplication Variant instructions 
> > have been added.  These instructions are
> > +mandatory in Armv8.4-A but available as an optional extension to 
> > Armv8.2-A and Armv8.3-A.  The new extension
> > +can be used by specifying the +fp16fml architectural 
> > extension on Armv8.2-A and Armv8.3-A. On Armv8.4-A
> > +the instructions can be enabled by specifying +fp16.
> > +  
> > +  
> > +New cryptographic instructions have been added as optional extensions 
> > to Armv8.2-A and newer.  These instructions can
> > +be enabled with:
> > +
> > +  +sha3 New SHA3 and SHA2 instructions from 
> > Armv8.4-A.  This implies +sha2.
> > +  +sm4 New SM3 and SM4 instructions from Armv8.4-A.
> > +
> > + 
> > +  
> > +   Support has been added for the following processors
> > +   (GCC identifiers in parentheses):
> > +   
> > + Arm Cortex-A75 (cortex-a75).
> > +Arm Cortex-A55 (cortex-a55).
> > +Arm Cortex-A55/Cortex-A75 DynamIQ big.LITTLE 
> > (cortex-a75.cortex-a55).
> > +   
> > +   The GCC identifiers can be used
> > +   as arguments to the -mcpu or -mtune 
> > options,
> > +   for example: -mcpu=cortex-a75 or
> > +   -mtune=thunderx2t99p1 or as arguments to the 
> > equivalent target
> > +   attributes and pragmas.
> > +  
> >  
> >  
> >  ARM
> > @@ -169,14 +213,58 @@
> >  removed in a future release.
> >
> >
> > -The default link behavior for ARMv6 and ARMv7-R targets has been
> > +The default link behavior for Armv6 and Armv7-R targets has been
> >  changed to produce BE8 format when generating big-endian images.  A new
> >  flag -mbe32 can be used to force the linker to produce
> >  legacy BE32 format images.  There is no change of behavior for
> > -ARMv6-m and other ARMv7 or later targets: these already defaulted
> > +Armv6-M and other Armv7 or later targets: these already defaulted
> >  to BE8 format.  This change brings GCC into alignment with other
> >  compilers for the ARM architecture.
> >
> > +  
> > +The Armv8-R architecture is now supported.  It can be used by 
> > specifying the
> > +-march=armv8-r option.
> > +  
> > +  
> > +The Armv8.3-A architecture is now supported.  It can be used by
> > +specifying the -march=armv8.3-a option.
> > +  
> > +  
> > +The Armv8.4-A architecture is now supported.  It can be used by
> > +specifying the -march=armv8.4-a option.
> > +  
> > +  
> > + The Dot Product instructions are now supported as an optional 
> > extension to the
> > + Armv8.2-A architecture and newer and are mandatory on Armv8.4-A.  The 
> > extension can be used by
> > + specifying the +dotprod architecture extension.  E.g. 
> > -march=armv8.2-a+dotprod.
> > +  
> > +
> > +  
> > +Support for setting extensions and architectures using the GCC target 
> > pragma and attribute has been added.
> > +It can be used by specifying #pragma GCC target 
> > ("arch=..."), #pragma GCC target ("+extension"),
> > +

Re: [PATCH][WWWDOCS][AArch64][ARM] Update GCC 8 release notes

2018-01-16 Thread James Greenhalgh
On Tue, Jan 16, 2018 at 02:21:30PM +, Tamar Christina wrote:
> Hi Kyrill,
> 
> > 
> > xgene1 was added a few releases ago, better to use one of the new additions 
> > from the above list.
> > For example -mtune=cortex-r52.
> 
> Thanks, I have updated the patch. I'll wait for an ok from an AArch64 
> maintainer and a Docs maintainer.

OK. But you have the same issue in the AArch64 part.

James

> Index: htdocs/gcc-8/changes.html
> ===
> RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-8/changes.html,v
> retrieving revision 1.26
> diff -u -r1.26 changes.html
> --- htdocs/gcc-8/changes.html 11 Jan 2018 09:31:53 -  1.26
> +++ htdocs/gcc-8/changes.html 16 Jan 2018 14:12:57 -
> @@ -147,7 +147,51 @@
>  
>  AArch64
>  
> -  
> +  
> +The Armv8.4-A architecture is now supported.  It can be used by
> +specifying the -march=armv8.4-a option.
> +  
> +  
> +The Dot Product instructions are now supported as an optional extension 
> to the
> +Armv8.2-A architecture and newer and are mandatory on Armv8.4-A.  The 
> extension can be used by
> +specifying the +dotprod architecture extension.  E.g. 
> -march=armv8.2-a+dotprod.
> +  
> +  
> +The Armv8-A +crypto extension has now been split into two 
> extensions for finer grained control:
> +
> +   +aes which contains the Armv8-A AES crytographic 
> instructions.
> +   +sha2 which contains the Armv8-A SHA2 and SHA1 
> cryptographic instructions.
> +
> +Using +crypto will now enable these two extensions.
> +  
> +  
> +New Armv8.4-A FP16 Floating Point Multiplication Variant instructions 
> have been added.  These instructions are
> +mandatory in Armv8.4-A but available as an optional extension to 
> Armv8.2-A and Armv8.3-A.  The new extension
> +can be used by specifying the +fp16fml architectural 
> extension on Armv8.2-A and Armv8.3-A. On Armv8.4-A
> +the instructions can be enabled by specifying +fp16.
> +  
> +  
> +New cryptographic instructions have been added as optional extensions to 
> Armv8.2-A and newer.  These instructions can
> +be enabled with:
> +
> +  +sha3 New SHA3 and SHA2 instructions from Armv8.4-A.  
> This implies +sha2.
> +  +sm4 New SM3 and SM4 instructions from Armv8.4-A.
> +
> + 
> +  
> +   Support has been added for the following processors
> +   (GCC identifiers in parentheses):
> +   
> + Arm Cortex-A75 (cortex-a75).
> +  Arm Cortex-A55 (cortex-a55).
> +  Arm Cortex-A55/Cortex-A75 DynamIQ big.LITTLE 
> (cortex-a75.cortex-a55).
> +   
> +   The GCC identifiers can be used
> +   as arguments to the -mcpu or -mtune options,
> +   for example: -mcpu=cortex-a75 or
> +   -mtune=thunderx2t99p1 or as arguments to the equivalent 
> target
> +   attributes and pragmas.
> +  
>  
>  
>  ARM
> @@ -169,14 +213,58 @@
>  removed in a future release.
>
>
> -The default link behavior for ARMv6 and ARMv7-R targets has been
> +The default link behavior for Armv6 and Armv7-R targets has been
>  changed to produce BE8 format when generating big-endian images.  A new
>  flag -mbe32 can be used to force the linker to produce
>  legacy BE32 format images.  There is no change of behavior for
> -ARMv6-m and other ARMv7 or later targets: these already defaulted
> +Armv6-M and other Armv7 or later targets: these already defaulted
>  to BE8 format.  This change brings GCC into alignment with other
>  compilers for the ARM architecture.
>
> +  
> +The Armv8-R architecture is now supported.  It can be used by specifying 
> the
> +-march=armv8-r option.
> +  
> +  
> +The Armv8.3-A architecture is now supported.  It can be used by
> +specifying the -march=armv8.3-a option.
> +  
> +  
> +The Armv8.4-A architecture is now supported.  It can be used by
> +specifying the -march=armv8.4-a option.
> +  
> +  
> + The Dot Product instructions are now supported as an optional extension 
> to the
> + Armv8.2-A architecture and newer and are mandatory on Armv8.4-A.  The 
> extension can be used by
> + specifying the +dotprod architecture extension.  E.g. 
> -march=armv8.2-a+dotprod.
> +  
> +
> +  
> +Support for setting extensions and architectures using the GCC target 
> pragma and attribute has been added.
> +It can be used by specifying #pragma GCC target 
> ("arch=..."), #pragma GCC target ("+extension"),
> +__attribute__((target("arch=..."))) or 
> __attribute__((target("+extension"))).
> +  
> +  
> +New Armv8.4-A FP16 Floating Point Multiplication Variant instructions 
> have been added.  These instructions are
> +mandatory in Armv8.4-A but available as an optional extension to 
> Armv8.2-A and Armv8.3-A.  The new extension
> +can be used by specifying the +fp16fml architectural 
> extension on Armv8.2-A and Armv8.3-A. On Armv8.4-A
> +the instructions can be 

[PATCH v2][AArch64] Remove remaining uses of * in patterns

2018-01-16 Thread Wilco Dijkstra
v2: Rebased after the big SVE commits

Remove the remaining uses of '*' from aarch64.md.
Using '*' in alternatives is typically incorrect as it tells the register
allocator to ignore those alternatives.  Also add a missing '?' so we
prefer a floating point register for same-size int<->fp conversions.

Passes regress & bootstrap, OK for commit?

ChangeLog:
2018-01-16  Wilco Dijkstra  

* config/aarch64/aarch64.md (mov): Remove '*' in alternatives.
(movsi_aarch64): Likewise.
(load_pairsi): Likewise.
(load_pairdi): Likewise.
(store_pairsi): Likewise.
(store_pairdi): Likewise.
(load_pairsf): Likewise.
(load_pairdf): Likewise.
(store_pairsf): Likewise.
(store_pairdf): Likewise.
(zero_extend): Likewise.
(fcvt_target): Add '?' to prefer w over r.

gcc/testsuite/
* gcc.target/aarch64/vfp-1.c: Update test.

--
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 
e52e8350a203b288208c1acb12c8b881d5e8039a..088ed8cb0aad0be08a7e19064708ea14499230f2
 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -907,8 +907,8 @@ (define_expand "mov"
 )
 
 (define_insn "*mov_aarch64"
-  [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r,   *w,r ,r,*w, m, 
m, r,*w,*w")
-   (match_operand:SHORT 1 "aarch64_mov_operand"  " r,M,D,Usv,m, 
m,rZ,*w,*w, r,*w"))]
+  [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r, w,r ,r,w, 
m,m,r,w,w")
+   (match_operand:SHORT 1 "aarch64_mov_operand"  " 
r,M,D,Usv,m,m,rZ,w,w,r,w"))]
   "(register_operand (operands[0], mode)
 || aarch64_reg_or_zero (operands[1], mode))"
 {
@@ -974,7 +974,7 @@ (define_expand "mov"
 
 (define_insn_and_split "*movsi_aarch64"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m,  
r,  r, w,r,w, w")
-   (match_operand:SI 1 "aarch64_mov_operand"  " 
r,r,k,M,n,Usv,m,m,rZ,*w,Usa,Ush,rZ,w,w,Ds"))]
+   (match_operand:SI 1 "aarch64_mov_operand"  " 
r,r,k,M,n,Usv,m,m,rZ,w,Usa,Ush,rZ,w,w,Ds"))]
   "(register_operand (operands[0], SImode)
 || aarch64_reg_or_zero (operands[1], SImode))"
   "@
@@ -1281,9 +1281,9 @@ (define_expand "movmemdi"
 ;; Operands 1 and 3 are tied together by the final condition; so we allow
 ;; fairly lax checking on the second memory operation.
 (define_insn "load_pairsi"
-  [(set (match_operand:SI 0 "register_operand" "=r,*w")
+  [(set (match_operand:SI 0 "register_operand" "=r,w")
(match_operand:SI 1 "aarch64_mem_pair_operand" "Ump,Ump"))
-   (set (match_operand:SI 2 "register_operand" "=r,*w")
+   (set (match_operand:SI 2 "register_operand" "=r,w")
(match_operand:SI 3 "memory_operand" "m,m"))]
   "rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
@@ -1297,9 +1297,9 @@ (define_insn "load_pairsi"
 )
 
 (define_insn "load_pairdi"
-  [(set (match_operand:DI 0 "register_operand" "=r,*w")
+  [(set (match_operand:DI 0 "register_operand" "=r,w")
(match_operand:DI 1 "aarch64_mem_pair_operand" "Ump,Ump"))
-   (set (match_operand:DI 2 "register_operand" "=r,*w")
+   (set (match_operand:DI 2 "register_operand" "=r,w")
(match_operand:DI 3 "memory_operand" "m,m"))]
   "rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
@@ -1317,9 +1317,9 @@ (define_insn "load_pairdi"
 ;; fairly lax checking on the second memory operation.
 (define_insn "store_pairsi"
   [(set (match_operand:SI 0 "aarch64_mem_pair_operand" "=Ump,Ump")
-   (match_operand:SI 1 "aarch64_reg_or_zero" "rZ,*w"))
+   (match_operand:SI 1 "aarch64_reg_or_zero" "rZ,w"))
(set (match_operand:SI 2 "memory_operand" "=m,m")
-   (match_operand:SI 3 "aarch64_reg_or_zero" "rZ,*w"))]
+   (match_operand:SI 3 "aarch64_reg_or_zero" "rZ,w"))]
   "rtx_equal_p (XEXP (operands[2], 0),
plus_constant (Pmode,
   XEXP (operands[0], 0),
@@ -1333,9 +1333,9 @@ (define_insn "store_pairsi"
 
 (define_insn "store_pairdi"
   [(set (match_operand:DI 0 "aarch64_mem_pair_operand" "=Ump,Ump")
-   (match_operand:DI 1 "aarch64_reg_or_zero" "rZ,*w"))
+   (match_operand:DI 1 "aarch64_reg_or_zero" "rZ,w"))
(set (match_operand:DI 2 "memory_operand" "=m,m")
-   (match_operand:DI 3 "aarch64_reg_or_zero" "rZ,*w"))]
+   (match_operand:DI 3 "aarch64_reg_or_zero" "rZ,w"))]
   "rtx_equal_p (XEXP (operands[2], 0),
plus_constant (Pmode,
   XEXP (operands[0], 0),
@@ -1350,9 +1350,9 @@ (define_insn "store_pairdi"
 ;; Operands 1 and 3 are tied together by the final condition; so we allow
 ;; fairly lax checking on the second memory operation.
 (define_insn "load_pairsf"
-  [(set (match_operand:SF 0 "register_operand" "=w,*r")
+  [(set (match_operand:SF 0 "register_operand" "=w,r")
(match_operand:SF 1 "aarch64_mem_pair_operand" "Ump,Ump"))
-   (set (match_operand:SF 2 "register_operand" "=w,*r")
+   (set 

VIEW_CONVERT_EXPR slots for strict-align targets (PR 83884)

2018-01-16 Thread Richard Sandiford
This PR is about a case in which we VIEW_CONVERT a variable-sized
unaligned record:

 
unit-size 
align:8 ...>

to an aligned 32-bit integer.  The strict-alignment handling of
this case creates an aligned temporary slot, moves the operand
into the slot in the operand's original mode, then accesses the
slot in the more-aligned result mode.

Previously the size of the temporary slot was calculated using:

  HOST_WIDE_INT temp_size
= MAX (int_size_in_bytes (inner_type),
   (HOST_WIDE_INT) GET_MODE_SIZE (mode));

int_size_in_bytes would return -1 for the variable-length type,
so we'd use the size of the result mode for the slot.  r256152 replaced
int_size_in_bytes with tree_to_poly_uint64, which triggered an ICE.

I'd assumed that variable-length types couldn't occur here, since it
seems strange to view-convert a variable-length type to a fixed-length
one.  It also seemed strange that (with the old code) we'd ignore the
size of the operand if it was a variable V but honour it if it was a
constant C, even though it's presumably possible for V to equal that
C at runtime.

If op0 has BLKmode we do a block copy of GET_MODE_SIZE (mode) bytes
and then convert the slot to "mode":

  poly_uint64 mode_size = GET_MODE_SIZE (mode);
  ...
  if (GET_MODE (op0) == BLKmode)
{
  rtx size_rtx = gen_int_mode (mode_size, Pmode);
  emit_block_move (new_with_op0_mode, op0, size_rtx,
   (modifier == EXPAND_STACK_PARM
? BLOCK_OP_CALL_PARM
: BLOCK_OP_NORMAL));
}
  else
...

  op0 = new_rtx;
}
}

  op0 = adjust_address (op0, mode, 0);

so I think in that case just the size of "mode" is enough, even if op0
is a fixed-size type.  For non-BLKmode op0 we first move in op0's mode
and then convert the slot to "mode":

emit_move_insn (new_with_op0_mode, op0);

  op0 = new_rtx;
}
}

  op0 = adjust_address (op0, mode, 0);

so I think we want the maximum of the two mode sizes in that case
(assuming they can be different sizes).

But is this VIEW_CONVERT_EXPR really valid?  Maybe this is just
papering over a deeper issue.  There again, the MAX in the old
code was presumably there because the sizes can be different...

Richard


2018-01-16  Richard Sandiford  

gcc/
PR middle-end/83884
* expr.c (expand_expr_real_1): Use the size of GET_MODE (op0)
rather than the size of inner_type to determine the stack slot size
when handling VIEW_CONVERT_EXPRs on strict-alignment targets.

Index: gcc/expr.c
===
--- gcc/expr.c  2018-01-14 08:42:44.497155977 +
+++ gcc/expr.c  2018-01-16 16:07:22.737883774 +
@@ -11145,11 +11145,11 @@ expand_expr_real_1 (tree exp, rtx target
}
  else if (STRICT_ALIGNMENT)
{
- tree inner_type = TREE_TYPE (treeop0);
  poly_uint64 mode_size = GET_MODE_SIZE (mode);
- poly_uint64 op0_size
-   = tree_to_poly_uint64 (TYPE_SIZE_UNIT (inner_type));
- poly_int64 temp_size = upper_bound (op0_size, mode_size);
+ poly_uint64 temp_size = mode_size;
+ if (GET_MODE (op0) != BLKmode)
+   temp_size = upper_bound (temp_size,
+GET_MODE_SIZE (GET_MODE (op0)));
  rtx new_rtx
= assign_stack_temp_for_type (mode, temp_size, type);
  rtx new_with_op0_mode


Re: [PATCH] i386: More use reference of struct ix86_frame to avoid copy

2018-01-16 Thread H.J. Lu
On Tue, Jan 16, 2018 at 7:03 AM, Martin Liška  wrote:
> On 01/16/2018 01:35 PM, H.J. Lu wrote:
>> On Tue, Jan 16, 2018 at 3:40 AM, H.J. Lu  wrote:
>>> This patch has been used with my Spectre backport for GCC 7 for many
>>> weeks and has been checked into GCC 7 branch.  Should I revert it on
>>> GCC 7 branch or check it into trunk?
>>
>> Ada build failed with this on trunk:
>>
>> raised STORAGE_ERROR : stack overflow or erroneous memory access
>> make[5]: *** 
>> [/export/gnu/import/git/sources/gcc/gcc/ada/Make-generated.in:45:
>> ada/sinfo.h] Error 1
>
> Hello.
>
> I know that you've already reverted the change, but it's possible to replace
> struct ix86_frame  = cfun->machine->frame;
>
> with:
> struct ix86_frame *frame = >machine->frame;
>
> And replace usages with point access operator (->). That would also avoid 
> copying.

Won't it be equivalent to reference?

> One another question. After you switched to references, isn't the behavior of 
> function
> ix86_expand_epilogue as it also contains write to frame struct like:
>
>  14799/* Special care must be taken for the normal return case of a 
> function
>  14800   using eh_return: the eax and edx registers are marked as saved, 
> but
>  14801   not restored along this path.  Adjust the save location to 
> match.  */
>  14802if (crtl->calls_eh_return && style != 2)
>  14803  frame.reg_save_offset -= 2 * UNITS_PER_WORD;

That could be the issue.  I will double check it.

Thanks.

H.J.
> Thanks for clarification.
> Martin
>
>>
>> Let me revert it on gcc-7-branch.
>>
>> H.J.
>>> H.J.
>>> ---
>>> When there is no need to make a copy of ix86_frame, we can use reference
>>> of struct ix86_frame to avoid copy.
>>>
>>> * config/i386/i386.c (ix86_expand_prologue): Use reference of
>>> struct ix86_frame.
>>> (ix86_expand_epilogue): Likewise.
>>> ---
>>>  gcc/config/i386/i386.c | 6 ++
>>>  1 file changed, 2 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>>> index bfb31db8752..9eba3ffd5d6 100644
>>> --- a/gcc/config/i386/i386.c
>>> +++ b/gcc/config/i386/i386.c
>>> @@ -13385,7 +13385,6 @@ ix86_expand_prologue (void)
>>>  {
>>>struct machine_function *m = cfun->machine;
>>>rtx insn, t;
>>> -  struct ix86_frame frame;
>>>HOST_WIDE_INT allocate;
>>>bool int_registers_saved;
>>>bool sse_registers_saved;
>>> @@ -13413,7 +13412,7 @@ ix86_expand_prologue (void)
>>>m->fs.sp_valid = true;
>>>m->fs.sp_realigned = false;
>>>
>>> -  frame = m->frame;
>>> +  struct ix86_frame  = cfun->machine->frame;
>>>
>>>if (!TARGET_64BIT && ix86_function_ms_hook_prologue 
>>> (current_function_decl))
>>>  {
>>> @@ -14291,7 +14290,6 @@ ix86_expand_epilogue (int style)
>>>  {
>>>struct machine_function *m = cfun->machine;
>>>struct machine_frame_state frame_state_save = m->fs;
>>> -  struct ix86_frame frame;
>>>bool restore_regs_via_mov;
>>>bool using_drap;
>>>bool restore_stub_is_tail = false;
>>> @@ -14304,7 +14302,7 @@ ix86_expand_epilogue (int style)
>>>  }
>>>
>>>ix86_finalize_stack_frame_flags ();
>>> -  frame = m->frame;
>>> +  struct ix86_frame  = cfun->machine->frame;
>>>
>>>m->fs.sp_realigned = stack_realign_fp;
>>>m->fs.sp_valid = stack_realign_fp
>>> --
>>> 2.14.3
>>>
>>
>>
>>
>



-- 
H.J.


Re: [PATCH v2] Change default to -fno-math-errno

2018-01-16 Thread Wilco Dijkstra
Joseph Myers wrote:

> Another question to consider: what about configurations (mostly 
> soft-float) where floating-point exceptions are not supported?  (glibc 
> wrongly defines math_errhandling to include MATH_ERREXCEPT there, but the 
> only option actually permitted by C99 in that case would be to define it 
> to MATH_ERRNO.)
> 
> If we wish to distinguish that case, the 
> targetm.float_exceptions_rounding_supported_p hook is the one to use (in 
> the absence of anyone identifying a target that supports exceptions but 
> not rounding modes) - possibly together with flag_iso.

I looked into this and the issue is that calling targetm functions is not 
possible until
the backend is fully initialized (whether the pattern exists or not is not 
sufficient,
the pattern condition must be valid to evaluate as well), and that happens after
option parsing.

In general soft-float is used on tiny targets which don't use errno at all (as 
in
remove all the code dealing with it, including the errno variable itself!), so I
believe it's best to let people explicitly enable -fmath-errno in the rare case
when they really want to.

>> lroundf in GLIBC doesn't set errno, so all the inefficiency was for nothing:
>
> (glibc bug 6797.)

I see, that explains it! A decade old bug - it shows the popularity of errno...

Wilco

Re: [PATCH] rtlanal: dead_or_set_regno_p should handle CLOBBER (PR83424)

2018-01-16 Thread Jeff Law
On 01/16/2018 06:41 AM, Segher Boessenkool wrote:
> On Mon, Dec 18, 2017 at 12:16:13PM -0700, Jeff Law wrote:
>> On 12/16/2017 02:03 PM, Segher Boessenkool wrote:
>>> In PR83424 combine's move_deaths puts a REG_DEAD not in the wrong place
>>> because dead_or_set_regno_p does not account for CLOBBER insns.  This
>>> fixes it.
>>>
>>> Bootstrapped and tested on powerpc64-linux {-m32,-m64} and on x86_64-linux.
>>> Is this okay for trunk?
>>>
>>>
>>> Segher
>>>
>>>
>>> 2017-12-16  Segher Boessenkool  
>>>
>>> PR rtl-optimization/83424
>>> * rtlanal.c (dead_or_set_regno_p): Handle CLOBBER just like SET.
>>>
>>> gcc/testsuite/
>>> PR rtl-optimization/83424
>>> * gcc.dg/pr83424.c: New testsuite.
>> OK.
> 
> Is this okay for backports to 7 and 6, too?
Yes.
jeff


[PATCH] Fix PR83867

2018-01-16 Thread Richard Biener

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2018-01-16  Richard Biener  

PR tree-optimization/83867
* tree-vect-stmts.c (vect_transform_stmt): Precompute
nested_in_vect_loop_p since the scalar stmt may get invalidated.

* gcc.dg/vect/pr83867.c: New testcase.

Index: gcc/tree-vect-stmts.c
===
--- gcc/tree-vect-stmts.c   (revision 256722)
+++ gcc/tree-vect-stmts.c   (working copy)
@@ -9426,6 +9426,11 @@ vect_transform_stmt (gimple *stmt, gimpl
   gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
   gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
 
+  bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
+  && nested_in_vect_loop_p
+   (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
+stmt));
+
   switch (STMT_VINFO_TYPE (stmt_info))
 {
 case type_demotion_vec_info_type:
@@ -9525,9 +9530,7 @@ vect_transform_stmt (gimple *stmt, gimpl
   /* Handle inner-loop stmts whose DEF is used in the loop-nest that
  is being vectorized, but outside the immediately enclosing loop.  */
   if (vec_stmt
-  && STMT_VINFO_LOOP_VINFO (stmt_info)
-  && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
-STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
+  && nested_p
   && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
   && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
   || STMT_VINFO_RELEVANT (stmt_info) ==
Index: gcc/testsuite/gcc.dg/vect/pr83867.c
===
--- gcc/testsuite/gcc.dg/vect/pr83867.c (nonexistent)
+++ gcc/testsuite/gcc.dg/vect/pr83867.c (working copy)
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O -ftrapv" } */
+
+int
+k5 (int u5, int aw)
+{
+  int v6;
+
+  while (u5 < 1)
+{
+  while (v6 < 4)
+   ++v6;
+
+  v6 = 0;
+  aw += u5 > 0;
+  ++u5;
+}
+
+  return aw;
+}


Re: Two fixes for live-out SLP inductions (PR 83857)

2018-01-16 Thread Richard Biener
On Tue, Jan 16, 2018 at 2:29 PM, Richard Sandiford
 wrote:
> vect_analyze_loop_operations was calling vectorizable_live_operation
> for all live-out phis, which led to a bogus ncopies calculation in
> the pure SLP case.  I think v_a_l_o should only be passing phis
> that are vectorised using normal loop vectorisation, since
> vect_slp_analyze_node_operations handles the SLP side (and knows
> the correct slp_index and slp_node arguments to pass in, via
> vect_analyze_stmt).
>
> With that fixed we hit an older bug that vectorizable_live_operation
> didn't handle live-out SLP inductions.  Fixed by using gimple_phi_result
> rather than gimple_get_lhs for phis.
>
> Tested on aarch64-linux-gnu.  OK to install?

Ok.

Richard.

> Richard
>
>
> 2018-01-16  Richard Sandiford  
>
> gcc/
> PR tree-optimization/83857
> * tree-vect-loop.c (vect_analyze_loop_operations): Don't call
> vectorizable_live_operation for pure SLP statements.
> (vectorizable_live_operation): Handle PHIs.
>
> gcc/testsuite/
> PR tree-optimization/83857
> * gcc.dg/vect/pr83857.c: New test.
>
> Index: gcc/tree-vect-loop.c
> ===
> --- gcc/tree-vect-loop.c2018-01-13 18:02:00.950360196 +
> +++ gcc/tree-vect-loop.c2018-01-16 13:24:33.022528019 +
> @@ -1851,7 +1851,10 @@ vect_analyze_loop_operations (loop_vec_i
> ok = vectorizable_reduction (phi, NULL, NULL, NULL, NULL);
>  }
>
> - if (ok && STMT_VINFO_LIVE_P (stmt_info))
> + /* SLP PHIs are tested by vect_slp_analyze_node_operations.  */
> + if (ok
> + && STMT_VINFO_LIVE_P (stmt_info)
> + && !PURE_SLP_STMT (stmt_info))
> ok = vectorizable_live_operation (phi, NULL, NULL, -1, NULL);
>
>if (!ok)
> @@ -8217,7 +8220,11 @@ vectorizable_live_operation (gimple *stm
>gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
>
>/* Get the correct slp vectorized stmt.  */
> -  vec_lhs = gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[vec_entry]);
> +  gimple *vec_stmt = SLP_TREE_VEC_STMTS (slp_node)[vec_entry];
> +  if (gphi *phi = dyn_cast  (vec_stmt))
> +   vec_lhs = gimple_phi_result (phi);
> +  else
> +   vec_lhs = gimple_get_lhs (vec_stmt);
>
>/* Get entry to use.  */
>bitstart = bitsize_int (vec_index);
> Index: gcc/testsuite/gcc.dg/vect/pr83857.c
> ===
> --- /dev/null   2018-01-15 18:48:25.844002736 +
> +++ gcc/testsuite/gcc.dg/vect/pr83857.c 2018-01-16 13:24:33.021528058 +
> @@ -0,0 +1,30 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ffast-math" } */
> +
> +#define N 100
> +
> +double __attribute__ ((noinline, noclone))
> +f (double *x, double y)
> +{
> +  double a = 0;
> +  for (int i = 0; i < N; ++i)
> +{
> +  a += y;
> +  x[i * 2] += a;
> +  x[i * 2 + 1] += a;
> +}
> +  return a - y;
> +}
> +
> +double x[N * 2];
> +
> +int
> +main (void)
> +{
> +  if (f (x, 5) != (N - 1) * 5)
> +__builtin_abort ();
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" { 
> target vect_double } } } */
> +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target 
> vect_double } } } */


Re: [C++ PATCH] Fix ICE in member_vec_dedup (PR c++/83825)

2018-01-16 Thread Nathan Sidwell

On 01/15/2018 04:46 PM, Jakub Jelinek wrote:

Hi!

As the testcase shows, calls to member_vec_dedup and qsort are just guarded
by the vector being non-NULL, which doesn't mean it must be non-empty,
so we can't do (*member_vec)[0] on it.  Fixed by the second hunk, the
rest is just a small cleanup to use the vec.h methods.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?


Ok I'm a little surprised we get this case, but I think we've both found 
other strange boundary cases here.  thanks.


nathan
--
Nathan Sidwell


Re: [PATCH] Fix gimplify_one_sizepos (PR libgomp/83590, take 4)

2018-01-16 Thread Richard Biener
On Tue, 16 Jan 2018, Jakub Jelinek wrote:

> Hi!
> 
> After lengthy IRC discussions, here is an updated patch, which should also
> fix the problem that variably_modified_type_p on a REAL_TYPE returns true
> even when it has constant maximum and minimum.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

Richard.

> 2018-01-16  Jakub Jelinek  
>   Richard Biener  
> 
>   PR libgomp/83590
>   * gimplify.c (gimplify_one_sizepos): For is_gimple_constant (expr)
>   return early, inline manually is_gimple_sizepos.  Make sure if we
>   call gimplify_expr we don't end up with a gimple constant.
>   * tree.c (variably_modified_type_p): Don't return true for
>   is_gimple_constant (_t).  Inline manually is_gimple_sizepos.
>   * gimplify.h (is_gimple_sizepos): Remove.
> 
> --- gcc/gimplify.c.jj 2018-01-12 16:38:50.705238254 +0100
> +++ gcc/gimplify.c2018-01-16 12:21:15.895859416 +0100
> @@ -12562,7 +12562,10 @@ gimplify_one_sizepos (tree *expr_p, gimp
>   a VAR_DECL.  If it's a VAR_DECL from another function, the gimplifier
>   will want to replace it with a new variable, but that will cause 
> problems
>   if this type is from outside the function.  It's OK to have that here.  
> */
> -  if (is_gimple_sizepos (expr))
> +  if (expr == NULL_TREE
> +  || is_gimple_constant (expr)
> +  || TREE_CODE (expr) == VAR_DECL
> +  || CONTAINS_PLACEHOLDER_P (expr))
>  return;
>  
>*expr_p = unshare_expr (expr);
> @@ -12570,6 +12573,12 @@ gimplify_one_sizepos (tree *expr_p, gimp
>/* SSA names in decl/type fields are a bad idea - they'll get reclaimed
>   if the def vanishes.  */
>gimplify_expr (expr_p, stmt_p, NULL, is_gimple_val, fb_rvalue, false);
> +
> +  /* If expr wasn't already is_gimple_sizepos or is_gimple_constant from the
> + FE, ensure that it is a VAR_DECL, otherwise we might handle some decls
> + as gimplify_vla_decl even when they would have all sizes INTEGER_CSTs.  
> */
> +  if (is_gimple_constant (*expr_p))
> +*expr_p = get_initialized_tmp_var (*expr_p, stmt_p, NULL, false);
>  }
>  
>  /* Gimplify the body of statements of FNDECL and return a GIMPLE_BIND node
> --- gcc/tree.c.jj 2018-01-15 10:01:40.830186474 +0100
> +++ gcc/tree.c2018-01-16 12:24:11.254821615 +0100
> @@ -8825,11 +8825,12 @@ variably_modified_type_p (tree type, tre
>do { tree _t = (T);
> \
>  if (_t != NULL_TREE  
> \
>   && _t != error_mark_node\
> - && TREE_CODE (_t) != INTEGER_CST\
> + && !CONSTANT_CLASS_P (_t)   \
>   && TREE_CODE (_t) != PLACEHOLDER_EXPR   \
>   && (!fn \
>   || (!TYPE_SIZES_GIMPLIFIED (type)   \
> - && !is_gimple_sizepos (_t)) \
> + && (TREE_CODE (_t) != VAR_DECL  \
> + && !CONTAINS_PLACEHOLDER_P (_t)))   \
>   || walk_tree (&_t, find_var_from_fn, fn, NULL)))\
>return true;  } while (0)
>  
> --- gcc/gimplify.h.jj 2018-01-03 10:19:53.757533721 +0100
> +++ gcc/gimplify.h2018-01-16 12:24:51.995812831 +0100
> @@ -85,23 +85,4 @@ extern enum gimplify_status gimplify_va_
> gimple_seq *);
>  gimple *gimplify_assign (tree, tree, gimple_seq *);
>  
> -/* Return true if gimplify_one_sizepos doesn't need to gimplify
> -   expr (when in TYPE_SIZE{,_UNIT} and similar type/decl size/bitsize
> -   fields).  */
> -
> -static inline bool
> -is_gimple_sizepos (tree expr)
> -{
> -  /* gimplify_one_sizepos doesn't need to do anything if the value isn't 
> there,
> - is constant, or contains A PLACEHOLDER_EXPR.  We also don't want to do
> - anything if it's already a VAR_DECL.  If it's a VAR_DECL from another
> - function, the gimplifier will want to replace it with a new variable,
> - but that will cause problems if this type is from outside the function.
> - It's OK to have that here.  */
> -  return (expr == NULL_TREE
> -   || TREE_CODE (expr) == INTEGER_CST
> -   || TREE_CODE (expr) == VAR_DECL
> -   || CONTAINS_PLACEHOLDER_P (expr));
> -}
> -
>  #endif /* GCC_GIMPLIFY_H */
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


Re: [PATCH] i386: More use reference of struct ix86_frame to avoid copy

2018-01-16 Thread Martin Liška
On 01/16/2018 01:35 PM, H.J. Lu wrote:
> On Tue, Jan 16, 2018 at 3:40 AM, H.J. Lu  wrote:
>> This patch has been used with my Spectre backport for GCC 7 for many
>> weeks and has been checked into GCC 7 branch.  Should I revert it on
>> GCC 7 branch or check it into trunk?
> 
> Ada build failed with this on trunk:
> 
> raised STORAGE_ERROR : stack overflow or erroneous memory access
> make[5]: *** [/export/gnu/import/git/sources/gcc/gcc/ada/Make-generated.in:45:
> ada/sinfo.h] Error 1

Hello.

I know that you've already reverted the change, but it's possible to replace
struct ix86_frame  = cfun->machine->frame;

with:
struct ix86_frame *frame = >machine->frame;

And replace usages with point access operator (->). That would also avoid 
copying.

One another question. After you switched to references, isn't the behavior of 
function
ix86_expand_epilogue as it also contains write to frame struct like:

 14799/* Special care must be taken for the normal return case of a function
 14800   using eh_return: the eax and edx registers are marked as saved, but
 14801   not restored along this path.  Adjust the save location to match.  
*/
 14802if (crtl->calls_eh_return && style != 2)
 14803  frame.reg_save_offset -= 2 * UNITS_PER_WORD;

Thanks for clarification.
Martin

> 
> Let me revert it on gcc-7-branch.
> 
> H.J.
>> H.J.
>> ---
>> When there is no need to make a copy of ix86_frame, we can use reference
>> of struct ix86_frame to avoid copy.
>>
>> * config/i386/i386.c (ix86_expand_prologue): Use reference of
>> struct ix86_frame.
>> (ix86_expand_epilogue): Likewise.
>> ---
>>  gcc/config/i386/i386.c | 6 ++
>>  1 file changed, 2 insertions(+), 4 deletions(-)
>>
>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> index bfb31db8752..9eba3ffd5d6 100644
>> --- a/gcc/config/i386/i386.c
>> +++ b/gcc/config/i386/i386.c
>> @@ -13385,7 +13385,6 @@ ix86_expand_prologue (void)
>>  {
>>struct machine_function *m = cfun->machine;
>>rtx insn, t;
>> -  struct ix86_frame frame;
>>HOST_WIDE_INT allocate;
>>bool int_registers_saved;
>>bool sse_registers_saved;
>> @@ -13413,7 +13412,7 @@ ix86_expand_prologue (void)
>>m->fs.sp_valid = true;
>>m->fs.sp_realigned = false;
>>
>> -  frame = m->frame;
>> +  struct ix86_frame  = cfun->machine->frame;
>>
>>if (!TARGET_64BIT && ix86_function_ms_hook_prologue 
>> (current_function_decl))
>>  {
>> @@ -14291,7 +14290,6 @@ ix86_expand_epilogue (int style)
>>  {
>>struct machine_function *m = cfun->machine;
>>struct machine_frame_state frame_state_save = m->fs;
>> -  struct ix86_frame frame;
>>bool restore_regs_via_mov;
>>bool using_drap;
>>bool restore_stub_is_tail = false;
>> @@ -14304,7 +14302,7 @@ ix86_expand_epilogue (int style)
>>  }
>>
>>ix86_finalize_stack_frame_flags ();
>> -  frame = m->frame;
>> +  struct ix86_frame  = cfun->machine->frame;
>>
>>m->fs.sp_realigned = stack_realign_fp;
>>m->fs.sp_valid = stack_realign_fp
>> --
>> 2.14.3
>>
> 
> 
> 



Re: [PATCH 0/5] x86: CVE-2017-5715, aka Spectre

2018-01-16 Thread Rainer Orth
Hi Richard,

> I'm quite sure Solaris supports comdats, after all it invented ELF ;)

true: gcc/configure.ac has

  # Sun ld has COMDAT group support since Solaris 9, but it doesn't
  # interoperate with GNU as until Solaris 11 build 130, i.e. ld
  # version 1.688.
  #
  # If using Sun as for COMDAT group as emitted by GCC, one needs at
  # least ld version 1.2267.

> I've also seen
> comdats in debugging early LTO issues.  We might run into Solaris as
> issues though.

The Solaris code has been taught to deal with that, so it should
hopefully be hidden from the rest of the compiler.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: Move pa.h FUNCTION_ARG_SIZE to pa.c (PR83858)

2018-01-16 Thread John David Anglin

On 2018-01-16 9:48 AM, Richard Sandiford wrote:

Oops, yes.  Installed with that change, thanks.
Oops, I just realized the CEIL function needs to be applied to the 
GET_MODE_SIZE

return as well...

Dave

--
John David Anglin  dave.ang...@bell.net



Re: [PATCH 0/5] x86: CVE-2017-5715, aka Spectre

2018-01-16 Thread Rainer Orth
Hi Jan,

>> It makes the option using thunks unusable though, right?  Can you simply make
>> them hidden on systems without comdat support?  That duplicates them per TU
>> but at least the feature works.  Or those systems should provide the
>> thunks via
>> libgcc.
>> 
>> I agree we can followup with a fix for Solaris given lack of a public
>> testing machine.
>
> My memory is bit dim, but I am convinced I was fixing specific errors for
> comdats
> on Solaris, so I think the toolchain supports them in some sort, just is more
> restrictive/different from GNU implementation.

comdat does work just fine in Solaris 11, but the Solaris 10 linker has
problems with what gcc generates.

> Indeed, i think just producing sorry, unimplemented message is what we should 
> do
> if we can't support retpoline on given target.

Certainly, coupled with an appropriate effective-target keyword to limit
testcases appropriately.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH 0/5] x86: CVE-2017-5715, aka Spectre

2018-01-16 Thread Rainer Orth
Hi Richard,

>>> Backport is blocked by
>>>
>>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83838
>>>
>>> There are many test failures due to lack of comdat support in linker on
>>> Solaris.

actually this is lack of hidden .gnu.linkonce support right now.
Currently that's disabled for all but gld; I'm looking to make that
dynamic on newer versions of Solaris 11.

>>> I can limit these tests to Linux.
>>
>> These are testcase issues and shouldn't block backport to GCC 7.
>
> It makes the option using thunks unusable though, right?  Can you simply make
> them hidden on systems without comdat support?  That duplicates them per TU
> but at least the feature works.  Or those systems should provide the thunks 
> via
> libgcc.
>
> I agree we can followup with a fix for Solaris given lack of a public
> testing machine.

I do have both an x86 and sparc machine running Solaris 11 around to
serve as testing machines.  Still checking with legal how best to handle
external access, either locally or integrated into the compile farm.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


[PATCH] Fix gimplify_one_sizepos (PR libgomp/83590, take 4)

2018-01-16 Thread Jakub Jelinek
Hi!

After lengthy IRC discussions, here is an updated patch, which should also
fix the problem that variably_modified_type_p on a REAL_TYPE returns true
even when it has constant maximum and minimum.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2018-01-16  Jakub Jelinek  
Richard Biener  

PR libgomp/83590
* gimplify.c (gimplify_one_sizepos): For is_gimple_constant (expr)
return early, inline manually is_gimple_sizepos.  Make sure if we
call gimplify_expr we don't end up with a gimple constant.
* tree.c (variably_modified_type_p): Don't return true for
is_gimple_constant (_t).  Inline manually is_gimple_sizepos.
* gimplify.h (is_gimple_sizepos): Remove.

--- gcc/gimplify.c.jj   2018-01-12 16:38:50.705238254 +0100
+++ gcc/gimplify.c  2018-01-16 12:21:15.895859416 +0100
@@ -12562,7 +12562,10 @@ gimplify_one_sizepos (tree *expr_p, gimp
  a VAR_DECL.  If it's a VAR_DECL from another function, the gimplifier
  will want to replace it with a new variable, but that will cause problems
  if this type is from outside the function.  It's OK to have that here.  */
-  if (is_gimple_sizepos (expr))
+  if (expr == NULL_TREE
+  || is_gimple_constant (expr)
+  || TREE_CODE (expr) == VAR_DECL
+  || CONTAINS_PLACEHOLDER_P (expr))
 return;
 
   *expr_p = unshare_expr (expr);
@@ -12570,6 +12573,12 @@ gimplify_one_sizepos (tree *expr_p, gimp
   /* SSA names in decl/type fields are a bad idea - they'll get reclaimed
  if the def vanishes.  */
   gimplify_expr (expr_p, stmt_p, NULL, is_gimple_val, fb_rvalue, false);
+
+  /* If expr wasn't already is_gimple_sizepos or is_gimple_constant from the
+ FE, ensure that it is a VAR_DECL, otherwise we might handle some decls
+ as gimplify_vla_decl even when they would have all sizes INTEGER_CSTs.  */
+  if (is_gimple_constant (*expr_p))
+*expr_p = get_initialized_tmp_var (*expr_p, stmt_p, NULL, false);
 }
 
 /* Gimplify the body of statements of FNDECL and return a GIMPLE_BIND node
--- gcc/tree.c.jj   2018-01-15 10:01:40.830186474 +0100
+++ gcc/tree.c  2018-01-16 12:24:11.254821615 +0100
@@ -8825,11 +8825,12 @@ variably_modified_type_p (tree type, tre
   do { tree _t = (T);  \
 if (_t != NULL_TREE
\
&& _t != error_mark_node\
-   && TREE_CODE (_t) != INTEGER_CST\
+   && !CONSTANT_CLASS_P (_t)   \
&& TREE_CODE (_t) != PLACEHOLDER_EXPR   \
&& (!fn \
|| (!TYPE_SIZES_GIMPLIFIED (type)   \
-   && !is_gimple_sizepos (_t)) \
+   && (TREE_CODE (_t) != VAR_DECL  \
+   && !CONTAINS_PLACEHOLDER_P (_t)))   \
|| walk_tree (&_t, find_var_from_fn, fn, NULL)))\
   return true;  } while (0)
 
--- gcc/gimplify.h.jj   2018-01-03 10:19:53.757533721 +0100
+++ gcc/gimplify.h  2018-01-16 12:24:51.995812831 +0100
@@ -85,23 +85,4 @@ extern enum gimplify_status gimplify_va_
  gimple_seq *);
 gimple *gimplify_assign (tree, tree, gimple_seq *);
 
-/* Return true if gimplify_one_sizepos doesn't need to gimplify
-   expr (when in TYPE_SIZE{,_UNIT} and similar type/decl size/bitsize
-   fields).  */
-
-static inline bool
-is_gimple_sizepos (tree expr)
-{
-  /* gimplify_one_sizepos doesn't need to do anything if the value isn't there,
- is constant, or contains A PLACEHOLDER_EXPR.  We also don't want to do
- anything if it's already a VAR_DECL.  If it's a VAR_DECL from another
- function, the gimplifier will want to replace it with a new variable,
- but that will cause problems if this type is from outside the function.
- It's OK to have that here.  */
-  return (expr == NULL_TREE
- || TREE_CODE (expr) == INTEGER_CST
- || TREE_CODE (expr) == VAR_DECL
- || CONTAINS_PLACEHOLDER_P (expr));
-}
-
 #endif /* GCC_GIMPLIFY_H */

Jakub


Re: Move pa.h FUNCTION_ARG_SIZE to pa.c (PR83858)

2018-01-16 Thread Richard Sandiford
John David Anglin  writes:
> On 2018-01-16 5:52 AM, Richard Sandiford wrote:
>> 2018-01-16  Richard Sandiford
>>
>> gcc/
>>  PR target/83858
>>  * config/pa/pa.h (FUNCTION_ARG_SIZE): Delete.
>>  * config/pa/pa-protos.h (pa_function_arg_size): Declare.
>>  * config/pa/som.h (ASM_DECLARE_FUNCTION_NAME): Use
>>  pa_function_arg_size instead of FUNCTION_ARG_SIZE.
>>  * config/pa/pa.c (pa_function_arg_advance): Likewise.
>>  (pa_function_arg, pa_arg_partial_bytes): Likewise.
>>  (pa_function_arg_size): New function.
> Thanks Richard.  I started a build yesterday evening with essentially 
> the same change.
>
> Two little nits.  I believe a declaration for pa_function_arg_size needs 
> to be added to pa-protos.h.

The patch did have this.

> Secondly, the comment for pa_function_arg_size needs to be updated to
> say "function" instead of "macro". Otherwise, the change is okay.

Oops, yes.  Installed with that change, thanks.

Richard



Re: Move pa.h FUNCTION_ARG_SIZE to pa.c (PR83858)

2018-01-16 Thread John David Anglin

On 2018-01-16 5:52 AM, Richard Sandiford wrote:

2018-01-16  Richard Sandiford

gcc/
PR target/83858
* config/pa/pa.h (FUNCTION_ARG_SIZE): Delete.
* config/pa/pa-protos.h (pa_function_arg_size): Declare.
* config/pa/som.h (ASM_DECLARE_FUNCTION_NAME): Use
pa_function_arg_size instead of FUNCTION_ARG_SIZE.
* config/pa/pa.c (pa_function_arg_advance): Likewise.
(pa_function_arg, pa_arg_partial_bytes): Likewise.
(pa_function_arg_size): New function.
Thanks Richard.  I started a build yesterday evening with essentially 
the same change.


Two little nits.  I believe a declaration for pa_function_arg_size needs 
to be added
be added to added pa-protos.h.  Secondly, the comment for 
pa_function_arg_size
needs to be updated to say "function" instead of "macro". Otherwise, the 
change

is okay.

I want to see if ASM_DECLARE_FUNCTION_NAME can be turned into a function in
pa.c as well. This would allow pa_function_arg_size to be static.

Dave

--
John David Anglin  dave.ang...@bell.net



Re: [PATCH v3, rs6000] Add -mspeculate-indirect-jumps option and implement non-speculating bctr / bctrl

2018-01-16 Thread Bill Schmidt
On Jan 16, 2018, at 6:13 AM, Segher Boessenkool  
wrote:
> 
> Hi!
> 
> On Tue, Jan 16, 2018 at 09:29:13AM +0100, Richard Biener wrote:
>> Did you consider simply removing the tablejump/casesi support so
>> expansion always
>> expands to a balanced tree?  At least if we have any knobs to tune we
>> should probably
>> tweak them away from the indirect jump using variants with
>> -mno-speculate-indirect-jumps,
>> right?
> 
> We can generate indirect jumps for other situations so this patch will
> still be needed.

Also, I'm not convinced that a balanced tree for a large jump table
is a slam dunk better performer than this (adding hundreds of poorly
predictable branches that can clog up hardware predictors for, say,
an interpreter loop).  I'd want to do some performance testing to look
for crossover points (as you say, tuning knobs).  But for smaller tables
this is a good idea.

Thanks,
Bill
> 
>> Performance optimization, so shouldn't block this patch - I just
>> thought I should probably
>> mention this.
> 
> Yeah let's get this done first :-)
> 
> 
> Segher
> 



Re: [PATCH][WWWDOCS][AArch64][ARM] Update GCC 8 release notes

2018-01-16 Thread Tamar Christina
Hi Kyrill,

> 
> xgene1 was added a few releases ago, better to use one of the new additions 
> from the above list.
> For example -mtune=cortex-r52.

Thanks, I have updated the patch. I'll wait for an ok from an AArch64 
maintainer and a Docs maintainer.

> 
> With that nit the arm changes look ok to me.
> Thanks for compiling this!
> Kyrill
> 

Cheers,
Tamar

-- 
Index: htdocs/gcc-8/changes.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-8/changes.html,v
retrieving revision 1.26
diff -u -r1.26 changes.html
--- htdocs/gcc-8/changes.html	11 Jan 2018 09:31:53 -	1.26
+++ htdocs/gcc-8/changes.html	16 Jan 2018 14:12:57 -
@@ -147,7 +147,51 @@
 
 AArch64
 
-  
+  
+The Armv8.4-A architecture is now supported.  It can be used by
+specifying the -march=armv8.4-a option.
+  
+  
+The Dot Product instructions are now supported as an optional extension to the
+Armv8.2-A architecture and newer and are mandatory on Armv8.4-A.  The extension can be used by
+specifying the +dotprod architecture extension.  E.g. -march=armv8.2-a+dotprod.
+  
+  
+The Armv8-A +crypto extension has now been split into two extensions for finer grained control:
+
+   +aes which contains the Armv8-A AES crytographic instructions.
+   +sha2 which contains the Armv8-A SHA2 and SHA1 cryptographic instructions.
+
+Using +crypto will now enable these two extensions.
+  
+  
+New Armv8.4-A FP16 Floating Point Multiplication Variant instructions have been added.  These instructions are
+mandatory in Armv8.4-A but available as an optional extension to Armv8.2-A and Armv8.3-A.  The new extension
+can be used by specifying the +fp16fml architectural extension on Armv8.2-A and Armv8.3-A. On Armv8.4-A
+the instructions can be enabled by specifying +fp16.
+  
+  
+New cryptographic instructions have been added as optional extensions to Armv8.2-A and newer.  These instructions can
+be enabled with:
+
+  +sha3 New SHA3 and SHA2 instructions from Armv8.4-A.  This implies +sha2.
+  +sm4 New SM3 and SM4 instructions from Armv8.4-A.
+
+ 
+  
+   Support has been added for the following processors
+   (GCC identifiers in parentheses):
+   
+ Arm Cortex-A75 (cortex-a75).
+	 Arm Cortex-A55 (cortex-a55).
+	 Arm Cortex-A55/Cortex-A75 DynamIQ big.LITTLE (cortex-a75.cortex-a55).
+   
+   The GCC identifiers can be used
+   as arguments to the -mcpu or -mtune options,
+   for example: -mcpu=cortex-a75 or
+   -mtune=thunderx2t99p1 or as arguments to the equivalent target
+   attributes and pragmas.
+  
 
 
 ARM
@@ -169,14 +213,58 @@
 removed in a future release.
   
   
-The default link behavior for ARMv6 and ARMv7-R targets has been
+The default link behavior for Armv6 and Armv7-R targets has been
 changed to produce BE8 format when generating big-endian images.  A new
 flag -mbe32 can be used to force the linker to produce
 legacy BE32 format images.  There is no change of behavior for
-ARMv6-m and other ARMv7 or later targets: these already defaulted
+Armv6-M and other Armv7 or later targets: these already defaulted
 to BE8 format.  This change brings GCC into alignment with other
 compilers for the ARM architecture.
   
+  
+The Armv8-R architecture is now supported.  It can be used by specifying the
+-march=armv8-r option.
+  
+  
+The Armv8.3-A architecture is now supported.  It can be used by
+specifying the -march=armv8.3-a option.
+  
+  
+The Armv8.4-A architecture is now supported.  It can be used by
+specifying the -march=armv8.4-a option.
+  
+  
+ The Dot Product instructions are now supported as an optional extension to the
+ Armv8.2-A architecture and newer and are mandatory on Armv8.4-A.  The extension can be used by
+ specifying the +dotprod architecture extension.  E.g. -march=armv8.2-a+dotprod.
+  
+
+  
+Support for setting extensions and architectures using the GCC target pragma and attribute has been added.
+It can be used by specifying #pragma GCC target ("arch=..."), #pragma GCC target ("+extension"),
+__attribute__((target("arch=..."))) or __attribute__((target("+extension"))).
+  
+  
+New Armv8.4-A FP16 Floating Point Multiplication Variant instructions have been added.  These instructions are
+mandatory in Armv8.4-A but available as an optional extension to Armv8.2-A and Armv8.3-A.  The new extension
+can be used by specifying the +fp16fml architectural extension on Armv8.2-A and Armv8.3-A. On Armv8.4-A
+the instructions can be enabled by specifying +fp16.
+  
+  
+   Support has been added for the following processors
+   (GCC identifiers in parentheses):
+   
+	 Arm Cortex-A75 (cortex-a75).
+	 Arm Cortex-A55 (cortex-a55).
+	 Arm Cortex-A55/Cortex-A75 DynamIQ big.LITTLE (cortex-a75.cortex-a55).
+	 Arm Cortex-R52 for Armv8-R 

Re: [PATCH] rtlanal: dead_or_set_regno_p should handle CLOBBER (PR83424)

2018-01-16 Thread Segher Boessenkool
On Mon, Dec 18, 2017 at 12:16:13PM -0700, Jeff Law wrote:
> On 12/16/2017 02:03 PM, Segher Boessenkool wrote:
> > In PR83424 combine's move_deaths puts a REG_DEAD not in the wrong place
> > because dead_or_set_regno_p does not account for CLOBBER insns.  This
> > fixes it.
> > 
> > Bootstrapped and tested on powerpc64-linux {-m32,-m64} and on x86_64-linux.
> > Is this okay for trunk?
> > 
> > 
> > Segher
> > 
> > 
> > 2017-12-16  Segher Boessenkool  
> > 
> > PR rtl-optimization/83424
> > * rtlanal.c (dead_or_set_regno_p): Handle CLOBBER just like SET.
> > 
> > gcc/testsuite/
> > PR rtl-optimization/83424
> > * gcc.dg/pr83424.c: New testsuite.
> OK.

Is this okay for backports to 7 and 6, too?


Segher


Two fixes for live-out SLP inductions (PR 83857)

2018-01-16 Thread Richard Sandiford
vect_analyze_loop_operations was calling vectorizable_live_operation
for all live-out phis, which led to a bogus ncopies calculation in
the pure SLP case.  I think v_a_l_o should only be passing phis
that are vectorised using normal loop vectorisation, since
vect_slp_analyze_node_operations handles the SLP side (and knows
the correct slp_index and slp_node arguments to pass in, via
vect_analyze_stmt).

With that fixed we hit an older bug that vectorizable_live_operation
didn't handle live-out SLP inductions.  Fixed by using gimple_phi_result
rather than gimple_get_lhs for phis.

Tested on aarch64-linux-gnu.  OK to install?

Richard


2018-01-16  Richard Sandiford  

gcc/
PR tree-optimization/83857
* tree-vect-loop.c (vect_analyze_loop_operations): Don't call
vectorizable_live_operation for pure SLP statements.
(vectorizable_live_operation): Handle PHIs.

gcc/testsuite/
PR tree-optimization/83857
* gcc.dg/vect/pr83857.c: New test.

Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c2018-01-13 18:02:00.950360196 +
+++ gcc/tree-vect-loop.c2018-01-16 13:24:33.022528019 +
@@ -1851,7 +1851,10 @@ vect_analyze_loop_operations (loop_vec_i
ok = vectorizable_reduction (phi, NULL, NULL, NULL, NULL);
 }
 
- if (ok && STMT_VINFO_LIVE_P (stmt_info))
+ /* SLP PHIs are tested by vect_slp_analyze_node_operations.  */
+ if (ok
+ && STMT_VINFO_LIVE_P (stmt_info)
+ && !PURE_SLP_STMT (stmt_info))
ok = vectorizable_live_operation (phi, NULL, NULL, -1, NULL);
 
   if (!ok)
@@ -8217,7 +8220,11 @@ vectorizable_live_operation (gimple *stm
   gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
 
   /* Get the correct slp vectorized stmt.  */
-  vec_lhs = gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[vec_entry]);
+  gimple *vec_stmt = SLP_TREE_VEC_STMTS (slp_node)[vec_entry];
+  if (gphi *phi = dyn_cast  (vec_stmt))
+   vec_lhs = gimple_phi_result (phi);
+  else
+   vec_lhs = gimple_get_lhs (vec_stmt);
 
   /* Get entry to use.  */
   bitstart = bitsize_int (vec_index);
Index: gcc/testsuite/gcc.dg/vect/pr83857.c
===
--- /dev/null   2018-01-15 18:48:25.844002736 +
+++ gcc/testsuite/gcc.dg/vect/pr83857.c 2018-01-16 13:24:33.021528058 +
@@ -0,0 +1,30 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ffast-math" } */
+
+#define N 100
+
+double __attribute__ ((noinline, noclone))
+f (double *x, double y)
+{
+  double a = 0;
+  for (int i = 0; i < N; ++i)
+{
+  a += y;
+  x[i * 2] += a;
+  x[i * 2 + 1] += a;
+}
+  return a - y;
+}
+
+double x[N * 2];
+
+int
+main (void)
+{
+  if (f (x, 5) != (N - 1) * 5)
+__builtin_abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" { target 
vect_double } } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target 
vect_double } } } */


Re: [C++ Patch] PR 81054 ("[7/8 Regression] ICE with volatile variable in constexpr function")

2018-01-16 Thread Paolo Carlini
.. nevermind, this requires more work: my simple patchlet would cause a 
few regression in the libstdc++-v3 testsuite (the assert at the 
beginning of finish_expr_stmt triggers)


Paolo.


Re: [PATCH] Fix store-merging for ~ of bswap (PR tree-optimization/83843)

2018-01-16 Thread Christophe Lyon
On 15 January 2018 at 22:44, Jakub Jelinek  wrote:
> Hi!
>
> When using the bswap pass infrastructure, BIT_NOT_EXPRs aren't allowed in
> the middle, but due to the way process_store handles those it can appear
> around the value, which is something output_merged_store didn't handle.
>
> Fixed thusly, where we handle not just the case when the bswap (or nop)
> value needs inversion as whole, but also cases where only a few portions of
> it need xoring with some mask.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2018-01-15  Jakub Jelinek  
>
> PR tree-optimization/83843
> * gimple-ssa-store-merging.c
> (imm_store_chain_info::output_merged_store): Handle bit_not_p on
> store_immediate_info for bswap/nop orig_stores.
>
> * gcc.dg/store_merging_18.c: New test.
>
Hi Jakub,

I've noticed that this new test fails on arm, eg:
arm-none-linux-gnueabihf
--with-mode arm
--with-cpu cortex-a9
--with-fpu neon-fp16
FAIL: gcc.dg/store_merging_18.c scan-tree-dump-times store-merging
"Merging successful" 3 (found 0 times)

Do you want me to file a PR?

Christophe



> --- gcc/gimple-ssa-store-merging.c.jj   2018-01-04 00:43:17.629703230 +0100
> +++ gcc/gimple-ssa-store-merging.c  2018-01-15 12:29:14.105789381 +0100
> @@ -3619,6 +3619,15 @@ imm_store_chain_info::output_merged_stor
>   gimple_seq_add_stmt_without_update (, stmt);
>   src = gimple_assign_lhs (stmt);
> }
> + inv_op = invert_op (split_store, 2, int_type, xor_mask);
> + if (inv_op != NOP_EXPR)
> +   {
> + stmt = gimple_build_assign (make_ssa_name (int_type),
> + inv_op, src, xor_mask);
> + gimple_set_location (stmt, loc);
> + gimple_seq_add_stmt_without_update (, stmt);
> + src = gimple_assign_lhs (stmt);
> +   }
>   break;
> default:
>   src = ops[0];
> --- gcc/testsuite/gcc.dg/store_merging_18.c.jj  2018-01-15 12:43:49.607227365 
> +0100
> +++ gcc/testsuite/gcc.dg/store_merging_18.c 2018-01-15 12:43:24.882245004 
> +0100
> @@ -0,0 +1,51 @@
> +/* PR tree-optimization/83843 */
> +/* { dg-do run } */
> +/* { dg-options "-O2 -fdump-tree-store-merging" } */
> +/* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging" 
> { target store_merge } } } */
> +
> +__attribute__((noipa)) void
> +foo (unsigned char *buf, unsigned char *tab)
> +{
> +  unsigned v = tab[1] ^ (tab[0] << 8);
> +  buf[0] = ~(v >> 8);
> +  buf[1] = ~v;
> +}
> +
> +__attribute__((noipa)) void
> +bar (unsigned char *buf, unsigned char *tab)
> +{
> +  unsigned v = tab[1] ^ (tab[0] << 8);
> +  buf[0] = (v >> 8);
> +  buf[1] = ~v;
> +}
> +
> +__attribute__((noipa)) void
> +baz (unsigned char *buf, unsigned char *tab)
> +{
> +  unsigned v = tab[1] ^ (tab[0] << 8);
> +  buf[0] = ~(v >> 8);
> +  buf[1] = v;
> +}
> +
> +int
> +main ()
> +{
> +  volatile unsigned char l1 = 0;
> +  volatile unsigned char l2 = 1;
> +  unsigned char buf[2];
> +  unsigned char tab[2] = { l1 + 1, l2 * 2 };
> +  foo (buf, tab);
> +  if (buf[0] != (unsigned char) ~1 || buf[1] != (unsigned char) ~2)
> +__builtin_abort ();
> +  buf[0] = l1 + 7;
> +  buf[1] = l2 * 8;
> +  bar (buf, tab);
> +  if (buf[0] != 1 || buf[1] != (unsigned char) ~2)
> +__builtin_abort ();
> +  buf[0] = l1 + 9;
> +  buf[1] = l2 * 10;
> +  baz (buf, tab);
> +  if (buf[0] != (unsigned char) ~1 || buf[1] != 2)
> +__builtin_abort ();
> +  return 0;
> +}
>
> Jakub


Re: GCC 8.0.0 Status Report (2018-01-15), Trunk in Regression and Documentation fixes only mode

2018-01-16 Thread Segher Boessenkool
On Mon, Jan 15, 2018 at 09:21:07AM +0100, Richard Biener wrote:
> We're still in pretty bad shape regression-wise.  Please also take
> the opportunity to check the state of your favorite host/target
> combination to make sure building and testing works appropriately.

I tested building Linux (the kernel) for all supported architectures.
Everything builds (with my usual tweaks, link with libgcc etc.);
except x86_64 and sh have more problems in the kernel, and mips has
an ICE.  I'll open a PR for that one.


Segher


[PATCH] PR libstdc++/83834 replace wildcard pattern in linker script

2018-01-16 Thread Jonathan Wakely

The soon-to-be-released binutils 2.30 makes a small change to how
lambda functions are demangled, which causes some unwanted symbols to
match a wildcard pattern in the GLIBCXX_3.4 version node of our linker
script. The only symbol that is supposed to match the pattern is
std::cerr so we should just name that explicitly. That prevents other
new symbols matching and being added to the old version.

See PR 83893 for the general problem, which we should fix later.

PR libstdc++/83834
* config/abi/pre/gnu.ver (GLIBCXX_3.4): Replace std::c[a-g]* wildcard
pattern with exact match for std::cerr.

Tested powerpc64le-linux with binutils 2.25.1-32.base.el7_4.1 and on
x86_64-linux with a binutils-2.3.0.0 snapshot from 2018-01-13.

Committed to trunk, backports to follow.


commit f8896e7451cd61008e0ceb0ac9a770d5cb77d85b
Author: Jonathan Wakely 
Date:   Tue Jan 16 12:01:36 2018 +

PR libstdc++/83834 replace wildcard pattern in linker script

PR libstdc++/83834
* config/abi/pre/gnu.ver (GLIBCXX_3.4): Replace std::c[a-g]* 
wildcard
pattern with exact match for std::cerr.

diff --git a/libstdc++-v3/config/abi/pre/gnu.ver 
b/libstdc++-v3/config/abi/pre/gnu.ver
index 774bedec9bc..5e66dc5cc3f 100644
--- a/libstdc++-v3/config/abi/pre/gnu.ver
+++ b/libstdc++-v3/config/abi/pre/gnu.ver
@@ -60,7 +60,7 @@ GLIBCXX_3.4 {
   std::basic_[t-z]*;
   std::ba[t-z]*;
   std::b[b-z]*;
-  std::c[a-g]*;
+  std::cerr;
 # std::char_traits;
 # std::c[i-z]*;
   std::c[i-n]*;


Re: [PATCH] i386: More use reference of struct ix86_frame to avoid copy

2018-01-16 Thread H.J. Lu
On Tue, Jan 16, 2018 at 3:40 AM, H.J. Lu  wrote:
> This patch has been used with my Spectre backport for GCC 7 for many
> weeks and has been checked into GCC 7 branch.  Should I revert it on
> GCC 7 branch or check it into trunk?

Ada build failed with this on trunk:

raised STORAGE_ERROR : stack overflow or erroneous memory access
make[5]: *** [/export/gnu/import/git/sources/gcc/gcc/ada/Make-generated.in:45:
ada/sinfo.h] Error 1

Let me revert it on gcc-7-branch.

H.J.
> H.J.
> ---
> When there is no need to make a copy of ix86_frame, we can use reference
> of struct ix86_frame to avoid copy.
>
> * config/i386/i386.c (ix86_expand_prologue): Use reference of
> struct ix86_frame.
> (ix86_expand_epilogue): Likewise.
> ---
>  gcc/config/i386/i386.c | 6 ++
>  1 file changed, 2 insertions(+), 4 deletions(-)
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index bfb31db8752..9eba3ffd5d6 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -13385,7 +13385,6 @@ ix86_expand_prologue (void)
>  {
>struct machine_function *m = cfun->machine;
>rtx insn, t;
> -  struct ix86_frame frame;
>HOST_WIDE_INT allocate;
>bool int_registers_saved;
>bool sse_registers_saved;
> @@ -13413,7 +13412,7 @@ ix86_expand_prologue (void)
>m->fs.sp_valid = true;
>m->fs.sp_realigned = false;
>
> -  frame = m->frame;
> +  struct ix86_frame  = cfun->machine->frame;
>
>if (!TARGET_64BIT && ix86_function_ms_hook_prologue 
> (current_function_decl))
>  {
> @@ -14291,7 +14290,6 @@ ix86_expand_epilogue (int style)
>  {
>struct machine_function *m = cfun->machine;
>struct machine_frame_state frame_state_save = m->fs;
> -  struct ix86_frame frame;
>bool restore_regs_via_mov;
>bool using_drap;
>bool restore_stub_is_tail = false;
> @@ -14304,7 +14302,7 @@ ix86_expand_epilogue (int style)
>  }
>
>ix86_finalize_stack_frame_flags ();
> -  frame = m->frame;
> +  struct ix86_frame  = cfun->machine->frame;
>
>m->fs.sp_realigned = stack_realign_fp;
>m->fs.sp_valid = stack_realign_fp
> --
> 2.14.3
>



-- 
H.J.


Re: [PATCH v3, rs6000] Add -mspeculate-indirect-jumps option and implement non-speculating bctr / bctrl

2018-01-16 Thread Segher Boessenkool
Hi!

On Mon, Jan 15, 2018 at 05:09:06PM -0600, Bill Schmidt wrote:
> @@ -12933,9 +12974,27 @@
>""
>  {
>if (TARGET_32BIT)
> -emit_jump_insn (gen_tablejumpsi (operands[0], operands[1]));
> +{
> +  if (rs6000_speculate_indirect_jumps)
> + emit_jump_insn (gen_tablejumpsi (operands[0], operands[1]));
> +  else
> + {
> +   rtx ccreg = gen_reg_rtx (CCmode);
> +   rtx jump = gen_tablejumpsi_nospec (operands[0], operands[1], ccreg);
> +   emit_jump_insn (jump);
> + }
> +}
>else
> -emit_jump_insn (gen_tablejumpdi (operands[0], operands[1]));
> +{
> +  if (rs6000_speculate_indirect_jumps)
> + emit_jump_insn (gen_tablejumpdi (operands[0], operands[1]));
> +  else
> + {
> +   rtx ccreg = gen_reg_rtx (CCmode);
> +   rtx jump = gen_tablejumpdi_nospec (operands[0], operands[1], ccreg);
> +   emit_jump_insn (jump);
> + }
> +}
>DONE;
>  })

This is easier to read if you swap the "if"s (put the
rs6000_speculate_indirect_jumps test on the outside).

Okay for trunk with or without such a change.  Also okay for the branches
after some testing (esp. on other ABIs, it is easy to break those together
with -mno-speculate-indirect-branches since no one sane would use that
combo on purpose).

Thanks!


Segher


[PATCH, committed] Add myself to MAINTAINERS

2018-01-16 Thread Sebastian Perta
Hi,

Just added myself to MAINTAINERS (write after approval)

Best Regards,
Sebastian

Index: ChangeLog
===
--- ChangeLog(revision 256737)
+++ ChangeLog(working copy)
@@ -1,3 +1,7 @@
+2018-01-16  Sebastian Perta  
+
+* MAINTAINERS (write after approval): Add myself.
+
 2018-01-03  Jakub Jelinek  

 Update copyright years.
Index: MAINTAINERS
===
--- MAINTAINERS(revision 256737)
+++ MAINTAINERS(working copy)
@@ -535,6 +535,7 @@
 Devang Patel
 Andris Pavenis
 Fernando Pereira
+Sebastian Perta
 Sebastian Peryt
 Kaushik Phatak
 Nicolas Pitre



Renesas Electronics Europe Ltd, Dukes Meadow, Millboard Road, Bourne End, 
Buckinghamshire, SL8 5FH, UK. Registered in England & Wales under Registered 
No. 04586709.


Re: [PATCH v3, rs6000] Add -mspeculate-indirect-jumps option and implement non-speculating bctr / bctrl

2018-01-16 Thread Segher Boessenkool
Hi!

On Tue, Jan 16, 2018 at 09:29:13AM +0100, Richard Biener wrote:
> Did you consider simply removing the tablejump/casesi support so
> expansion always
> expands to a balanced tree?  At least if we have any knobs to tune we
> should probably
> tweak them away from the indirect jump using variants with
> -mno-speculate-indirect-jumps,
> right?

We can generate indirect jumps for other situations so this patch will
still be needed.

> Performance optimization, so shouldn't block this patch - I just
> thought I should probably
> mention this.

Yeah let's get this done first :-)


Segher


Re: Avoid GCC 4.1 build failure in fold-const.c

2018-01-16 Thread Jakub Jelinek
On Tue, Jan 16, 2018 at 12:11:28PM +, Richard Sandiford wrote:
> We had:
> 
> tree t = fold_vec_perm (type, arg1, arg2,
> vec_perm_indices (sel, 2, nelts));
> 
> where fold_vec_perm takes a const vec_perm_indices &.  GCC 4.1 apparently
> required a public copy constructor:
> 
> gcc/vec-perm-indices.h:85: error: 'vec_perm_indices::vec_perm_indices(const 
> vec_perm_indices&)' is private
> gcc/fold-const.c:11410: error: within this context
> 
> even though no copy should be made here.  This patch tries to work
> around that by constructing the vec_perm_indices separately.
> 
> Tested on aarch64-linux-gnu.  OK to install?
> 
> Richard
> 
> 
> 2018-01-16  Richard Sandiford  
> 
> gcc/
>   * fold-const.c (fold_ternary_loc): Construct the vec_perm_indices
>   in a separate statement.

Ok, thanks.

> Index: gcc/fold-const.c
> ===
> --- gcc/fold-const.c  2018-01-15 12:38:28.967896418 +
> +++ gcc/fold-const.c  2018-01-16 12:08:10.08501 +
> @@ -11406,8 +11406,8 @@ fold_ternary_loc (location_t loc, enum t
> else /* Currently unreachable.  */
>   return NULL_TREE;
>   }
> -   tree t = fold_vec_perm (type, arg1, arg2,
> -   vec_perm_indices (sel, 2, nelts));
> +   vec_perm_indices indices (sel, 2, nelts);
> +   tree t = fold_vec_perm (type, arg1, arg2, indices);
> if (t != NULL_TREE)
>   return t;
>   }

Jakub


Avoid GCC 4.1 build failure in fold-const.c

2018-01-16 Thread Richard Sandiford
We had:

  tree t = fold_vec_perm (type, arg1, arg2,
  vec_perm_indices (sel, 2, nelts));

where fold_vec_perm takes a const vec_perm_indices &.  GCC 4.1 apparently
required a public copy constructor:

gcc/vec-perm-indices.h:85: error: 'vec_perm_indices::vec_perm_indices(const 
vec_perm_indices&)' is private
gcc/fold-const.c:11410: error: within this context

even though no copy should be made here.  This patch tries to work
around that by constructing the vec_perm_indices separately.

Tested on aarch64-linux-gnu.  OK to install?

Richard


2018-01-16  Richard Sandiford  

gcc/
* fold-const.c (fold_ternary_loc): Construct the vec_perm_indices
in a separate statement.

Index: gcc/fold-const.c
===
--- gcc/fold-const.c2018-01-15 12:38:28.967896418 +
+++ gcc/fold-const.c2018-01-16 12:08:10.08501 +
@@ -11406,8 +11406,8 @@ fold_ternary_loc (location_t loc, enum t
  else /* Currently unreachable.  */
return NULL_TREE;
}
- tree t = fold_vec_perm (type, arg1, arg2,
- vec_perm_indices (sel, 2, nelts));
+ vec_perm_indices indices (sel, 2, nelts);
+ tree t = fold_vec_perm (type, arg1, arg2, indices);
  if (t != NULL_TREE)
return t;
}


[PATCH] i386: More use reference of struct ix86_frame to avoid copy

2018-01-16 Thread H.J. Lu
This patch has been used with my Spectre backport for GCC 7 for many
weeks and has been checked into GCC 7 branch.  Should I revert it on
GCC 7 branch or check it into trunk?

H.J.
---
When there is no need to make a copy of ix86_frame, we can use reference
of struct ix86_frame to avoid copy.

* config/i386/i386.c (ix86_expand_prologue): Use reference of
struct ix86_frame.
(ix86_expand_epilogue): Likewise.
---
 gcc/config/i386/i386.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index bfb31db8752..9eba3ffd5d6 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -13385,7 +13385,6 @@ ix86_expand_prologue (void)
 {
   struct machine_function *m = cfun->machine;
   rtx insn, t;
-  struct ix86_frame frame;
   HOST_WIDE_INT allocate;
   bool int_registers_saved;
   bool sse_registers_saved;
@@ -13413,7 +13412,7 @@ ix86_expand_prologue (void)
   m->fs.sp_valid = true;
   m->fs.sp_realigned = false;
 
-  frame = m->frame;
+  struct ix86_frame  = cfun->machine->frame;
 
   if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
 {
@@ -14291,7 +14290,6 @@ ix86_expand_epilogue (int style)
 {
   struct machine_function *m = cfun->machine;
   struct machine_frame_state frame_state_save = m->fs;
-  struct ix86_frame frame;
   bool restore_regs_via_mov;
   bool using_drap;
   bool restore_stub_is_tail = false;
@@ -14304,7 +14302,7 @@ ix86_expand_epilogue (int style)
 }
 
   ix86_finalize_stack_frame_flags ();
-  frame = m->frame;
+  struct ix86_frame  = cfun->machine->frame;
 
   m->fs.sp_realigned = stack_realign_fp;
   m->fs.sp_valid = stack_realign_fp
-- 
2.14.3



Re: [PATCH][WWWDOCS][AArch64][ARM] Update GCC 8 release notes

2018-01-16 Thread Kyrill Tkachov

Hi Tamar,

On 16/01/18 10:04, Tamar Christina wrote:

Hi All,

This patch updates the GCC 8 release notes for ARM and AArch64.

Ok for cvs?

Thanks,
Tamar

--



+  
+  
+New Armv8.4-A FP16 Floating Point Multiplication Variant instructions have 
been added.  These instructions are
+mandatory in Armv8.4-A but available as an optional extension to Armv8.2-A 
and Armv8.3-A.  The new extension
+can be used by specifying the +fp16fml architectural 
extension on Armv8.2-A and Armv8.3-A. On Armv8.4-A
+the instructions can be enabled by specifying +fp16.
+  
+  
+   Support has been added for the following processors
+   (GCC identifiers in parentheses):
+   
+Arm Cortex-A75 (cortex-a75).
+Arm Cortex-A55 (cortex-a55).
+Arm Cortex-A55/Cortex-A75 DynamIQ big.LITTLE 
(cortex-a75.cortex-a55).
+Arm Cortex-R52 for Armv8-R (cortex-r52).
+   
+   The GCC identifiers can be used
+   as arguments to the -mcpu or -mtune options,
+   for example: -mcpu=cortex-a75 or
+   -mtune=xgene1 or as arguments to the equivalent target

xgene1 was added a few releases ago, better to use one of the new additions 
from the above list.
For example -mtune=cortex-r52.

With that nit the arm changes look ok to me.
Thanks for compiling this!
Kyrill



Re: [PATCH] Fix warn_if_not_align ICE (PR c/83844)

2018-01-16 Thread Richard Sandiford
Jakub Jelinek  writes:
> On Tue, Jan 16, 2018 at 08:57:38AM +0100, Richard Biener wrote:
>> > -  unsigned HOST_WIDE_INT off
>> > -= (tree_to_uhwi (DECL_FIELD_OFFSET (field))
>> > -   + tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field)) / BITS_PER_UNIT);
>> > -  if ((off % warn_if_not_align) != 0)
>> > -warning (opt_w, "%q+D offset %wu in %qT isn't aligned to %u",
>> > +  tree off = byte_position (field);
>> > +  if (!multiple_of_p (TREE_TYPE (off), off, size_int (warn_if_not_align)))
>> 
>> multiple_of_p also returns 0 if it doesn't know (for the non-constant
>> case obviously), so the warning should say "may be not aligned"?  Or
>> we don't want any false positives which means multiple_of_p should get
>> a worker factored out that returns a tri-state value?
>
> tri-state sounds optimizing for the very uncommon case, I think it must be
> very rare in practice when we could prove it must be not aligned and
> especially we'd need to extend it a lot to handle those cases.
>
> Here is an updated patch which says may not be aligned if off is
> non-constant.  When extending the testcase, I've noticed we don't handle
> IMHO quite important case in multiple_of_p, so the patch handles that too.
> I've tried not to increase asymptotic complexity of multiple_of_p, so except
> for the cases where both arguments are INTEGER_CSTs it shouldn't call
> multiple_of_p more times than before.
>
> Ok for trunk if this passes bootstrap/regtest?
>
> 2018-01-16  Jakub Jelinek  
>
>   PR c/83844
>   * stor-layout.c (handle_warn_if_not_align): Use byte_position and
>   multiple_of_p instead of unchecked tree_to_uhwi and UHWI check.
>   If off is not INTEGER_CST, issue a may not be aligned warning
>   rather than isn't aligned.  Use isn%'t rather than isn't.
>   * fold-const.c (multiple_of_p) : Don't fall through
>   into MULT_EXPR.
>   : Improve the case when bottom and one of the
>   MULT_EXPR operands are INTEGER_CSTs and bottom is multiple of that
>   operand, in that case check if the other operand is multiple of
>   bottom divided by the INTEGER_CST operand.
>
>   * gcc.dg/pr83844.c: New test.
>
> --- gcc/stor-layout.c.jj  2018-01-15 22:40:14.009263280 +0100
> +++ gcc/stor-layout.c 2018-01-16 10:01:48.135111031 +0100
> @@ -1150,12 +1150,16 @@ handle_warn_if_not_align (tree field, un
>  warning (opt_w, "alignment %u of %qT is less than %u",
>record_align, context, warn_if_not_align);
>  
> -  unsigned HOST_WIDE_INT off
> -= (tree_to_uhwi (DECL_FIELD_OFFSET (field))
> -   + tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field)) / BITS_PER_UNIT);
> -  if ((off % warn_if_not_align) != 0)
> -warning (opt_w, "%q+D offset %wu in %qT isn't aligned to %u",
> -  field, off, context, warn_if_not_align);
> +  tree off = byte_position (field);
> +  if (!multiple_of_p (TREE_TYPE (off), off, size_int (warn_if_not_align)))
> +{
> +  if (TREE_CODE (off) == INTEGER_CST)
> + warning (opt_w, "%q+D offset %E in %qT isn%'t aligned to %u",
> +  field, off, context, warn_if_not_align);
> +  else
> + warning (opt_w, "%q+D offset %E in %qT may not be aligned to %u",
> +  field, off, context, warn_if_not_align);
> +}
>  }
>  
>  /* Called from place_field to handle unions.  */
> --- gcc/fold-const.c.jj   2018-01-15 10:02:04.119181355 +0100
> +++ gcc/fold-const.c  2018-01-16 10:48:10.444360796 +0100
> @@ -12595,9 +12595,34 @@ multiple_of_p (tree type, const_tree top
>a multiple of BOTTOM then TOP is a multiple of BOTTOM.  */
>if (!integer_pow2p (bottom))
>   return 0;
> -  /* FALLTHRU */
> +  return (multiple_of_p (type, TREE_OPERAND (top, 1), bottom)
> +   || multiple_of_p (type, TREE_OPERAND (top, 0), bottom));
>  
>  case MULT_EXPR:
> +  if (TREE_CODE (bottom) == INTEGER_CST)
> + {
> +   op1 = TREE_OPERAND (top, 0);
> +   op2 = TREE_OPERAND (top, 1);
> +   if (TREE_CODE (op1) == INTEGER_CST)
> + std::swap (op1, op2);
> +   if (TREE_CODE (op2) == INTEGER_CST)
> + {
> +   if (multiple_of_p (type, op2, bottom))
> + return 1;
> +   /* Handle multiple_of_p ((x * 2 + 2) * 4, 8).  */
> +   if (multiple_of_p (type, bottom, op2))
> + {
> +   widest_int w = wi::sdiv_trunc (wi::to_widest (bottom),
> +  wi::to_widest (op2));
> +   if (wi::fits_to_tree_p (w, TREE_TYPE (bottom)))
> + {
> +   op2 = wide_int_to_tree (TREE_TYPE (bottom), w);
> +   return multiple_of_p (type, op1, op2);
> + }
> + }

It doesn't really matter since this isn't performance-critical code,
but FWIW, there's a wi::multiple_of_p that would avoid the recursion
and do the sdiv_trunc as a side-effect.

Thanks,
Richard


Re: [PATCH 0/5] x86: CVE-2017-5715, aka Spectre

2018-01-16 Thread H.J. Lu
On Tue, Jan 16, 2018 at 12:34 AM, Jan Hubicka  wrote:
>> On Mon, Jan 15, 2018 at 5:53 PM, H.J. Lu  wrote:
>> > On Mon, Jan 15, 2018 at 3:38 AM, H.J. Lu  wrote:
>> >> On Mon, Jan 15, 2018 at 12:31 AM, Richard Biener
>> >>  wrote:
>> >>> On Sun, Jan 14, 2018 at 4:08 PM, H.J. Lu  wrote:
>>  Now my patch set has been checked into trunk.  Here is a patch set
>>  to move struct ix86_frame to machine_function on GCC 7, which is
>>  needed to backport the patch set to GCC 7:
>> 
>>  https://gcc.gnu.org/ml/gcc-patches/2018-01/msg01239.html
>>  https://gcc.gnu.org/ml/gcc-patches/2018-01/msg01240.html
>>  https://gcc.gnu.org/ml/gcc-patches/2018-01/msg01241.html
>> 
>>  OK for gcc-7-branch?
>> >>>
>> >>> Yes, backporting is ok - please watch for possible fallout on trunk and 
>> >>> make
>> >>> sure to adjust the backport accordingly.  I plan to do GCC 7.3 RC1 on
>> >>> Wednesday now with the final release about a week later if no issue shows
>> >>> up.
>> >>>
>> >>
>> >> Backport is blocked by
>> >>
>> >> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83838
>> >>
>> >> There are many test failures due to lack of comdat support in linker on 
>> >> Solaris.
>> >> I can limit these tests to Linux.
>> >
>> > These are testcase issues and shouldn't block backport to GCC 7.
>>
>> It makes the option using thunks unusable though, right?  Can you simply make
>> them hidden on systems without comdat support?  That duplicates them per TU
>> but at least the feature works.  Or those systems should provide the thunks 
>> via
>> libgcc.
>>
>> I agree we can followup with a fix for Solaris given lack of a public
>> testing machine.
>
> My memory is bit dim, but I am convinced I was fixing specific errors for 
> comdats
> on Solaris, so I think the toolchain supports them in some sort, just is more
> restrictive/different from GNU implementation.
>
> Indeed, i think just producing sorry, unimplemented message is what we should 
> do
> if we can't support retpoline on given target.
>

It still works without comdat.  GCC just generate a local thunk in each object
file.

-- 
H.J.


Move pa.h FUNCTION_ARG_SIZE to pa.c (PR83858)

2018-01-16 Thread Richard Sandiford
The port-local FUNCTION_ARG_SIZE:

  MODE) != BLKmode \
 ? (HOST_WIDE_INT) GET_MODE_SIZE (MODE) \
 : int_size_in_bytes (TYPE)) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)

is used by code in pa.c and by ASM_DECLARE_FUNCTION_NAME in som.h.
Treating GET_MODE_SIZE as a constant is OK for the former but not
the latter, which is used in target-independent code.  This caused
a build failure on hppa2.0w-hp-hpux11.11.

Tested with a cross build of hppa2.0w-hp-hpux11.11.  OK to install?

Richard


2018-01-16  Richard Sandiford  

gcc/
PR target/83858
* config/pa/pa.h (FUNCTION_ARG_SIZE): Delete.
* config/pa/pa-protos.h (pa_function_arg_size): Declare.
* config/pa/som.h (ASM_DECLARE_FUNCTION_NAME): Use
pa_function_arg_size instead of FUNCTION_ARG_SIZE.
* config/pa/pa.c (pa_function_arg_advance): Likewise.
(pa_function_arg, pa_arg_partial_bytes): Likewise.
(pa_function_arg_size): New function.

Index: gcc/config/pa/pa.h
===
--- gcc/config/pa/pa.h  2018-01-03 11:12:55.202783713 +
+++ gcc/config/pa/pa.h  2018-01-16 10:50:31.245063090 +
@@ -592,15 +592,6 @@ #define INIT_CUMULATIVE_INCOMING_ARGS(CU
   (CUM).indirect = 0,  \
   (CUM).nargs_prototype = 1000
 
-/* Figure out the size in words of the function argument.  The size
-   returned by this macro should always be greater than zero because
-   we pass variable and zero sized objects by reference.  */
-
-#define FUNCTION_ARG_SIZE(MODE, TYPE)  \
-  MODE) != BLKmode \
- ? (HOST_WIDE_INT) GET_MODE_SIZE (MODE) \
- : int_size_in_bytes (TYPE)) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
-
 /* Determine where to put an argument to a function.
Value is zero to push the argument on the stack,
or a hard register in which to store the argument.
Index: gcc/config/pa/pa-protos.h
===
--- gcc/config/pa/pa-protos.h   2018-01-03 11:12:55.198783870 +
+++ gcc/config/pa/pa-protos.h   2018-01-16 10:50:31.244063125 +
@@ -107,5 +107,6 @@ extern void pa_asm_output_aligned_local
 unsigned int);
 extern void pa_hpux_asm_output_external (FILE *, tree, const char *);
 extern HOST_WIDE_INT pa_initial_elimination_offset (int, int);
+extern HOST_WIDE_INT pa_function_arg_size (machine_mode, const_tree);
 
 extern const int pa_magic_milli[];
Index: gcc/config/pa/som.h
===
--- gcc/config/pa/som.h 2018-01-03 11:12:55.191784145 +
+++ gcc/config/pa/som.h 2018-01-16 10:50:31.246063055 +
@@ -136,8 +136,8 @@ #define ASM_DECLARE_FUNCTION_NAME(FILE,
 else   \
   {\
 int arg_size = \
-  FUNCTION_ARG_SIZE (TYPE_MODE (DECL_ARG_TYPE (parm)),\
- DECL_ARG_TYPE (parm));\
+  pa_function_arg_size (TYPE_MODE (DECL_ARG_TYPE (parm)),\
+DECL_ARG_TYPE (parm)); \
 /* Passing structs by invisible reference uses \
one general register.  */   \
 if (arg_size > 2   \
Index: gcc/config/pa/pa.c
===
--- gcc/config/pa/pa.c  2018-01-03 11:12:55.201783752 +
+++ gcc/config/pa/pa.c  2018-01-16 10:50:31.245063090 +
@@ -9485,7 +9485,7 @@ pa_function_arg_advance (cumulative_args
 const_tree type, bool named ATTRIBUTE_UNUSED)
 {
   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
-  int arg_size = FUNCTION_ARG_SIZE (mode, type);
+  int arg_size = pa_function_arg_size (mode, type);
 
   cum->nargs_prototype--;
   cum->words += (arg_size
@@ -9517,7 +9517,7 @@ pa_function_arg (cumulative_args_t cum_v
   if (mode == VOIDmode)
 return NULL_RTX;
 
-  arg_size = FUNCTION_ARG_SIZE (mode, type);
+  arg_size = pa_function_arg_size (mode, type);
 
   /* If this arg would be passed partially or totally on the stack, then
  this routine should return zero.  pa_arg_partial_bytes will
@@ -9724,10 +9724,10 @@ pa_arg_partial_bytes (cumulative_args_t
   if (!TARGET_64BIT)
 return 0;
 
-  if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
+  if (pa_function_arg_size (mode, type) > 1 && (cum->words & 1))
 offset = 1;
 
-  if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
+  if (cum->words + offset + pa_function_arg_size (mode, type) <= max_arg_words)
 /* Arg fits fully into registers.  */
 return 0;
   else if (cum->words + offset >= max_arg_words)
@@ -10835,4 +10835,16 @@ 

Re: [PATCH 3/6] [ARC] Add support for "register file 16" reduced register set

2018-01-16 Thread Andrew Burgess
* Claudiu Zissulescu  [2017-11-02 13:30:32 
+0100]:

> gcc/
> 2017-03-20  Claudiu Zissulescu  
> 
>   * config/arc/arc-arches.def: Option mrf16 valid for all
>   architectures.
>   * config/arc/arc-c.def (__ARC_RF16__): New predefined macro.
>   * config/arc/arc-cpus.def (em_mini): New cpu with rf16 on.
>   * config/arc/arc-options.def (FL_RF16): Add mrf16 option.
>   * config/arc/arc-tables.opt: Regenerate.
>   * config/arc/arc.c (arc_conditional_register_usage): Handle
>   reduced register file case.
>   (arc_file_start): Set must have build attributes.
>   * config/arc/arc.h (MAX_ARC_PARM_REGS): Conditional define using
>   mrf16 option value.
>   * config/arc/arc.opt (mrf16): Add new option.
>   * config/arc/elf.h (ATTRIBUTE_PCS): Define.
>   * config/arc/genmultilib.awk: Handle new mrf16 option.
>   * config/arc/linux.h (ATTRIBUTE_PCS): Define.
>   * config/arc/t-multilib: Regenerate.
>   * doc/invoke.texi (ARC Options): Document mrf16 option.
> 
> gcc/testsuite/
> 2017-03-20  Claudiu Zissulescu  
> 
>   * gcc.dg/builtin-apply2.c: Change for the ARC's reduced register
>   set file case.
> 
> libgcc/
> 2017-09-18  Claudiu Zissulescu  
> 
>   * config/arc/lib1funcs.S (__udivmodsi4): Use safe version for RF16
>   option.
>   (__divsi3): Use RF16 safe registers.
>   (__modsi3): Likewise.

Looks fine, except I think that the new 'em_mini' cpu needs to be
added to the -mcpu= description in doc/invoke.texi.

Thanks,
Andrew




> ---
>  gcc/config/arc/arc-arches.def |  8 
>  gcc/config/arc/arc-c.def  |  1 +
>  gcc/config/arc/arc-cpus.def   |  1 +
>  gcc/config/arc/arc-options.def|  2 +-
>  gcc/config/arc/arc-tables.opt |  3 +++
>  gcc/config/arc/arc.c  | 27 +++
>  gcc/config/arc/arc.h  |  2 +-
>  gcc/config/arc/arc.opt|  4 
>  gcc/config/arc/elf.h  |  4 
>  gcc/config/arc/genmultilib.awk|  2 ++
>  gcc/config/arc/linux.h|  9 +
>  gcc/config/arc/t-multilib |  4 ++--
>  gcc/doc/invoke.texi   |  8 +++-
>  gcc/testsuite/gcc.dg/builtin-apply2.c |  8 +++-
>  libgcc/config/arc/lib1funcs.S | 22 +++---
>  15 files changed, 84 insertions(+), 21 deletions(-)
> 
> diff --git a/gcc/config/arc/arc-arches.def b/gcc/config/arc/arc-arches.def
> index 29cb9c4..a0d585b 100644
> --- a/gcc/config/arc/arc-arches.def
> +++ b/gcc/config/arc/arc-arches.def
> @@ -40,15 +40,15 @@
>  
>  ARC_ARCH ("arcem", em, FL_MPYOPT_1_6 | FL_DIVREM | FL_CD | FL_NORM   \
> | FL_BS | FL_SWAP | FL_FPUS | FL_SPFP | FL_DPFP   \
> -   | FL_SIMD | FL_FPUDA | FL_QUARK, 0)
> +   | FL_SIMD | FL_FPUDA | FL_QUARK | FL_RF16, 0)
>  ARC_ARCH ("archs", hs, FL_MPYOPT_7_9 | FL_DIVREM | FL_NORM | FL_CD   \
> | FL_ATOMIC | FL_LL64 | FL_BS | FL_SWAP   \
> -   | FL_FPUS | FL_FPUD,  \
> +   | FL_FPUS | FL_FPUD | FL_RF16,\
> FL_CD | FL_ATOMIC | FL_BS | FL_NORM | FL_SWAP)
>  ARC_ARCH ("arc6xx", 6xx, FL_BS | FL_NORM | FL_SWAP | FL_MUL64 | FL_MUL32x16 \
> -   | FL_SPFP | FL_ARGONAUT | FL_DPFP, 0)
> +   | FL_SPFP | FL_ARGONAUT | FL_DPFP | FL_RF16, 0)
>  ARC_ARCH ("arc700", 700, FL_ATOMIC | FL_BS | FL_NORM | FL_SWAP | FL_EA \
> -   | FL_SIMD | FL_SPFP | FL_ARGONAUT | FL_DPFP, \
> +   | FL_SIMD | FL_SPFP | FL_ARGONAUT | FL_DPFP | FL_RF16,   \
> FL_BS | FL_NORM | FL_SWAP)
>  
>  /* Local Variables: */
> diff --git a/gcc/config/arc/arc-c.def b/gcc/config/arc/arc-c.def
> index 8c5097e..c9443c9 100644
> --- a/gcc/config/arc/arc-c.def
> +++ b/gcc/config/arc/arc-c.def
> @@ -28,6 +28,7 @@ ARC_C_DEF ("__ARC_NORM__",  TARGET_NORM)
>  ARC_C_DEF ("__ARC_MUL64__",  TARGET_MUL64_SET)
>  ARC_C_DEF ("__ARC_MUL32BY16__", TARGET_MULMAC_32BY16_SET)
>  ARC_C_DEF ("__ARC_SIMD__",   TARGET_SIMD_SET)
> +ARC_C_DEF ("__ARC_RF16__",   TARGET_RF16)
>  
>  ARC_C_DEF ("__ARC_BARREL_SHIFTER__", TARGET_BARREL_SHIFTER)
>  
> diff --git a/gcc/config/arc/arc-cpus.def b/gcc/config/arc/arc-cpus.def
> index 60b4045..c2b0062 100644
> --- a/gcc/config/arc/arc-cpus.def
> +++ b/gcc/config/arc/arc-cpus.def
> @@ -46,6 +46,7 @@
> TUNETune value for the given configuration, otherwise NONE.  */
>  
>  ARC_CPU (em, em, 0, NONE)
> +ARC_CPU (em_mini,   em, FL_RF16, NONE)
>  ARC_CPU (arcem,  em, FL_MPYOPT_2|FL_CD|FL_BS, NONE)
>  ARC_CPU (em4,em, FL_CD, NONE)
>  ARC_CPU (em4_dmips, em, FL_MPYOPT_2|FL_CD|FL_DIVREM|FL_NORM|FL_SWAP|FL_BS, 
> NONE)
> diff --git a/gcc/config/arc/arc-options.def b/gcc/config/arc/arc-options.def
> index be51614..8fc7b50 100644
> --- 

Re: [PATCH PR82096] Fix ICE in int_mode_for_mode, at stor-layout.c:403 with arm-linux-gnueabi

2018-01-16 Thread Sudakshina Das

Hi Jeff

On 12/01/18 23:00, Jeff Law wrote:

On 01/12/2018 01:45 AM, Christophe Lyon wrote:

Hi,

On 11 January 2018 at 11:58, Sudakshina Das  wrote:

Hi Jeff


On 10/01/18 21:08, Jeff Law wrote:


On 01/10/2018 09:25 AM, Sudakshina Das wrote:


Hi Jeff

On 10/01/18 10:44, Sudakshina Das wrote:


Hi Jeff

On 09/01/18 23:43, Jeff Law wrote:


On 01/05/2018 12:25 PM, Sudakshina Das wrote:


Hi Jeff

On 05/01/18 18:44, Jeff Law wrote:


On 01/04/2018 08:35 AM, Sudakshina Das wrote:


Hi

The bug reported a particular test di-longlong64-sync-1.c failing
when
run on arm-linux-gnueabi with options -mthumb -march=armv5t
-O[g,1,2,3]
and -mthumb -march=armv6 -O[g,1,2,3].

According to what I could see, the crash was caused because of the
explicit VOIDmode argument that was sent to emit_store_flag_force
().
Since the comparing argument was a long long, it was being forced
into a
VOID type register before the comparison (in prepare_cmp_insn()) is
done.



As pointed out by Kyrill, there is a comment on emit_store_flag()
which
says "MODE is the mode to use for OP0 and OP1 should they be
CONST_INTs.
 If it is VOIDmode, they cannot both be CONST_INT". This
condition is
not true in this case and thus I think it is suitable to change the
argument.

Testing done: Checked for regressions on bootstrapped
arm-none-linux-gnueabi and arm-none-linux-gnueabihf and added new
test
cases.

Sudi

ChangeLog entries:

*** gcc/ChangeLog ***

2017-01-04  Sudakshina Das  

PR target/82096
* optabs.c (expand_atomic_compare_and_swap): Change argument
to emit_store_flag_force.

*** gcc/testsuite/ChangeLog ***

2017-01-04  Sudakshina Das  

PR target/82096
* gcc.c-torture/compile/pr82096-1.c: New test.
* gcc.c-torture/compile/pr82096-2.c: Likwise.


In the case where both (op0/op1) to
emit_store_flag/emit_store_flag_force are constants, don't we know
the
result of the comparison and shouldn't we have optimized the store
flag
to something simpler?

I feel like I must be missing something here.



emit_store_flag_force () is comparing a register to op0.


?
/* Emit a store-flags instruction for comparison CODE on OP0 and OP1
  and storing in TARGET.  Normally return TARGET.
  Return 0 if that cannot be done.

  MODE is the mode to use for OP0 and OP1 should they be
CONST_INTs.  If
  it is VOIDmode, they cannot both be CONST_INT.


So we're comparing op0 and op1 AFAICT.  One, but not both can be a
CONST_INT.  If both are a CONST_INT, then you need to address the
problem in the caller (by optimizing away the condition).  If you've
got
a REG and a CONST_INT, then the mode should be taken from the REG
operand.







The 2 constant arguments are to the expand_atomic_compare_and_swap ()
function. emit_store_flag_force () is used in case when this
function is
called by the bool variant of the built-in function where the bool
return value is computed by comparing the result register with the
expected op0.


So if only one of the two objects is a CONST_INT, then the mode should
come from the other object.  I think that's the fundamental problem
here
and that you're just papering over it by changing the caller.


I think my earlier explanation was a bit misleading and I may have
rushed into quoting the comment about both operands being const for
emit_store_flag_force(). The problem is with the function and I do
agree with your suggestion of changing the function to add the code
below to be a better approach than the changing the caller. I will
change the patch and test it.



This is the updated patch according to your suggestions.

Testing: Checked for regressions on arm-none-linux-gnueabihf and added
new test case.

Thanks
Sudi

ChangeLog entries:

*** gcc/ChangeLog ***

2017-01-10  Sudakshina Das  

  PR target/82096
  * expmed.c (emit_store_flag_force): Swap if const op0
  and change VOIDmode to mode of op0.

*** gcc/testsuite/ChangeLog ***

2017-01-10  Sudakshina Das  

  PR target/82096
  * gcc.c-torture/compile/pr82096.c: New test.


OK.



Thanks. Committed as r256526.
Sudi



Could you add a guard like in other tests to skip it if the user added
-mfloat-abi=XXX when running the tests?

For instance, I have a configuration where I add
-mthumb/-march=armv8-a/-mfpu=crypto-neon-fp-armv8/-mfloat-abi=hard
and the new test fails because:
xgcc: error: -mfloat-abi=soft and -mfloat-abi=hard may not be used together

It's starting to feel like the test should move into gcc.target/arm :-)
  I nearly suggested that already.  Consider moving it into
gcc.target/arm pre-approved along with adding the -O
to the options and whatever is needed to skip the test at the
appropriate time.


My initial thought was also to put the test in gcc.target/arm. But I 
wanted to put it in a torture suite as this was failing at different 
optimization levels. Creating several tests for different 

[C++ Patch] PR 81054 ("[7/8 Regression] ICE with volatile variable in constexpr function")

2018-01-16 Thread Paolo Carlini

Hi,

in this error recovery regression we ICE when we end-up in an 
inconsistent state after meaningful diagnostic emitted by 
ensure_literal_type_for_constexpr_object and then some redundant / 
slightly misleading one emitted by check_static_variable_definition. I 
think we can just return early from cp_finish_decl and solve the primary 
and the secondary issue. I also checked that clang too doesn't emit an 
error for line #28 of constexpr-diag3.C, after the hard error for co1 
itself at line #27. Tested x86_64-linux.


Thanks, Paolo.

//

/cp
2018-01-61  Paolo Carlini  

PR c++/81054
* decl.c (cp_finish_decl): Early return when the
ensure_literal_type_for_constexpr_object fails.

/testsuite
2018-01-61  Paolo Carlini  

PR c++/81054
* g++.dg/cpp0x/constexpr-ice19.C: New.
* g++.dg/cpp0x/constexpr-diag3.C: Adjust.
Index: cp/decl.c
===
--- cp/decl.c   (revision 256728)
+++ cp/decl.c   (working copy)
@@ -6811,7 +6811,11 @@ cp_finish_decl (tree decl, tree init, bool init_co
 }
 
   if (!ensure_literal_type_for_constexpr_object (decl))
-DECL_DECLARED_CONSTEXPR_P (decl) = 0;
+{
+  DECL_DECLARED_CONSTEXPR_P (decl) = 0;
+  TREE_TYPE (decl) = error_mark_node;
+  return;
+}
 
   if (VAR_P (decl)
   && DECL_CLASS_SCOPE_P (decl)
Index: testsuite/g++.dg/cpp0x/constexpr-diag3.C
===
--- testsuite/g++.dg/cpp0x/constexpr-diag3.C(revision 256728)
+++ testsuite/g++.dg/cpp0x/constexpr-diag3.C(working copy)
@@ -25,7 +25,7 @@ struct complex// { dg-message "no 
.constexpr.
 };
 
 constexpr complex co1(0, 1);  // { dg-error "not literal" }
-constexpr double dd2 = co1.real(); // { dg-error "|in .constexpr. expansion of 
" }
+constexpr double dd2 = co1.real();
 
 // 
 
Index: testsuite/g++.dg/cpp0x/constexpr-ice19.C
===
--- testsuite/g++.dg/cpp0x/constexpr-ice19.C(nonexistent)
+++ testsuite/g++.dg/cpp0x/constexpr-ice19.C(working copy)
@@ -0,0 +1,13 @@
+// PR c++/81054
+// { dg-do compile { target c++11 } }
+
+struct A
+{
+  volatile int i;
+  constexpr A() : i() {}
+};
+
+struct B
+{
+  static constexpr A a {};  // { dg-error "not literal" }
+};


Re: [PATCH 2/6] [ARC] Add SJLI support.

2018-01-16 Thread Andrew Burgess
* Claudiu Zissulescu  [2017-11-02 13:30:31 
+0100]:

> gcc/
> 2017-02-20  Claudiu Zissulescu  
> 
>   * config/arc/arc-protos.h: Add arc_is_secure_call_p proto.
>   * config/arc/arc.c (arc_handle_secure_attribute): New function.
>   (arc_attribute_table): Add 'secure_call' attribute.
>   (arc_print_operand): Print secure call operand.
>   (arc_function_ok_for_sibcall): Don't optimize tail calls when
>   secure.
>   (arc_is_secure_call_p): New function.
>   * config/arc/arc.md (call_i): Add support for sjli instruction.
>   (call_value_i): Likewise.
>   * config/arc/constraints.md (Csc): New constraint.
> ---
>  gcc/config/arc/arc-protos.h   |   1 +
>  gcc/config/arc/arc.c  | 164 
> +++---
>  gcc/config/arc/arc.md |  32 +
>  gcc/config/arc/constraints.md |   7 ++
>  gcc/doc/extend.texi   |   6 ++
>  5 files changed, 155 insertions(+), 55 deletions(-)

Looks fine, few comments inline below.

Thanks
Andrew

> 
> @@ -3939,6 +3985,9 @@ arc_print_operand (FILE *file, rtx x, int code)
>   : NULL_TREE);
> if (lookup_attribute ("jli_fixed", attrs))
>   {
> +   /* No special treatment for jli_fixed functions.  */
> +   if (code == 'j' )

Extra space before ')'.

> + break;
> fprintf (file, "%ld\t; @",
>  TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attrs;
> assemble_name (file, XSTR (x, 0));
> @@ -3947,6 +3996,22 @@ arc_print_operand (FILE *file, rtx x, int code)
>   }
> fprintf (file, "@__jli.");
> assemble_name (file, XSTR (x, 0));
> +   if (code == 'j')
> + arc_add_jli_section (x);
> +   return;
> + }
> +  if (GET_CODE (x) == SYMBOL_REF
> +   && arc_is_secure_call_p (x))
> + {
> +   /* No special treatment for secure functions.  */
> +   if (code == 'j' )
> + break;
> +   tree attrs = (TREE_TYPE (SYMBOL_REF_DECL (x)) != error_mark_node
> + ? TYPE_ATTRIBUTES (TREE_TYPE (SYMBOL_REF_DECL (x)))
> + : NULL_TREE);
> +   fprintf (file, "%ld\t; @",
> +TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attrs;
> +   assemble_name (file, XSTR (x, 0));
> return;
>   }
>break;
> @@ -6897,6 +6962,8 @@ arc_function_ok_for_sibcall (tree decl,
>   return false;
>if (lookup_attribute ("jli_fixed", attrs))
>   return false;
> +  if (lookup_attribute ("secure_call", attrs))
> + return false;
>  }
>  
>/* Everything else is ok.  */
> @@ -7594,46 +7661,6 @@ arc_reorg_loops (void)
>reorg_loops (true, _doloop_hooks);
>  }
>  
> -/* Add the given function declaration to emit code in JLI section.  */
> -
> -static void
> -arc_add_jli_section (rtx pat)
> -{
> -  const char *name;
> -  tree attrs;
> -  arc_jli_section *sec = arc_jli_sections, *new_section;
> -  tree decl = SYMBOL_REF_DECL (pat);
> -
> -  if (!pat)
> -return;
> -
> -  if (decl)
> -{
> -  /* For fixed locations do not generate the jli table entry.  It
> -  should be provided by the user as an asm file.  */
> -  attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
> -  if (lookup_attribute ("jli_fixed", attrs))
> - return;
> -}
> -
> -  name = XSTR (pat, 0);
> -
> -  /* Don't insert the same symbol twice.  */
> -  while (sec != NULL)
> -{
> -  if(strcmp (name, sec->name) == 0)
> - return;
> -  sec = sec->next;
> -}
> -
> -  /* New name, insert it.  */
> -  new_section = (arc_jli_section *) xmalloc (sizeof (arc_jli_section));
> -  gcc_assert (new_section != NULL);
> -  new_section->name = name;
> -  new_section->next = arc_jli_sections;
> -  arc_jli_sections = new_section;
> -}
> -
>  /* Scan all calls and add symbols to be emitted in the jli section if
> needed.  */
>  
> @@ -10968,6 +10995,63 @@ arc_handle_jli_attribute (tree *node 
> ATTRIBUTE_UNUSED,
> return NULL_TREE;
>  }
>  
> +/* Handle and "scure" attribute; arguments as in struct
> +   attribute_spec.handler.  */
> +
> +static tree
> +arc_handle_secure_attribute (tree *node ATTRIBUTE_UNUSED,
> +   tree name, tree args, int,
> +   bool *no_add_attrs)
> +{
> +  if (!TARGET_EM)
> +{
> +  warning (OPT_Wattributes,
> +"%qE attribute only valid for ARC EM architecture",
> +name);
> +  *no_add_attrs = true;
> +}
> +
> +  if (args == NULL_TREE)
> +{
> +  warning (OPT_Wattributes,
> +"argument of %qE attribute is missing",
> +name);
> +  *no_add_attrs = true;
> +}
> +  else
> +{
> +  if (TREE_CODE (TREE_VALUE (args)) == NON_LVALUE_EXPR)
> + TREE_VALUE (args) = TREE_OPERAND (TREE_VALUE (args), 0);
> +  tree arg = TREE_VALUE (args);
> +  if 

  1   2   >