Re: [PATCH 3/5] Run profile feedback tests with autofdo

2016-05-21 Thread Andi Kleen
On Sat, May 21, 2016 at 10:55:21PM +0200, Bernhard Reutner-Fischer wrote:
> >@@ -313,6 +332,7 @@ proc profopt-execute { src } {
> > # valid, by running it after dg-additional-files-options.
> > foreach ext $prof_ext {
> > profopt-target-cleanup $tmpdir $base $ext
> >+profopt-target-cleanup $tmpdir perf data
> > }
> 
> I think I've asked this before.. Why do you need to run cleanup of perf data 
> in this loop (and not outside)?
> Also why the asymmetry ...

perf generates a backup file if a file already exists. 

> >@@ -399,7 +460,9 @@ proc profopt-execute { src } {
> > # Remove the profiling data files.
> > foreach ext $prof_ext {
> > profopt-target-cleanup $tmpdir $base $ext
> >+profopt-target-cleanup $tmpdir perf data
> > }
> >+file delete "perf.data" 

The extra one is not needed. I'll remove it.

Thanks
-Andi

-- 
a...@linux.intel.com -- Speaking for myself only.


Re: [PATCH 3/5] Run profile feedback tests with autofdo

2016-05-21 Thread Bernhard Reutner-Fischer
On May 21, 2016 6:36:24 PM GMT+02:00, Andi Kleen  wrote:
>From: Andi Kleen 

>diff --git a/gcc/testsuite/lib/profopt.exp
>b/gcc/testsuite/lib/profopt.exp
>index 0aea6c4..4ddb10a 100644
>--- a/gcc/testsuite/lib/profopt.exp
>+++ b/gcc/testsuite/lib/profopt.exp
>@@ -36,7 +36,7 @@ load_lib gcc-dg.exp
> global PROFOPT_OPTIONS perf_delta
> 
> # The including .exp file must define these.
>-global tool profile_option feedback_option prof_ext
>+global tool profile_option feedback_option prof_ext profile_wrapper
> if ![info exists tool] {
> error "Tools is not specified."
> }
>@@ -229,6 +229,18 @@ proc profopt-get-options { src } {
> return ${dg-extra-tool-flags}
> }
> 
>+# auto-profopt-execute -- Compile for auot profiling and then
>feedback, then normal.

s/auot/auto/

>+# SRC is the full path name of the testcase.
>+proc auto-profopt-execute { src } {
>+set profile_wrapper [profopt-perf-wrapper]
>+set profile_options "-g"
>+set feedback_options "-fauto-profile"
>+set run_autofdo 1
>+profopt-execute $src
>+set run_autofdo ""
>+set profile_wrapper ""
>+}
>+
> #
># c-prof-execute -- compile for profiling and then feedback, then
>normal
> #
>@@ -238,6 +250,7 @@ proc profopt-execute { src } {
> global srcdir tmpdir
> global PROFOPT_OPTIONS
>global tool profile_option feedback_option prof_ext perf_ext perf_delta
>+global profile_wrapper run_autofdo ld_library_path
> global generate_final_code use_final_code
> global verbose
> global testname_with_flags
>@@ -248,6 +261,12 @@ proc profopt-execute { src } {
> if ![info exists feedback_option] {
> error "No feedback option specified for second compile."
> }
>+if ![info exists profile_wrapper] {
>+  set profile_wrapper ""
>+}
>+if ![info exists run_autofdo] {
>+  set run_autofdo ""
>+}
> 
> # Use the default option list or one defined for a set of tests.
> if ![info exists PROFOPT_OPTIONS] {
>@@ -313,6 +332,7 @@ proc profopt-execute { src } {
>   # valid, by running it after dg-additional-files-options.
>   foreach ext $prof_ext {
>   profopt-target-cleanup $tmpdir $base $ext
>+  profopt-target-cleanup $tmpdir perf data
>   }

I think I've asked this before.. Why do you need to run cleanup of perf data in 
this loop (and not outside)?
Also why the asymmetry ...
 
>   # Tree profiling requires TLS runtime support, which may need
>@@ -335,12 +355,49 @@ proc profopt-execute { src } {
>   }
> 
>   # Run the profiled test.
>+  if { $run_autofdo == 1 } {
>+  if { ![info exists ld_library_path]} {
>+  set ld_library_path ""
>+  }
>+  set orig_ld_library_path "[getenv LD_LIBRARY_PATH]"
>+  setenv LD_LIBRARY_PATH "$ld_library_path:$orig_ld_library_path"
>+  verbose "Running $profile_wrapper $execname1"
>+  set id [remote_spawn "" "$profile_wrapper $execname1" "readonly"]
>+  setenv LD_LIBRARY_PATH $orig_ld_library_path
>+  if { $id < 0 } {
>+  warning "Failed to run profiler"
>+  set status "fail"
>+  } else {
>+  set result [remote_wait "" 300]
>+  set status [lindex $result 0]
>+  verbose "perf result $result"
>+  if { $status == 0 } {
>+  set status "pass"
>+  } else {
>+  set status "fail"
>+  }
>+  }
>+  } else {
>+  set result [${tool}_load $execname1 "" ""]
>+  set status [lindex $result 0]
>+  }
> 
>-  set result [${tool}_load $execname1 "" ""]
>-  set status [lindex $result 0]
>   set missing_file 0
>   # Make sure the profile data was generated, and fail if not.
>   if { $status == "pass" } {
>+  # convert profile
>+  if { $run_autofdo == 1 } {
>+  set cmd "create_gcov --binary $execname1 --profile=perf.data
>-gcov_version=1 --gcov=$tmpdir/$base.$ext"
>+  verbose "Running $cmd"
>+  set id [remote_spawn "" $cmd]
>+  if { $id < 0 } {
>+  set status "fail"
>+  fail "$testcase: Cannot run $cmd"
>+  }
>+  set status [remote_wait "" 300]
>+  set status "pass"
>+  }
>+
>   foreach ext $prof_ext {
>   remote_upload target $tmpdir/$base.$ext
>   set files [glob -nocomplain $base.$ext]
>@@ -375,6 +432,10 @@ proc profopt-execute { src } {
>   set options "$extra_options"
>   lappend options "additional_flags=$option $extra_flags
>$feedback_option"
>   set optstr "$option $feedback_option"
>+  if { [string first "-fauto-profile" $options] >= 0} {
>+  set options [regsub -- "-fauto-profile" $options
>"-fauto-profile=$tmpdir/$base.$ext"]
>+  }
>+
>   set comp_output [${tool}_target_compile "$src" "$execname2"
>"executable" $options]
> 

Re: [PATCH 2/5] Don't cause ICEs when auto profile file is not found with checking

2016-05-21 Thread Bernhard Reutner-Fischer
On May 21, 2016 6:36:23 PM GMT+02:00, Andi Kleen  wrote:
>From: Andi Kleen 
>
>Currently, on a checking enabled compiler when -fauto-profile does
>not find the profile feedback file it errors out with assertation
>failures. Add proper errors for this case.

Please s/mathch/match/ while at it.

thanks,



Re: [PATCH 1/5] Add gcc-auto-profile script

2016-05-21 Thread Bernhard Reutner-Fischer
On May 21, 2016 6:36:22 PM GMT+02:00, Andi Kleen  wrote:
>From: Andi Kleen 

>+if [ "$1" = "--kernel" ] ; then
>+  FLAGS=k
>+  shift
>+fi
>+if [ "$1" == "--all" ] ; then

== is legacy, s/==/=/

>+  FLAGS=uk
>+  shift
>+fi
>+
>+if ! grep -q Intel /proc/cpuinfo ] ; then
>+  echo >&2 "Only Intel CPUs supported"
>+  exit 1
>+fi
>+
>+if grep -q hypervisor /proc/cpuinfo ; then
>+  echo >&2 "Warning: branch profiling may not be functional in VMs"
>+fi

grep && echo would do but OK.

>+
>+case `egrep -q "^cpu family\s*: 6" /proc/cpuinfo &&
>+  egrep "^model\s*:" /proc/cpuinfo | head -1` in'''

head and tail both require -n nowadays (in fact since susv2, IIRC), so please 
head -n1

thanks,



[PATCH] Fix up a few i386 tests

2016-05-21 Thread Jakub Jelinek
Hi!

While trying to look for bugs using the
https://sourceware.org/ml/binutils/2016-05/msg00328.html
hacks, in order to achive more testing I've also turned all
dg-do compile tests into dg-do assemble, so that they would be assembled and
I could watch out diagnostics.  There are about 2 tests that use complete
garbage in inline asm, which is fine, but I guess the following ones are
unintended that it isn't valid assembly.
The first one, for -m64 it would be enough to use "r" (1LL) or "r" (2LL),
but for -m32 kmovq supports just loading from m64.
The second one has -mavx512f only enabled, so I've replaced the avx512dq
instruction used in there with an avx512f one.
And the third one contains asm template that is only valid for 32-bit code.

Tested on x86_64-linux and i686-linux, ok for trunk?

2016-05-21  Jakub Jelinek  

* gcc.target/i386/avx512bw-kunpckdq-1.c (avx512bw_test): Use "m"
constraint instead of "r".
* gcc.target/i386/avx512f-additional-reg-names.c (foo): Use vpxord
insn instead of vxorpd.
* gcc.target/i386/strinline.c (__mempcpy_by2): Use empty asm template
string for x86_64.

--- gcc/testsuite/gcc.target/i386/avx512bw-kunpckdq-1.c.jj  2014-12-03 
15:06:06.469866209 +0100
+++ gcc/testsuite/gcc.target/i386/avx512bw-kunpckdq-1.c 2016-05-21 
18:35:34.269533825 +0200
@@ -8,9 +8,10 @@ void
 avx512bw_test () {
   __mmask64 k1, k2, k3;
   volatile __m512i x;
+  long long one = 1, two = 2;
 
-  __asm__( "kmovq %1, %0" : "=k" (k1) : "r" (1) );
-  __asm__( "kmovq %1, %0" : "=k" (k2) : "r" (2) );
+  __asm__( "kmovq %1, %0" : "=k" (k1) : "m" (one) );
+  __asm__( "kmovq %1, %0" : "=k" (k2) : "m" (two) );
 
   k3 = _mm512_kunpackd (k1, k2);
   x = _mm512_mask_avg_epu8 (x, k3, x, x);
--- gcc/testsuite/gcc.target/i386/avx512f-additional-reg-names.c.jj 
2014-10-01 16:27:25.838134349 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-additional-reg-names.c
2016-05-21 18:37:46.505781090 +0200
@@ -5,5 +5,5 @@ void foo ()
 {
   register int zmm_var asm ("zmm6") __attribute__((unused));
 
-  __asm__ __volatile__("vxorpd %%zmm0, %%zmm0, %%zmm7\n" : : : "zmm7" );
+  __asm__ __volatile__("vpxord %%zmm0, %%zmm0, %%zmm7\n" : : : "zmm7" );
 }
--- gcc/testsuite/gcc.target/i386/strinline.c.jj2014-09-25 
15:02:06.703336175 +0200
+++ gcc/testsuite/gcc.target/i386/strinline.c   2016-05-21 18:37:07.454298661 
+0200
@@ -8,7 +8,11 @@ __mempcpy_by2 (char *__dest, __const cha
   register char *__tmp = __dest;
   register unsigned long int __d0, __d1;
   __asm__ __volatile__
-("shrl  $1,%3\n\t"
+(
+#ifdef __x86_64__
+ ""
+#else
+ "shrl  $1,%3\n\t"
  "jz2f\n"
  "1:\n\t"
  "movl  (%2),%0\n\t"
@@ -20,6 +24,7 @@ __mempcpy_by2 (char *__dest, __const cha
  "2:\n\t"
  "movw  (%2),%w0\n\t"
  "movw  %w0,(%1)"
+#endif
  : "=" (__d0), "=r" (__tmp), "=" (__src), "=" (__d1),
"=m" ( *(struct { __extension__ char __x[__srclen]; } *)__dest)
  : "1" (__tmp), "2" (__src), "3" (__srclen / 2),

Jakub


Updated autofdo bootstrap and testing patches

2016-05-21 Thread Andi Kleen
Here's an updated version of the patchkit to enable autofdo bootstrap
and testing. It also fixes some autofdo issues. The last patch is more a 
workaround
(to make autofdo bootstrap not ICE), but may need a better fix.

The main motivation is to get better test coverage for autofdo 
and also an useful benchmark (speed of generated compiler) for it. 
If you want the absolutely fastest compiler using profiledbootstrap
is still the way to go.

I addressed most of the earlier review comments. The python script
is still python 2 for better compatibility with old systems.

Ok to commit?



[PATCH 5/5] workaround for PR70427

2016-05-21 Thread Andi Kleen
From: Andi Kleen 

This makes autofdo bootstrap not crash.

This is probably not the right fix, but for now it works for me.
Not for submission.
---
 gcc/ipa-profile.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/ipa-profile.c b/gcc/ipa-profile.c
index da17bcd..c7d7792 100644
--- a/gcc/ipa-profile.c
+++ b/gcc/ipa-profile.c
@@ -201,6 +201,8 @@ ipa_profile_generate_summary (void)
if (h->hvalue.counters[2])
  {
struct cgraph_edge * e = node->get_edge (stmt);
+   if (!e)
+ continue;
if (e && !e->indirect_unknown_callee)
  continue;
e->indirect_info->common_target_id
-- 
2.8.2



[PATCH 1/5] Add gcc-auto-profile script

2016-05-21 Thread Andi Kleen
From: Andi Kleen 

Using autofdo is currently something difficult. It requires using the
model specific branches taken event, which differs on different CPUs.
The example shown in the manual requires a special patched version of
perf that is non standard, and also will likely not work everywhere.

This patch adds a new gcc-auto-profile script that figures out the
correct event and runs perf.

This is needed to actually make use of autofdo in a generic way
in the build system and in the test suite.

Since maintaining the script would be somewhat tedious (needs changes
every time a new CPU comes out) I auto generated it from the online
Intel event database. The script to do that is in contrib and can be
rerun.

Right now there is no test if perf works in configure. This
would vary depending on the build and target system, and since
it currently doesn't work in virtualization and needs uptodate
kernel it may often fail in common distribution build setups.

So far the script is not installed.

gcc/:
2016-05-21  Andi Kleen  

* doc/invoke.texi: Document gcc-auto-profile
* config/i386/gcc-auto-profile: New file.

contrib/:

2016-05-21  Andi Kleen  

* gen_autofdo_event.py: New file to regenerate
gcc-auto-profile.
---
 contrib/gen_autofdo_event.py | 155 +++
 gcc/config/i386/gcc-auto-profile |  70 ++
 gcc/doc/invoke.texi  |  31 ++--
 3 files changed, 251 insertions(+), 5 deletions(-)
 create mode 100755 contrib/gen_autofdo_event.py
 create mode 100755 gcc/config/i386/gcc-auto-profile

diff --git a/contrib/gen_autofdo_event.py b/contrib/gen_autofdo_event.py
new file mode 100755
index 000..907430d
--- /dev/null
+++ b/contrib/gen_autofdo_event.py
@@ -0,0 +1,155 @@
+#!/usr/bin/python
+# Generate Intel taken branches Linux perf event script for autofdo profiling.
+
+# Copyright (C) 2016 Free Software Foundation, Inc.
+#
+# GCC is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 3, or (at your option) any later
+# version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# .  */
+
+# Run it with perf record -b -e EVENT program ...
+# The Linux Kernel needs to support the PMU of the current CPU, and
+# It will likely not work in VMs.
+# Add --all to print for all cpus, otherwise for current cpu.
+# Add --script to generate shell script to run correct event.
+#
+# Requires internet (https) access. This may require setting up a proxy
+# with export https_proxy=...
+#
+import urllib2
+import sys
+import json
+import argparse
+import collections
+
+baseurl = "https://download.01.org/perfmon;
+
+target_events = (u'BR_INST_RETIRED.NEAR_TAKEN',
+ u'BR_INST_EXEC.TAKEN',
+ u'BR_INST_RETIRED.TAKEN_JCC',
+ u'BR_INST_TYPE_RETIRED.COND_TAKEN')
+
+ap = argparse.ArgumentParser()
+ap.add_argument('--all', '-a', help='Print for all CPUs', action='store_true')
+ap.add_argument('--script', help='Generate shell script', action='store_true')
+args = ap.parse_args()
+
+eventmap = collections.defaultdict(list)
+
+def get_cpu_str():
+with open('/proc/cpuinfo', 'r') as c:
+vendor, fam, model = None, None, None
+for j in c:
+n = j.split()
+if n[0] == 'vendor_id':
+vendor = n[2]
+elif n[0] == 'model' and n[1] == ':':
+model = int(n[2])
+elif n[0] == 'cpu' and n[1] == 'family':
+fam = int(n[3])
+if vendor and fam and model:
+return "%s-%d-%X" % (vendor, fam, model), model
+return None, None
+
+def find_event(eventurl, model):
+print >>sys.stderr, "Downloading", eventurl
+u = urllib2.urlopen(eventurl)
+events = json.loads(u.read())
+u.close()
+
+found = 0
+for j in events:
+if j[u'EventName'] in target_events:
+event = "cpu/event=%s,umask=%s/" % (j[u'EventCode'], j[u'UMask'])
+if u'PEBS' in j and j[u'PEBS'] > 0:
+event += "p"
+if args.script:
+eventmap[event].append(model)
+else:
+print j[u'EventName'], "event for model", model, "is", event
+found += 1
+return found
+
+if not args.all:
+cpu, model = get_cpu_str()
+if not cpu:
+sys.exit("Unknown CPU type")
+
+url = baseurl + "/mapfile.csv"
+print >>sys.stderr, "Downloading", url
+u = urllib2.urlopen(url)
+found = 0

[PATCH 3/5] Run profile feedback tests with autofdo

2016-05-21 Thread Andi Kleen
From: Andi Kleen 

Extend the existing bprob and tree-prof tests to also run with autofdo.
The test runtimes are really a bit too short for autofdo, but it's
a reasonable sanity check.

This only works natively for now.

dejagnu doesn't seem to support a wrapper for unix tests, so I had
to open code running these tests.  That should be ok due to the
native run restrictions.

gcc/testsuite/:

2016-05-21  Andi Kleen  

* g++.dg/bprob/bprob.exp: Support autofdo.
* g++.dg/tree-prof/tree-prof.exp: dito.
* gcc.dg/tree-prof/tree-prof.exp: dito.
* gcc.misc-tests/bprob.exp: dito.
* gfortran.dg/prof/prof.exp: dito.
* lib/profopt.exp: dito.
* lib/target-supports.exp: Check for autofdo.
---
 gcc/testsuite/g++.dg/bprob/bprob.exp | 10 
 gcc/testsuite/g++.dg/tree-prof/tree-prof.exp | 10 
 gcc/testsuite/gcc.dg/tree-prof/tree-prof.exp | 10 
 gcc/testsuite/gcc.misc-tests/bprob.exp   | 14 ++
 gcc/testsuite/gfortran.dg/prof/prof.exp  |  9 
 gcc/testsuite/lib/profopt.exp| 69 ++--
 gcc/testsuite/lib/target-supports.exp| 31 +
 7 files changed, 150 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/g++.dg/bprob/bprob.exp 
b/gcc/testsuite/g++.dg/bprob/bprob.exp
index d07..e45d965 100644
--- a/gcc/testsuite/g++.dg/bprob/bprob.exp
+++ b/gcc/testsuite/g++.dg/bprob/bprob.exp
@@ -53,6 +53,7 @@ if $tracelevel then {
 
 set profile_options "-fprofile-arcs"
 set feedback_options "-fbranch-probabilities"
+set profile_wrapper ""
 
 # Main loop.
 foreach profile_option $profile_options feedback_option $feedback_options {
@@ -65,4 +66,13 @@ foreach profile_option $profile_options feedback_option 
$feedback_options {
 }
 }
 
+foreach profile_option $profile_options feedback_option $feedback_options {
+foreach src [lsort [glob -nocomplain $srcdir/$subdir/bprob-*.c]] {
+if ![runtest_file_p $runtests $src] then {
+continue
+}
+auto-profopt-execute $src
+}
+}
+
 set PROFOPT_OPTIONS $bprob_save_profopt_options
diff --git a/gcc/testsuite/g++.dg/tree-prof/tree-prof.exp 
b/gcc/testsuite/g++.dg/tree-prof/tree-prof.exp
index 7a4b5cb..ea08602 100644
--- a/gcc/testsuite/g++.dg/tree-prof/tree-prof.exp
+++ b/gcc/testsuite/g++.dg/tree-prof/tree-prof.exp
@@ -44,6 +44,7 @@ set PROFOPT_OPTIONS [list {}]
 # profile data.
 set profile_option "-fprofile-generate -D_PROFILE_GENERATE"
 set feedback_option "-fprofile-use -D_PROFILE_USE"
+set profile_wrapper ""
 
 foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.C]] {
 # If we're only testing specific files and this isn't one of them, skip it.
@@ -53,4 +54,13 @@ foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.C]] {
 profopt-execute $src
 }
 
+foreach profile_option $profile_options feedback_option $feedback_options {
+foreach src [lsort [glob -nocomplain $srcdir/$subdir/bprob-*.c]] {
+if ![runtest_file_p $runtests $src] then {
+continue
+}
+auto-profopt-execute $src
+}
+}
+
 set PROFOPT_OPTIONS $treeprof_save_profopt_options
diff --git a/gcc/testsuite/gcc.dg/tree-prof/tree-prof.exp 
b/gcc/testsuite/gcc.dg/tree-prof/tree-prof.exp
index 650ad8d..abf7231 100644
--- a/gcc/testsuite/gcc.dg/tree-prof/tree-prof.exp
+++ b/gcc/testsuite/gcc.dg/tree-prof/tree-prof.exp
@@ -44,6 +44,7 @@ set PROFOPT_OPTIONS [list {}]
 # profile data.
 set profile_option "-fprofile-generate -D_PROFILE_GENERATE"
 set feedback_option "-fprofile-use -D_PROFILE_USE"
+set profile_wrapper ""
 
 foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.c]] {
 # If we're only testing specific files and this isn't one of them, skip it.
@@ -53,4 +54,13 @@ foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.c]] {
 profopt-execute $src
 }
 
+foreach profile_option $profile_options feedback_option $feedback_options {
+foreach src [lsort [glob -nocomplain $srcdir/$subdir/bprob-*.c]] {
+if ![runtest_file_p $runtests $src] then {
+continue
+}
+auto-profopt-execute $src
+}
+}
+
 set PROFOPT_OPTIONS $treeprof_save_profopt_options
diff --git a/gcc/testsuite/gcc.misc-tests/bprob.exp 
b/gcc/testsuite/gcc.misc-tests/bprob.exp
index 52dcb1f..f43f011 100644
--- a/gcc/testsuite/gcc.misc-tests/bprob.exp
+++ b/gcc/testsuite/gcc.misc-tests/bprob.exp
@@ -41,6 +41,7 @@ load_lib profopt.exp
 set bprob_save_profopt_options $PROFOPT_OPTIONS
 set PROFOPT_OPTIONS [list { -O2 } { -O3  }]
 
+set profile_wrapper ""
 set profile_options "-fprofile-arcs"
 set feedback_options "-fbranch-probabilities"
 
@@ -54,4 +55,17 @@ foreach profile_option $profile_options feedback_option 
$feedback_options {
 }
 }
 
+if { ! [check_profiling_available "-fauto-profile"] } {
+set PROFOPT_OPTIONS $bprob_save_profopt_options
+return
+}
+
+foreach profile_option $profile_options feedback_option $feedback_options {
+foreach 

[PATCH 2/5] Don't cause ICEs when auto profile file is not found with checking

2016-05-21 Thread Andi Kleen
From: Andi Kleen 

Currently, on a checking enabled compiler when -fauto-profile does
not find the profile feedback file it errors out with assertation
failures. Add proper errors for this case.

gcc/:

2016-05-21  Andi Kleen  

* auto-profile.c (read_profile): Replace asserts with errors
when file does not exist.
* gcov-io.c (gcov_read_words): Dito.
---
 gcc/auto-profile.c | 32 +---
 gcc/gcov-io.c  |  4 +++-
 2 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c
index cd82ab4..9e3fd02 100644
--- a/gcc/auto-profile.c
+++ b/gcc/auto-profile.c
@@ -884,16 +884,25 @@ static void
 read_profile (void)
 {
   if (gcov_open (auto_profile_file, 1) == 0)
-error ("Cannot open profile file %s.", auto_profile_file);
+{
+  error ("Cannot open profile file %s.", auto_profile_file);
+  return;
+}
 
   if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
-error ("AutoFDO profile magic number does not mathch.");
+{
+  error ("AutoFDO profile magic number does not mathch.");
+  return;
+}
 
   /* Skip the version number.  */
   unsigned version = gcov_read_unsigned ();
   if (version != AUTO_PROFILE_VERSION)
-error ("AutoFDO profile version %u does match %u.",
-   version, AUTO_PROFILE_VERSION);
+{
+  error ("AutoFDO profile version %u does match %u.",
+version, AUTO_PROFILE_VERSION);
+  return;
+}
 
   /* Skip the empty integer.  */
   gcov_read_unsigned ();
@@ -901,19 +910,28 @@ read_profile (void)
   /* string_table.  */
   afdo_string_table = new string_table ();
   if (!afdo_string_table->read())
-error ("Cannot read string table from %s.", auto_profile_file);
+{
+  error ("Cannot read string table from %s.", auto_profile_file);
+  return;
+}
 
   /* autofdo_source_profile.  */
   afdo_source_profile = autofdo_source_profile::create ();
   if (afdo_source_profile == NULL)
-error ("Cannot read function profile from %s.", auto_profile_file);
+{
+  error ("Cannot read function profile from %s.", auto_profile_file);
+  return;
+}
 
   /* autofdo_module_profile.  */
   fake_read_autofdo_module_profile ();
 
   /* Read in the working set.  */
   if (gcov_read_unsigned () != GCOV_TAG_AFDO_WORKING_SET)
-error ("Cannot read working set from %s.", auto_profile_file);
+{
+  error ("Cannot read working set from %s.", auto_profile_file);
+  return;
+}
 
   /* Skip the length of the section.  */
   gcov_read_unsigned ();
diff --git a/gcc/gcov-io.c b/gcc/gcov-io.c
index 17fcae0..95ead22 100644
--- a/gcc/gcov-io.c
+++ b/gcc/gcov-io.c
@@ -493,7 +493,9 @@ gcov_read_words (unsigned words)
   const gcov_unsigned_t *result;
   unsigned excess = gcov_var.length - gcov_var.offset;
 
-  gcov_nonruntime_assert (gcov_var.mode > 0);
+  if (gcov_var.mode <= 0)
+return NULL;
+
   if (excess < words)
 {
   gcov_var.start += gcov_var.offset;
-- 
2.8.2



[PR other/70945] Handle function_glibc_finite_math in offloading

2016-05-21 Thread Thomas Schwinge
Hi!

As discussed in  "Offloading: compatibility
of target and offloading toolchains", there are situations where we have
to do more work to ensure compatibility between target and offloading
toolchains.

The first thing I'm working on is math functions usage in offloaded
regions.

Here is a first patch, addressing glibc's finite math optimizations: if
-ffinite-math-only (as implied by -ffast-math, or -Ofast) is in effect,
glibc's  is known to include  for "special
entry points to use when the compiler got told to only expect finite
results".  This divertes the math functions' assembler names from
"[function]" to "__[function]_finite".  This, obviously, is incompatible
with offloading targets that don't use glibc, and thus don't provide
these "__[function]_finite" entry points.

In response to Alexander's
, I argue this
does belong into the generic offloading data handling, instead of a
nvptx-specific pass, for the reason that it is not a nvptx-specific
transformation but addresses a general target vs. offloading target
configuration mismatch.

If I understand him correctly, Joseph in
 confirms my idea
about how this may use some extension (attributes), allowing us to get
rid of the "__[function]_finite" name matching heuristic.  That's indeed
something to work on, but as it will take a rather long time until glibc
changes make their way into distributions that end users are using, I'd
like to start with the heuristic as implemented, and adjust this later
on.

OK for trunk?  I'm working on a test case, too.

commit 0f65dbe65e883d2294c055631eccb07869bc5375
Author: Thomas Schwinge 
Date:   Fri May 13 17:02:30 2016 +0200

[PR other/70945] Handle function_glibc_finite_math in offloading

gcc/
PR other/70945
* targhooks.c (default_libc_has_function): Update comment.
* target.def (libc_has_function): Likewise.
* doc/tm.texi: Regenerate.
* coretypes.h (enum function_class): Add
function_glibc_finite_math.
* config/darwin.c (darwin_libc_has_function): Handle it.
* lto-streamer.h (enum lto_section_type): Rename
LTO_section_offload_table to LTO_section_offload_data.  Adjust all
users.
* lto-cgraph.c (void output_offload_data): New function, split out
of output_offload_tables.  Adjust all users.  Stream the target's
function_glibc_finite_math property.
(input_offload_data): New function, split out of
input_offload_tables.  Adjust all users.  Handle mismatch between
the target's and the offloading target's
function_glibc_finite_math property.
---
 gcc/config/darwin.c|   2 +
 gcc/coretypes.h|  11 ++-
 gcc/doc/tm.texi|   2 +-
 gcc/lto-cgraph.c   | 181 -
 gcc/lto-streamer-out.c |   2 +-
 gcc/lto-streamer.h |   6 +-
 gcc/lto/lto.c  |   2 +-
 gcc/target.def |   2 +-
 gcc/targhooks.c|   2 +-
 9 files changed, 152 insertions(+), 58 deletions(-)

diff --git gcc/config/darwin.c gcc/config/darwin.c
index 0055d80..92fe3e5 100644
--- gcc/config/darwin.c
+++ gcc/config/darwin.c
@@ -3401,6 +3401,8 @@ darwin_libc_has_function (enum function_class fn_class)
   || fn_class == function_c99_misc)
 return (TARGET_64BIT
|| strverscmp (darwin_macosx_version_min, "10.3") >= 0);
+  if (fn_class == function_glibc_finite_math)
+return false;
 
   return true;
 }
diff --git gcc/coretypes.h gcc/coretypes.h
index b3a91a6..aa48b5a 100644
--- gcc/coretypes.h
+++ gcc/coretypes.h
@@ -305,14 +305,21 @@ union _dont_use_tree_here_;
 
 #endif
 
-/* Classes of functions that compiler needs to check
+/* Properties, such as classes of functions that the compiler can check
whether they are present at the runtime or not.  */
 enum function_class {
   function_c94,
   function_c99_misc,
   function_c99_math_complex,
   function_sincos,
-  function_c11_misc
+  function_c11_misc,
+  /* If -ffinite-math-only (as implied by -ffast-math, or -Ofast) is in effect,
+ glibc's  is known to include  for "special
+ entry points to use when the compiler got told to only expect finite
+ results".  This divertes the math functions' assembler names from
+ "[function]" to "__[function]_finite".  This property indicates whether
+ such diversion may occur, not whether it actually has.  */
+  function_glibc_finite_math
 };
 
 /* Enumerate visibility settings.  This is deliberately ordered from most
diff --git gcc/doc/tm.texi gcc/doc/tm.texi
index 8c7f2a1..4ce3a43 100644
--- gcc/doc/tm.texi
+++ gcc/doc/tm.texi
@@ -5308,7 +5308,7 @@ macro, a reasonable default is used.
 @end defmac
 
 @deftypefn {Target Hook} bool TARGET_LIBC_HAS_FUNCTION (enum function_class 
@var{fn_class})
-This hook determines whether a function from a 

Re: [Patch, avr] Include INCOMING_FRAME_SP_OFFSET when printing stack usage

2016-05-21 Thread Denis Chertykov
2016-05-19 16:10 GMT+03:00 Senthil Kumar Selvaraj
:
> Ping!
>
> Regards
> Senthil
>
> Senthil Kumar Selvaraj writes:
>
>> Hi,
>>
>>   This trivial patch adds INCOMING_FRAME_SP_OFFSET to
>>   current_function_static_stack_size, thus fixing the 2 (or 3, for
>>   3 byte PC devices) byte difference between reported and actual
>>   values when using -fstack-usage.
>>
>>   The patch came about because of this discussion
>>   (https://gcc.gnu.org/ml/gcc/2016-05/msg00107.html). For AVRs, the
>>   return address gets pushed into the stack as part of the call
>>   instruction, and the number of bytes pushed varies by PC width.
>>   This is already taken care of when defining INCOMING_FRAME_SP_OFFSET,
>>   so I just add it to the previously computed value when setting
>>   current_function_static_stack_size.
>>
>>   If this is ok, could someone commit please? I don't have commit
>>   access.
>>
>> Regards
>> Senthil
>>
>> gcc/ChangeLog
>>
>> 2016-05-13  Senthil Kumar Selvaraj  
>>
>>   * config/avr/avr.c (avr_expand_prologue): Add INCOMING_FRAME_SP_OFFSET
>>   to computed stack_usage.
>>
>>
>> diff --git gcc/config/avr/avr.c gcc/config/avr/avr.c
>> index 8de39e0..ba5cd91 100644
>> --- gcc/config/avr/avr.c
>> +++ gcc/config/avr/avr.c
>> @@ -1484,7 +1484,7 @@ avr_expand_prologue (void)
>>avr_prologue_setup_frame (size, set);
>>
>>if (flag_stack_usage_info)
>> -current_function_static_stack_size = cfun->machine->stack_usage;
>> +current_function_static_stack_size = cfun->machine->stack_usage + 
>> INCOMING_FRAME_SP_OFFSET;
>>  }
>>
>>
>

Committed.


Re: Ping: [patch, avr] Fix unrecognizable insn ICE for avr (PR71103)

2016-05-21 Thread Denis Chertykov
2016-05-20 16:13 GMT+03:00 Pitchumani Sivanupandi
:
> Ping!
>
> Note: Removed the garbled characters and added ChangeLog
> --
> avr-gcc crashes for following test as it couldn't recognize the
> instruction pattern.
>
> struct st {
>   unsigned char uc1;
>   unsigned int *ui1;
> };
>
> unsigned int ui1;
> struct st foo () {
>   struct st ret;
>   ret.uc1 = 6;
>   ret.ui1 = 
>   return ret;
> }
>
> $ avr-gcc -mmcu=atmega328p -O1 test.c
> (-- snip --)
> test.c: In function 'foo':
> test.c:12:1: error: unrecognizable insn:
>  }
>  ^
> (insn 6 5 7 2 (set (subreg:QI (reg:PSI 42 [ D.1499 ]) 1)
> (subreg:QI (symbol_ref:HI ("ui1")  ui1>) 0)) test.c:11 -1
>  (nil))
> test.c:12:1: internal compiler error: in extract_insn, at recog.c:2287
> 0xd51195 _fatal_insn(char const*, rtx_def const*, char const*, int,
> char const*)
> /home/rudran/code/gcc/gcc/rtl-error.c:108
> (--snip--)
>
> There is no valid pattern in avr to match the "subreg:QI
> (symbol_ref:HI)". Attached patch forces the symbol_ref of subreg
> operand to register so that it will become register operand and movqi
> pattern shall recognize it.
>
> Ran gcc regression test with internal simulators. No new regressions
> found.
>
> If Ok, could someone commit please?
>
> Regards,
> Pitchumani
>
> gcc/ChangeLog
> 2016-05-20  Pitchumani Sivanupandi  
>
> PR target/71103
> * config/avr/avr.md (define_expand "mov"): If the source
> operand is
> subreg (symbol_ref) then move the symbol ref to register.
>
> gcc/testsuite/ChangeLog
> 2016-05-20  Pitchumani Sivanupandi  
>
> PR target/71103
> * gcc.target/avr/pr71103.c: New test.

Committed.

PS: I'm sorry for long delay. I was at vacation.


Re: [PATCH 2/3] Implement CALL_EXPR_MUST_TAIL_CALL

2016-05-21 Thread Andreas Schwab
David Malcolm  writes:

> diff --git a/gcc/testsuite/gcc.dg/plugin/must-tail-call-2.c 
> b/gcc/testsuite/gcc.dg/plugin/must-tail-call-2.c
> new file mode 100644
> index 000..c5504f8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/plugin/must-tail-call-2.c
> @@ -0,0 +1,58 @@
> +/* Allow nested functions.  */
> +/* { dg-options "-Wno-pedantic" } */
> +
> +struct box { char field[64]; int i; };
> +
> +struct box __attribute__((noinline,noclone))
> +returns_struct (int i)
> +{
> +  struct box b;
> +  b.i = i * i;
> +  return b;
> +}
> +
> +int __attribute__((noinline,noclone))
> +test_1 (int i)
> +{
> +  return returns_struct (i * 5).i; /* { dg-error "cannot tail-call: callee 
> returns a structure" } */
> +}
> +
> +int __attribute__((noinline,noclone))
> +test_2_callee (int i, struct box b)
> +{
> +  if (b.field[0])
> +return 5;
> +  return i * i;
> +}
> +
> +int __attribute__((noinline,noclone))
> +test_2_caller (int i)
> +{
> +  struct box b;
> +  return test_2_callee (i + 1, b); /* { dg-error "cannot tail-call: callee 
> required more stack slots than the caller" } */
> +}
> +
> +extern void setjmp (void);
> +void
> +test_3 (void)
> +{
> +  setjmp (); /* { dg-error "cannot tail-call: callee returns twice" } */
> +}
> +
> +void
> +test_4 (void)
> +{
> +  void nested (void)
> +  {
> +  }
> +  nested (); /* { dg-error "cannot tail-call: nested function" } */
> +}
> +
> +typedef void (fn_ptr_t) (void);
> +volatile fn_ptr_t fn_ptr;
> +
> +void
> +test_5 (void)
> +{
> +  fn_ptr (); /* { dg-error "cannot tail-call: callee does not return" } */
> +}

On aarch64:

FAIL: gcc.dg/plugin/must-tail-call-2.c -fplugin=./must_tail_call_plugin.so  
(test for errors, line 32)
FAIL: gcc.dg/plugin/must-tail-call-2.c -fplugin=./must_tail_call_plugin.so 
(test for excess errors)
Excess errors:
gcc.dg/plugin/must-tail-call-2.c:32:10: error: cannot tail-call: argument must 
be passed by copying

Andreas.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."


Re: [PATCH 2/3] Implement CALL_EXPR_MUST_TAIL_CALL

2016-05-21 Thread Andreas Schwab
David Malcolm  writes:

> diff --git a/gcc/testsuite/gcc.dg/plugin/must-tail-call-2.c 
> b/gcc/testsuite/gcc.dg/plugin/must-tail-call-2.c
> new file mode 100644
> index 000..c5504f8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/plugin/must-tail-call-2.c
> @@ -0,0 +1,58 @@
> +/* Allow nested functions.  */
> +/* { dg-options "-Wno-pedantic" } */
> +
> +struct box { char field[64]; int i; };
> +
> +struct box __attribute__((noinline,noclone))
> +returns_struct (int i)
> +{
> +  struct box b;
> +  b.i = i * i;
> +  return b;
> +}
> +
> +int __attribute__((noinline,noclone))
> +test_1 (int i)
> +{
> +  return returns_struct (i * 5).i; /* { dg-error "cannot tail-call: callee 
> returns a structure" } */
> +}
> +
> +int __attribute__((noinline,noclone))
> +test_2_callee (int i, struct box b)
> +{
> +  if (b.field[0])
> +return 5;
> +  return i * i;
> +}
> +
> +int __attribute__((noinline,noclone))
> +test_2_caller (int i)
> +{
> +  struct box b;
> +  return test_2_callee (i + 1, b); /* { dg-error "cannot tail-call: callee 
> required more stack slots than the caller" } */
> +}
> +
> +extern void setjmp (void);
> +void
> +test_3 (void)
> +{
> +  setjmp (); /* { dg-error "cannot tail-call: callee returns twice" } */
> +}
> +
> +void
> +test_4 (void)
> +{
> +  void nested (void)
> +  {
> +  }
> +  nested (); /* { dg-error "cannot tail-call: nested function" } */
> +}
> +
> +typedef void (fn_ptr_t) (void);
> +volatile fn_ptr_t fn_ptr;
> +
> +void
> +test_5 (void)
> +{
> +  fn_ptr (); /* { dg-error "cannot tail-call: callee does not return" } */
> +}

On ia64:

FAIL: gcc.dg/plugin/must-tail-call-2.c -fplugin=./must_tail_call_plugin.so  
(test for errors, line 39)
FAIL: gcc.dg/plugin/must-tail-call-2.c -fplugin=./must_tail_call_plugin.so  
(test for errors, line 57)
FAIL: gcc.dg/plugin/must-tail-call-2.c -fplugin=./must_tail_call_plugin.so 
(test for excess errors)
Excess errors:
gcc.dg/plugin/must-tail-call-2.c:39:3: error: cannot tail-call: target is not 
able to optimize the call into a sibling call
gcc.dg/plugin/must-tail-call-2.c:57:3: error: cannot tail-call: target is not 
able to optimize the call into a sibling call

On m68k:

FAIL: gcc.dg/plugin/must-tail-call-2.c -fplugin=./must_tail_call_plugin.so  
(test for errors, line 48)
FAIL: gcc.dg/plugin/must-tail-call-2.c -fplugin=./must_tail_call_plugin.so 
(test for excess errors)
Excess errors:
gcc.dg/plugin/must-tail-call-2.c:48:3: error: cannot tail-call: target is not 
able to optimize the call into a sibling call

Andreas.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."


Re: [nios2, committed] fix bad assertion

2016-05-21 Thread Chung-Lin Tang
On 2015/6/30 1:01 AM, Sandra Loosemore wrote:
> When I was preparing to regression-test something else in a nios2-linux-gnu 
> build, I discovered it was ICE'ing while building
> shared libraries with -fpic (glibc, libgomp).  I tracked this down to having 
> started with r224048, but on further investigation
> I decided that commit merely exposed a latent bug.
> 
> The trouble is that the assertion in nios2_delegitimize_address is too 
> restrictive compared to what nios2_legitimize_address can
> produce.  It's expecting to find a SYMBOL_REF underneath but in one case it 
> was crashing on a LABEL_REF (for a computed goto),
> and in another case it was a symbol + offset expression which is even 
> documented with a big block of comments in
> nios2_legitimize_address.  I've checked in this patch to relax the assertion; 
> it allows the toolchain to build again, and test
> results look decent.
> 
> -Sandra
> 

This was backported to gcc-5-branch, to fix that ICE Marek encountered.

Chung-Lin



Re: [PATCH][Testsuite] Force testing of vectorized builtins rather than inlined i387 asm

2016-05-21 Thread Uros Bizjak
On Fri, May 20, 2016 at 8:01 PM, Ilya Verbin  wrote:
> Hi!
>
> In some cases the i387 version of a math function may be inlined from math.h,
> and the testcase (like gcc.target/i386/sse4_1-ceil-vec.c) will actually test
> inlined asm instead of vectorized builtin.  To fix this I've created a new 
> file
> gcc.dg/mathfunc.h (similar to gcc.dg/strlenopt.h) and changed vectorization
> tests so that they include it instead of math.h.
> Regtested on x86_64-linux and i686-linux.  Is it OK for trunk?

No, please just #define NO_MATH_INLINES before math.h is included.
This will solve unwanted inlining.

Uros.

> gcc/testsuite/
> * gcc.dg/mathfunc.h: New file.
> * gcc.target/i386/avx-ceil-sfix-2-vec.c: Do not skip if there is no 
> M_PI
> for vxworks_kernel.  Include mathfunc.h instead of math.h.  Remove
> declaration.
> * gcc.target/i386/avx-cvt-2-vec.c: Likewise.
> * gcc.target/i386/avx-floor-sfix-2-vec.c: Likewise.
> * gcc.target/i386/avx-rint-sfix-2-vec.c: Likewise.
> * gcc.target/i386/avx-round-sfix-2-vec.c: Likewise.
> * gcc.target/i386/avx512f-ceil-sfix-vec-1.c: Likewise.
> * gcc.target/i386/avx512f-floor-sfix-vec-1.c: Likewise.
> * gcc.target/i386/sse2-cvt-vec.c: Likewise.
> * gcc.target/i386/sse4_1-ceil-sfix-vec.c: Likewise.
> * gcc.target/i386/sse4_1-ceil-vec.c: Likewise.
> * gcc.target/i386/sse4_1-ceilf-sfix-vec.c: Likewise.
> * gcc.target/i386/sse4_1-ceilf-vec.c: Likewise.
> * gcc.target/i386/sse4_1-floor-sfix-vec.c: Likewise.
> * gcc.target/i386/sse4_1-floor-vec.c: Likewise.
> * gcc.target/i386/sse4_1-rint-sfix-vec.c: Likewise.
> * gcc.target/i386/sse4_1-rint-vec.c: Likewise.
> * gcc.target/i386/sse4_1-rintf-sfix-vec.c: Likewise.
> * gcc.target/i386/sse4_1-rintf-vec.c: Likewise.
> * gcc.target/i386/sse4_1-round-sfix-vec.c: Likewise.
> * gcc.target/i386/sse4_1-round-vec.c: Likewise.
> * gcc.target/i386/sse4_1-roundf-sfix-vec.c: Likewise.
> * gcc.target/i386/sse4_1-roundf-vec.c: Likewise.
> * gcc.target/i386/sse4_1-trunc-vec.c: Likewise.
> * gcc.target/i386/sse4_1-truncf-vec.c: Likewise.
> * gcc.target/i386/sse4_1-floorf-sfix-vec.c: Likewise.  Use floorf
> instead of __builtin_floorf.
> * gcc.target/i386/sse4_1-floorf-vec.c: Likewise.
>
>
> diff --git a/gcc/testsuite/gcc.dg/mathfunc.h b/gcc/testsuite/gcc.dg/mathfunc.h
> new file mode 100644
> index 000..1c1b7bc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/mathfunc.h
> @@ -0,0 +1,20 @@
> +/* This is a replacement of needed parts from math.h for testing 
> vectorization,
> +   to ensure we are testing the builtins rather than whatever the OS has in 
> its
> +   headers.  */
> +
> +#define M_PI  3.14159265358979323846
> +
> +extern double ceil (double);
> +extern float ceilf (float);
> +
> +extern double floor (double);
> +extern float floorf (float);
> +
> +extern double trunc (double);
> +extern float truncf (float);
> +
> +extern double round (double);
> +extern float roundf (float);
> +
> +extern double rint (double);
> +extern float rintf (float);
> diff --git a/gcc/testsuite/gcc.target/i386/avx-ceil-sfix-2-vec.c 
> b/gcc/testsuite/gcc.target/i386/avx-ceil-sfix-2-vec.c
> index bf48b80..567a16d 100644
> --- a/gcc/testsuite/gcc.target/i386/avx-ceil-sfix-2-vec.c
> +++ b/gcc/testsuite/gcc.target/i386/avx-ceil-sfix-2-vec.c
> @@ -1,7 +1,6 @@
>  /* { dg-do run } */
>  /* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */
>  /* { dg-require-effective-target avx } */
> -/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
>
>  #ifndef CHECK_H
>  #define CHECK_H "avx-check.h"
> @@ -13,9 +12,7 @@
>
>  #include CHECK_H
>
> -#include 
> -
> -extern double ceil (double);
> +#include "../../gcc.dg/mathfunc.h"
>
>  #define NUM 4
>
> diff --git a/gcc/testsuite/gcc.target/i386/avx-cvt-2-vec.c 
> b/gcc/testsuite/gcc.target/i386/avx-cvt-2-vec.c
> index 0081dcf..8a8369b 100644
> --- a/gcc/testsuite/gcc.target/i386/avx-cvt-2-vec.c
> +++ b/gcc/testsuite/gcc.target/i386/avx-cvt-2-vec.c
> @@ -1,7 +1,6 @@
>  /* { dg-do run } */
>  /* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */
>  /* { dg-require-effective-target avx } */
> -/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
>
>  #ifndef CHECK_H
>  #define CHECK_H "avx-check.h"
> @@ -13,7 +12,7 @@
>
>  #include CHECK_H
>
> -#include 
> +#include "../../gcc.dg/mathfunc.h"
>
>  #define NUM 4
>
> diff --git a/gcc/testsuite/gcc.target/i386/avx-floor-sfix-2-vec.c 
> b/gcc/testsuite/gcc.target/i386/avx-floor-sfix-2-vec.c
> index 275199c..44002b4 100644
> --- a/gcc/testsuite/gcc.target/i386/avx-floor-sfix-2-vec.c
> +++ b/gcc/testsuite/gcc.target/i386/avx-floor-sfix-2-vec.c
> @@ -1,7 +1,6 @@
>  /* { dg-do run } */
>  /* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */
>  /* { dg-require-effective-target avx } */
> -/* { dg-skip-if "no M_PI" { 

Re: [PATCH] Use flag_general_regs_only with -mgeneral-regs-only

2016-05-21 Thread Uros Bizjak
On Fri, May 20, 2016 at 7:49 PM, H.J. Lu  wrote:
> On Fri, May 20, 2016 at 10:15 AM, Rainer Orth
>  wrote:
>> "H.J. Lu"  writes:
>>
>>> On Thu, May 12, 2016 at 10:54 AM, H.J. Lu  wrote:
>> Here is a patch to add
>> -mgeneral-regs-only option to x86 backend.   We can update
>> spec for interrupt handle to recommend compiling interrupt handler
>> with -mgeneral-regs-only option and add a note for compiler
>> implementers.
>>
>> OK for trunk if there is no regression?
>
>
> I can't comment on the code patch, but for the documentation part:
>
>> @@ -24242,6 +24242,12 @@ opcodes, to mitigate against certain forms of
>> attack. At the moment,
>>  this option is limited in what it can do and should not be relied
>>  on to provide serious protection.
>>
>> +@item -mgeneral-regs-only
>> +@opindex mgeneral-regs-only
>> +Generate code which uses only the general-purpose registers.  This will
>
>
> s/which/that/
>
>> +prevent the compiler from using floating-point, vector, mask and bound
>
>
> s/will prevent/prevents/
>
>> +registers, but will not impose any restrictions on the assembler.
>
>
> Maybe you mean to say "does not restrict use of those registers in inline
> assembly code"?  In any case, please get rid of the future tense here, 
> too.

 I changed it to

 ---
 @item -mgeneral-regs-only
 @opindex mgeneral-regs-only
 Generate code that uses only the general-purpose registers.  This
 prevents the compiler from using floating-point, vector, mask and bound
 registers.
 ---

>>>
>>> Here is the updated patch.  Tested on x86-64.  OK for trunk?
>>
>> This patch broke {i386,x86_64}-apple-darwin15.5.0 bootstrap:
>>
>> In file included from ./tm.h:16:0,
>>  from /vol/gcc/src/hg/trunk/local/gcc/genattrtab.c:108:
>> ./options.h:5443:2: error: #error too many target masks
>>  #error too many target masks
>>   ^
>> Makefile:2497: recipe for target 'build/genattrtab.o' failed
>> make[3]: *** [build/genattrtab.o] Error 1
>>
>> options.h has
>>
>> #define OPTION_MASK_ISA_XSAVES (HOST_WIDE_INT_1 << 62)
>> #error too many target masks
>>
>> The tree bootstraps just fine at the previous revision.
>>
>
> Tested on x86-64.  OK for trunk?

No, this is a flag, not a variable. Let's figure out how to extend
target flags to more than 63 flags first.

Please revert the original patch in the mean time.

Thanks,
Uros.


Re: [PATCH][Testsuite] Fix mips dsp testsuite mistakes

2016-05-21 Thread Maciej W. Rozycki
On Sat, 21 May 2016, Paul Hua wrote:

> There are some mistakes in mips dsp testsuite.
> 
> This patch fixing it.

 Thank you for your contribution, however you need to be more explicit 
with patch descriptions, and explain in detail what problem your change 
addresses, in this case what mistakes you have corrected.  For example: 
"Code does this and that and this is wrong, because...  Correct it by 
doing this and that instead."

 Please see the individual questions below.

 Also please don't make your ChangeLog entry a part of the patch submitted 
as it makes it difficult for the committer to apply the patch, because 
ChangeLog changes constantly.  Instead include it in the e-mail body and 
the committer will prepend it to ChangeLog at the committment time.

> Index: gcc/testsuite/gcc.target/mips/mips32-dsp-run.c
> ===
> --- gcc/testsuite/gcc.target/mips/mips32-dsp-run.c  (revision 236553)
> +++ gcc/testsuite/gcc.target/mips/mips32-dsp-run.c  (working copy)
> @@ -394,7 +394,7 @@ NOMIPS16 void test_MIPS_DSP ()
> 
>v2q15_a = (v2q15) {0x1234, 0x5678};
>i32_b = 1;
> -  v2q15_s = (v2q15) {0x2468, 0x7fff};
> +  v2q15_s = (v2q15) {0x2468, 0xacf0};
>v2q15_r = __builtin_mips_shll_s_ph (v2q15_a, i32_b);
>r = (int) v2q15_r;
>s = (int) v2q15_s;

 The shift operation requested results in a signed integer overflow and 
consequently saturation triggers.  Why do you think the original result 
expected is wrong?

> @@ -409,7 +409,7 @@ NOMIPS16 void test_MIPS_DSP ()
> 
>q31_a = 0x7000;
>i32_b = 1;
> -  q31_s = 0x7fff;
> +  q31_s = 0xe000;
>q31_r = __builtin_mips_shll_s_w (q31_a, i32_b);
>if (q31_r != q31_s)
>  abort ();

 Likewise, same question as above.

> @@ -961,9 +961,9 @@ NOMIPS16 void test_MIPS_DSP ()
>  abort ();
>  #endif
> 
> -  i32_a = 0x1357a468;
> +  i32_a = 0x13572468;
>__builtin_mips_wrdsp (i32_a, 63);
> -  i32_s = 0x03572428;
> +  i32_s = 0x13572468;
>i32_r = __builtin_mips_rddsp (63);
>if (i32_r != i32_s)
>  abort ();

 This undoubtedly verifies that reserved bits read back as zeros, so it 
does not look like a mistake to me.  How did you verify your change?

  Maciej


[PATCH][Testsuite] Fix mips dsp testsuite mistakes

2016-05-21 Thread Paul Hua
Hi,

There are some mistakes in mips dsp testsuite.

This patch fixing it.

Ok to commit?


[mips] Fix mips dsp testsuite mistake.

gcc/testsuite/gcc.target/mips/
*mips32-dsp-run.c: Fix mistake.

Index: gcc/testsuite/ChangeLog
===
--- gcc/testsuite/ChangeLog (revision 236553)
+++ gcc/testsuite/ChangeLog (working copy)
@@ -1,3 +1,7 @@
+2016-05-21  Chenghua Xu  
+
+   * gcc.target/mips/mips32-dsp-run.c: Fix mistake.
+
 2016-05-20  Martin Sebor  

PR c/71115
Index: gcc/testsuite/gcc.target/mips/mips32-dsp-run.c
===
--- gcc/testsuite/gcc.target/mips/mips32-dsp-run.c  (revision 236553)
+++ gcc/testsuite/gcc.target/mips/mips32-dsp-run.c  (working copy)
@@ -394,7 +394,7 @@ NOMIPS16 void test_MIPS_DSP ()

   v2q15_a = (v2q15) {0x1234, 0x5678};
   i32_b = 1;
-  v2q15_s = (v2q15) {0x2468, 0x7fff};
+  v2q15_s = (v2q15) {0x2468, 0xacf0};
   v2q15_r = __builtin_mips_shll_s_ph (v2q15_a, i32_b);
   r = (int) v2q15_r;
   s = (int) v2q15_s;
@@ -409,7 +409,7 @@ NOMIPS16 void test_MIPS_DSP ()

   q31_a = 0x7000;
   i32_b = 1;
-  q31_s = 0x7fff;
+  q31_s = 0xe000;
   q31_r = __builtin_mips_shll_s_w (q31_a, i32_b);
   if (q31_r != q31_s)
 abort ();
@@ -961,9 +961,9 @@ NOMIPS16 void test_MIPS_DSP ()
 abort ();
 #endif

-  i32_a = 0x1357a468;
+  i32_a = 0x13572468;
   __builtin_mips_wrdsp (i32_a, 63);
-  i32_s = 0x03572428;
+  i32_s = 0x13572468;
   i32_r = __builtin_mips_rddsp (63);
   if (i32_r != i32_s)
 abort ();



Regards,
Chenghua


Re: [PATCH] Fix PR tree-optimization/71170

2016-05-21 Thread Kugan Vivekanandarajah
On 20 May 2016 at 21:07, Richard Biener  wrote:
> On Fri, May 20, 2016 at 1:51 AM, Kugan Vivekanandarajah
>  wrote:
>> Hi Richard,
>>
>>> I think it should have the same rank as op or op + 1 which is the current
>>> behavior.  Sth else doesn't work correctly here I think, like inserting the
>>> multiplication not near the definition of op.
>>>
>>> Well, the whole "clever insertion" logic is simply flawed.
>>
>> What I meant to say was that the simple logic we have now wouldn’t
>> work. "clever logic" is knowing where exactly where it is needed and
>> inserting there.  I think thats what  you are suggesting below in a
>> simple to implement way.
>>
>>> I'd say that ideally we would delay inserting the multiplication to
>>> rewrite_expr_tree time.  For example by adding a ops->stmt_to_insert
>>> member.
>>>
>>
>> Here is an implementation based on above. Bootstrap on x86-linux-gnu
>> is OK. regression testing is ongoing.
>
> I like it.  Please push the insertion code to a helper as I think you need
> to post-pone setting the stmts UID to that point.
>
> Ideally we'd make use of the same machinery in attempt_builtin_powi,
> removing the special-casing of powi_result.  (same as I said that ideally
> the plus->mult stuff would use the repeat-ops machinery...)
>
> I'm not 100% convinced the place you insert the stmt is correct but I
> haven't spent too much time to decipher reassoc in this area.


Hi Richard,

Thanks. Here is a tested version of the patch. I did miss one place
which I fixed now (tranform_stmt_to_copy) I also created a function to
do the insertion.


Bootstrap and regression testing on x86_64-linux-gnu are fine. Is this
OK for trunk.

Thanks,
Kugan


gcc/ChangeLog:

2016-05-21  Kugan Vivekanandarajah  

PR middle-end/71170
* tree-ssa-reassoc.c (struct operand_entry): Add field stmt_to_insert.
(add_to_ops_vec): Add stmt_to_insert.
(add_repeat_to_ops_vec): Init stmt_to_insert.
(insert_stmt_before_use): New.
(transform_add_to_multiply): Remove mult_stmt insertion and add it
to ops vector.
(get_ops): Init stmt_to_insert.
(maybe_optimize_range_tests): Likewise.
(rewrite_expr_tree): Insert  stmt_to_insert before use stmt.
(rewrite_expr_tree_parallel): Likewise.
(reassociate_bb): Likewise.
diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
index 3b5f36b..0b905e9 100644
--- a/gcc/tree-ssa-reassoc.c
+++ b/gcc/tree-ssa-reassoc.c
@@ -195,6 +195,7 @@ struct operand_entry
   int id;
   tree op;
   unsigned int count;
+  gimple *stmt_to_insert;
 };
 
 static object_allocator operand_entry_pool
@@ -553,7 +554,7 @@ sort_by_operand_rank (const void *pa, const void *pb)
 /* Add an operand entry to *OPS for the tree operand OP.  */
 
 static void
-add_to_ops_vec (vec *ops, tree op)
+add_to_ops_vec (vec *ops, tree op, gimple *stmt_to_insert = 
NULL)
 {
   operand_entry *oe = operand_entry_pool.allocate ();
 
@@ -561,6 +562,7 @@ add_to_ops_vec (vec *ops, tree op)
   oe->rank = get_rank (op);
   oe->id = next_operand_entry_id++;
   oe->count = 1;
+  oe->stmt_to_insert = stmt_to_insert;
   ops->safe_push (oe);
 }
 
@@ -577,6 +579,7 @@ add_repeat_to_ops_vec (vec *ops, tree op,
   oe->rank = get_rank (op);
   oe->id = next_operand_entry_id++;
   oe->count = repeat;
+  oe->stmt_to_insert = NULL;
   ops->safe_push (oe);
 
   reassociate_stats.pows_encountered++;
@@ -1756,10 +1759,21 @@ eliminate_redundant_comparison (enum tree_code opcode,
   return false;
 }
 
+/* If the stmt that defines operand has to be inserted, insert it
+   before the use.  */
+static void
+insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
+{
+  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+  gimple_set_uid (stmt_to_insert, gimple_uid (stmt));
+  gsi_insert_before (, stmt_to_insert, GSI_NEW_STMT);
+}
+
+
 /* Transform repeated addition of same values into multiply with
constant.  */
 static bool
-transform_add_to_multiply (gimple *stmt, vec *ops)
+transform_add_to_multiply (vec *ops)
 {
   operand_entry *oe;
   tree op = NULL_TREE;
@@ -1810,21 +1824,11 @@ transform_add_to_multiply (gimple *stmt, 
vec *ops)
ops->unordered_remove (i);
   tree tmp = make_ssa_name (TREE_TYPE (op));
   tree cst = build_int_cst (integer_type_node, count);
-  gimple *def_stmt = SSA_NAME_DEF_STMT (op);
   gassign *mul_stmt
= gimple_build_assign (tmp, MULT_EXPR,
   op, fold_convert (TREE_TYPE (op), cst));
-  if (gimple_code (def_stmt) == GIMPLE_NOP
- || gimple_bb (stmt) != gimple_bb (def_stmt))
-   {
- gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
- gimple_set_uid (mul_stmt, gimple_uid (stmt));
- gsi_insert_before (, mul_stmt, GSI_NEW_STMT);
-   }
-  else
-   insert_stmt_after (mul_stmt, def_stmt);
   gimple_set_visited (mul_stmt, true);
-  add_to_ops_vec (ops, tmp);
+  add_to_ops_vec (ops, tmp,