Move some complex simplifications to match.pd

2015-08-30 Thread Marc Glisse

Hello,

just trying to shrink fold-const.c a bit more.

The tests "if (TREE_CODE (TREE_TYPE (arg0)) != COMPLEX_TYPE)" seem 
useless, I did one bootstrap+testsuite with asserts there to make sure, so 
I am dropping them. The CONJ_EXPR simplifications don't seem very useful, 
as far as I can tell CONJ_EXPR is immediatly replaced with a piecewise 
operation (where the transformations are obvious), but it seemed easier to 
keep the transformations, in case they are not completely useless. I may 
have been a bit too enthusiastic with the :s on some transformations, but 
again they seem to be dead code... The converts also seem to be not so 
useful since they are expanded piecewise, even the ones that should be 
NOPS, so for

  _Complex unsigned f(_Complex int i){return i;}
we generate:
movl%edi, %eax
shrq$32, %rdi
salq$32, %rdi
orq %rdi, %rax
...

Bootstrap+testsuite on ppc64le-redhat-linux.

2015-08-31  Marc Glisse  

gcc/
* match.pd (SIN, COS, TAN, COSH): Reorder for consistency.
(CEXPI): New operator list.
(real (conj (x)), imag (conj (x)), real (x +- y), real (cexpi (x)),
imag (cexpi (x)), conj (conj (x)), conj (complex (x, y))):
Converted from ...
* fold-const.c (fold_unary_loc, fold_binary_loc): ... here. Remove.

gcc/testsuite/
* gcc.dg/tree-ssa/complex-7.c: New file.

--
Marc GlisseIndex: gcc/fold-const.c
===
--- gcc/fold-const.c(revision 227316)
+++ gcc/fold-const.c(working copy)
@@ -7725,35 +7725,20 @@ fold_unary_loc (location_t loc, enum tre
   /* Strip sign ops from argument.  */
   if (TREE_CODE (type) == REAL_TYPE)
{
  tem = fold_strip_sign_ops (arg0);
  if (tem)
return fold_build1_loc (loc, ABS_EXPR, type,
fold_convert_loc (loc, type, tem));
}
   return NULL_TREE;
 
-case CONJ_EXPR:
-  if (TREE_CODE (TREE_TYPE (arg0)) != COMPLEX_TYPE)
-   return fold_convert_loc (loc, type, arg0);
-  if (TREE_CODE (arg0) == COMPLEX_EXPR)
-   {
- tree itype = TREE_TYPE (type);
- tree rpart = fold_convert_loc (loc, itype, TREE_OPERAND (arg0, 0));
- tree ipart = fold_convert_loc (loc, itype, TREE_OPERAND (arg0, 1));
- return fold_build2_loc (loc, COMPLEX_EXPR, type, rpart,
- negate_expr (ipart));
-   }
-  if (TREE_CODE (arg0) == CONJ_EXPR)
-   return fold_convert_loc (loc, type, TREE_OPERAND (arg0, 0));
-  return NULL_TREE;
-
 case BIT_NOT_EXPR:
   /* Convert ~(X ^ Y) to ~X ^ Y or X ^ ~Y if ~X or ~Y simplify.  */
   if (TREE_CODE (arg0) == BIT_XOR_EXPR
  && (tem = fold_unary_loc (loc, BIT_NOT_EXPR, type,
fold_convert_loc (loc, type,
  TREE_OPERAND (arg0, 0)
return fold_build2_loc (loc, BIT_XOR_EXPR, type, tem,
fold_convert_loc (loc, type,
  TREE_OPERAND (arg0, 1)));
   else if (TREE_CODE (arg0) == BIT_XOR_EXPR
@@ -7769,95 +7754,20 @@ fold_unary_loc (location_t loc, enum tre
 case TRUTH_NOT_EXPR:
   /* Note that the operand of this must be an int
 and its values must be 0 or 1.
 ("true" is a fixed value perhaps depending on the language,
 but we don't handle values other than 1 correctly yet.)  */
   tem = fold_truth_not_expr (loc, arg0);
   if (!tem)
return NULL_TREE;
   return fold_convert_loc (loc, type, tem);
 
-case REALPART_EXPR:
-  if (TREE_CODE (TREE_TYPE (arg0)) != COMPLEX_TYPE)
-   return fold_convert_loc (loc, type, arg0);
-  if (TREE_CODE (arg0) == PLUS_EXPR || TREE_CODE (arg0) == MINUS_EXPR)
-   {
- tree itype = TREE_TYPE (TREE_TYPE (arg0));
- tem = fold_build2_loc (loc, TREE_CODE (arg0), itype,
-fold_build1_loc (loc, REALPART_EXPR, itype,
- TREE_OPERAND (arg0, 0)),
-fold_build1_loc (loc, REALPART_EXPR, itype,
- TREE_OPERAND (arg0, 1)));
- return fold_convert_loc (loc, type, tem);
-   }
-  if (TREE_CODE (arg0) == CONJ_EXPR)
-   {
- tree itype = TREE_TYPE (TREE_TYPE (arg0));
- tem = fold_build1_loc (loc, REALPART_EXPR, itype,
-TREE_OPERAND (arg0, 0));
- return fold_convert_loc (loc, type, tem);
-   }
-  if (TREE_CODE (arg0) == CALL_EXPR)
-   {
- tree fn = get_callee_fndecl (arg0);
- if (fn && DECL_BUILT_IN_CLASS (fn) == BUILT_IN_NORMAL)
-   switch (DECL_FUNCTION_CODE (fn))
- {
- CASE_FLT_FN (BUILT_IN_CEXPI):
-   fn = mathfn_built_in (type, BUILT_IN_COS);
-   if (fn)
-   

Move some comparison simplifications to match.pd

2015-08-30 Thread Marc Glisse

Hello,

just trying to shrink fold-const.c a bit more.

initializer_zerop is close to what I was looking for with zerop, but I 
wasn't sure if it would be safe (it accepts some CONSTRUCTOR and 
STRING_CST). At some point I tried using sign_bit_p, but using the return 
of that function in the simplification confused the machinery too much. I 
added an "overload" of element_precision like the one in element_mode, for 
convenience.


Bootstrap+testsuite on ppc64le-redhat-linux.


2015-08-31  Marc Glisse  

gcc/
* tree.h (zerop): New function.
* tree.c (zerop): Likewise.
(element_precision): Handle expressions.
* match.pd (define_predicates): Add zerop.
(x <= +Inf): Fix comment.
(abs (x) == 0, A & C == C, A & C != 0): Converted from ...
* fold-const.c (fold_binary_loc): ... here. Remove.

gcc/testsuite/
* gcc.dg/tree-ssa/cmp-1.c: New file.

--
Marc GlisseIndex: gcc/fold-const.c
===
--- gcc/fold-const.c(revision 227316)
+++ gcc/fold-const.c(working copy)
@@ -10761,25 +10761,20 @@ fold_binary_loc (location_t loc,
1)),
  arg1, 0)
  && wi::extract_uhwi (TREE_OPERAND (arg0, 0), 0, 1) == 1)
{
  return omit_two_operands_loc (loc, type,
code == NE_EXPR
? boolean_true_node : boolean_false_node,
TREE_OPERAND (arg0, 1), arg1);
}
 
-  /* Convert ABS_EXPR == 0 or ABS_EXPR != 0 to x == 0 or x != 0.  */
-  if (TREE_CODE (arg0) == ABS_EXPR
- && (integer_zerop (arg1) || real_zerop (arg1)))
-   return fold_build2_loc (loc, code, type, TREE_OPERAND (arg0, 0), arg1);
-
   /* If this is an EQ or NE comparison with zero and ARG0 is
 (1 << foo) & bar, convert it to (bar >> foo) & 1.  Both require
 two operations, but the latter can be done in one less insn
 on machines that have only two-operand insns or on which a
 constant cannot be the first operand.  */
   if (TREE_CODE (arg0) == BIT_AND_EXPR
  && integer_zerop (arg1))
{
  tree arg00 = TREE_OPERAND (arg0, 0);
  tree arg01 = TREE_OPERAND (arg0, 1);
@@ -10868,35 +10863,20 @@ fold_binary_loc (location_t loc,
 ((X >> C1) & C2) != 0 is rewritten as (X,false), and
 ((X >> C1) & C2) == 0 is rewritten as (X,true).  */
  else
return omit_one_operand_loc (loc, type,
 code == EQ_EXPR ? integer_one_node
 : integer_zero_node,
 arg000);
}
}
 
-  /* If we have (A & C) == C where C is a power of 2, convert this into
-(A & C) != 0.  Similarly for NE_EXPR.  */
-  if (TREE_CODE (arg0) == BIT_AND_EXPR
- && integer_pow2p (TREE_OPERAND (arg0, 1))
- && operand_equal_p (TREE_OPERAND (arg0, 1), arg1, 0))
-   return fold_build2_loc (loc, code == EQ_EXPR ? NE_EXPR : EQ_EXPR, type,
-   arg0, fold_convert_loc (loc, TREE_TYPE (arg0),
-   integer_zero_node));
-
-  /* If we have (A & C) != 0 or (A & C) == 0 and C is the sign
-bit, then fold the expression into A < 0 or A >= 0.  */
-  tem = fold_single_bit_test_into_sign_test (loc, code, arg0, arg1, type);
-  if (tem)
-   return tem;
-
   /* If we have (A & C) == D where D & ~C != 0, convert this into 0.
 Similarly for NE_EXPR.  */
   if (TREE_CODE (arg0) == BIT_AND_EXPR
  && TREE_CODE (arg1) == INTEGER_CST
  && TREE_CODE (TREE_OPERAND (arg0, 1)) == INTEGER_CST)
{
  tree notc = fold_build1_loc (loc, BIT_NOT_EXPR,
   TREE_TYPE (TREE_OPERAND (arg0, 1)),
   TREE_OPERAND (arg0, 1));
  tree dandnotc
Index: gcc/match.pd
===
--- gcc/match.pd(revision 227316)
+++ gcc/match.pd(working copy)
@@ -21,20 +21,21 @@ for more details.
 You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 .  */
 
 
 /* Generic tree predicates we inherit.  */
 (define_predicates
integer_onep integer_zerop integer_all_onesp integer_minus_onep
integer_each_onep integer_truep integer_nonzerop
real_zerop real_onep real_minus_onep
+   zerop
CONSTANT_CLASS_P
tree_expr_nonnegative_p
integer_pow2p
HONOR_NANS)
 
 /* Operator lists.  */
 (define_operator_list tcc_comparison
   lt   le   eq ne ge   gt   unordered ordered   unlt unle ungt unge uneq ltgt)
 (define_operator_list inverted_tc

Re: [PATCH] fix --with-cpu for sh targets

2015-08-30 Thread Kaz Kojima
Rich Felker  wrote:
> A missing * in the pattern for sh targets prevents the --with-cpu
> configure option from being accepted for certain targets (e.g. ones
> with explicit endianness, like sh2eb).

Looks good to me, though we need the trunk patch first with
the appropriate ChangeLog entry.
Oleg, do you have any opinions?

Regards,
kaz


Re: [Patch] Add to the libgfortran/newlib bodge to "detect" ftruncate support in ARM/AArch64/SH

2015-08-30 Thread Hans-Peter Nilsson
(Pruned the CC list a bit as lists are included anyway)

On Fri, 28 Aug 2015, James Greenhalgh wrote:
> On Fri, Aug 28, 2015 at 10:40:31AM +0100, James Greenhalgh wrote:
> > On Tue, Aug 25, 2015 at 03:44:05PM +0100, FX wrote:
> > > > 2015-08-25  James Greenhalgh  
> > > >
> > > > * configure.ac: Auto-detect newlib function support unless we
> > > > know there are issues when configuring for a host.
> > > > * configure: Regenerate.
> > >
> > > Thanks for CC?ing the fortran list.
> > >
> > > Given that this is newlib-specific code, even though it?s in libgfortran
> > > configury, you should decide and commit what?s best. I don?t think we have
> > > any newlib expert in the Fortran maintainers.
> > >
> > > Wait for 48 hours to see if anyone else objects, though.
> >
> > OK, it has been 48 hours and I haven't seen any objections. The newlib
> > patch has now been committed.
> >
> > I agree with Marcus' suggestion that we put the more comprehensive patch
> > (which requires the newlib fix) on trunk and my original patch (which does
> > not) on the release branches.
> >
> > I'll go ahead with that later today.
>
> Now in place on trunk (r227301), gcc-5-branch (r227302) and gcc-4_9-branch
> (r227304).
>
> Give me a shout if you see issues in your build systems.

Since you asked: I saw a build failure for cris-elf matching the
missing-kill-declaration issue, and I don't like much having to
take manual steps force a new newlib version. It isn't being
automatically updated because there are regressions in my gcc
test-suite results.  I guess autodetecting the kill-declaration
issue in libgfortran is unnecessary complicated, in presence of
a fixed newlib trunk.  All in all, I appreciate you don't force
a new newlib on release branches.

brgds, H-P


Re: [PATCH] fix --with-cpu for sh targets

2015-08-30 Thread Oleg Endo

On 30 Aug 2015, at 19:41, Kaz Kojima  wrote:

> Rich Felker  wrote:
>> A missing * in the pattern for sh targets prevents the --with-cpu
>> configure option from being accepted for certain targets (e.g. ones
>> with explicit endianness, like sh2eb).
> 
> Looks good to me, though we need the trunk patch first with
> the appropriate ChangeLog entry.
> Oleg, do you have any opinions?

No, I don't.  If it doesn't break anything and fixes some inconvenience, please 
go ahead.

Cheers,
Oleg

[PATCH, rs6000] Improve swap optimization to modify general xxpermdi patterns

2015-08-30 Thread Bill Schmidt
Hi,

The VSX swap optimization currently misses opportunities to optimize
loops when expressions corresponding to xxpermdi instructions are
present (other than xxswapd instructions associated with loads and
stores).  These occur commonly when interleaving vector double or vector
unsigned long operands, or when concatenating two doubles or unsigned
longs to make a V2DI or V2DF result.  This patch adds logic to recognize
these insns and adjust them to account for swapped doublewords in the
computation.

Both opportunities arise in a simple test case that performs a reduction
on complex multiplications, which I've added here.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this ok for trunk?

Thanks,
Bill


[gcc]

2015-08-30  Bill Schmidt  

* config/rs6000/rs6000.c (swap_web_entry): Enlarge
special_handling bitfield.
(special_handling_values): Add SH_XXPERMDI and SH_CONCAT.
(rtx_is_swappable_p): Add handling for vec_select/vec_concat
form
that represents a general xxpermdi.
(insn_is_swappable_p): Add handling for vec_concat of two
doublewords, which maps to a specific xxpermdi.
(adjust_xxpermdi): New function.
(adjust_concat): Likewise.
(handle_special_swappables): Call adjust_xxpermdi and
adjust_concat.
(dump_swap_insn_table): Handle SH_XXPERMDI and SH_CONCAT.

[gcc/testsuite]

2015-08-30  Bill Schmidt  

* gcc.target/powerpc/swaps-p8-19.c: New test.


Index: gcc/config/rs6000/rs6000.c
===
--- gcc/config/rs6000/rs6000.c  (revision 227326)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -34986,7 +34986,7 @@ class swap_web_entry : public web_entry_base
   /* A nonzero value indicates what kind of special handling for this
  insn is required if doublewords are swapped.  Undefined if
  is_swappable is not set.  */
-  unsigned int special_handling : 3;
+  unsigned int special_handling : 4;
   /* Set if the web represented by this entry cannot be optimized.  */
   unsigned int web_not_optimizable : 1;
   /* Set if this insn should be deleted.  */
@@ -35000,7 +35000,9 @@ enum special_handling_values {
   SH_NOSWAP_LD,
   SH_NOSWAP_ST,
   SH_EXTRACT,
-  SH_SPLAT
+  SH_SPLAT,
+  SH_XXPERMDI,
+  SH_CONCAT
 };
 
 /* Union INSN with all insns containing definitions that reach USE.
@@ -35192,6 +35194,20 @@ rtx_is_swappable_p (rtx op, unsigned int
*special)
  *special = SH_EXTRACT;
  return 1;
}
+  /* An XXPERMDI is ok if we adjust the lanes.  Note that if the
+XXPERMDI is a swap operation, it will be identified by
+insn_is_swap_p and therefore we won't get here.  */
+  else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
+  && (GET_MODE (XEXP (op, 0)) == V4DFmode
+  || GET_MODE (XEXP (op, 0)) == V4DImode)
+  && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
+  && XVECLEN (parallel, 0) == 2
+  && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
+  && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
+   {
+ *special = SH_XXPERMDI;
+ return 1;
+   }
   else
return 0;
 
@@ -35369,6 +35385,17 @@ insn_is_swappable_p (swap_web_entry
*insn_entry, r
   return 1;
 }
 
+  /* A concatenation of two doublewords is ok if we reverse the
+ order of the inputs.  */
+  if (GET_CODE (body) == SET
+  && GET_CODE (SET_SRC (body)) == VEC_CONCAT
+  && (GET_MODE (SET_SRC (body)) == V2DFmode
+ || GET_MODE (SET_SRC (body)) == V2DImode))
+{
+  *special = SH_CONCAT;
+  return 1;
+}
+
   /* Otherwise check the operands for vector lane violations.  */
   return rtx_is_swappable_p (body, special);
 }
@@ -35658,6 +35685,49 @@ adjust_splat (rtx_insn *insn)
 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID
(insn));
 }
 
+/* Given OP that contains an XXPERMDI operation (that is not a
doubleword
+   swap), reverse the order of the source operands and adjust the
indices
+   of the source lanes to account for doubleword reversal.  */
+static void
+adjust_xxpermdi (rtx_insn *insn)
+{
+  rtx set = PATTERN (insn);
+  rtx select = XEXP (set, 1);
+  rtx concat = XEXP (select, 0);
+  rtx src0 = XEXP (concat, 0);
+  XEXP (concat, 0) = XEXP (concat, 1);
+  XEXP (concat, 1) = src0;
+  rtx parallel = XEXP (select, 1);
+  int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
+  int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
+  int new_lane0 = 3 - lane1;
+  int new_lane1 = 3 - lane0;
+  XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
+  XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
+  INSN_CODE (insn) = -1; /* Force re-recognition.  */
+  df_insn_rescan (insn);
+
+  if (dump_file)
+fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID
(insn));
+}
+
+/* Given OP that contains a VEC_CONCAT operation of two doublewords,
+   reverse the orde

Re: [PATCH, rs6000] Improve swap optimization to modify general xxpermdi patterns

2015-08-30 Thread David Edelsohn
On Sun, Aug 30, 2015 at 7:19 PM, Bill Schmidt
 wrote:
> Hi,
>
> The VSX swap optimization currently misses opportunities to optimize
> loops when expressions corresponding to xxpermdi instructions are
> present (other than xxswapd instructions associated with loads and
> stores).  These occur commonly when interleaving vector double or vector
> unsigned long operands, or when concatenating two doubles or unsigned
> longs to make a V2DI or V2DF result.  This patch adds logic to recognize
> these insns and adjust them to account for swapped doublewords in the
> computation.
>
> Both opportunities arise in a simple test case that performs a reduction
> on complex multiplications, which I've added here.
>
> Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
> regressions.  Is this ok for trunk?
>
> Thanks,
> Bill
>
>
> [gcc]
>
> 2015-08-30  Bill Schmidt  
>
> * config/rs6000/rs6000.c (swap_web_entry): Enlarge
> special_handling bitfield.
> (special_handling_values): Add SH_XXPERMDI and SH_CONCAT.
> (rtx_is_swappable_p): Add handling for vec_select/vec_concat
> form
> that represents a general xxpermdi.
> (insn_is_swappable_p): Add handling for vec_concat of two
> doublewords, which maps to a specific xxpermdi.
> (adjust_xxpermdi): New function.
> (adjust_concat): Likewise.
> (handle_special_swappables): Call adjust_xxpermdi and
> adjust_concat.
> (dump_swap_insn_table): Handle SH_XXPERMDI and SH_CONCAT.
>
> [gcc/testsuite]
>
> 2015-08-30  Bill Schmidt  
>
> * gcc.target/powerpc/swaps-p8-19.c: New test.

Okay.

Thanks, David


Re: [PATCH] fix --with-cpu for sh targets

2015-08-30 Thread Kaz Kojima
I've committed the patch with the attached ChangeLog entry
after testing on sh-elf and i686-pc-linux-gnu.  I'd like to
backport it to the 5-branch after a week or so.
Rich, thanks for the patch.

Regards,
kaz
--
2015-08-30  Rich Felker 

* config.gcc (supported_defaults): Handle sh[123456ble]*-*-*
case instead of sh[123456ble]-*-*.

diff --git a/config.gcc b/config.gcc
index f8582eb..5712547 100644
--- a/config.gcc
+++ b/config.gcc
@@ -4200,7 +4200,7 @@ case "${target}" in
esac
;;
 
-   sh[123456ble]-*-* | sh-*-*)
+   sh[123456ble]*-*-* | sh-*-*)
supported_defaults="cpu"
case "`echo $with_cpu | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ_ 
abcdefghijklmnopqrstuvwxyz- | sed s/sh/m/`" in
"" | m1 | m2 | m2e | m3 | m3e | m4 | m4-single | m4-single-only 
| m4-nofpu )


Go patch committed: check for invalid UTF-8 in line comments

2015-08-30 Thread Ian Lance Taylor
This patch by Chris Manghane fixes the Go compiler to check for
invalid UTF-8 in line comments.  Otherwise the compiler might pass a
file that it shouldn't.  This fixes https://golang.org/issue/11527 .
Bootstrapped and ran Go testsuite on x86_64-unknown-linux-gnu.
Committed to mainline.

Ian
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 227299)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-3aa2ea272e475010da8b480fc3095d0cd7254d12
+65672c16004c6d6d0247b6691881d282ffca89e3
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: gcc/go/gofrontend/lex.cc
===
--- gcc/go/gofrontend/lex.cc(revision 227299)
+++ gcc/go/gofrontend/lex.cc(working copy)
@@ -1689,6 +1689,16 @@ Lex::skip_cpp_comment()
   && memcmp(p, "line ", 5) == 0)
 {
   p += 5;
+
+  // Before finding FILE:LINENO, make sure line has valid characters.
+  const char* pcheck = p;
+  while (pcheck < pend)
+{
+  unsigned int c;
+  bool issued_error;
+  pcheck = this->advance_one_utf8_char(pcheck, &c, &issued_error);
+}
+
   while (p < pend && *p == ' ')
++p;
   const char* pcolon = static_cast(memchr(p, ':', pend - p));


[PING][PATCH, PR 57195] Allow mode iterators inside angle brackets

2015-08-30 Thread Michael Collison

Ping. Originally posted here:

https://gcc.gnu.org/ml/gcc-patches/2015-08/msg01475.html

Regards,

Michael Collison





Re: [PATCH] [gomp] Simplify thread pool initialization

2015-08-30 Thread Sebastian Huber

Ping.

On 28/07/15 13:06, Sebastian Huber wrote:

Ping.

This is a pre-requisite for:

https://gcc.gnu.org/ml/gcc-patches/2015-07/msg02347.html

On 22/07/15 14:56, Sebastian Huber wrote:

Move the thread pool initialization from the team start to the team
creation.  This eliminates one conditional expression.  In addition this
is a preparation patch to enable shared thread pools which I would like
to use for RTEMS later.  No unexpected failures on
x86_64-unknown-linux-gnu.

libgomp/ChangeLog
2015-07-22  Sebastian Huber 

* team.c (gomp_new_thread_pool): Delete and move content to ...
(gomp_get_thread_pool): ... new function.  Allocate and
initialize thread pool on demand.
(get_last_team): Use gomp_get_thread_pool().
(gomp_team_start): Delete thread pool initialization.
---




--
Sebastian Huber, embedded brains GmbH

Address : Dornierstr. 4, D-82178 Puchheim, Germany
Phone   : +49 89 189 47 41-16
Fax : +49 89 189 47 41-09
E-Mail  : sebastian.hu...@embedded-brains.de
PGP : Public key available on request.

Diese Nachricht ist keine geschäftliche Mitteilung im Sinne des EHUG.



[PING^2][PATCH] Use single shared memory block pool for all pool allocators

2015-08-30 Thread Mikhail Maltsev
Ping.

On 08/03/2015 11:40 AM, Mikhail Maltsev wrote:
> On Jul 26, 2015, at 11:50 AM, Andi Kleen  wrote:
>> I've been compiling gcc with tcmalloc to do a similar speedup. It would be
>> interesting to compare that to your patch.
> I repeated the test with TCMalloc and jemalloc. TCMalloc shows nice results,
> though it required some tweaks: this allocator has a threshold block size 
> equal
> to 32 KB, larger blocks are allocated from global heap, rather than thread 
> cache
> (and this operation is expensive), so the original patch shows worse 
> performance
> when used with TCMalloc. In order to fix this, I reduced the block size to 8 
> KB.
> Here there are 5 columns for each value: pristine version, pristine version +
> TCMalloc (and the difference in parenthesis), and patched version with 
> TCMalloc
> (difference is relative to pristine version). Likewise, for memory usage.
> 
> 400.perlbench26.86  26.17 (  -2.57%)  26.17 (  -2.57%) user
>   0.56   0.64 ( +14.29%)   0.61 (  +8.93%) sys
>  27.45  26.84 (  -2.22%)  26.81 (  -2.33%) real
> 401.bzip2 2.532.5 (  -1.19%)   2.48 (  -1.98%) user
>   0.07   0.09 ( +28.57%)0.1 ( +42.86%) sys
>   2.612.6 (  -0.38%)   2.59 (  -0.77%) real
> 403.gcc  73.59  72.62 (  -1.32%)  71.72 (  -2.54%) user
>   1.59   1.88 ( +18.24%)   1.88 ( +18.24%) sys
>  75.27  74.58 (  -0.92%)  73.67 (  -2.13%) real
> 429.mcf0.4   0.41 (  +2.50%)0.4 (  +0.00%) user
>   0.03   0.05 ( +66.67%)   0.05 ( +66.67%) sys
>   0.44   0.47 (  +6.82%)   0.47 (  +6.82%) real
> 433.milc  3.22   3.24 (  +0.62%)   3.25 (  +0.93%) user
>   0.22   0.32 ( +45.45%)0.3 ( +36.36%) sys
>   3.48   3.59 (  +3.16%)   3.59 (  +3.16%) real
> 444.namd  7.54   7.41 (  -1.72%)   7.37 (  -2.25%) user
>0.1   0.15 ( +50.00%)   0.15 ( +50.00%) sys
>   7.66   7.58 (  -1.04%)   7.54 (  -1.57%) real
> 445.gobmk20.24  19.59 (  -3.21%)   19.6 (  -3.16%) user
>   0.52   0.67 ( +28.85%)   0.59 ( +13.46%) sys
>   20.8  20.29 (  -2.45%)  20.23 (  -2.74%) real
> 450.soplex   19.08  18.47 (  -3.20%)  18.51 (  -2.99%) user
>   0.87   1.11 ( +27.59%)   1.06 ( +21.84%) sys
>  19.99  19.62 (  -1.85%)   19.6 (  -1.95%) real
> 453.povray   42.27  41.42 (  -2.01%)  41.32 (  -2.25%) user
>   2.71   3.11 ( +14.76%)   3.09 ( +14.02%) sys
>  45.04  44.58 (  -1.02%)  44.47 (  -1.27%) real
> 456.hmmer 7.27   7.22 (  -0.69%)   7.15 (  -1.65%) user
>   0.31   0.36 ( +16.13%)   0.39 ( +25.81%) sys
>   7.61   7.61 (  +0.00%)   7.57 (  -0.53%) real
> 458.sjeng 3.22   3.14 (  -2.48%)   3.15 (  -2.17%) user
>   0.09   0.16 ( +77.78%)   0.14 ( +55.56%) sys
>   3.32   3.32 (  +0.00%)3.3 (  -0.60%) real
> 462.libquantum0.86   0.87 (  +1.16%)   0.85 (  -1.16%) user
>   0.05   0.08 ( +60.00%)   0.08 ( +60.00%) sys
>   0.92   0.96 (  +4.35%)   0.94 (  +2.17%) real
> 464.h264ref  27.62  27.27 (  -1.27%)  27.16 (  -1.67%) user
>   0.63   0.73 ( +15.87%)   0.75 ( +19.05%) sys
>  28.28  28.03 (  -0.88%)  27.95 (  -1.17%) real
> 470.lbm   0.27   0.27 (  +0.00%)   0.27 (  +0.00%) user
>   0.01   0.01 (  +0.00%)   0.01 (  +0.00%) sys
>   0.29   0.29 (  +0.00%)   0.29 (  +0.00%) real
> 471.omnetpp  28.29  27.63 (  -2.33%)  27.54 (  -2.65%) user
>1.5   1.57 (  +4.67%)   1.62 (  +8.00%) sys
>  29.84  29.25 (  -1.98%)  29.21 (  -2.11%) real
> 473.astar 1.14   1.12 (  -1.75%)   1.11 (  -2.63%) user
>   0.05   0.07 ( +40.00%)   0.09 ( +80.00%) sys
>   1.21   1.21 (  +0.00%)1.2 (  -0.83%) real
> 482.sphinx3   4.65   4.57 (  -1.72%)   4.59 (  -1.29%) user
>0.20.3 ( +50.00%)   0.26 ( +30.00%) sys
>   4.88   4.89 (  +0.20%)   4.88 (  +0.00%) real
> 483.xalancbmk284.5  276.4 (  -2.85%) 276.48 (  -2.82%) user
>  20.29  23.03 ( +13.50%)  22.82 ( +12.47%) sys
> 305.19 299.79 (  -1.77%) 299.67 (  -1.81%) real
> 
> 400.perlbench 102308kB123004kB  (  +20696kB)116104kB  (  +13796kB)
> 401.bzip2  74628kB 86936kB  (  +12308kB) 84316kB  (   +9688kB)
> 403.gcc   190284kB218180kB  (  +27896kB)212480kB  (  +22196kB)
> 429.mcf19804kB 24464kB  (   +4660kB) 24320kB  (   +4516kB)
> 433.milc