Go patch committed: Use backend interface for interface types

2011-05-06 Thread Ian Lance Taylor
This patch to the Go frontend uses the backend interface for interface
types.  Bootstrapped and ran Go testsuite on x86_64-unknown-linux-gnu.
Committed to mainline.

Ian

diff -r 37dae2a9c21b go/gogo-tree.cc
--- a/go/gogo-tree.cc	Wed May 04 08:38:47 2011 -0700
+++ b/go/gogo-tree.cc	Wed May 04 22:18:22 2011 -0700
@@ -1936,38 +1936,6 @@
   return build_fold_addr_expr(decl);
 }
 
-// Build the type of the struct that holds a slice for the given
-// element type.
-
-tree
-Gogo::slice_type_tree(tree element_type_tree)
-{
-  // We use int for the count and capacity fields in a slice header.
-  // This matches 6g.  The language definition guarantees that we
-  // can't allocate space of a size which does not fit in int
-  // anyhow. FIXME: integer_type_node is the the C type int but is
-  // not necessarily the Go type int.  They will differ when the C
-  // type int has fewer than 32 bits.
-  return Gogo::builtin_struct(NULL, __go_slice, NULL_TREE, 3,
-			  __values,
-			  build_pointer_type(element_type_tree),
-			  __count,
-			  integer_type_node,
-			  __capacity,
-			  integer_type_node);
-}
-
-// Given the tree for a slice type, return the tree for the type of
-// the elements of the slice.
-
-tree
-Gogo::slice_element_type_tree(tree slice_type_tree)
-{
-  go_assert(TREE_CODE(slice_type_tree) == RECORD_TYPE
-	  POINTER_TYPE_P(TREE_TYPE(TYPE_FIELDS(slice_type_tree;
-  return TREE_TYPE(TREE_TYPE(TYPE_FIELDS(slice_type_tree)));
-}
-
 // Build a constructor for a slice.  SLICE_TYPE_TREE is the type of
 // the slice.  VALUES is the value pointer and COUNT is the number of
 // entries.  If CAPACITY is not NULL, it is the capacity; otherwise
@@ -2011,21 +1979,6 @@
   return build_constructor(slice_type_tree, init);
 }
 
-// Build a constructor for an empty slice.
-
-tree
-Gogo::empty_slice_constructor(tree slice_type_tree)
-{
-  tree element_field = TYPE_FIELDS(slice_type_tree);
-  tree ret = Gogo::slice_constructor(slice_type_tree,
- fold_convert(TREE_TYPE(element_field),
-		  null_pointer_node),
- size_zero_node,
- size_zero_node);
-  TREE_CONSTANT(ret) = 1;
-  return ret;
-}
-
 // Build a map descriptor for a map of type MAPTYPE.
 
 tree
diff -r 37dae2a9c21b go/gogo.h
--- a/go/gogo.h	Wed May 04 08:38:47 2011 -0700
+++ b/go/gogo.h	Wed May 04 22:18:22 2011 -0700
@@ -465,16 +465,6 @@
   static void
   mark_fndecl_as_builtin_library(tree fndecl);
 
-  // Build the type of the struct that holds a slice for the given
-  // element type.
-  tree
-  slice_type_tree(tree element_type_tree);
-
-  // Given a tree for a slice type, return the tree for the element
-  // type.
-  static tree
-  slice_element_type_tree(tree slice_type_tree);
-
   // Build a constructor for a slice.  SLICE_TYPE_TREE is the type of
   // the slice.  VALUES points to the values.  COUNT is the size,
   // CAPACITY is the capacity.  If CAPACITY is NULL, it is set to
@@ -483,11 +473,6 @@
   slice_constructor(tree slice_type_tree, tree values, tree count,
 		tree capacity);
 
-  // Build a constructor for an empty slice.  SLICE_TYPE_TREE is the
-  // type of the slice.
-  static tree
-  empty_slice_constructor(tree slice_type_tree);
-
   // Build a map descriptor.
   tree
   map_descriptor(Map_type*);
diff -r 37dae2a9c21b go/types.cc
--- a/go/types.cc	Wed May 04 08:38:47 2011 -0700
+++ b/go/types.cc	Wed May 04 22:18:22 2011 -0700
@@ -4399,6 +4399,41 @@
   return this-length_tree_;
 }
 
+// Get the backend representation of the fields of a slice.  This is
+// not declared in types.h so that types.h doesn't have to #include
+// backend.h.
+//
+// We use int for the count and capacity fields.  This matches 6g.
+// The language more or less assumes that we can't allocate space of a
+// size which does not fit in int.
+
+static void
+get_backend_slice_fields(Gogo* gogo, Array_type* type,
+			 std::vectorBackend::Btyped_identifier* bfields)
+{
+  bfields-resize(3);
+
+  Type* pet = Type::make_pointer_type(type-element_type());
+  Btype* pbet = tree_to_type(pet-get_tree(gogo));
+
+  Backend::Btyped_identifier* p = (*bfields)[0];
+  p-name = __values;
+  p-btype = pbet;
+  p-location = UNKNOWN_LOCATION;
+
+  Type* int_type = Type::lookup_integer_type(int);
+
+  p = (*bfields)[1];
+  p-name = __count;
+  p-btype = tree_to_type(int_type-get_tree(gogo));
+  p-location = UNKNOWN_LOCATION;
+
+  p = (*bfields)[2];
+  p-name = __capacity;
+  p-btype = tree_to_type(int_type-get_tree(gogo));
+  p-location = UNKNOWN_LOCATION;
+}
+
 // Get a tree for the type of this array.  A fixed array is simply
 // represented as ARRAY_TYPE with the appropriate index--i.e., it is
 // just like an array in C.  An open array is a struct with three
@@ -4409,8 +,9 @@
 {
   if (this-length_ == NULL)
 {
-  tree struct_type = gogo-slice_type_tree(void_type_node);
-  return this-fill_in_slice_tree(gogo, struct_type);
+  std::vectorBackend::Btyped_identifier bfields;
+  get_backend_slice_fields(gogo, 

Re: RFC: A new MIPS64 ABI

2011-05-06 Thread Alexandre Oliva
Reviewing some old e-mail...

On Feb 21, 2011, David Daney dda...@caviumnetworks.com wrote:

 Everything identical to n32, except Pmode == DImode and
 POINTERS_EXTEND_UNSIGNED == true.

 Here is a patch that allows me to generate plausible looking assembly
 for trivial programs.

Neat!

Just one suggestion: instead of NB32 (what does that stand for?) how
about naming it u32?  It's shorter, clear (to me), and there's the fun
factor that the lower-case u looks like an upside-down n.

-- 
Alexandre Oliva, freedom fighterhttp://FSFLA.org/~lxoliva/
You must be the change you wish to see in the world. -- Gandhi
Be Free! -- http://FSFLA.org/   FSF Latin America board member
Free Software Evangelist  Red Hat Brazil Compiler Engineer


[committed] Fix var-tracking.c compilation on PCC_STATIC_STRUCT_RETURN targets (PR debug/48902)

2011-05-06 Thread Jakub Jelinek
Hi!

Committed as obvious:

2011-05-06  Jakub Jelinek  ja...@redhat.com

PR debug/48902
* var-tracking.c (prepare_call_arguments): Move else before #endif.

--- gcc/var-tracking.c.jj   2011-03-31 08:51:04.0 +0200
+++ gcc/var-tracking.c  2011-05-06 09:18:50.0 +0200
@@ -5646,8 +5646,8 @@ prepare_call_arguments (basic_block bb, 
  }
}
}
-#endif
  else
+#endif
INIT_CUMULATIVE_ARGS (args_so_far, type, NULL_RTX, fndecl,
  nargs);
  if (obj_type_ref  TYPE_ARG_TYPES (type) != void_list_node)

Jakub


Re: [google][RFA] add extra text to stack frame warnings (issue4479046)

2011-05-06 Thread Chris Demetriou
On Thu, May 5, 2011 at 12:19, Andrew Pinski pins...@gmail.com wrote:
 Is there a reason why this cannot be an option that someone passes on
 the command line of GCC instead of a configure option?

I don't think we ever considered that approach.
That's actually a great idea, I think better for our purposes than a
configuration option.
(Previously, it didn't much matter, since in our tree this was a small
local patch directly to final.c.)

Thank you, I'm going to do over taking the approach you suggested.


 Also can you
 show an example of why this message would be changed?

We use the stack frame size warning on some of our internal code.
(Obvious, I guess -- otherwise, why would I be messing with it.  8-)

In summary, -Wframe-larger-than does not always produce obvious results.  8-)

There are common questions, e.g.:
* why we care about this warning at all (i.e., why does stack frame
size matter?!).
* how to identify the cause of the warning (since it's not necessarily
obvious what's causing stack growth, and because the warning is
somewhat ... finicky thanks to inlining and thanks to
sometimes-less-than-great reuse of stack space from dead variables in
optimized and especially unoptimized code).
* how to work around, or if absolutely necessary disable the warning.

So, to help, when we output the frame-size warning, we also provide a
link to an internal documentation page to help with the stuff
mentioned above.

Of necessity, the doc link we provide explains our internal
circumstances and workarounds.  (Generic documentation wouldn't help
with a number of the questions.)


In theory, a more general warning-text-addition mechanism could be useful.
e.g. a flag that said when outputting a warning about flag 'foo',
output this additional text could be useful.
However, we haven't felt the need to do this for other warnings.

IMO, a general solution along these lines would be solving a problem
that ~nobody has.  8-)

If one wanted to dive into warning message changes, there are other,
more substantial changes IMO that would be generally useful and would
enable this type of functionality via external tools.
E.g., structured warnings with fixed identifiers (numbers, words,
whatever), blah blah blah.
If there were support for *that*, then people could write wrapper
tools that automatically annotate warnings with additional information
as necessary.
(it would also make parsing errors/warnings a lot easier.  8-)



Anyway, thanks for the suggestion.  8-)


chris


Re: RFA: Improve jump threading 5 of N

2011-05-06 Thread Richard Guenther
On Thu, May 5, 2011 at 6:11 PM, Jeff Law l...@redhat.com wrote:
 -BEGIN PGP SIGNED MESSAGE-
 Hash: SHA1


 I should have included these in the last patch of infrastructure changes.

 The main change is create_block_for_threading no longer calls
 remove_ctrl_stmt_and_useless_edges and instead its callers are expected
 to handle that, when needed.  This will allow me to use
 create_block_for_threading to duplicate the join block in a future patch.

 Additionally there was another place I should have been using a macro to
 access the edges stored in the aux field.

 Bootstrapped and regression tested on x86_64-unknown-linux-gnu.  OK for
 trunk?

Ok.

Thanks,
Richard.

 Thanks,
 Jeff
 -BEGIN PGP SIGNATURE-
 Version: GnuPG v1.4.11 (GNU/Linux)
 Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/

 iQEcBAEBAgAGBQJNwswlAAoJEBRtltQi2kC72U4H/Rup77S9Pi2bZgkT8k1wEY7x
 +teD8FOKAW52dhfFrYmI8pmOBsmC8WTvn3WlOX+a0/+eB+j2aX3OITDYAzxinu45
 6w+5jBHw96iJ3IvI1HIg6wsXo0HEJW40z6OeyPR06xz9AUh2xtJCh5Mh5WCC66Qf
 SPisgr/w5wteuHpDT/URsW/cPfhTS26SeB5x61QAXM7wwXDETBnI5nX+kGtZ7zTG
 x0qslTTePWvpYj4OqtlYzUSC/a0qKhc724ZRBsRlME+OQ/ClGh0ikAWD1kzjU899
 AmtrUWYf/NpYRe1XKLmylcAhN5qwYJ7rGNL5AdgD0lCzkjic63axOb9t3z6d3aY=
 =yU+L
 -END PGP SIGNATURE-



Re: [google] Patch to support calling multi-versioned functions via new GCC builtin. (issue4440078)

2011-05-06 Thread Richard Guenther
On Thu, May 5, 2011 at 7:02 PM, Xinliang David Li davi...@google.com wrote:
 On Thu, May 5, 2011 at 2:16 AM, Richard Guenther
 richard.guent...@gmail.com wrote:
 On Thu, May 5, 2011 at 12:19 AM, Xinliang David Li davi...@google.com 
 wrote:

 I can think of some more-or-less obvious high-level forms, one would
 for example simply stick a new DISPATCH tree into gimple_call_fn
 (similar to how we can have OBJ_TYPE_REF there), the DISPATCH
 tree would be of variable length, first operand the selector function
 and further operands function addresses.  That would keep the
 actual call visible (instead of a fake __builtin_dispatch call), something
 I'd really like to see.

 This sounds like a good long term solution.

 Thinking about it again maybe, similar to OBJ_TYPE_REF, have the
 selection itself lowered and only keep the set of functions as
 additional info.  Thus instead of having the selector function as
 first operand have a pointer to the selected function there (that also
 avoids too much knowledge about the return value of the selector).
 Thus,

  sel = selector ();
  switch (sel)
   {
   case A: fn = bar;
   case B: fn = foo;
   }
  val = (*DISPATCH (fn, bar, foo)) (...);

 that way regular optimizations can apply to the selection, eventually
 discard the dispatch if fn becomes a known direct function (similar
 to devirtualization).  At expansion time the call address is simply
 taken from the first operand and an indirect call is assembled.

 Does the above still provide enough knowledge for the IPA path isolation?


 I like your original proposal (extending call) better because related
 information are tied together and is easier to hoist and clean up.

 I want propose a more general solution.

 1) Generic Annotation Support for gcc IR -- it is used attach to
 application/optimization specific annotation to gimple statements and
 annotations can be passed around across passes. In gcc, I only see
 HISTOGRAM annotation for value profiling, which is not general enough
 2) Support of CallInfo for each callsite. This is an annotation, but
 more standardized. The callinfo can be used to record information such
 as call attributes, call side effects, mod-ref information etc ---
 current gimple_call_flags can be folded into this Info structure.

I don't like generic annotation facilities.  What should passes to with
annotated stmts that are a) transformed, b) removed?  See RTL notes
and all the interesting issues they cause.

 Similarly (not related to this discussion), LoopInfo structure can be
 introduced to annotate loop back edge jumps to allow FE to pass useful
 information at loop level. For floating pointer operations, things
 like the precision constraint, sensitivity to floating environment etc
 can be recorded in FPInfo.

Yes, the idea is to keep the loop structures live throughout the whole
compilation.  Just somebody needs to do the last 1% of work.

Richard.

 T


 Restricting ourselves to use the existing target attribute at the
 beginning (with a single, compiler-generated selector function)
 is probably good enough to get a prototype up and running.
 Extending it to arbitrary selector-function, value pairs using a
 new attribute is then probably easy (I don't see the exact use-case
 for that yet, but I suppose it exists if you say so).

 For the use cases, CPU model will be looked at instead of just the
 core architecture -- this will give use more information about the
 numbrer of cores, size of caches etc. Intel's runtime library does
 this checkiing at start up time so that the multi-versioned code can
 look at those and make the appropriate decisions.

 It will be even more complicated for arm processors -- which can have
 the same processor cores but configured differently w.r.t VFP, NEON
 etc.

 Ah, indeed.  I hadn't thought about the tuning for different variants
 as opposed to enabling HW features.  So the interface for overloading
 would be sth like

 enum X { Foo = 0, Bar = 5 };

 enum X select () { return Bar; }

 void foo (void) __attribute__((dispatch(select, Bar)));


 Yes, for overloading -- something like this looks good.

 Thanks,

 David



Re: [PATCH, ARM] PR47855 Compute attr length for thumb2 insns, 3/3 (issue4475042)

2011-05-06 Thread Carrot Wei
On Thu, May 5, 2011 at 5:42 PM, Richard Earnshaw rearn...@arm.com wrote:

 On Thu, 2011-05-05 at 14:51 +0800, Guozhi Wei wrote:
  Hi
 
  This is the third part of the fixing for
 
  http://gcc.gnu.org/bugzilla/show_bug.cgi?id=47855
 
  This patch contains the length computation/refinement for insn patterns
  *thumb2_movsi_insn, *thumb2_cbz and *thumb2_cbnz.
 
  At the same time this patch revealed two bugs. The first is the maximum 
  offset
  of cbz/cbnz, it should be 126, but it is 128 in patterns *thumb2_cbz and
  *thumb2_cbnz. The second is that only 2-register form of shift 
  instructions
  can be 16 bit, but 3-register form is allowed in *thumb2_shiftsi3_short 
  and
  related peephole2. The fix is also contained in this patch.
 
  The patch has been tested on arm qemu.
 
  thanks
  Carrot
 
 
  2011-05-05  Guozhi Wei  car...@google.com
 
        PR target/47855
        * config/arm/thumb2.md (thumb2_movsi_insn): Add length addtribute.
        (thumb2_shiftsi3_short and peephole2): Remove 3-register case.
        (thumb2_cbz): Refine length computation.
        (thumb2_cbnz): Likewise.
 

 Hmm, although these changes are all related to length calculations, they
 are really three patches that are unrelated to each other.  It would be
 easier to review this if they were kept separate.

 1) thumb2_shiftsi3_short
 This appears to be a straight bug.  We are putting out a 32-bit
 instruction when we are claiming it to be only 16 bits.  This is OK.

 2) thumb2_movsi_insn
 There are two things here.
 a) Thumb2 has a 16-bit move instruction for all core
 register-to-register transfers, so the separation of alternatives 1 and
 2 is unnecessary -- just code these as rk.

done.


 b) The ldm form does not support unaligned memory accesses.  I'm aware
 that work is being done to add unaligned support to GCC for ARM, so I
 need to find out whether this patch will interfere with those changes.
 I'll try to find out what the situation is here and get back to you.

 3) thumb2_cbz and thumb2_cbnz
 The range calculations look wrong here.  Remember that the 'pc' as far
 as GCC is concerned is the address of the start of the insn.  So for a
 backwards branch you need to account for all the bytes in the insn
 pattern that occur before the branch instruction itself, and secondly
 you also have to remember that the 'pc' that the CPU uses is the address
 of the branch instruction plus 4.  All these conspire to reduce the
 backwards range of a short branch to several bytes less than the 256
 that you currently have coded.

The usage of 'pc' is more complex than I thought. I understood it after
reading the comment in file arm.md. And the description at
http://gcc.gnu.org/onlinedocs/gccint/Insn-Lengths.html#Insn-Lengths is not
right for forward branch cases. Now the ranges are modified accordingly.

It has been tested on arm qemu in thumb2 mode.

thanks
Carrot


2011-05-06  Guozhi Wei  car...@google.com

PR target/47855
* config/arm/thumb2.md (thumb2_movsi_insn): Add length addtribute.
(thumb2_shiftsi3_short and peephole2): Remove 3-register case.
(thumb2_cbz): Refine length computation.
(thumb2_cbnz): Likewise.


Index: config/arm/thumb2.md
===
--- config/arm/thumb2.md(revision 173350)
+++ config/arm/thumb2.md(working copy)
@@ -165,23 +165,46 @@
 ;; regs.  The high register alternatives are not taken into account when
 ;; choosing register preferences in order to reflect their expense.
 (define_insn *thumb2_movsi_insn
-  [(set (match_operand:SI 0 nonimmediate_operand =rk,r,r,r,l ,*hk,m,*m)
-   (match_operand:SI 1 general_operand  rk ,I,K,j,mi,*mi,l,*hk))]
+  [(set (match_operand:SI 0 nonimmediate_operand =rk,r,r,r,l ,*rk,Uu,*m)
+   (match_operand:SI 1 general_operand  rk ,I,K,j,Uu,*mi,l ,*rk))]
   TARGET_THUMB2  ! TARGET_IWMMXT
 !(TARGET_HARD_FLOAT  TARGET_VFP)
 (   register_operand (operands[0], SImode)
|| register_operand (operands[1], SImode))
-  @
-   mov%?\\t%0, %1
-   mov%?\\t%0, %1
-   mvn%?\\t%0, #%B1
-   movw%?\\t%0, %1
-   ldr%?\\t%0, %1
-   ldr%?\\t%0, %1
-   str%?\\t%1, %0
-   str%?\\t%1, %0
+  *
+  switch (which_alternative)
+{
+case 0: return \mov%?\\t%0, %1\;
+case 1: return \mov%?\\t%0, %1\;
+case 2: return \mvn%?\\t%0, #%B1\;
+case 3: return \movw%?\\t%0, %1\;
+
+case 4:
+  if (GET_CODE (XEXP (operands[1], 0)) == POST_INC)
+   {
+ operands[1] = XEXP (XEXP (operands[1], 0), 0);
+ return \ldm%(ia%)\t%1!, {%0}\;
+   }
+  else
+   return \ldr%?\\t%0, %1\;
+
+case 5: return \ldr%?\\t%0, %1\;
+
+case 6:
+  if (GET_CODE (XEXP (operands[0], 0)) == POST_INC)
+   {
+ operands[0] = XEXP (XEXP (operands[0], 0), 0);
+ return \stm%(ia%)\t%0!, {%1}\;
+   }
+  else
+   return \str%?\\t%1, %0\;
+
+case 7: return \str%?\\t%1, %0\;
+default: gcc_unreachable ();
+}

Re: [PATCH] Cleanup expand_shift

2011-05-06 Thread Richard Guenther
On Thu, 5 May 2011, Hans-Peter Nilsson wrote:

 On Thu, 5 May 2011, Richard Guenther wrote:
  On Wed, 4 May 2011, Richard Guenther wrote:
   On Wed, 4 May 2011, Eric Botcazou wrote:
   Hm.  I guess people will scream if something breaks (I can't imagine
   what though).
 
 AAAaaarghh!  Building cris-elf is now broken.
 
  I have applied the following after re-bootstrapping and testing on
  x86_64-unknown-linux-gnu and re-checking the mipsel cross testcase.
 
  Richard.
 
  2011-05-05  Richard Guenther  rguent...@suse.de
 
  * expmed.c (expand_variable_shift): Rename to ...
  (expand_shift_1): ... this.  Take an expanded shift amount.
  For rotates recurse directly not building trees for the shift amount.
  (expand_variable_shift): Wrap around expand_shift_1.
  (expand_shift): Adjust.
 
 PR 48908.

Ok, it seems simplify_gen_binary doesn't like VOIDmode.  The following
side-steps the issue of choosing an appropriate mode for a constant
shift amount and instead computes it in HWI.  Similar to the
SHIFT_COUNT_TRUNCATED path we don't bother about a CONST_DOUBLE shift
amount.

I'm going to bootstrap  regtest this on x86_64-unknown-linux-gnu
(with again zero testing coverage ...).  The patch fixes the
reported ICE with a cross to cris-elf, more testing is appreciated
(though I guess autotesters will pick it up).

Does it look sane?

Thanks,
Richard.

2011-05-06  Richard Guenther  rguent...@suse.de

PR middle-end/48908
* expmed.c (expand_shift_1): Compute adjusted constant shift
amount manually.

Index: gcc/expmed.c
===
*** gcc/expmed.c(revision 173473)
--- gcc/expmed.c(working copy)
*** expand_shift_1 (enum tree_code code, enu
*** 2141,2151 
  rtx new_amount, other_amount;
  rtx temp1;
  
  new_amount = op1;
! other_amount
!   = simplify_gen_binary (MINUS, GET_MODE (op1),
!  GEN_INT (GET_MODE_BITSIZE (mode)),
!  op1);
  
  shifted = force_reg (mode, shifted);
  
--- 2141,2156 
  rtx new_amount, other_amount;
  rtx temp1;
  
+ op1_mode = GET_MODE (op1);
  new_amount = op1;
! if (op1_mode == VOIDmode)
!   other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
!   - INTVAL (op1));
! else
!   other_amount
! = simplify_gen_binary (MINUS, op1_mode,
!GEN_INT (GET_MODE_BITSIZE (mode)),
!op1);
  
  shifted = force_reg (mode, shifted);
  


Re: [patch, ARM] Fix PR target/48252

2011-05-06 Thread Richard Earnshaw

On Sun, 2011-05-01 at 10:30 +0300, Ira Rosen wrote:
 
 Ramana Radhakrishnan ramana.radhakrish...@linaro.org wrote on 07/04/2011
 03:16:44 PM:
 
 
  On 07/04/11 08:42, Ira Rosen wrote:
   Hi,
  
   This patch makes both outputs of neon_vzip/vuzp/vtrn_internal
   explicitly dependent on both inputs, preventing incorrect
   optimization:
   for
   (a,b)- vzip (c,d)
   and
   (e,f)- vzip (g,d)
   CSE decides that b==f, since b and f depend only on d.
  
   Tested on arm-linux-gnueabi. OK for trunk?
 
  This is OK for trunk.
 
   OK for 4.6 after testing?
 

I don't understand how it has happened, but the 4.6 patch that has been
committed is corrupt (the patch submitted here looks OK).

Please remember that it is essential to test release branches before
commits are made.

R.

  I have no objections to this going into 4.5 and 4.6 since it corrects
  the implementation of the neon intrinsics but please check with the
  release managers.
 
 OK to backport to 4.5 and 4.6 - both tested on arm-linux-gnueabi?
 
 Thanks,
 Ira
 
 4.5 and 4.6 ChangeLog:
 
   Backport from mainline:
   2011-04-18  Ulrich Weigand  ulrich.weig...@linaro.org
   Ira Rosen  ira.ro...@linaro.org
 
   PR target/48252
   * config/arm/arm.c (neon_emit_pair_result_insn): Swap arguments
   to match neon_vzip/vuzp/vtrn_internal.
   * config/arm/neon.md (neon_vtrnmode_internal): Make both
   outputs explicitly dependent on both inputs.
   (neon_vzipmode_internal, neon_vuzpmode_internal): Likewise.
 
 testsuite/Changelog:
 
   Backport from mainline:
   2011-04-18  Ulrich Weigand  ulrich.weig...@linaro.org
   Ira Rosen  ira.ro...@linaro.org
 
   PR target/48252
   * gcc.target/arm/pr48252.c: New test.
 
 
 4.5 patch:
 
 Index: config/arm/arm.c
 ===
 --- config/arm/arm.c(revision 172714)
 +++ config/arm/arm.c(working copy)
 @@ -18237,7 +18237,7 @@ neon_emit_pair_result_insn (enum machine_mode mode
rtx tmp1 = gen_reg_rtx (mode);
rtx tmp2 = gen_reg_rtx (mode);
 
 -  emit_insn (intfn (tmp1, op1, tmp2, op2));
 +  emit_insn (intfn (tmp1, op1, op2, tmp2));
 
emit_move_insn (mem, tmp1);
mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
 Index: config/arm/neon.md
 ===
 --- config/arm/neon.md  (revision 172714)
 +++ config/arm/neon.md  (working copy)
 @@ -3895,13 +3895,14 @@
 
  (define_insn neon_vtrnmode_internal
[(set (match_operand:VDQW 0 s_register_operand =w)
 -   (unspec:VDQW [(match_operand:VDQW 1 s_register_operand 0)]
 -UNSPEC_VTRN1))
 -   (set (match_operand:VDQW 2 s_register_operand =w)
 -(unspec:VDQW [(match_operand:VDQW 3 s_register_operand 2)]
 -UNSPEC_VTRN2))]
 +(unspec:VDQW [(match_operand:VDQW 1 s_register_operand 0)
 +  (match_operand:VDQW 2 s_register_operand w)]
 + UNSPEC_VTRN1))
 +   (set (match_operand:VDQW 3 s_register_operand =2)
 + (unspec:VDQW [(match_dup 1) (match_dup 2)]
 + UNSPEC_VTRN2))]
TARGET_NEON
 -  vtrn.V_sz_elem\t%V_reg0, %V_reg2
 +  vtrn.V_sz_elem\t%V_reg0, %V_reg3
[(set (attr neon_type)
(if_then_else (ne (symbol_ref Is_d_reg) (const_int 0))
  (const_string neon_bp_simple)
 @@ -3921,13 +3922,14 @@
 
  (define_insn neon_vzipmode_internal
[(set (match_operand:VDQW 0 s_register_operand =w)
 -   (unspec:VDQW [(match_operand:VDQW 1 s_register_operand 0)]
 -UNSPEC_VZIP1))
 -   (set (match_operand:VDQW 2 s_register_operand =w)
 -(unspec:VDQW [(match_operand:VDQW 3 s_register_operand 2)]
 -UNSPEC_VZIP2))]
 +(unspec:VDQW [(match_operand:VDQW 1 s_register_operand 0)
 +  (match_operand:VDQW 2 s_register_operand w)]
 + UNSPEC_VZIP1))
 +   (set (match_operand:VDQW 3 s_register_operand =2)
 +(unspec:VDQW [(match_dup 1) (match_dup 2)]
 + UNSPEC_VZIP2))]
TARGET_NEON
 -  vzip.V_sz_elem\t%V_reg0, %V_reg2
 +  vzip.V_sz_elem\t%V_reg0, %V_reg3
[(set (attr neon_type)
(if_then_else (ne (symbol_ref Is_d_reg) (const_int 0))
  (const_string neon_bp_simple)
 @@ -3947,13 +3949,14 @@
 
  (define_insn neon_vuzpmode_internal
[(set (match_operand:VDQW 0 s_register_operand =w)
 -   (unspec:VDQW [(match_operand:VDQW 1 s_register_operand 0)]
 +(unspec:VDQW [(match_operand:VDQW 1 s_register_operand 0)
 +  (match_operand:VDQW 2 s_register_operand w)]
   UNSPEC_VUZP1))
 -   (set (match_operand:VDQW 2 s_register_operand =w)
 -(unspec:VDQW [(match_operand:VDQW 3 s_register_operand 2)]
 -UNSPEC_VUZP2))]
 +   (set (match_operand:VDQW 3 s_register_operand =2)
 +(unspec:VDQW [(match_dup 1) (match_dup 2)]
 +   

Re: [PATCH] Canonicalize compares in combine [2/3] Modifications to try_combine()

2011-05-06 Thread Paolo Bonzini

On 04/22/2011 05:21 PM, Chung-Lin Tang wrote:

Also, instead of testing for XEXP(SET_SRC(PATTERN(i3)),1) == const0_rtx
at the top, it now allows CONST_INT_P(XEXP(SET_SRC(PATTERN(i3)),1)),
tries to adjust it by simplify_compare_const() from the last patch, and
then tests if op1 == const0_rtx. This is a small improvement in some cases.


I'm not sure why it doesn't allow both?

Paolo


[committed] Fix -fdefault-integer-8 omp_lib wrappers (PR fortran/pr48894)

2011-05-06 Thread Jakub Jelinek
Hi!

This patch fixes the -fdefault-integer-8 wrappers, tested on x86_64-linux,
committed to trunk and 4.[654] branches.

2011-05-06  Jakub Jelinek  ja...@redhat.com

PR fortran/48894
* fortran.c: Include limits.h.
(TO_INT): Define.
(omp_set_dynamic_8_, omp_set_num_threads_8_): Use !!*set instead of
*set.
(omp_set_num_threads_8_, omp_set_schedule_8_,
omp_set_max_active_levels_8_, omp_get_ancestor_thread_num_8_,
omp_get_team_size_8_): Use TO_INT macro.
* testsuite/libgomp.fortran/pr48894.f90: New test.

--- libgomp/fortran.c.jj2009-04-14 16:33:07.0 +0200
+++ libgomp/fortran.c   2011-05-06 10:13:46.0 +0200
@@ -1,4 +1,4 @@
-/* Copyright (C) 2005, 2007, 2008, 2009 Free Software Foundation, Inc.
+/* Copyright (C) 2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc.
Contributed by Jakub Jelinek ja...@redhat.com.
 
This file is part of the GNU OpenMP Library (libgomp).
@@ -27,6 +27,7 @@
 #include libgomp.h
 #include libgomp_f.h
 #include stdlib.h
+#include limits.h
 
 #ifdef HAVE_ATTRIBUTE_ALIAS
 /* Use internal aliases if possible.  */
@@ -244,6 +245,8 @@ omp_lock_symver (omp_unset_nest_lock_)
 omp_lock_symver (omp_test_nest_lock_)
 #endif
 
+#define TO_INT(x) ((x)  INT_MIN ? (x)  INT_MAX ? (x) : INT_MAX : INT_MIN)
+
 void
 omp_set_dynamic_ (const int32_t *set)
 {
@@ -253,7 +256,7 @@ omp_set_dynamic_ (const int32_t *set)
 void
 omp_set_dynamic_8_ (const int64_t *set)
 {
-  omp_set_dynamic (*set);
+  omp_set_dynamic (!!*set);
 }
 
 void
@@ -265,7 +268,7 @@ omp_set_nested_ (const int32_t *set)
 void
 omp_set_nested_8_ (const int64_t *set)
 {
-  omp_set_nested (*set);
+  omp_set_nested (!!*set);
 }
 
 void
@@ -277,7 +280,7 @@ omp_set_num_threads_ (const int32_t *set
 void
 omp_set_num_threads_8_ (const int64_t *set)
 {
-  omp_set_num_threads (*set);
+  omp_set_num_threads (TO_INT (*set));
 }
 
 int32_t
@@ -343,7 +346,7 @@ omp_set_schedule_ (const int32_t *kind, 
 void
 omp_set_schedule_8_ (const int32_t *kind, const int64_t *modifier)
 {
-  omp_set_schedule (*kind, *modifier);
+  omp_set_schedule (*kind, TO_INT (*modifier));
 }
 
 void
@@ -381,7 +384,7 @@ omp_set_max_active_levels_ (const int32_
 void
 omp_set_max_active_levels_8_ (const int64_t *levels)
 {
-  omp_set_max_active_levels (*levels);
+  omp_set_max_active_levels (TO_INT (*levels));
 }
 
 int32_t
@@ -405,7 +408,7 @@ omp_get_ancestor_thread_num_ (const int3
 int32_t
 omp_get_ancestor_thread_num_8_ (const int64_t *level)
 {
-  return omp_get_ancestor_thread_num (*level);
+  return omp_get_ancestor_thread_num (TO_INT (*level));
 }
 
 int32_t
@@ -417,7 +420,7 @@ omp_get_team_size_ (const int32_t *level
 int32_t
 omp_get_team_size_8_ (const int64_t *level)
 {
-  return omp_get_team_size (*level);
+  return omp_get_team_size (TO_INT (*level));
 }
 
 int32_t
--- libgomp/testsuite/libgomp.fortran/pr48894.f90.jj2011-05-06 
10:27:01.0 +0200
+++ libgomp/testsuite/libgomp.fortran/pr48894.f90   2011-05-06 
10:26:29.0 +0200
@@ -0,0 +1,23 @@
+! PR fortran/48894
+! { dg-do run }
+! { dg-options -fdefault-integer-8 }
+
+  use omp_lib
+  integer, parameter :: zero = 0
+  integer :: err
+  logical :: l
+  err = 0
+  !$omp parallel
+!$omp parallel private (l)
+  l = omp_get_ancestor_thread_num (-HUGE (zero)) .ne. -1
+  l = l .or. (omp_get_ancestor_thread_num (HUGE (zero)) .ne. -1)
+  l = l .or. (omp_get_team_size (-HUGE (zero)) .ne. -1)
+  l = l .or. (omp_get_team_size (HUGE (zero)) .ne. -1)
+  if (l) then
+!$omp atomic
+  err = err + 1
+  endif
+!$omp end parallel
+  !$omp end parallel
+  if (err .ne. 0) call abort
+end

Jakub


Re: [PATCH][ARM] Thumb2 replicated constants

2011-05-06 Thread Richard Earnshaw

On Thu, 2011-04-21 at 12:23 +0100, Andrew Stubbs wrote:
 This patch is a repost of the one I previously posted here:
 
http://gcc.gnu.org/ml/gcc-patches/2010-12/msg00652.html
 
 As requested, I've broken out the other parts of the original patch, and 
 those have already been reposted yesterday (and one committed also).
 
 This (final) part is support for using Thumb2's replicated constants and 
 addw/subw instructions as part of split constant loads. Previously the 
 compiler could use these constants, but only where they would be loaded 
 in a single instruction.
 
 This patch must be applied on top of the addw/subw patch I posted yesterday.
 
 The patch also optimizes the use of inverted or negated constants as a 
 short-cut to the final value. The previous code did this in some cases, 
 but could not be easily adapted to replicated constants.
 
 The previous code also had a bug that prevented optimal use of shifted 
 constants in Thumb code by imposing the same restrictions as ARM code. 
 This has been fixed.
 
 Example 1: addw as part of a split constant load
 
 a + 0xf
 
 Before:
   movwr3, #65535   ; 0x0
   movtr3, 15   ; 0xf
   addsr3, r0, r3
 After:
   add r0, r0, #1044480 ; 0xff000
   addwr0, r0, #4095; 0x00fff
 
 Example 2: arbitrary shifts bug fix
 
 a - 0xfff1
 
 Before:
   sub r0, r0, #65024   ; 0xfe00
   sub r0, r0, #496 ; 0x01f0
   sub r0, r0, #1   ; 0x0001
 After:
   sub r0, r0, #65280   ; 0xff00
   sub r0, r0, #241 ; 0x00f1
 
 Example 3: 16-bit replicated patterns
 
 a + 0x44004401
 
 Before:
   movwr3, #17409  ; 0x4401
   movtr3, 17408   ; 0x4400
   addsr3, r0, r3
 After:
   add r0, r0, #1140868096 ; 0x44004400
   addsr0, r0, #1  ; 0x0001
 
 Example 4: 32-bit replicated patterns
 
 a  0xaa00
 
 Before:
   mov r3, #43520   ; 0xaa00
   movtr3, 43690; 0x
   and r3, r0, r3
 After:
   and r0, r0, #-1431655766 ; 0x
   bic r0, r0, #170 ; 0x00aa
 
 The constant splitting code was duplicated in two places, and I would 
 have needed to modify both quite heavily, so I have taken the 
 opportunity to unify the two, and hopefully reduce the future 
 maintenance burden.
 
 Let me respond to a point Richard Earnshaw raised following the original 
 posting:
 
   A final note is that you may have missed some cases.  Now that we have
   movw,
  reg  ~(16-bit const)
   can now be done in at most 2 insns:
  movw t1, #16-bit const
  bic  Rd, reg, t1
 
 Actually, I think we can do better than that for a 16-bit constant.
 
 Given:
 
 a  ~(0xabcd)
 
 Before my changes, GCC gave:
 
  bic r0, r0, #43520
  bic r0, r0, #460
  bic r0, r0, #1
 
 and after applying my patch:
 
  bic r0, r0, #43776
  bic r0, r0, #205
 
 Two instructions and no temporary register.
 
   On thumb-2 you can also use ORN that way as well.
 
 It turns out that my previous patch was broken for ORN. I traced the 
 problem to some confusing code already in arm.c that set can_invert for 
 IOR, but then explicitly ignored it later (I had removed the second 
 part, but not the first). I posted, and committed a patch to fix this 
 yesterday.
 
 In fact ORN is only of limited use for this kind of thing. Like AND, you 
 can't use multiple ORNs to build a constant. The compiler already does 
 use ORN in some circumstances, and this patch has not changed that.
 
 Is the patch OK?
 
 Andrew

+   RETURN_SEQUENCE must be an int[4].

It would be a more robust coding style to define a struct with an int[4]
array as its only member.  Then it wouldn't be possible to pass an
undersized object to these routines.

OK with a change to do that.

R.




Re: Ping: Make 128 bits the default vector size for NEON

2011-05-06 Thread Richard Earnshaw

On Thu, 2011-04-21 at 09:02 +0300, Ira Rosen wrote:
 http://gcc.gnu.org/ml/gcc-patches/2011-03/msg02172.html
 
 The last version:
 
 ChangeLog:
 
  * doc/invoke.texi (preferred-vector-size): Document.
  * params.h (PREFERRED_VECTOR_SIZE): Define.
  * config/arm/arm.c (arm_preferred_simd_mode): Use param
  PREFERRED_VECTOR_SIZE instead of
  TARGET_NEON_VECTORIZE_QUAD. Make 128 bits the default.
  (arm_autovectorize_vector_sizes): Likewise.
  * config/arm/arm.opt (NEON_VECTORIZE_QUAD): Add
  RejectNegative.
  * params.def (PARAM_PREFERRED_VECTOR_SIZE): Define.
 
 testsuite/ChangeLog:
 
  * lib/target-supports.exp (check_effective_target_vect_multiple_sizes):
  New procedure.
  (add_options_for_quad_vectors): Replace with ...
  (add_options_for_double_vectors): ... this.
  * gfortran.dg/vect/pr19049.f90: Expect more printings on targets that
  support multiple vector sizes since the vectorizer attempts to
  vectorize with both vector sizes.
  * gcc.dg/vect/slp-reduc-6.c, gcc.dg/vect/no-vfa-vect-79.c,
  gcc.dg/vect/no-vfa-vect-102a.c, gcc.dg/vect/vect-outer-1a.c,
  gcc.dg/vect/vect-outer-1b.c, gcc.dg/vect/vect-outer-2b.c,
  gcc.dg/vect/vect-outer-3a.c, gcc.dg/vect/no-vfa-vect-37.c,
  gcc.dg/vect/vect-outer-3b.c, gcc.dg/vect/no-vfa-vect-101.c,
  gcc.dg/vect/no-vfa-vect-102.c, gcc.dg/vect/vect-reduc-dot-s8b.c,
  gcc.dg/vect/vect-outer-1.c, gcc.dg/vect/vect-104.c: Likewise.
  * gcc.dg/vect/vect-16.c: Rename to...
  * gcc.dg/vect/no-fast-math-vect-16.c: ... this to ensure that it runs
  without -ffast-math.
  * gcc.dg/vect/vect-42.c: Run with 64 bit vectors if applicable.
  * gcc.dg/vect/vect-multitypes-6.c, gcc.dg/vect/vect-52.c,
  gcc.dg/vect/vect-54.c, gcc.dg/vect/vect-46.c, gcc.dg/vect/vect-48.c,
  gcc.dg/vect/vect-96.c, gcc.dg/vect/vect-multitypes-3.c,
  gcc.dg/vect/vect-40.c: Likewise.
  * gcc.dg/vect/vect-outer-5.c: Remove quad-vectors option as
  redundant.
  * gcc.dg/vect/vect-109.c, gcc.dg/vect/vect-peel-1.c,
  gcc.dg/vect/vect-peel-2.c, gcc.dg/vect/slp-25.c,
  gcc.dg/vect/vect-multitypes-1.c, gcc.dg/vect/slp-3.c,
  gcc.dg/vect/no-vfa-pr29145.c, gcc.dg/vect/vect-multitypes-4.c:
  Likewise.
  * gcc.dg/vect/vect.exp: Run no-fast-math-vect*.c tests with
  -fno-fast-math.
 
 Thanks,
 Ira

+@item preferred-vector-size
+Preferred vector size in bits for targets that support multiple vector sizes.
+Invalid values are ignored.  The default is 128.
+

Shouldn't the preferred size be the largest size supported by the
target?  Setting it to 128 might be OK today, but who knows what might
happen in future?  

R.




[patch] PR 48837

2011-05-06 Thread Zdenek Dvorak
Hi,

when accumulator transformation is performed on a function like

foo(a)
{
  if (a  0)
return 1 + foo (a - 1)

  return bla();
}

this becomes

foo(a)
{
  int tmp = 0;

  while (a  0)
tm = 1 + tmp;

  return tmp + bla();
}

Before, bla was a tail-call, but after the optimization, it is not (since an 
addition
has to be performed after the result of bla is known).  However, we used to 
mark bla
as tail-call, leading to a misscompilation later.  Fixed by not marking 
tail-calls
when the transformation is performed.  Bootstrapped and regtested on i686.

Zdenek

PR tree-optimization/48837
* tree-tailcall.c (tree_optimize_tail_calls_1): Do not mark tailcalls
when accumulator transformation is performed.

* gcc.dg/pr48837.c: New testcase.

Index: tree-tailcall.c
===
--- tree-tailcall.c (revision 173354)
+++ tree-tailcall.c (working copy)
@@ -1021,6 +1021,14 @@ tree_optimize_tail_calls_1 (bool opt_tailcalls)
 integer_one_node);
 }
 
+  if (a_acc || m_acc)
+{
+  /* When the tail call elimination using accumulators is performed,
+statements adding the accumulated value are inserted at all exits.
+This turns all other tail calls to non-tail ones.  */
+  opt_tailcalls = false;
+}
+
   for (; tailcalls; tailcalls = next)
 {
   next = tailcalls-next;
Index: testsuite/gcc.dg/pr48837.c
===
--- testsuite/gcc.dg/pr48837.c  (revision 0)
+++ testsuite/gcc.dg/pr48837.c  (revision 0)
@@ -0,0 +1,30 @@
+/* PR tree-optimization/48837 */
+/* { dg-do run } */
+/* { dg-options -O2 } */
+
+void abort (void);
+
+__attribute__((noinline))
+int baz(void)
+{
+  return 1;
+}
+
+inline const int *bar(const int *a, const int *b)
+{
+ return *a ? a : b;
+}
+
+int foo(int a, int b)
+{
+   return a || b ? baz() : foo(*bar(a, b), 1) + foo(1, 0);
+}
+
+int main(void)
+{
+ if (foo(0, 0) != 2)
+   abort();
+
+ return 0;
+}
+


Re: Add an array_mode_supported_p target hook

2011-05-06 Thread Richard Earnshaw

On Fri, 2011-05-06 at 11:35 +0100, Richard Sandiford wrote:
 Richard Earnshaw rearn...@arm.com writes:
  +/* Implements target hook array_mode_supported_p.  */
  +
  +static bool
  +arm_array_mode_supported_p (enum machine_mode mode,
  +  unsigned HOST_WIDE_INT nelems)
  +{
  +  if (TARGET_NEON
  +   (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
  +   (nelems = 2  nelems = 4))
  +return true;
  +
  +  return false;
  +}
 
  I'm not sure I understand why this is limited to 4 or fewer elements. A
  Q reg of chars would surely be 16 elements.
 
 The mode here is the mode of the array element, which for the cases
 we're interested in would be something like V4HI (D) or V4SI (Q).
 nelems says how many of those (in our case, vector) elements there
 are in the array.
 
 The element range we want is 1-4 because that matches the number
 of vectors that can be loaded by the vld1-vld4 instructions.
 We don't include 1 because arrays of one element are already
 treated as having the same mode as their element.
 
 Richard

I understand now...

Ok.

R.
 




Re: [PATCH] Canonicalize compares in combine [2/3] Modifications to try_combine()

2011-05-06 Thread Chung-Lin Tang
On 2011/5/6 05:57 PM, Paolo Bonzini wrote:
 On 04/22/2011 05:21 PM, Chung-Lin Tang wrote:
 Also, instead of testing for XEXP(SET_SRC(PATTERN(i3)),1) == const0_rtx
 at the top, it now allows CONST_INT_P(XEXP(SET_SRC(PATTERN(i3)),1)),
 tries to adjust it by simplify_compare_const() from the last patch, and
 then tests if op1 == const0_rtx. This is a small improvement in some
 cases.
 
 I'm not sure why it doesn't allow both?
 
 Paolo

Hi Paolo, I'm not sure I understand your meaning of 'both', but before
this patch, it only tested for == const0_rtx, without any attempt of
other cases.

Now it tests CONST_INT_P(XEXP(SET_SRC(PATTERN(i3)),1)), attempts a
simplification which may change a non-zero constant to const0_rtx, then
test for const0_rtx. Supposedly, the new code should be strictly more
general.

Thanks,
Chung-Lin


[PATCH] Fix PR target/48807

2011-05-06 Thread Henderson, Stuart
Hi,
The attached patch is a fix for PR/48807, which is a segfault when compiling 
the bfin compiler.  The problem appears to be that we're not checking the 
return value of cgraph_local_info for null before attempting to dereference it. 
 This wasn't a problem before, but now cgraph_local_info calls cgraph_get_node 
(instead of the old cgraph_node), we cannot assume it will always return 
non-null.

Fix is in bfin specific code.

Ok to commit to trunk?

Stu


2011-05-06  Stuart Henderson  shend...@gcc.gnu.org

* config/bfin/bfin.c: Check return value of cgraph_local_info for null 
before
attempting to use it.




Index: gcc/config/bfin/bfin.c
===
--- gcc/config/bfin/bfin.c  (revision 173363)
+++ gcc/config/bfin/bfin.c  (working copy)
@@ -2077,6 +2077,8 @@

   this_func = cgraph_local_info (current_function_decl);
   called_func = cgraph_local_info (decl);
+  if (!called_func || !this_func)
+return false;
   return !called_func-local || this_func-local;
 }







Re: Add an array_mode_supported_p target hook

2011-05-06 Thread Richard Sandiford
Richard Earnshaw rearn...@arm.com writes:
 +/* Implements target hook array_mode_supported_p.  */
 +
 +static bool
 +arm_array_mode_supported_p (enum machine_mode mode,
 +unsigned HOST_WIDE_INT nelems)
 +{
 +  if (TARGET_NEON
 +   (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
 +   (nelems = 2  nelems = 4))
 +return true;
 +
 +  return false;
 +}

 I'm not sure I understand why this is limited to 4 or fewer elements. A
 Q reg of chars would surely be 16 elements.

The mode here is the mode of the array element, which for the cases
we're interested in would be something like V4HI (D) or V4SI (Q).
nelems says how many of those (in our case, vector) elements there
are in the array.

The element range we want is 1-4 because that matches the number
of vectors that can be loaded by the vld1-vld4 instructions.
We don't include 1 because arrays of one element are already
treated as having the same mode as their element.

Richard


Re: [PATCH] Fix PR c++/48574

2011-05-06 Thread Dodji Seketeli
Jason Merrill ja...@redhat.com writes:

 How about type_dependent_expression_p_push instead?

Like this ?  Lightly tested.  A full bootstrap and regression test is
under way.

Thanks.

gcc/cp/

PR c++/48574 * class.c (fixed_type_or_null): Use
type_dependent_p_push to test if the instance has a dependent
initializer.

gcc/testsuite/

PR c++/48574
* g++.dg/template/dependent-expr8.C: New test case.
---
 gcc/cp/class.c  |2 +-
 gcc/testsuite/g++.dg/template/dependent-expr8.C |   25 +++
 2 files changed, 26 insertions(+), 1 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/template/dependent-expr8.C

diff --git a/gcc/cp/class.c b/gcc/cp/class.c
index a67b34a..6b08a03 100644
--- a/gcc/cp/class.c
+++ b/gcc/cp/class.c
@@ -5939,7 +5939,7 @@ fixed_type_or_null (tree instance, int *nonnull, int 
*cdtorp)
 itself.  */
  if (TREE_CODE (instance) == VAR_DECL
   DECL_INITIAL (instance)
-  !type_dependent_expression_p (DECL_INITIAL (instance))
+  !type_dependent_expression_p_push (DECL_INITIAL (instance))
   !htab_find (ht, instance))
{
  tree type;
diff --git a/gcc/testsuite/g++.dg/template/dependent-expr8.C 
b/gcc/testsuite/g++.dg/template/dependent-expr8.C
new file mode 100644
index 000..20014d6
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/dependent-expr8.C
@@ -0,0 +1,25 @@
+// Origin PR c++/48574
+// { dg-options -std=c++0x }
+// { dg-do compile }
+
+struct A
+{
+  virtual int foo();
+};
+
+void baz (int);
+
+template typename T
+void
+bar(T x)
+{
+  A b = *x;
+  baz (b.foo ());
+}
+
+void
+foo()
+{
+  A a;
+  bar(a);
+}
-- 
Dodji


[Comitted] S/390: Fix static chain trampolines with -m31 -mzarch

2011-05-06 Thread Andreas Krebbel
Hi,

the attached patch fixes a problem with trampolines in -m31 -mzarch
mode.  The elements of the trampoline are pointer size entities so
they have to depend on -m31/-m64 (UNITS_PER_LONG) but not on
-mesa/-mzarch (UNITS_PER_WORD). Fixed with the attached patch.

Committed to mainline and 4.6.

Bye,

-Andreas-


2011-05-06  Andreas Krebbel  andreas.kreb...@de.ibm.com

* config/s390/s390.c (s390_asm_trampoline_template): Comment
instruction sizes.
(s390_trampoline_init): Replace UNITS_PER_WORD with
UNITS_PER_LONG.


Index: gcc/config/s390/s390.c
===
*** gcc/config/s390/s390.c.orig
--- gcc/config/s390/s390.c
*** s390_asm_trampoline_template (FILE *file
*** 9281,9296 
  
if (TARGET_64BIT)
  {
!   output_asm_insn (basr\t%1,0, op);
!   output_asm_insn (lmg\t%0,%1,14(%1), op);
!   output_asm_insn (br\t%1, op);
ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
  }
else
  {
!   output_asm_insn (basr\t%1,0, op);
!   output_asm_insn (lm\t%0,%1,6(%1), op);
!   output_asm_insn (br\t%1, op);
ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
  }
  }
--- 9281,9296 
  
if (TARGET_64BIT)
  {
!   output_asm_insn (basr\t%1,0, op); /* 2 byte */
!   output_asm_insn (lmg\t%0,%1,14(%1), op);  /* 6 byte */
!   output_asm_insn (br\t%1, op); /* 2 byte */
ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
  }
else
  {
!   output_asm_insn (basr\t%1,0, op); /* 2 byte */
!   output_asm_insn (lm\t%0,%1,6(%1), op);/* 4 byte */
!   output_asm_insn (br\t%1, op); /* 2 byte */
ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
  }
  }
*** s390_trampoline_init (rtx m_tramp, tree 
*** 9306,9316 
rtx mem;
  
emit_block_move (m_tramp, assemble_trampoline_template (),
!  GEN_INT (2*UNITS_PER_WORD), BLOCK_OP_NORMAL);
  
!   mem = adjust_address (m_tramp, Pmode, 2*UNITS_PER_WORD);
emit_move_insn (mem, cxt);
!   mem = adjust_address (m_tramp, Pmode, 3*UNITS_PER_WORD);
emit_move_insn (mem, fnaddr);
  }
  
--- 9306,9316 
rtx mem;
  
emit_block_move (m_tramp, assemble_trampoline_template (),
!  GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
  
!   mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
emit_move_insn (mem, cxt);
!   mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
emit_move_insn (mem, fnaddr);
  }
  


[PATCH, i386]: Apply * constraint modifier to r in *movdf_internal_nointeger

2011-05-06 Thread Uros Bizjak
Hello!

We should avoid to allocate r in *_nointeger instructions.
Constraint modifiers apply to a following character, not to the whole
alternative.

2011-05-06  Uros Bizjak  ubiz...@gmail.com

* config/i386/i386.md (*movdf_internal_nointeger): Apply *
constraint modifier to r.

Tested on x86_64-pc-linux-gnu {,-m32}, committed to mainline SVN and
(soon) 4.6 branch.

Uros.
Index: config/i386/i386.md
===
--- config/i386/i386.md (revision 173469)
+++ config/i386/i386.md (working copy)
@@ -3251,7 +3251,7 @@
   [(set (match_operand:DF 0 nonimmediate_operand
=f,m,f,*r  ,o  ,Y2*x,Y2*x,Y2*x ,m  )
(match_operand:DF 1 general_operand
-   fm,f,G,*roF,*Fr,C   ,Y2*x,mY2*x,Y2*x))]
+   fm,f,G,*roF,F*r,C   ,Y2*x,mY2*x,Y2*x))]
   !TARGET_64BIT  !(MEM_P (operands[0])  MEM_P (operands[1]))
 (optimize_function_for_size_p (cfun)
|| !TARGET_INTEGER_DFMODE_MOVES)


Re: [patch] PR 48837

2011-05-06 Thread Richard Guenther
On Fri, May 6, 2011 at 12:44 PM, Zdenek Dvorak rakd...@kam.mff.cuni.cz wrote:
 Hi,

 when accumulator transformation is performed on a function like

 foo(a)
 {
  if (a  0)
    return 1 + foo (a - 1)

  return bla();
 }

 this becomes

 foo(a)
 {
  int tmp = 0;

  while (a  0)
    tm = 1 + tmp;

  return tmp + bla();
 }

 Before, bla was a tail-call, but after the optimization, it is not (since an 
 addition
 has to be performed after the result of bla is known).  However, we used to 
 mark bla
 as tail-call, leading to a misscompilation later.  Fixed by not marking 
 tail-calls
 when the transformation is performed.  Bootstrapped and regtested on i686.

Ok.

Thanks,
Richard.

 Zdenek

        PR tree-optimization/48837
        * tree-tailcall.c (tree_optimize_tail_calls_1): Do not mark tailcalls
        when accumulator transformation is performed.

        * gcc.dg/pr48837.c: New testcase.

 Index: tree-tailcall.c
 ===
 --- tree-tailcall.c     (revision 173354)
 +++ tree-tailcall.c     (working copy)
 @@ -1021,6 +1021,14 @@ tree_optimize_tail_calls_1 (bool opt_tailcalls)
                                             integer_one_node);
     }

 +  if (a_acc || m_acc)
 +    {
 +      /* When the tail call elimination using accumulators is performed,
 +        statements adding the accumulated value are inserted at all exits.
 +        This turns all other tail calls to non-tail ones.  */
 +      opt_tailcalls = false;
 +    }
 +
   for (; tailcalls; tailcalls = next)
     {
       next = tailcalls-next;
 Index: testsuite/gcc.dg/pr48837.c
 ===
 --- testsuite/gcc.dg/pr48837.c  (revision 0)
 +++ testsuite/gcc.dg/pr48837.c  (revision 0)
 @@ -0,0 +1,30 @@
 +/* PR tree-optimization/48837 */
 +/* { dg-do run } */
 +/* { dg-options -O2 } */
 +
 +void abort (void);
 +
 +__attribute__((noinline))
 +int baz(void)
 +{
 +  return 1;
 +}
 +
 +inline const int *bar(const int *a, const int *b)
 +{
 + return *a ? a : b;
 +}
 +
 +int foo(int a, int b)
 +{
 +   return a || b ? baz() : foo(*bar(a, b), 1) + foo(1, 0);
 +}
 +
 +int main(void)
 +{
 + if (foo(0, 0) != 2)
 +   abort();
 +
 + return 0;
 +}
 +



[PATCH,c++] introduce {class,type}_of_this functions

2011-05-06 Thread Nathan Froyd
The patch below introduces simple accessors for getting at the class or
the type of the `this' parameter.  It hides a couple of TYPE_ARG_TYPES
usages and makes the code slightly more obvious, I think.

Tested on x86_64-unknown-linux-gnu.  OK to commit?

-Nathan

gcc/cp/
* cp-tree.h (type_of_this, class_of_this): New functions.
* call.c (standard_conversion): Call class_of_this.
* cxx-pretty-print.c (pp_cxx_implicit_parameter_type): Likewise.
(pp_cxx_direct_abstract_declarator): Likewise.
* decl2.c (change_return_type): Likewise.
(cp_reconstruct_complex_type): Likewise.
* error.c (dump_type_suffix, dump_function_decl): Likewise.
* mangle.c (write_function_type): Likewise.
* pt.c (unify): Likewise.
* typeck.c (merge_types, type_memfn_quals): Likewise.
* decl.c (build_this_parm): Call type_of_this.

diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index f5bd521..7ad9279 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -1146,8 +1146,8 @@ standard_conversion (tree to, tree from, tree expr, bool 
c_cast_p,
 {
   tree fromfn = TREE_TYPE (TYPE_PTRMEMFUNC_FN_TYPE (from));
   tree tofn = TREE_TYPE (TYPE_PTRMEMFUNC_FN_TYPE (to));
-  tree fbase = TREE_TYPE (TREE_VALUE (TYPE_ARG_TYPES (fromfn)));
-  tree tbase = TREE_TYPE (TREE_VALUE (TYPE_ARG_TYPES (tofn)));
+  tree fbase = class_of_this (fromfn);
+  tree tbase = class_of_this (tofn);
 
   if (!DERIVED_FROM_P (fbase, tbase)
  || !same_type_p (TREE_TYPE (fromfn), TREE_TYPE (tofn))
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 9d13393..d410e02 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -4616,6 +4616,24 @@ struct GTY(()) tinst_level {
   bool in_system_header_p;
 };
 
+/* Return the type of the `this' parameter of FNTYPE.  */
+
+static inline tree
+type_of_this (const_tree fntype)
+{
+  function_args_iterator iter;
+  function_args_iter_init (iter, fntype);
+  return function_args_iter_cond (iter);
+}
+
+/* Return the class of the `this' parameter of FNTYPE.  */
+
+static inline tree
+class_of_this (const_tree fntype)
+{
+  return TREE_TYPE (type_of_this (fntype));
+}
+
 /* A parameter list indicating for a function with no parameters,
e.g  int f(void).  */
 extern cp_parameter_declarator *no_parameters;
diff --git a/gcc/cp/cxx-pretty-print.c b/gcc/cp/cxx-pretty-print.c
index bd0381b..eeb6d07 100644
--- a/gcc/cp/cxx-pretty-print.c
+++ b/gcc/cp/cxx-pretty-print.c
@@ -1363,7 +1363,7 @@ pp_cxx_ptr_operator (cxx_pretty_printer *pp, tree t)
 static inline tree
 pp_cxx_implicit_parameter_type (tree mf)
 {
-  return TREE_TYPE (TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (mf;
+  return class_of_this (TREE_TYPE (mf));
 }
 
 /*
@@ -1652,8 +1652,7 @@ pp_cxx_direct_abstract_declarator (cxx_pretty_printer 
*pp, tree t)
   if (TREE_CODE (t) == METHOD_TYPE)
{
  pp_base (pp)-padding = pp_before;
- pp_cxx_cv_qualifier_seq
-   (pp, TREE_TYPE (TREE_VALUE (TYPE_ARG_TYPES (t;
+ pp_cxx_cv_qualifier_seq (pp, class_of_this (t));
}
   pp_cxx_exception_specification (pp, t);
   break;
diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index 3622c2c..962dd22 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -6924,7 +6924,7 @@ build_this_parm (tree type, cp_cv_quals quals)
   tree parm;
   cp_cv_quals this_quals;
 
-  this_type = TREE_VALUE (TYPE_ARG_TYPES (type));
+  this_type = type_of_this (type);
   /* The `this' parameter is implicitly `const'; it cannot be
  assigned to.  */
   this_quals = (quals  TYPE_QUAL_RESTRICT) | TYPE_QUAL_CONST;
diff --git a/gcc/cp/decl2.c b/gcc/cp/decl2.c
index ef8de31..02d9fd9 100644
--- a/gcc/cp/decl2.c
+++ b/gcc/cp/decl2.c
@@ -161,8 +161,7 @@ change_return_type (tree new_ret, tree fntype)
 }
   else
 newtype = build_method_type_directly
-  (TREE_TYPE (TREE_VALUE (TYPE_ARG_TYPES (fntype))),
-   new_ret, TREE_CHAIN (args));
+  (class_of_this (fntype), new_ret, TREE_CHAIN (args));
   if (raises)
 newtype = build_exception_variant (newtype, raises);
   if (attrs)
@@ -1249,8 +1248,7 @@ cp_reconstruct_complex_type (tree type, tree bottom)
 so we must compensate by getting rid of it.  */
   outer
= build_method_type_directly
-   (TREE_TYPE (TREE_VALUE (TYPE_ARG_TYPES (type))),
-inner,
+   (class_of_this (type), inner,
 TREE_CHAIN (TYPE_ARG_TYPES (type)));
 }
   else if (TREE_CODE (type) == OFFSET_TYPE)
diff --git a/gcc/cp/error.c b/gcc/cp/error.c
index fce7403..b364824 100644
--- a/gcc/cp/error.c
+++ b/gcc/cp/error.c
@@ -794,8 +794,7 @@ dump_type_suffix (tree t, int flags)
dump_parameters (arg, flags  ~TFF_FUNCTION_DEFAULT_ARGUMENTS);
 
if (TREE_CODE (t) == METHOD_TYPE)
- pp_cxx_cv_qualifier_seq
-   (cxx_pp, TREE_TYPE (TREE_VALUE (TYPE_ARG_TYPES (t;
+ pp_cxx_cv_qualifier_seq (cxx_pp, class_of_this (t));
else
  

Re: [PATCH] Canonicalize compares in combine [2/3] Modifications to try_combine()

2011-05-06 Thread Paolo Bonzini

On 05/06/2011 12:56 PM, Chung-Lin Tang wrote:

  I'm not sure why it doesn't allow both?

  Paolo

Hi Paolo, I'm not sure I understand your meaning of 'both', but before
this patch, it only tested for == const0_rtx, without any attempt of
other cases.

Now it tests CONST_INT_P(XEXP(SET_SRC(PATTERN(i3)),1)), attempts a
simplification which may change a non-zero constant to const0_rtx, then
test for const0_rtx. Supposedly, the new code should be strictly more
general.


Uff.  Stupid question is stupid.

Paolo


[PATCH, ARM] Unaligned accesses for packed structures [1/2]

2011-05-06 Thread Julian Brown
Hi,

This is the first of two patches to add unaligned-access support to the
ARM backend. This is done somewhat differently to Jie Zhang's earlier
patch:

  http://gcc.gnu.org/ml/gcc-patches/2010-12/msg01890.html

In that with Jie's patch, *any* pointer dereference would be allowed to
access unaligned data. This has the undesirable side-effect of
disallowing instructions which don't support unaligned accesses (LDRD,
LDM etc.) when unaligned accesses are enabled.

Instead, this patch enables only packed-structure accesses to use
ldr/str/ldrh/strh, by taking a hint from the MIPS ldl/ldr
implementation. I figured the unaligned-access ARM case is kind of
similar to those, except that normal loads/stores are used, and the
shifting/merging happens in hardware.

The standard names extv/extzv/insv can take a memory
operand for the source/destination of the extract/insert operation, so
we just expand to unspec'ed versions of the load and store operations
when unaligned-access support is enabled: the benefit of doing that
rather than, say, expanding using the regular movsi pattern is that we
bypass any smartness in the compiler which might replace operations
which work for unaligned accesses (ldr/str/ldrh/strh) with operations
which don't work (ldrd/strd/ldm/stm/vldr/...). The downside is we might
potentially miss out on optimization opportunities (since these things
no longer look like plain memory accesses).

Doing things this way allows us to leave the settings for
STRICT_ALIGNMENT/SLOW_BYTE_ACCESS alone, avoiding the disruption that
changing them might cause.

The most awkward change in the patch is to generic code (expmed.c,
{store,extract}_bit_field_1): in big-endian mode, the existing behaviour
(when inserting/extracting a bitfield to a memory location) is
definitely bogus: unit is set to BITS_PER_UNIT for memory locations,
and if bitsize (the size of the field to insert/extract) is greater than
BITS_PER_UNIT (which isn't unusual at all), xbitpos becomes negative.
That can't possibly be intentional; I can only assume that this code
path is not exercised for machines which have memory alternatives for
bitfield insert/extract, and BITS_BIG_ENDIAN of 0 in BYTES_BIG_ENDIAN
mode.

The logic for choosing when to enable the unaligned-access support (and
the name of the option to override the default behaviour) is lifted from
Jie's patch.

Tested with cross to ARM Linux, and (on a branch) in both little 
big-endian mode cross to ARM EABI, with no regressions. OK to apply?

Thanks,

Julian

ChangeLog

gcc/
* config/arm/arm.c (arm_override_options): Add unaligned_access
support.
* config/arm/arm.md (UNSPEC_UNALIGNED_LOAD)
(UNSPEC_UNALIGNED_STORE): Add constants for unspecs.
(insv, extzv): Add unaligned-access support.
(extv): Change to expander. Likewise.
(unaligned_loadsi, unaligned_loadhis, unaligned_loadhiu)
(unaligned_storesi, unaligned_storehi): New.
(*extv_reg): New (previous extv implementation).
* config/arm/arm.opt (munaligned_access): Add option.
* expmed.c (store_bit_field_1): Don't tweak bitfield numbering for
memory locations if BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN.
(extract_bit_field_1): Likewise.
commit e76508ff702406fd63bc59465d9c7ab70dcb3266
Author: Julian Brown jul...@henry7.codesourcery.com
Date:   Wed May 4 10:06:25 2011 -0700

Permit regular ldr/str/ldrh/strh for packed-structure accesses etc.

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 4f9c2aa..a18aea6 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -1833,6 +1833,22 @@ arm_option_override (void)
 	fix_cm3_ldrd = 0;
 }
 
+  /* Enable -munaligned-access by default for
+ - all ARMv6 architecture-based processors
+ - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
+
+ Disable -munaligned-access by default for
+ - all pre-ARMv6 architecture-based processors
+ - ARMv6-M architecture-based processors.  */
+
+  if (unaligned_access == 2)
+{
+  if (arm_arch6  (arm_arch_notm || arm_arch7))
+	unaligned_access = 1;
+  else
+	unaligned_access = 0;
+}
+
   if (TARGET_THUMB1  flag_schedule_insns)
 {
   /* Don't warn since it's on by default in -O2.  */
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 40ebf35..7d37445 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -104,6 +104,10 @@
   UNSPEC_SYMBOL_OFFSET  ; The offset of the start of the symbol from
 ; another symbolic address.
   UNSPEC_MEMORY_BARRIER ; Represent a memory barrier.
+  UNSPEC_UNALIGNED_LOAD	; Used to represent ldr/ldrh instructions that access
+			; unaligned locations, on architectures which support
+			; that.
+  UNSPEC_UNALIGNED_STORE ; Same for str/strh.
 ])
 
 ;; UNSPEC_VOLATILE Usage:
@@ -2393,7 +2397,7 @@
 ;;; this insv pattern, so this pattern needs to be reevalutated.
 
 (define_expand insv
-  [(set (zero_extract:SI (match_operand:SI 0 s_register_operand )
+  [(set 

Re: Cgraph thunk reorg

2011-05-06 Thread Richard Guenther
On Fri, 6 May 2011, Jan Hubicka wrote:

 Hi,
 this patch implements thunks as real cgraph nodes instead of alias nodes.  I 
 am not
 entirely happy about it, but I can't come with anything better.
 
 The main problem is that thunks can be seen in two ways:
 
   1) As alternative entry points into functions
 
   This is how the existing code attempts to be structured: thunks do not
   appear in callgraph, instead of the calgraph edges points to the
   functions the thunk are associated with.
 
   The problem with current code is that none of IPA code nor rest of
   compiler is familiar with the concept of alternative entry points.
   Consequentely the direct calls to thunks appears in the program
   in equivalent way as direct calls to function they are associated
   to that consequentely may lead to miscompilations when we decide to
   inline and ignore thunk or do ipa-prop.
 
   As a temporary measure, we declared direct calls to thunk invalid.
   This lead to need for devirtualization code to inline the thunk
   when devirtualizing the call or to not devirtualize.  For siple thunks
   this is not big deal to do, but for covariant thunks this imply
   extra control flow that is something Richi don't like.
   Also we now devirtualize implicitely via folding lookups into the
   vtables. Requiring that code to ponder about thunk adjustments don't
   look quite right.
 
   Next problem is that with LTO we can merge direct call to external
   function with thunk and in this case we have to represent the direct
   call to thunk.
 
   To allow direct calls to thunks would mean adding concept of entry
   points into callgraph edgess that would mean next pointer to something
   that would describe it.  Most probably chain of thunk structures:
   we do allow and build thunks of thunks.
 
   We discussed this quite few times on IRC and always this was voted
   down as weird. One argument agains is that it will be easy to do
   simple wrong code bugs by forgetting about the info hanging on cgraph
   edges, since in most cases there is nothing.
 
   2) As real functions calling the function they are associated with.
 
   Because backend don't handle alternative entry points, we really 
 implement
   thunks as small functions that usually tail call into the associated
   functions after doing adjustments to THIS.
 
   Other natural abstraction seems to be handle thunks as real functions.
   This is what the patch does.  There are several issues with this.
 
   1) Not all thunks have bodies that represent in gimple. The variadic
   thunks currently don't have any gimple representation. While we can
   come with some, there is not that much of value for it because...
   2) We can't expand thunks into RTL.  On many archs we have existing
   ASM output machinery that leads to better code (and only possible code
   for variadic thunks that are not really representable in RTL either).
   3) Thunks are not real functions in C++ ABI sense. They share comdat
   groups implicitely and they must be output in specified order to
   get proper comdat group signatures
 
   This patch takes this route and does the compensation where needed.
   In particular all IPA passes that worries about gimple bodies needs
   to be updated to handle thunks.  This is not that hard to do and as
   first cut I simply disabled inlining, ipa-prop and cloning on thunks.
   We can handle that incrementally.
 
 The problem of thunks is related to problem of proper representation of 
 aliases.
 Again aliases can be transparent that is not having cgraph nodes to them
 and all edges going to the final destination or they can be separate nodes.
 I originally indended to go for the first case that also has problem with
 representing the visibilities of aliases: i.e. depending on alias used, the
 edges may or may not be overwritable by the linker, so the alternative entry
 point info would need to represent this, too.
 
 With thunks as separate nodes, I will turn aliases into separate nodes, too
 that will have link via ipa-ref infrastructure (i.e. in addition to load/store
 and address links we will also have alias links).
 
 Because IPA passes really care about objects themselves, not the aliases
 (i.e. ipa-reference or ipa-pta wants to see the variable and all its aliases
 as one object, so wants the inliner or ipa-propagate), we will need to add
 some accessor functions that will walk to real destination of the edge
 and also walk all real objects referencing the given object skipping the
 aliases.
 
 This approach has the advantage of getting cgraph/varpool closer to symbol
 table and making things bit easier at lto-symtab side.
 
 The patch does basicaly the following:
 
  1) turns thunks from alias node into function nodes with node-thunk.thunk_p
 flag set
  2) 

Re: [PATCH] Fix PR c++/48574

2011-05-06 Thread Jason Merrill

On 05/06/2011 07:08 AM, Dodji Seketeli wrote:

Jason Merrillja...@redhat.com  writes:


How about type_dependent_expression_p_push instead?


Like this ?  Lightly tested.  A full bootstrap and regression test is
under way.


OK.

Jason


Re: Fix PR48900, powerpc duplicate __tls_get_addr calls

2011-05-06 Thread David Edelsohn
On Thu, May 5, 2011 at 10:17 PM, Alan Modra amo...@gmail.com wrote:
 My fix for PR44266 using the libcall machinery to ensure we had a
 proper stack frame allocated for __tls_get_addr calls sloppily used r3
 as the arg to the dummy libcall.  This made the call seem to depend on
 whatever was in r3 previously, at least until we get to the first
 split pass and the real arg is exposed.  So DCE couldn't merge calls.
 Even for a simple testcase like
        extern __thread int i;
        void foo (void) { i++; }
 we get two __tls_get_addr calls if using global-dynamic tls model.

 Easliy fixed by giving the dummy libcall an arg of zero.  An
 alternative giving slightly better -O0 code would be to say that the
 libcall doesn't have any args.  I chose to leave the libcall with one
 arg since this is closest to the real __tls_get_addr call, and the
 whole point of faking up a libcall here is to have the generic code do
 whatever is necessary when making function calls.  It's not totally
 impossible to imagine some future ABI change that treats zero arg
 calls differently from other calls.

 Bootstrapped and regression tested powerpc64-linux.  OK to apply
 mainline, 4.6 and 4.5?

        PR target/48900
        * config/rs6000/rs6000.c (rs6000_legitimize_tls_address): Use
        const0_rtx as the arg to the dummy __tls_get_addr libcall.

Okay,.

Thanks, David


[PATCH, ARM] Unaligned accesses for builtin memcpy [2/2]

2011-05-06 Thread Julian Brown
Hi,

This is the second of two patches to add unaligned-access support to
the ARM backend. It builds on the first patch to provide support for
unaligned accesses when expanding block moves (i.e. for builtin memcpy
operations). It makes some effort to use load/store multiple
instructions where appropriate (when accessing sufficiently-aligned
source or destination addresses), and also makes some effort to
generate fast code (for -O1/2/3) or small code (for -Os), though some
of the heuristics may need tweaking still.

Examples:

#include string.h

void foo (char *dest, char *src)
{
  memcpy (dest, src, AMOUNT);
}

char known[64];

void dst_aligned (char *src)
{
  memcpy (known, src, AMOUNT);
}

void src_aligned (char *dst)
{
  memcpy (dst, known, AMOUNT);
}

For -mcpu=cortex-m4 -mthumb -O2 -DAMOUNT=15 we get:

foo:
ldr r2, [r1, #4]@ unaligned
ldr r3, [r1, #8]@ unaligned
push{r4}
ldr r4, [r1, #0]@ unaligned
str r2, [r0, #4]@ unaligned
str r4, [r0, #0]@ unaligned
str r3, [r0, #8]@ unaligned
ldrhr2, [r1, #12]   @ unaligned
ldrbr3, [r1, #14]   @ zero_extendqisi2
strhr2, [r0, #12]   @ unaligned
strbr3, [r0, #14]
pop {r4}
bx  lr

dst_aligned:
push{r4}
mov r4, r0
movwr3, #:lower16:known
ldr r1, [r4, #4]@ unaligned
ldr r2, [r4, #8]@ unaligned
ldr r0, [r0, #0]@ unaligned
movtr3, #:upper16:known
stmia   r3!, {r0, r1, r2}
ldrhr1, [r4, #12]   @ unaligned
ldrbr2, [r4, #14]   @ zero_extendqisi2
strhr1, [r3, #0]@ unaligned
strbr2, [r3, #2]
pop {r4}
bx  lr

src_aligned:
push{r4}
movwr3, #:lower16:known
movtr3, #:upper16:known
mov r4, r0
ldmia   r3!, {r0, r1, r2}
str r0, [r4, #0]@ unaligned
str r1, [r4, #4]@ unaligned
str r2, [r4, #8]@ unaligned
ldrhr2, [r3, #0]@ unaligned
ldrbr3, [r3, #2]@ zero_extendqisi2
strhr2, [r4, #12]   @ unaligned
strbr3, [r4, #14]
pop {r4}
bx  lr

Whereas for -mcpu=cortex-m4 -mthumb -Os -DAMOUNT=15, e.g.:

foo:
add r3, r1, #12
.L2:
ldr r2, [r1], #4@ unaligned
cmp r1, r3
str r2, [r0], #4@ unaligned
bne .L2
ldrhr3, [r1, #0]@ unaligned
strhr3, [r0, #0]@ unaligned
ldrbr3, [r1, #2]@ zero_extendqisi2
strbr3, [r0, #2]
bx  lr

Tested (alongside the first patch) with cross to ARM Linux. OK to apply?

Thanks,

Julian

ChangeLog

gcc/
* config/arm/arm.c (arm_block_move_unaligned_straight)
(arm_adjust_block_mem, arm_block_move_unaligned_loop)
(arm_movmemqi_unaligned): New.
(arm_gen_movmemqi): Support unaligned block copies.
commit 16973f69fce37a2b347ea7daffd6f593aba843d5
Author: Julian Brown jul...@henry7.codesourcery.com
Date:   Wed May 4 11:26:01 2011 -0700

Optimize block moves when unaligned accesses are permitted.

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index a18aea6..b6df0d3 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -10362,6 +10362,335 @@ gen_const_stm_seq (rtx *operands, int nops)
   return true;
 }
 
+/* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
+   unaligned copies on processors which support unaligned semantics for those
+   instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
+   (using more registers) by doing e.g. load/load/store/store for a factor of 2.
+   An interleave factor of 1 (the minimum) will perform no interleaving. 
+   Load/store multiple are used for aligned addresses where possible.  */
+
+static void
+arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
+   HOST_WIDE_INT length,
+   unsigned int interleave_factor)
+{
+  rtx *regs = XALLOCAVEC (rtx, interleave_factor);
+  int *regnos = XALLOCAVEC (int, interleave_factor);
+  HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
+  HOST_WIDE_INT i, j;
+  HOST_WIDE_INT remaining = length, words;
+  rtx halfword_tmp = NULL, byte_tmp = NULL;
+  rtx dst, src;
+  bool src_aligned = MEM_ALIGN (srcbase) = BITS_PER_WORD;
+  bool dst_aligned = MEM_ALIGN (dstbase) = BITS_PER_WORD;
+  HOST_WIDE_INT srcoffset, dstoffset;
+  HOST_WIDE_INT src_autoinc, dst_autoinc;
+  rtx mem, addr;
+  
+  gcc_assert (1 = interleave_factor  interleave_factor = 4);
+  
+  /* Use hard registers if we have aligned source or destination so we can use
+ load/store multiple with contiguous registers.  */
+  if (dst_aligned || src_aligned)
+for (i = 0; i  interleave_factor; i++)
+  regs[i] = gen_rtx_REG (SImode, i);
+  else
+for (i = 

Re: [google] Patch to support calling multi-versioned functions via new GCC builtin. (issue4440078)

2011-05-06 Thread Diego Novillo
On Fri, May 6, 2011 at 04:55, Richard Guenther
richard.guent...@gmail.com wrote:
 On Thu, May 5, 2011 at 7:02 PM, Xinliang David Li davi...@google.com wrote:

 2) Support of CallInfo for each callsite. This is an annotation, but
 more standardized. The callinfo can be used to record information such
 as call attributes, call side effects, mod-ref information etc ---
 current gimple_call_flags can be folded into this Info structure.

 I don't like generic annotation facilities.  What should passes to with
 annotated stmts that are a) transformed, b) removed?  See RTL notes
 and all the interesting issues they cause.

Likewise.  We kind of tried having them in the early days of gimple
and tree-ssa, but quickly removed them.  Anything that is not a
first-class IL member, makes life difficult.  We have some examples in
PHI nodes and EH regions.  They're a bit to the side, and require
extra code to manage.


Diego.


Re: [PATCH,c++] introduce {class,type}_of_this functions

2011-05-06 Thread Jason Merrill

On 05/06/2011 07:49 AM, Nathan Froyd wrote:

The patch below introduces simple accessors for getting at the class or
the type of the `this' parameter.  It hides a couple of TYPE_ARG_TYPES
usages and makes the code slightly more obvious, I think.


Hmm, when I first read the names I expected them to refer to the 'this' 
in the current function.  I think adding _parm to the end of the names 
would help.


The type_ function should also have an assert that fntype is a METHOD_TYPE.

OK with those changes.

Jason


Re: [Patch,AVR]: Fix PR27663

2011-05-06 Thread Georg-Johann Lay
Denis Chertykov schrieb:
 2011/5/2 Georg-Johann Lay a...@gjlay.de:
 This is a fix for an optimization flaw when a long value is composed
 from byte values.

 For -fsplit-wide-types (which is still default for avr) the code is
 worse than with -fno-split-wide-types. The code for the test case is
 better in either situations, i.e. compared to code without the patch,
 but it is still not optimal.

 Fixing this by some combine patterns is the only thing the BE can do.
 I did not write more complex patterns because things get too complex
 with little performance gain.

 Tested without regressions.

 Johann

 2011-05-02  Georg-Johann Lay  a...@gjlay.de

PR target/27663
* config/avr/predicates.md (const_8_16_24_operand): New predicate.
* config/avr/avr.md (*iormodeqi.byte0,
*iormodeqi.byte1-3): New define_insn_and_split patterns.

 
 I'm sorry, but I dot'n like to have a both combiner related patches in
 port because code improvement isn't much and your patterns are
 difficult to understand and maintain.

You refer to this patch for PR42210?

http://gcc.gnu.org/ml/gcc-patches/2011-04/msg02099.html

 May be somebody else have a different oppinion ?
 I'm open to discussion.


The patterns in this patch are similar to *addhi3_zero_extend,
*addhi3_zero_extend1 that handle HI+QI resp. *addhi3_zero_extend
that handle SI+QI.

The difference is that they handle IOR instead of PLUS. It's true that
the user has to use some specific code (addition of QI to HI resp. SI
in the first case and ORing of QI to HI resp. SI in the second).

IMO insn combine is a very powerful pass and I do not see why the avr
BE should not take advantage of it to synthesize new instructions.
Note that other parts like *sbi or *cbi rely on insn combine, too.

If it's hard to understand what their intention is, I can add some
more comments.

As insn combine is capable of generating new instructions that are not
covered by standard patterns, it is only natural that they might be
more complicated than standard patterns. But almost everything in GCC
is complicated, even in the avr BE stuff like, e.g. handling of
rotate, is way much more complicated.

The new patterns are restricted to one single place in the backend.
If they are correct, they are supposed to work in the future without
steadily maintaining them.

I agree that it would be nice if the middleend detected the
expressions as, say, (set (zero_extract:QI (reg:SI ...))), but that's
not the case; not even on 32-bit targets with full insv/extzv support.

And as I already wrote, the -fsplit-wide-types is not a good choice on
avr (except for 64-bit stuff where subreg lowering leads to much
code), see

http://gcc.gnu.org/ml/gcc/2011-03/msg00261.html

So with -fno-split-wide-types and some more elaborate testcase you
will see that the new patterns are a clear improvement.

Johann

 
 Denis.
 



Re: [Patch,AVR]: Fix PR45099

2011-05-06 Thread Georg-Johann Lay
Nathan Froyd schrieb:
 On Mon, May 02, 2011 at 05:23:48PM +0200, Georg-Johann Lay wrote:
 PR45099 is an extension that gives an error when a fixed register is
 needed to pass a parameter to a function.

 Because the program will show malfunction when such code is generated,
 anyway, I think an error is more appropriate than a warning (as
 proposed in the PR).
 
 This seems like something that should be handled by common code.
 
 -Nathan

Yes, I agree. However, common code it too complicated for me to run
tests for, so I restrict myself to avr backend.

Until such a test will find its way into common code, it might still
be useful in avr backend. I think this has quite low priority for
other targets because global registers are not very common in, e.g. i386.

Johann



Re: Cgraph thunk reorg

2011-05-06 Thread Michael Matz
Hi,

On Fri, 6 May 2011, Jan Hubicka wrote:

   
 *** dump_cgraph_node (FILE *f, struct cgraph
 *** 1874,1880 
 if (node-only_called_at_exit)
   fprintf (f,  only_called_at_exit);
   
 !   fprintf (f, \n  called by: );
 for (edge = node-callers; edge; edge = edge-next_caller)
   {
 fprintf (f, %s/%i , cgraph_node_name (edge-caller),
 --- 1884,1907 
 if (node-only_called_at_exit)
   fprintf (f,  only_called_at_exit);
   
 !   fprintf (f, \n);
 ! 
 !   if (node-thunk.thunk_p)
 ! {
 !   if (node-thunk.thunk_p)
 ! {

Doubled conditional.


Ciao,
Michael.


Re: [patch] PR 48837

2011-05-06 Thread Jeff Law
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

On 05/06/11 04:44, Zdenek Dvorak wrote:
 Hi,
 
 when accumulator transformation is performed on a function like
 
 foo(a)
 {
   if (a  0)
 return 1 + foo (a - 1)
 
   return bla();
 }
 
 this becomes
 
 foo(a)
 {
   int tmp = 0;
 
   while (a  0)
 tm = 1 + tmp;
 
   return tmp + bla();
 }
 
 Before, bla was a tail-call, but after the optimization, it is not (since an 
 addition
 has to be performed after the result of bla is known).  However, we used to 
 mark bla
 as tail-call, leading to a misscompilation later.  Fixed by not marking 
 tail-calls
 when the transformation is performed.  Bootstrapped and regtested on i686.
 
 Zdenek
 
   PR tree-optimization/48837
   * tree-tailcall.c (tree_optimize_tail_calls_1): Do not mark tailcalls
   when accumulator transformation is performed.
 
   * gcc.dg/pr48837.c: New testcase.
OK.

Thanks,
jeff
-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJNxA66AAoJEBRtltQi2kC7yysIAKZYUpU9JlyH2XwvvVslq8C1
CJ7E/akRDsBoYBS+syNsLMwkbGG0WoaFJzOd7vUmIknAHEusF6OasczsN8PD9aEB
i8xJNTZm2yxhrVZh8m/KBX96r80RwzpAhr9L1WAspiS/xpw12lRoJoh3XeKXYXWw
Z0aBL4ljCgLj6GKEyy7FbGHx0gEqaa1x7EDM1kJGCgZPAFJalJPozBiiriYL9/Th
gHqLXZ0HUhXNGql5M2S+lfZG2d30Rj1KBXDrU8EOXedHRjxb+U9+WLGsUHZtkcTI
j3//n6bjTr/YmyTe43voG3Rn6z6k0g2Eb8gF8UMvDbaSJlH9+xb6SuWLS8+mEhY=
=luE9
-END PGP SIGNATURE-


Re: [PATCH] Canonicalize compares in combine [2/3] Modifications to try_combine()

2011-05-06 Thread Jeff Law
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

On 05/06/11 03:57, Paolo Bonzini wrote:
 On 04/22/2011 05:21 PM, Chung-Lin Tang wrote:
 Also, instead of testing for XEXP(SET_SRC(PATTERN(i3)),1) == const0_rtx
 at the top, it now allows CONST_INT_P(XEXP(SET_SRC(PATTERN(i3)),1)),
 tries to adjust it by simplify_compare_const() from the last patch, and
 then tests if op1 == const0_rtx. This is a small improvement in some
 cases.
 
 I'm not sure why it doesn't allow both?
Part of the purpose of the patch is be more general in the constants
allowed; prior to Chung-Lin's patch only const0_rtx was allowed.
Chung-Lin's patch generalizes the code to allow other constants is
specific cases.

Jeff
-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJNxA9+AAoJEBRtltQi2kC79lUH/2s2u2HNJMSedW5RFGPhYghX
zIosctPzZ4EkqrH5uvNJMBRxnxu0sBmDcJM5HcoaA5tz/T1aHlsGk6XvPeh+gSJO
wDnFHCUMdmB7hXSB/BcpAC5496DTrZNoyix5qIwIpxPjlaA9n4LoSA+ZiO6nObPH
dZ6UfyCihF+zCukSSQ0qHywJvSVfsQByBYefspS7uy0yFhzm45LHTcIN/j4hC685
lC2lIsBH7ZtMV01tRbr47PGgoey0pwvVeiHf/FcCWA6+Zo2ctfyjzsaE3exg8ms6
zylDHA/9gf2D1oYFn5FmrnHiYt3WGX/75u7bJCCJK1OUKknq6MnexVnfITsovFo=
=7ZnG
-END PGP SIGNATURE-


Re: Cgraph thunk reorg

2011-05-06 Thread Mike Stump
On May 6, 2011, at 5:12 AM, Jan Hubicka wrote:
  2) As real functions calling the function they are associated with.
 
  Because backend don't handle alternative entry points, we really 
 implement
  thunks as small functions that usually tail call into the associated
  functions after doing adjustments to THIS.

I don't like the word usually here...  that tail call for non-adjusting 
covariant returns and non-covariant returns, and a normal call otherwise 
Since this is just the email description, not a problem, but I did want to 
ensure people don't think of thunks as tail calling.


Re: [PING] Fix PR46399 - missing mode promotion for libcall args

2011-05-06 Thread NightStrike
On Wed, May 4, 2011 at 9:45 AM, Richard Guenther
richard.guent...@gmail.com wrote:
 On Wed, May 4, 2011 at 3:45 PM, Kai Tietz ktiet...@googlemail.com wrote:
 2011/5/4 Richard Guenther richard.guent...@gmail.com:
 On Mon, Apr 18, 2011 at 10:17 AM, Andreas Krebbel
 kreb...@linux.vnet.ibm.com wrote:
 Hi,

 the attached patch uses the existing promote_function_mode hook.  For
 a libcall neither TYPE nor FNTYPE is available so I had to change a
 few related function in order to deal with that.

 The patch also fixes the s390 DFP problems.

 Bye,

 -Andreas-


 2011-04-18  Andreas Krebbel  andreas.kreb...@de.ibm.com

        * calls.c (emit_library_call_value_1): Invoke
        promote_function_mode hook on libcall arguments.
        * explow.c (promote_function_mode, promote_mode): Handle TYPE
        argument being NULL.
        * targhooks.c (default_promote_function_mode): Lisewise.
        * config/s390/s390.c (s390_promote_function_mode): Likewise.
        * config/sparc/sparc.c (sparc_promote_function_mode): Likewise.

        * doc/tm.texi: Document that TYPE argument might be NULL.


 Index: gcc/calls.c
 ===
 *** gcc/calls.c.orig
 --- gcc/calls.c
 *** emit_library_call_value_1 (int retval, r
 *** 3484,3489 
 --- 3484,3490 
      {
        rtx val = va_arg (p, rtx);
        enum machine_mode mode = (enum machine_mode) va_arg (p, int);
 +       int unsigned_p = 0;

        /* We cannot convert the arg value to the mode the library wants 
 here;
         must do it earlier where we know the signedness of the arg.  */
 *** emit_library_call_value_1 (int retval, r
 *** 3531,3539 
          val = force_operand (XEXP (slot, 0), NULL_RTX);
        }

 !       argvec[count].value = val;
        argvec[count].mode = mode;
 !
        argvec[count].reg = targetm.calls.function_arg (args_so_far, mode,
                                                      NULL_TREE, true);

 --- 3532,3540 
          val = force_operand (XEXP (slot, 0), NULL_RTX);
        }

 !       mode = promote_function_mode (NULL_TREE, mode, unsigned_p, 
 NULL_TREE, 0);
        argvec[count].mode = mode;
 !       argvec[count].value = convert_modes (mode, GET_MODE (val), val, 0);
        argvec[count].reg = targetm.calls.function_arg (args_so_far, mode,
                                                      NULL_TREE, true);

 Index: gcc/config/s390/s390.c
 ===
 *** gcc/config/s390/s390.c.orig
 --- gcc/config/s390/s390.c
 *** s390_promote_function_mode (const_tree t
 *** 8742,8748 
    if (INTEGRAL_MODE_P (mode)
         GET_MODE_SIZE (mode)  UNITS_PER_LONG)
      {
 !       if (POINTER_TYPE_P (type))
        *punsignedp = POINTERS_EXTEND_UNSIGNED;
        return Pmode;
      }
 --- 8742,8748 
    if (INTEGRAL_MODE_P (mode)
         GET_MODE_SIZE (mode)  UNITS_PER_LONG)
      {
 !       if (type != NULL_TREE  POINTER_TYPE_P (type))
        *punsignedp = POINTERS_EXTEND_UNSIGNED;
        return Pmode;
      }
 Index: gcc/explow.c
 ===
 *** gcc/explow.c.orig
 --- gcc/explow.c
 *** enum machine_mode
 *** 771,776 
 --- 771,787 
  promote_function_mode (const_tree type, enum machine_mode mode, int 
 *punsignedp,
                       const_tree funtype, int for_return)
  {
 +   /* Called without a type node for a libcall.  */
 +   if (type == NULL_TREE)
 +     {
 +       if (INTEGRAL_MODE_P (mode))
 +       return targetm.calls.promote_function_mode (NULL_TREE, mode,
 +                                                   punsignedp, funtype,
 +                                                   for_return);
 +       else
 +       return mode;
 +     }
 +
    switch (TREE_CODE (type))
      {
      case INTEGER_TYPE:   case ENUMERAL_TYPE:   case BOOLEAN_TYPE:
 *** enum machine_mode
 *** 791,796 
 --- 802,813 
  promote_mode (const_tree type ATTRIBUTE_UNUSED, enum machine_mode mode,
              int *punsignedp ATTRIBUTE_UNUSED)
  {
 +   /* For libcalls this is invoked without TYPE from the backends
 +      TARGET_PROMOTE_FUNCTION_MODE hooks.  Don't do anything in that
 +      case.  */
 +   if (type == NULL_TREE)
 +     return mode;
 +

 This broke bootstrap

 /space/rguenther/src/svn/trunk/gcc/explow.c: In function 'promote_mode':
 /space/rguenther/src/svn/trunk/gcc/explow.c:815:3: error: ISO C90
 forbids mixed declarations and code [-Werror=edantic]
 cc1: all warnings being treated as errors


    /* FIXME: this is the same logic that was there until GCC 4.4, but we
       probably want to test POINTERS_EXTEND_UNSIGNED even if PROMOTE_MODE
       is not defined.  The affected targets are M32C, S390, SPARC.  */
 Index: gcc/config/sparc/sparc.c
 ===
 *** gcc/config/sparc/sparc.c.orig
 --- 

Ping: [PATCH] PR 48175, Make CASE_VALUES_THRESHOLD settable via --param

2011-05-06 Thread Michael Meissner
On Thu, Apr 21, 2011 at 03:02:10PM -0400, Michael Meissner wrote:
 In looking at some improvements to the powerpc, we wanted to change the 
 default
 for when a table jump is generated vs. a series of if statements.  Now, we
 could just add a powerpc specific TARGET_CASE_VALUES_THRESHOLD, but I tend to
 think that these should be settable on all/most ports with --param.
 
 At present, there are only two ports (avr and mn10300) that define their own
 TARGET_CASE_VALUES_THRESHOLD hook.  My first patch does not remove the target
 hook and modify the avr/mn10300 ports to use maybe_set_param_value, but that
 can be done if desired.
 
 The patch adds two --param values, one for when the port is using the casesi
 insn, and the other when it uses the more primitive tablejump insn.
 
 I have bootstrapped the compiler with this patch and run the test suite with 
 no
 regressions.  Is it ok to apply as is?  Should I modify the avr and mn10300
 ports to use the parameters and do away with the target hook?  Or should I do
 this just as a powerpc target hook?

I never got a response for this, and my earlier ping didn't seem to go out.
I'll check it in on Monday if there are no objections.

-- 
Michael Meissner, IBM
5 Technology Place Drive, M/S 2757, Westford, MA 01886-3141, USA
meiss...@linux.vnet.ibm.com fax +1 (978) 399-6899


Re: [google][RFA] add extra text to stack frame warnings (issue4479046)

2011-05-06 Thread Andrew Pinski
On Fri, May 6, 2011 at 1:52 AM, Chris Demetriou c...@google.com wrote:
 In theory, a more general warning-text-addition mechanism could be useful.
 e.g. a flag that said when outputting a warning about flag 'foo',
 output this additional text could be useful.
 However, we haven't felt the need to do this for other warnings.

 IMO, a general solution along these lines would be solving a problem
 that ~nobody has.  8-)

We already output the option which enables the warning that seems like
a general solution.

Thanks,
Andrew Pinski


Re: [Patch,Fortran] Minor libcaf cleanup

2011-05-06 Thread Daniel Kraft

On 05/05/11 21:18, Tobias Burnus wrote:

Changes:
- Remove (not working) critical functions; a normal coarray of LOCK type
should be used instead. (Stub left in until it is removed the the front
end.)

- Added prototypes and stub implementations for
registering/deregistering coarray (currently unused).

- Small bug fixes.

OK for the trunk?


Ok.

Yours,
Daniel


--
http://www.pro-vegan.info/
--
Done:  Arc-Bar-Cav-Kni-Ran-Rog-Sam-Tou-Val-Wiz
To go: Hea-Mon-Pri


Re: Ping: [PATCH] PR 48175, Make CASE_VALUES_THRESHOLD settable via --param

2011-05-06 Thread Jakub Jelinek
On Fri, May 06, 2011 at 12:21:24PM -0400, Michael Meissner wrote:
 On Thu, Apr 21, 2011 at 03:02:10PM -0400, Michael Meissner wrote:
  In looking at some improvements to the powerpc, we wanted to change the 
  default
  for when a table jump is generated vs. a series of if statements.  Now, we
  could just add a powerpc specific TARGET_CASE_VALUES_THRESHOLD, but I tend 
  to
  think that these should be settable on all/most ports with --param.
  
  At present, there are only two ports (avr and mn10300) that define their own
  TARGET_CASE_VALUES_THRESHOLD hook.  My first patch does not remove the 
  target
  hook and modify the avr/mn10300 ports to use maybe_set_param_value, but that
  can be done if desired.
  
  The patch adds two --param values, one for when the port is using the casesi
  insn, and the other when it uses the more primitive tablejump insn.
  
  I have bootstrapped the compiler with this patch and run the test suite 
  with no
  regressions.  Is it ok to apply as is?  Should I modify the avr and mn10300
  ports to use the parameters and do away with the target hook?  Or should I 
  do
  this just as a powerpc target hook?
 
 I never got a response for this, and my earlier ping didn't seem to go out.
 I'll check it in on Monday if there are no objections.

I think it is very weird to have two different params, if we need any such
param, there should be just one and its default value should depend on
HAVE_casesi.

Jakub


Re: [google] Patch to support calling multi-versioned functions via new GCC builtin. (issue4440078)

2011-05-06 Thread Xinliang David Li
 I want propose a more general solution.

 1) Generic Annotation Support for gcc IR -- it is used attach to
 application/optimization specific annotation to gimple statements and
 annotations can be passed around across passes. In gcc, I only see
 HISTOGRAM annotation for value profiling, which is not general enough
 2) Support of CallInfo for each callsite. This is an annotation, but
 more standardized. The callinfo can be used to record information such
 as call attributes, call side effects, mod-ref information etc ---
 current gimple_call_flags can be folded into this Info structure.

 I don't like generic annotation facilities.  What should passes to with
 annotated stmts that are a) transformed, b) removed?  See RTL notes
 and all the interesting issues they cause.


Then how do you store information that needs to be passed across
optimization passes -- you can not possibly dump all of them into the
core IR. In fact, anything that is derived from (via analysis) but not
part of the core IR need to worry about update and maintenance. In
current GIMPLE, we can find many such instances -- DU chains, Memory
SSA, control flow information, as well as flags like visited,
no_warning, PLF (?), etc. Have a unified way of representing them is a
good thing so that 1) make the IR lean and mean; 2) avoid too many
different side data structures.  The important thing is to have a good
verifier to catch insanity and inconsistency of the annotation after
each pass.

Thanks,

David



 Similarly (not related to this discussion), LoopInfo structure can be
 introduced to annotate loop back edge jumps to allow FE to pass useful
 information at loop level. For floating pointer operations, things
 like the precision constraint, sensitivity to floating environment etc
 can be recorded in FPInfo.

 Yes, the idea is to keep the loop structures live throughout the whole
 compilation.  Just somebody needs to do the last 1% of work.

 Richard.

 T


 Restricting ourselves to use the existing target attribute at the
 beginning (with a single, compiler-generated selector function)
 is probably good enough to get a prototype up and running.
 Extending it to arbitrary selector-function, value pairs using a
 new attribute is then probably easy (I don't see the exact use-case
 for that yet, but I suppose it exists if you say so).

 For the use cases, CPU model will be looked at instead of just the
 core architecture -- this will give use more information about the
 numbrer of cores, size of caches etc. Intel's runtime library does
 this checkiing at start up time so that the multi-versioned code can
 look at those and make the appropriate decisions.

 It will be even more complicated for arm processors -- which can have
 the same processor cores but configured differently w.r.t VFP, NEON
 etc.

 Ah, indeed.  I hadn't thought about the tuning for different variants
 as opposed to enabling HW features.  So the interface for overloading
 would be sth like

 enum X { Foo = 0, Bar = 5 };

 enum X select () { return Bar; }

 void foo (void) __attribute__((dispatch(select, Bar)));


 Yes, for overloading -- something like this looks good.

 Thanks,

 David




Re: PR 47793 - Support relative paths using -fprofile-generate

2011-05-06 Thread Xinliang David Li
Honza, what do you think of the patch? It actually fixed a regression.

Thanks,

David

On Wed, May 4, 2011 at 4:40 PM, Xinliang David Li davi...@google.com wrote:
 Is this patch ok for trunk?

 Allowing relative path in -fprofile-generate= is very useful when
 running the program remotely -- the profile data will be just dumped
 in the dir relative to the working dir in the remote machine. Using
 GCOV_PREFIX_STRIP can workaround the problem, but it is not always to
 pass environment around.

 Thanks,

 David

 On Wed, Feb 23, 2011 at 3:37 PM, Martin Thuresson mart...@google.com wrote:
 On Wed, Feb 23, 2011 at 10:21 AM, Martin Thuresson mart...@google.com 
 wrote:
 Change 165596 and 168475 updated the code for handling gcda-paths. As
 part of this change, relative paths stopped working.

 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=47793

 This patch adds a guard so that / is not added when no prefix is
 given.

 The added testcase uses the path ../gcc/. This puts the gcda in the
 same directory, so that the cleanup-coverage-files will find them.

 I have tested the patch using make bootstrap; make -k check with
 target x86_64-unknown-linux-gnu and saw no additional test failures.

 Let me know if there is any other testing I should do.

 ChangeLog
 gcc/

 2011-02-23  Martin Thuresson  mart...@google.com

        PR gcov-profile/47793
        * libgcov.c (gcov_exit): Support relative profile paths.

 gcc/testsuite/

 2011-02-23  Martin Thuresson  mart...@google.com

        PR gcov-profile/47793
        * gcc.dg/pr47793.c: New.


 Thanks,
 Martin





Re: [ping] 3 unreviewed patches

2011-05-06 Thread NightStrike
Ping again.  Still no review.

On Fri, Apr 15, 2011 at 7:08 AM, Eric Botcazou ebotca...@adacore.com wrote:
 Fix annoying gcov filename handling:
  http://gcc.gnu.org/ml/gcc-patches/2011-03/msg01380.html

 (rs6000) Fix thinko in output_profile_hook:
  http://gcc.gnu.org/ml/gcc-patches/2011-03/msg01624.html

 Introduce -Wstack-usage:
  http://gcc.gnu.org/ml/gcc-patches/2011-03/msg01992.html

 Thanks in advance.

 --
 Eric Botcazou



[google] Backport r172837 and r172788 to google/main

2011-05-06 Thread Easwaran Raman
Backported r172788 and r172837  from trunk to google/main.


2011-05-06  Easwaran Raman  era...@google.com

Backport r172837:

* cfgexpand.c (stack_var): Remove OFFSET...
(add_stack_var): ...and its reference here...
(expand_stack_vars): ...and here.
(stack_var_cmp): Sort by descending order of size.
(partition_stack_vars): Change heuristic.
(union_stack_vars): Fix to reflect changes in
partition_stack_vars.
(dump_stack_var_partition): Add newline after each partition.

2011-05-06  Easwaran Raman  era...@google.com

Backport r172788:

* cfgexpand.c (add_alias_set_conflicts): Add conflicts with a variable
containing union type only with -fstrict-aliasing.

testsuite/ChangeLog.google-main:

2011-05-06  Easwaran Raman  era...@google.com

Backport r172837:

* gcc.dg/stack-layout-2.c: New test.

2011-05-06  Easwaran Raman  era...@google.com

Backport r172788:

* gcc.dg/stack-layout-1.c: New test.
Index: gcc/testsuite/gcc.dg/stack-layout-1.c
===
--- gcc/testsuite/gcc.dg/stack-layout-1.c	(revision 0)
+++ gcc/testsuite/gcc.dg/stack-layout-1.c	(revision 173499)
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options -O2 -fno-strict-aliasing -fdump-rtl-expand } */
+union U {
+  int a;
+  float b;
+};
+struct A {
+  union U u1;
+  char a[100];
+};
+void bar (struct A *);
+void foo ()
+  {
+{
+  struct A a;
+  bar (a);
+}
+{
+  struct A a;
+  bar (a);
+}
+  }
+
+/* { dg-final { scan-rtl-dump-times Partition 1 expand } } */
+/* { dg-final { cleanup-rtl-dump expand } } */
Index: gcc/testsuite/gcc.dg/stack-layout-2.c
===
--- gcc/testsuite/gcc.dg/stack-layout-2.c	(revision 0)
+++ gcc/testsuite/gcc.dg/stack-layout-2.c	(revision 173499)
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options -O2 -fdump-rtl-expand } */
+void bar( char *);
+int foo()
+{
+  int i=0;
+  {
+char a[8000];
+bar(a);
+i += a[0];
+  }
+  {
+char a[8192];
+char b[32];
+bar(a);
+i += a[0];
+bar(b);
+i += a[0];
+  }
+  return i;
+}
+/* { dg-final { scan-rtl-dump size 8192 expand } } */
+/* { dg-final { scan-rtl-dump size 32 expand } } */
Index: gcc/cfgexpand.c
===
--- gcc/cfgexpand.c	(revision 173498)
+++ gcc/cfgexpand.c	(revision 173499)
@@ -158,11 +158,6 @@ struct stack_var
   /* The Variable.  */
   tree decl;
 
-  /* The offset of the variable.  During partitioning, this is the
- offset relative to the partition.  After partitioning, this
- is relative to the stack frame.  */
-  HOST_WIDE_INT offset;
-
   /* Initially, the size of the variable.  Later, the size of the partition,
  if this variable becomes it's partition's representative.  */
   HOST_WIDE_INT size;
@@ -267,7 +262,6 @@ add_stack_var (tree decl)
   v = stack_vars[stack_vars_num];
 
   v-decl = decl;
-  v-offset = 0;
   v-size = tree_low_cst (DECL_SIZE_UNIT (SSAVAR (decl)), 1);
   /* Ensure that all variables have size, so that a != b for any two
  variables that are simultaneously live.  */
@@ -372,8 +366,9 @@ add_alias_set_conflicts (void)
 		 to elements will conflict.  In case of unions we have
 		 to be careful as type based aliasing rules may say
 		 access to the same memory does not conflict.  So play
-		 safe and add a conflict in this case.  */
-	  || contains_union)
+		 safe and add a conflict in this case when
+ -fstrict-aliasing is used.  */
+  || (contains_union  flag_strict_aliasing))
 	add_stack_var_conflict (i, j);
 	}
 }
@@ -403,9 +398,9 @@ stack_var_cmp (const void *a, const void *b)
 return (int)largeb - (int)largea;
 
   /* Secondary compare on size, decreasing  */
-  if (sizea  sizeb)
-return -1;
   if (sizea  sizeb)
+return -1;
+  if (sizea  sizeb)
 return 1;
 
   /* Tertiary compare on true alignment, decreasing.  */
@@ -564,28 +559,19 @@ update_alias_info_with_stack_vars (void)
 
 /* A subroutine of partition_stack_vars.  The UNION portion of a UNION/FIND
partitioning algorithm.  Partitions A and B are known to be non-conflicting.
-   Merge them into a single partition A.
+   Merge them into a single partition A.  */
 
-   At the same time, add OFFSET to all variables in partition B.  At the end
-   of the partitioning process we've have a nice block easy to lay out within
-   the stack frame.  */
-
 static void
-union_stack_vars (size_t a, size_t b, HOST_WIDE_INT offset)
+union_stack_vars (size_t a, size_t b)
 {
-  size_t i, last;
   struct stack_var *vb = stack_vars[b];
   bitmap_iterator bi;
   unsigned u;
 
-  /* Update each element of partition B with the given offset,
- and merge them into partition A.  */
-  for (last = i = b; i != EOC; last = i, i = stack_vars[i].next)
-{
-  stack_vars[i].offset 

Re: [google] Backport r172837 and r172788 to google/main

2011-05-06 Thread Diego Novillo
On Fri, May 6, 2011 at 14:22, Easwaran Raman era...@google.com wrote:
 Backported r172788 and r172837  from trunk to google/main.


Minor nit:

 2011-05-06  Easwaran Raman  era...@google.com

 Backport r172837:


This needs to be indented by 1 tab.

        * cfgexpand.c (stack_var): Remove OFFSET...
        (add_stack_var): ...and its reference here...
        (expand_stack_vars): ...and here.


Diego.


Go patch committed: Use backend representation for string type

2011-05-06 Thread Ian Lance Taylor
This small patch to the Go frontend uses the backend representation for
the string type.  Bootstrapped and ran Go testsuite on
x86_64-unknown-linux-gnu.  Committed to mainline.

Ian

diff -r 434e7ba47e8d go/types.cc
--- a/go/types.cc	Thu May 05 23:32:03 2011 -0700
+++ b/go/types.cc	Fri May 06 11:12:12 2011 -0700
@@ -2172,14 +2172,27 @@
 // struct with two fields: a pointer to the characters and a length.
 
 tree
-String_type::do_get_tree(Gogo*)
-{
-  static tree struct_type;
-  return Gogo::builtin_struct(struct_type, __go_string, NULL_TREE, 2,
-			  __data,
-			  build_pointer_type(unsigned_char_type_node),
-			  __length,
-			  integer_type_node);
+String_type::do_get_tree(Gogo* gogo)
+{
+  static Btype* backend_string_type;
+  if (backend_string_type == NULL)
+{
+  std::vectorBackend::Btyped_identifier fields(2);
+
+  Type* b = gogo-lookup_global(byte)-type_value();
+  Type* pb = Type::make_pointer_type(b);
+  fields[0].name = __data;
+  fields[0].btype = tree_to_type(pb-get_tree(gogo));
+  fields[0].location = UNKNOWN_LOCATION;
+
+  Type* int_type = Type::lookup_integer_type(int);
+  fields[1].name = __length;
+  fields[1].btype = tree_to_type(int_type-get_tree(gogo));
+  fields[1].location = UNKNOWN_LOCATION;
+
+  backend_string_type = gogo-backend()-struct_type(fields);
+}
+  return type_to_tree(backend_string_type);
 }
 
 // Return a tree for the length of STRING.


[patch] fix typos and grammar in -fuse-linker-plugin docs

2011-05-06 Thread Jonathan Wakely
2011-05-06  Jonathan Wakely  jwakely@gmail.com

* doc/invoke.texi (-fuse-linker-plugin): Improve grammar.

I was going to commit a smaller version of this patch as obvious (just
the second of the three hunks in the patch) but I spotted a few other
improvements that could be made. I think my changes preserve the
intended meaning, but improve the English slightly and (I hope)
clarify it.

OK for trunk?

Would removing do from and shared libraries that do use hidden
visibility be a further improvement?
Index: doc/invoke.texi
===
--- doc/invoke.texi (revision 173499)
+++ doc/invoke.texi (working copy)
@@ -7701,17 +7701,17 @@
 Disabled by default.
 
 @item -fuse-linker-plugin
-Enables the use of linker plugin during link time optimization.  This option
-relies on the linker plugin support in linker that is available in gold
+Enables the use of a linker plugin during link time optimization.  This option
+relies on plugin support in the linker, which is available in gold
 or in GNU ld 2.21 or newer.
 
 This option enables the extraction of object files with GIMPLE bytecode out of
 library archives. This improves the quality of optimization by exposing more
-code the link time optimizer.  This information specify what symbols
+code to the link time optimizer.  This information specifies what symbols 
 can be accessed externally (by non-LTO object or during dynamic linking).
 Resulting code quality improvements on binaries (and shared libraries that do
-use hidden visibility) is similar to @code{-fwhole-program}.  See
-@option{-flto} for a description on the effect of this flag and how to use it.
+use hidden visibility) are similar to @code{-fwhole-program}.  See
+@option{-flto} for a description of the effect of this flag and how to use it.
 
 Enabled by default when LTO support in GCC is enabled and GCC was compiled
 with a linker supporting plugins (GNU ld 2.21 or newer or gold).


Re: [google] Backport r172837 and r172788 to google/main

2011-05-06 Thread Easwaran Raman
Thanks. Fixed them (as well as the same issue in some earlier entries).

-Easwaran

On Fri, May 6, 2011 at 11:27 AM, Diego Novillo dnovi...@google.com wrote:
 On Fri, May 6, 2011 at 14:22, Easwaran Raman era...@google.com wrote:
 Backported r172788 and r172837  from trunk to google/main.


 Minor nit:

 2011-05-06  Easwaran Raman  era...@google.com

 Backport r172837:


 This needs to be indented by 1 tab.

        * cfgexpand.c (stack_var): Remove OFFSET...
        (add_stack_var): ...and its reference here...
        (expand_stack_vars): ...and here.


 Diego.



Re: [google] Backport r172837 and r172788 to google/main

2011-05-06 Thread Diego Novillo
On Fri, May 6, 2011 at 14:34, Easwaran Raman era...@google.com wrote:
 Thanks. Fixed them (as well as the same issue in some earlier entries).

Great, thanks!


Re: Ping: [PATCH] PR 48175, Make CASE_VALUES_THRESHOLD settable via --param

2011-05-06 Thread Michael Meissner
On Fri, May 06, 2011 at 06:30:07PM +0200, Jakub Jelinek wrote:
 On Fri, May 06, 2011 at 12:21:24PM -0400, Michael Meissner wrote:
  On Thu, Apr 21, 2011 at 03:02:10PM -0400, Michael Meissner wrote:
   In looking at some improvements to the powerpc, we wanted to change the 
   default
   for when a table jump is generated vs. a series of if statements.  Now, we
   could just add a powerpc specific TARGET_CASE_VALUES_THRESHOLD, but I 
   tend to
   think that these should be settable on all/most ports with --param.
   
   At present, there are only two ports (avr and mn10300) that define their 
   own
   TARGET_CASE_VALUES_THRESHOLD hook.  My first patch does not remove the 
   target
   hook and modify the avr/mn10300 ports to use maybe_set_param_value, but 
   that
   can be done if desired.
   
   The patch adds two --param values, one for when the port is using the 
   casesi
   insn, and the other when it uses the more primitive tablejump insn.
   
   I have bootstrapped the compiler with this patch and run the test suite 
   with no
   regressions.  Is it ok to apply as is?  Should I modify the avr and 
   mn10300
   ports to use the parameters and do away with the target hook?  Or should 
   I do
   this just as a powerpc target hook?
  
  I never got a response for this, and my earlier ping didn't seem to go out.
  I'll check it in on Monday if there are no objections.
 
 I think it is very weird to have two different params, if we need any such
 param, there should be just one and its default value should depend on
 HAVE_casesi.

The problem is the values in params.def must be constant, and can't depend on
switches.  I imagine we can have a single param that is normally 0, and if it
is non-zero use that value, otherwise fall back to (HAVE_casesi ? 4 : 5).  Or
we could set it in finish_options in opts.c.  Any preference?

-- 
Michael Meissner, IBM
5 Technology Place Drive, M/S 2757, Westford, MA 01886-3141, USA
meiss...@linux.vnet.ibm.com fax +1 (978) 399-6899


Go patch committed: More uses of backend interface for types

2011-05-06 Thread Ian Lance Taylor
This patch to the Go frontend and to libgo adds more uses of the backend
interface for types.  There were some changes to libgo because the code
now uses produces a Go type for maps.  Previously the map types were
using size_t, but there is no equivalent to size_t in Go.  Go instead
has uintptr_t, so I changed the libgo code accordingly.  This should not
make any actual difference, of course.  Bootstrapped and ran Go
testsuite on x86_64-unknown-linux-gnu.  Committed to mainline.

Ian

diff -r 0a1edd881eca go/types.cc
--- a/go/types.cc	Fri May 06 11:28:30 2011 -0700
+++ b/go/types.cc	Fri May 06 12:58:15 2011 -0700
@@ -845,7 +845,7 @@
 
   if (this-forward_declaration_type() != NULL
   || this-named_type() != NULL)
-return this-get_tree_without_hash(gogo);
+return type_to_tree(this-get_btype_without_hash(gogo));
 
   if (this-is_error_type())
 return error_mark_node;
@@ -865,7 +865,7 @@
   return ins.first-second;
 }
 
-  tree t = this-get_tree_without_hash(gogo);
+  tree t = type_to_tree(this-get_btype_without_hash(gogo));
 
   if (ins.first-second == NULL_TREE)
 ins.first-second = t;
@@ -884,43 +884,33 @@
   return t;
 }
 
-// Return a tree for a type without looking in the hash table for
-// identical types.  This is used for named types, since there is no
-// point to looking in the hash table for them.
-
-tree
-Type::get_tree_without_hash(Gogo* gogo)
+// Return the backend representation for a type without looking in the
+// hash table for identical types.  This is used for named types,
+// since a named type is never identical to any other type.
+
+Btype*
+Type::get_btype_without_hash(Gogo* gogo)
 {
   if (this-tree_ == NULL_TREE)
 {
-  tree t = this-do_get_tree(gogo);
+  Btype* bt = tree_to_type(this-do_get_tree(gogo));
 
   // For a recursive function or pointer type, we will temporarily
   // return a circular pointer type during the recursion.  We
   // don't want to record that for a forwarding type, as it may
   // confuse us later.
   if (this-forward_declaration_type() != NULL
-	   gogo-backend()-is_circular_pointer_type(tree_to_type(t)))
-	return t;
+	   gogo-backend()-is_circular_pointer_type(bt))
+	return bt;
 
   if (gogo == NULL || !gogo-named_types_are_converted())
-	return t;
-
+	return bt;
+
+  tree t = type_to_tree(bt);
   this-tree_ = t;
-  go_preserve_from_gc(t);
-}
-
-  return this-tree_;
-}
-
-// Return the backend representation for a type without looking in the
-// hash table for identical types.  This is used for named types,
-// since a named type is never identical to any other type.
-
-Btype*
-Type::get_btype_without_hash(Gogo* gogo)
-{
-  return tree_to_type(this-get_tree_without_hash(gogo));
+}
+
+  return tree_to_type(this-tree_);
 }
 
 // Return a tree representing a zero initialization for this type.
@@ -1596,8 +1586,8 @@
 
  protected:
   tree
-  do_get_tree(Gogo*)
-  { return error_mark_node; }
+  do_get_tree(Gogo* gogo)
+  { return type_to_tree(gogo-backend()-error_type()); }
 
   tree
   do_get_init_tree(Gogo*, tree, bool)
@@ -3228,8 +3218,11 @@
 
  protected:
   tree
-  do_get_tree(Gogo*)
-  { return ptr_type_node; }
+  do_get_tree(Gogo* gogo)
+  {
+Btype* bt = gogo-backend()-pointer_type(gogo-backend()-void_type());
+return type_to_tree(bt);
+  }
 
   tree
   do_get_init_tree(Gogo*, tree type_tree, bool is_clear)
@@ -5064,61 +5057,44 @@
   return true;
 }
 
-// Get a tree for a map type.  A map type is represented as a pointer
-// to a struct.  The struct is __go_map in libgo/map.h.
+// Get the backend representation for a map type.  A map type is
+// represented as a pointer to a struct.  The struct is __go_map in
+// libgo/map.h.
 
 tree
 Map_type::do_get_tree(Gogo* gogo)
 {
-  static tree type_tree;
-  if (type_tree == NULL_TREE)
-{
-  tree struct_type = make_node(RECORD_TYPE);
-
-  tree map_descriptor_type = gogo-map_descriptor_type();
-  tree const_map_descriptor_type =
-	build_qualified_type(map_descriptor_type, TYPE_QUAL_CONST);
-  tree name = get_identifier(__descriptor);
-  tree field = build_decl(BUILTINS_LOCATION, FIELD_DECL, name,
-			  build_pointer_type(const_map_descriptor_type));
-  DECL_CONTEXT(field) = struct_type;
-  TYPE_FIELDS(struct_type) = field;
-  tree last_field = field;
-
-  name = get_identifier(__element_count);
-  field = build_decl(BUILTINS_LOCATION, FIELD_DECL, name, sizetype);
-  DECL_CONTEXT(field) = struct_type;
-  DECL_CHAIN(last_field) = field;
-  last_field = field;
-
-  name = get_identifier(__bucket_count);
-  field = build_decl(BUILTINS_LOCATION, FIELD_DECL, name, sizetype);
-  DECL_CONTEXT(field) = struct_type;
-  DECL_CHAIN(last_field) = field;
-  last_field = field;
-
-  name = get_identifier(__buckets);
-  field = build_decl(BUILTINS_LOCATION, FIELD_DECL, name,
-			 build_pointer_type(ptr_type_node));
-  DECL_CONTEXT(field) = struct_type;

Re: [PING] config/mep/mep.c: don't translate syntax description.

2011-05-06 Thread DJ Delorie

 OK to check in?

Ok with me.  Thanks!

 2011-05-06  Philipp Thomas  p...@suse.de
   * config/mep/mep.c (mep_validate_vliw): Syntax description
   should not be translated.


Minor type merging optimization

2011-05-06 Thread Jan Hubicka
Hi,
while looking at type merging code I noticed that type pairs can be managed
to be ordered by their UIDs.  This save some of hashing overhead in one of
most intensively querried hashes.

Also gimple_lookup_type_leader is hot function that is better to be inlined.

I also wonder, why unionfind algorithm is not used here to maintain the
positive answers?

Bootstrapped/regtested x86_64-linux, OK?

Honza

* gimple.c (type_pair_hash, type_pair_eq, lookup_type_pair):
Arrange type pairs to be UID ordered.
(gimple_lookup_type_leader): Make inline.
Index: gimple.c
===
--- gimple.c(revision 173506)
+++ gimple.c(working copy)
@@ -3240,8 +3240,7 @@ type_pair_hash (const void *p)
   const struct type_pair_d *pair = (const struct type_pair_d *) p;
   hashval_t val1 = pair-uid1;
   hashval_t val2 = pair-uid2;
-  return (iterative_hash_hashval_t (val2, val1)
- ^ iterative_hash_hashval_t (val1, val2));
+  return iterative_hash_hashval_t (val1, val2);
 }
 
 /* Compare two type pairs pointed-to by P1 and P2.  */
@@ -3251,8 +3250,7 @@ type_pair_eq (const void *p1, const void
 {
   const struct type_pair_d *pair1 = (const struct type_pair_d *) p1;
   const struct type_pair_d *pair2 = (const struct type_pair_d *) p2;
-  return ((pair1-uid1 == pair2-uid1  pair1-uid2 == pair2-uid2)
- || (pair1-uid1 == pair2-uid2  pair1-uid2 == pair2-uid1));
+  return (pair1-uid1 == pair2-uid1  pair1-uid2 == pair2-uid2);
 }
 
 /* Lookup the pair of types T1 and T2 in *VISITED_P.  Insert a new
@@ -3271,8 +3269,16 @@ lookup_type_pair (tree t1, tree t2, htab
   gcc_obstack_init (ob_p);
 }
 
-  pair.uid1 = TYPE_UID (t1);
-  pair.uid2 = TYPE_UID (t2);
+  if (TYPE_UID (t1)  TYPE_UID (t2))
+{
+  pair.uid1 = TYPE_UID (t1);
+  pair.uid2 = TYPE_UID (t2);
+}
+  else
+{
+  pair.uid1 = TYPE_UID (t2);
+  pair.uid2 = TYPE_UID (t1);
+}
   slot = htab_find_slot (*visited_p, pair, INSERT);
 
   if (*slot)
@@ -3280,8 +3286,8 @@ lookup_type_pair (tree t1, tree t2, htab
   else
 {
   p = XOBNEW (ob_p, struct type_pair_d);
-  p-uid1 = TYPE_UID (t1);
-  p-uid2 = TYPE_UID (t2);
+  p-uid1 = pair.uid1;
+  p-uid2 = pair.uid2;
   p-same_p[0] = -2;
   p-same_p[1] = -2;
   *slot = (void *) p;
@@ -3324,7 +3330,7 @@ static GTY((deletable, length(GIMPLE_TY
 /* Lookup an existing leader for T and return it or NULL_TREE, if
there is none in the cache.  */
 
-static tree
+static inline tree
 gimple_lookup_type_leader (tree t)
 {
   gimple_type_leader_entry *leader;


Re: [PATCH] Canonicalize compares in combine [2/3] Modifications to try_combine()

2011-05-06 Thread Jeff Law
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

On 05/06/11 03:38, Chung-Lin Tang wrote:
 Hi Jeff,
 I have verified the patch with a native bootstrap + testsuite run on
 powerpc-linux (32-bit), results were clean.
 
 Attached is a single patch with the 1+2 combine parts together, with
 comments updated. Please check if they feel descriptive enough.
 
 I haven't updated the CANONICALIZE_COMPARISON stuff, as we discussed it
 doesn't look like absolutely needed right now. As for the const0_rtx
 compare, because the entire case is guarded by a CONST_INT_P, I think it
 should be safe.
 
 Is this now okay for trunk?

Yes, please install.

Thanks,
jeff
-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJNxFocAAoJEBRtltQi2kC7IhQH/2P8rOuJloYS4ckDCOhbqBcW
w37R+qlzQztJLKRrI+cxSHl/uUPZ4iJ0NPsZ/WnuMcj2o/eWnU8zERYvky8NGb0g
FnHbhBsRz6cvw0+vEhfBxmZ4i2RKezSZwXquu/Dt4ZZ/Wy4agTMKEQoiimGz2QvR
f8/6JSfkJKLuj/4t/XkoQIzK516ADG1mvvp6CWKR/UoXSnfJKS9eXcmZZ5YMuVpp
NiQ4oXJHGZguH1ecv31l/Eqz6KsJTLsX+3nhriSwfORdlmDGi3IVQZy3vCP02iw8
IFDm5mxH7mUWPrTVaW4wEgMIFdiBIinsC7/mNARO2FLGnkMW++lFLuSWeRc7A9Y=
=darN
-END PGP SIGNATURE-


Use Enum for MIPS -march=, -mtune=, -mips options

2011-05-06 Thread Joseph S. Myers
This patch makes the MIPS options -march=, -mtune= and -mips use the
.opt Enum facility, completing the avoidance of global state in
mips_handle_option.

The list of possible arguments for those options in mips-tables.opt is
generated by awk code in genopt.sh, which reimplements the logic
formerly in mips.c to allow some variants of CPU names to match.  The
generated list uses Canonical markings where appropriate, so specs now
only need to match the particular form of each name that is used in
mips-cpus.def.  There is enough information in mips-cpus.def for it to
be possible to generate MIPS_ISA_LEVEL_SPEC automatically as well, but
I haven't implemented that.

Interpretation of from-abi is now deferred to mips_option_override,
so it uses whatever the final ABI setting from the command line was.
The processing of MIPS_CPU_STRING_DEFAULT is replaced by much simpler
use of strcmp to find a matching entry; the from-abi default
definition of that macro is replaced by code in mips_default_arch if
that macro is not defined.  (Previously it would always have been
defined, so the previous fallback for it being undefined was dead
code.)  MIPS is one of several targets that have code in cc1 to set a
default architecture (etc.) that may or may not be derived from
--with-arch etc. configure settings; ideally there would be some
generic mechanism for the --with-* settings to be used in cc1 as well
as via specs (maybe appending a -march= option to the end of the cc1
command line if none was there originally, though --with-arch-32
etc. complicates that - maybe we actually want cc1 to be able to
process specs) that doesn't need this custom code.  (Then
target-specific defaults would always be defaults in config.gcc for
the relevant configure options.)

Tested building cc1 and xgcc for cross to mips-elf.  Will commit to
trunk in the absence of target maintainer objections.

contrib:
2011-05-06  Joseph Myers  jos...@codesourcery.com

* gcc_update (gcc/config/mips/mips-tables.opt): New dependencies.

gcc:
2011-05-06  Joseph Myers  jos...@codesourcery.com

* config/mips/genopt.sh, config/mips/mips-cpus.def: New files.
* config/mips/mips-tables.opt: New file (generated).
* config.gcc (mips*-*-*): Add mips/mips-tables.opt to
extra_options.
* config/mips/mips-opts.h (MIPS_ARCH_OPTION_FROM_ABI,
MIPS_ARCH_OPTION_NATIVE): Define.
* config/mips/mips.c (mips_cpu_info_table): Move contents to
mips-cpus.def.
(mips_strict_matching_cpu_name_p, mips_matching_cpu_name_p,
mips_parse_cpu): Remove.
(mips_cpu_info_from_opt, mips_default_arch): New.
(mips_handle_option): Don't assert that global structures are in
use.  Don't handle OPT_march_, OPT_mtune_ and OPT_mips here.
(mips_option_override): Use new variables and functions to set
state of these options.  Use strcmp to check for individual CPU
names.
* config/mips/mips.h (MIPS_CPU_STRING_DEFAULT): Remove default
definition.
* config/mips/mips.opt (march=): Use ToLower and Enum.
(mips): Use ToLower, Enum and Var.
(mtune=): Use ToLower and Enum.
* config/mips/t-mips ($(srcdir)/config/mips/mips-tables.opt): New.

Index: contrib/gcc_update
===
--- contrib/gcc_update  (revision 173491)
+++ contrib/gcc_update  (working copy)
@@ -82,6 +82,7 @@ gcc/fixinc/fixincl.x: gcc/fixinc/fixincl
 gcc/config/arm/arm-tune.md: gcc/config/arm/arm-cores.def 
gcc/config/arm/gentune.sh
 gcc/config/arm/arm-tables.opt: gcc/config/arm/arm-arches.def 
gcc/config/arm/arm-cores.def gcc/config/arm/genopt.sh
 gcc/config/m68k/m68k-tables.opt: gcc/config/m68k/m68k-devices.def 
gcc/config/m68k/m68k-isas.def gcc/config/m68k/m68k-microarchs.def 
gcc/config/m68k/genopt.sh
+gcc/config/mips/mips-tables.opt: gcc/config/mips/mips-cpus.def 
gcc/config/mips/genopt.sh
 # And then, language-specific files
 gcc/cp/cfns.h: gcc/cp/cfns.gperf
 gcc/java/keyword.h: gcc/java/keyword.gperf
Index: gcc/config.gcc
===
--- gcc/config.gcc  (revision 173491)
+++ gcc/config.gcc  (working copy)
@@ -371,7 +371,7 @@ mips*-*-*)
cpu_type=mips
need_64bit_hwint=yes
extra_headers=loongson.h
-   extra_options=${extra_options} g.opt
+   extra_options=${extra_options} g.opt mips/mips-tables.opt
;;
 picochip-*-*)
 cpu_type=picochip
Index: gcc/config/mips/mips-tables.opt
===
--- gcc/config/mips/mips-tables.opt (revision 0)
+++ gcc/config/mips/mips-tables.opt (revision 0)
@@ -0,0 +1,605 @@
+; -*- buffer-read-only: t -*-
+; Generated automatically by genopt.sh from mips-cpus.def.
+
+; Copyright (C) 2011 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it 

[google] revert 173158 (-fstrict-enum-precisions) (issue4503041)

2011-05-06 Thread David Li
The following patch reverted r173158 from google/main -- -fstrict-enums 
provides a better
implementation. The test cases are kept with slight modification.

Bootstrap and tested with related test cases.

Ok for google/main?

2011-05-06  David Li  davi...@google.com

Revert r173158.

Index: tree-vrp.c
===
--- tree-vrp.c  (revision 173415)
+++ tree-vrp.c  (working copy)
@@ -5553,9 +5553,7 @@ stmt_interesting_for_vrp (gimple stmt)
   ((is_gimple_call (stmt)
gimple_call_fndecl (stmt) != NULL_TREE
DECL_IS_BUILTIN (gimple_call_fndecl (stmt)))
- || !gimple_vuse (stmt))
-   (flag_strict_enum_precision
-  || TREE_CODE (TREE_TYPE (lhs)) != ENUMERAL_TYPE))
+ || !gimple_vuse (stmt)))
return true;
 }
   else if (gimple_code (stmt) == GIMPLE_COND
Index: doc/invoke.texi
===
--- doc/invoke.texi (revision 173415)
+++ doc/invoke.texi (working copy)
@@ -395,8 +395,8 @@ Objective-C and Objective-C++ Dialects}.
 -fsel-sched-pipelining -fsel-sched-pipelining-outer-loops @gol
 -fsignaling-nans -fsingle-precision-constant -fsplit-ivs-in-unroller @gol
 -fsplit-wide-types -fstack-protector -fstack-protector-all @gol
--fstrict-aliasing -fstrict-overflow -fno-strict-enum-precision -fthread-jumps
--ftracer -ftree-bit-ccp @gol
+-fstrict-aliasing -fstrict-overflow -fthread-jumps -ftracer @gol
+-ftree-bit-ccp @gol
 -ftree-builtin-call-dce -ftree-ccp -ftree-ch -ftree-copy-prop @gol
 -ftree-copyrename -ftree-dce -ftree-dominator-opts -ftree-dse @gol
 -ftree-forwprop -ftree-fre -ftree-loop-if-convert @gol
@@ -2075,11 +2075,6 @@ represented in the minimum number of bit
 enumerators).  This assumption may not be valid if the program uses a
 cast to convert an arbitrary integer value to the enumeration type.
 
-@item -fno-strict-enum-precision
-@opindex fno-strict-enum-precision
-Do not perform optimizations of switch() statements based on the
-precision of enum types.
-
 @item -ftemplate-depth=@var{n}
 @opindex ftemplate-depth
 Set the maximum instantiation depth for template classes to @var{n}.
Index: testsuite/g++.dg/other/no-strict-enum-precision-3.C
===
--- testsuite/g++.dg/other/no-strict-enum-precision-3.C (revision 173415)
+++ testsuite/g++.dg/other/no-strict-enum-precision-3.C (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options -O2 -fno-strict-enum-precision } */
+/* { dg-options -O2 -fno-strict-enums } */
 
 extern C void abort (void);
 
Index: testsuite/g++.dg/other/no-strict-enum-precision-1.C
===
--- testsuite/g++.dg/other/no-strict-enum-precision-1.C (revision 173415)
+++ testsuite/g++.dg/other/no-strict-enum-precision-1.C (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options -fno-strict-enum-precision } */
+/* { dg-options -fno-strict-enums } */
 
 extern C void abort (void);
 
Index: testsuite/g++.dg/other/no-strict-enum-precision-2.C
===
--- testsuite/g++.dg/other/no-strict-enum-precision-2.C (revision 173415)
+++ testsuite/g++.dg/other/no-strict-enum-precision-2.C (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options -O2 -fno-strict-enum-precision } */
+/* { dg-options -O2 -fno-strict-enums } */
 
 extern C void abort (void);
 
Index: gimplify.c
===
--- gimplify.c  (revision 173415)
+++ gimplify.c  (working copy)
@@ -1602,8 +1602,6 @@ gimplify_switch_expr (tree *expr_p, gimp
type = TREE_TYPE (SWITCH_COND (switch_expr));
  if (len
   INTEGRAL_TYPE_P (type)
-   (flag_strict_enum_precision
-  || TREE_CODE (type) != ENUMERAL_TYPE)
   TYPE_MIN_VALUE (type)
   TYPE_MAX_VALUE (type)
   tree_int_cst_equal (CASE_LOW (VEC_index (tree, labels, 0)),

--
This patch is available for review at http://codereview.appspot.com/4503041


Re: [google] revert 173158 (-fstrict-enum-precisions) (issue4503041)

2011-05-06 Thread Diego Novillo
On Fri, May 6, 2011 at 16:53, David Li davi...@google.com wrote:
 The following patch reverted r173158 from google/main -- -fstrict-enums 
 provides a better
 implementation. The test cases are kept with slight modification.

 Bootstrap and tested with related test cases.

 Ok for google/main?

 2011-05-06  David Li  davi...@google.com

        Revert r173158.

OK.  Minor nit, when reverting a patch, please also include the
ChangeLog entry corresponding to the revision.


Diego.


C++ PATCH for c++/48909 (constexpr ICE)

2011-05-06 Thread Jason Merrill

Here, the problem was that in

(*(first + 1)  *first) != false

integral promotion changes the  expr to have type int.  Then 
fold_binary_op_with_conditional_arg wants to change this to


(*(first + 1)  *first) ? true : false

without changing the type, so the condition of a ?: has the wrong type 
by the time we get to the constexpr expander.  For 4.6, it seems 
simplest to fix this by making the constexpr code more permissive.


Tested x86_64-pc-linux-gnu, applying to 4.6 and trunk.
commit 1be580e74a6e959ffaa041b49be08c895d44eb01
Author: Jason Merrill ja...@redhat.com
Date:   Fri May 6 10:48:52 2011 -0400

PR c++/48909
* semantics.c (cxx_eval_conditional_expression): Check
integer_zerop/onep instead.

diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index d0c559b..cc8db90 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -6299,13 +6299,12 @@ cxx_eval_conditional_expression (const constexpr_call 
*call, tree t,
   allow_non_constant, addr,
   non_constant_p);
   VERIFY_CONSTANT (val);
-  if (val == boolean_true_node)
-return cxx_eval_constant_expression (call, TREE_OPERAND (t, 1),
+  /* Don't VERIFY_CONSTANT the other operands.  */
+  if (integer_zerop (val))
+return cxx_eval_constant_expression (call, TREE_OPERAND (t, 2),
 allow_non_constant, addr,
 non_constant_p);
-  gcc_assert (val == boolean_false_node);
-  /* Don't VERIFY_CONSTANT here.  */
-  return cxx_eval_constant_expression (call, TREE_OPERAND (t, 2),
+  return cxx_eval_constant_expression (call, TREE_OPERAND (t, 1),
   allow_non_constant, addr,
   non_constant_p);
 }
@@ -7872,12 +7871,12 @@ potential_constant_expression_1 (tree t, bool 
want_rval, tsubst_flags_t flags)
   tmp = TREE_OPERAND (t, 0);
   if (!potential_constant_expression_1 (tmp, rval, flags))
return false;
-  else if (tmp == boolean_true_node)
-   return potential_constant_expression_1 (TREE_OPERAND (t, 1),
-   want_rval, flags);
-  else if (tmp == boolean_false_node)
+  else if (integer_zerop (tmp))
return potential_constant_expression_1 (TREE_OPERAND (t, 2),
want_rval, flags);
+  else if (TREE_CODE (tmp) == INTEGER_CST)
+   return potential_constant_expression_1 (TREE_OPERAND (t, 1),
+   want_rval, flags);
   for (i = 1; i  3; ++i)
if (potential_constant_expression_1 (TREE_OPERAND (t, i),
 want_rval, tf_none))
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-condition2.C 
b/gcc/testsuite/g++.dg/cpp0x/constexpr-condition2.C
new file mode 100644
index 000..2434096
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-condition2.C
@@ -0,0 +1,18 @@
+// PR c++/48909
+// { dg-options -std=c++0x }
+
+#define SA(X) static_assert((X),#X)
+
+constexpr int const * is_sorted_until(int const * first, int const * last)
+{
+ return first == last || first + 1 == last ? last
+  : (*(first + 1)  *first) != false ? first + 1
+  : is_sorted_until(first + 1, last);
+}
+
+int main()
+{
+ static constexpr int array[2] = {0, 1};
+ constexpr int const * last = is_sorted_until(array, array + 2);
+ SA(last==array+2);
+}


Re: [PATCH] Cleanup expand_shift

2011-05-06 Thread Eric Botcazou
 I'm going to bootstrap  regtest this on x86_64-unknown-linux-gnu
 (with again zero testing coverage ...).  The patch fixes the
 reported ICE with a cross to cris-elf, more testing is appreciated
 (though I guess autotesters will pick it up).

 Does it look sane?

Yes, I think so, but...

 Index: gcc/expmed.c
 ===
 *** gcc/expmed.c  (revision 173473)
 --- gcc/expmed.c  (working copy)
 *** expand_shift_1 (enum tree_code code, enu
 *** 2141,2151 
 rtx new_amount, other_amount;
 rtx temp1;

 new_amount = op1;
 !   other_amount
 ! = simplify_gen_binary (MINUS, GET_MODE (op1),
 !GEN_INT (GET_MODE_BITSIZE (mode)),
 !op1);

 shifted = force_reg (mode, shifted);

 --- 2141,2156 
 rtx new_amount, other_amount;
 rtx temp1;

 +   op1_mode = GET_MODE (op1);
 new_amount = op1;
 !   if (op1_mode == VOIDmode)
 ! other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
 ! - INTVAL (op1));
 !   else
 ! other_amount
 !   = simplify_gen_binary (MINUS, op1_mode,
 !  GEN_INT (GET_MODE_BITSIZE (mode)),
 !  op1);

 shifted = force_reg (mode, shifted);

... I'd test CONST_INT_P (op1) instead of op1_mode == VOIDmode since you are 
accessing INTVAL in the branch.

-- 
Eric Botcazou


C++ PATCH for c++/48911 (constexpr and implicit aggregate initializers)

2011-05-06 Thread Jason Merrill
In 48911, the constexpr expander wasn't properly dealing with 
aggregate/string constant array initializers with omitted elements.  We 
should build up a value-initialization as needed.


Tested x86_64-pc-linux-gnu, applying to trunk and 4.6.
commit b557b9384f1a6509735c25574f1c1d09703e6252
Author: Jason Merrill ja...@redhat.com
Date:   Fri May 6 10:21:38 2011 -0400

PR c++/48911
* semantics.c (cxx_eval_array_reference): Handle implicit
initializers.

diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index 8bf5a52..d0c559b 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -6324,6 +6324,7 @@ cxx_eval_array_reference (const constexpr_call *call, 
tree t,
   non_constant_p);
   tree index, oldidx;
   HOST_WIDE_INT i;
+  tree elem_type;
   unsigned len, elem_nchars = 1;
   if (*non_constant_p)
 return t;
@@ -6336,16 +6337,27 @@ cxx_eval_array_reference (const constexpr_call *call, 
tree t,
 return t;
   else if (addr)
 return build4 (ARRAY_REF, TREE_TYPE (t), ary, index, NULL, NULL);
+  elem_type = TREE_TYPE (TREE_TYPE (ary));
   if (TREE_CODE (ary) == CONSTRUCTOR)
 len = CONSTRUCTOR_NELTS (ary);
   else
 {
-  elem_nchars = (TYPE_PRECISION (TREE_TYPE (TREE_TYPE (ary)))
+  elem_nchars = (TYPE_PRECISION (elem_type)
 / TYPE_PRECISION (char_type_node));
   len = (unsigned) TREE_STRING_LENGTH (ary) / elem_nchars;
 }
   if (compare_tree_int (index, len) = 0)
 {
+  if (tree_int_cst_lt (index, array_type_nelts_top (TREE_TYPE (ary
+   {
+ /* If it's within the array bounds but doesn't have an explicit
+initializer, it's value-initialized.  */
+ tree val = build_value_init (elem_type, tf_warning_or_error);
+ return cxx_eval_constant_expression (call, val,
+  allow_non_constant, addr,
+  non_constant_p);
+   }
+
   if (!allow_non_constant)
error (array subscript out of bound);
   *non_constant_p = true;
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-missing.C 
b/gcc/testsuite/g++.dg/cpp0x/constexpr-missing.C
new file mode 100644
index 000..547f552
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-missing.C
@@ -0,0 +1,39 @@
+// PR c++/48911
+// { dg-do compile }
+// { dg-options -std=c++0x }
+
+#define SA(X) static_assert((X),#X)
+
+struct A
+{
+  constexpr A () : a (6) {}
+  int a;
+};
+
+int
+main ()
+{
+  constexpr int a[2] = { 42 };
+  constexpr int i = a[1];
+  SA(i==0);
+  constexpr int b[1] = { };
+  constexpr int j = b[0];
+  SA(j==0);
+  constexpr char c[2] = a;
+  constexpr char k = c[1];
+  SA(k==0);
+  constexpr char d[2] = ;
+  constexpr char l = d[1];
+  SA(l==0);
+  constexpr wchar_t e[2] = La;
+  constexpr wchar_t m = e[1];
+  SA(m==0);
+  constexpr wchar_t f[2] = L;
+  constexpr wchar_t n = f[1];
+  SA(n==0);
+  constexpr A g[2] = { A () };
+  constexpr A o = g[0];
+  SA(o.a == 6);
+  constexpr A p = g[1];
+  SA(p.a == 6);
+}


Re: C++ PATCH for c++/48446 (ICE with VLA)

2011-05-06 Thread Jason Merrill
I noticed a minor tweak I could make to speed this up and figure I might 
as well, even though it shouldn't be a significant component of compile 
time.


Tested x86_64-pc-linux-gnu, applying to trunk and 4.6.
commit 4c3e6de3e988799dac490b6eb2b762674b5bb9f8
Author: Jason Merrill ja...@redhat.com
Date:   Thu May 5 17:57:50 2011 -0400

* decl.c (stabilize_save_expr_r): Set *walk_subtrees as
appropriate.

diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index c5184e0..b5d4cc2 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -7615,8 +7615,9 @@ stabilize_save_expr_r (tree *expr_p, int *walk_subtrees, 
void *data)
   cp_walk_tree (op, stabilize_save_expr_r, data, pset);
   if (TREE_SIDE_EFFECTS (op))
TREE_OPERAND (expr, 0) = get_temp_regvar (TREE_TYPE (op), op);
+  *walk_subtrees = 0;
 }
-  else if (!EXPR_P (expr))
+  else if (!EXPR_P (expr) || !TREE_SIDE_EFFECTS (expr))
 *walk_subtrees = 0;
   return NULL;
 }


Re: [Patch, Fortran] Support scalar coarrays in this_image/ucobound/image_index

2011-05-06 Thread H.J. Lu
On Wed, May 4, 2011 at 11:07 PM, Tobias Burnus bur...@net-b.de wrote:
 Before, scalar coarrays were not supported in the coindex intrinsics as they
 did not have - on tree level - cobounds attached to them. This patch adds
 them.

 Additionally, it fixes the algorithm of this_image, which seemingly only
 worked by chance for the test case; hopefully it now works always correctly.

 Note: Allocatable scalar coarrays remain unsupported for the moment.

 Is the patch OK for the trunk?

This caused:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=48919


-- 
H.J.


Re: [Patch, Fortran] Support scalar coarrays in this_image/ucobound/image_index

2011-05-06 Thread Tobias Burnus

Am 07.05.2011 00:50, schrieb H.J. Lu:

On Wed, May 4, 2011 at 11:07 PM, Tobias Burnusbur...@net-b.de  wrote:

Is the patch OK for the trunk?

This caused: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=4891


That happens if patches do not get approved in the order in which they 
were written/submitted.


The failure is fixed by my patch at: 
http://gcc.gnu.org/ml/fortran/2011-05/msg00023.html


(The patch did not include a test case (I didn't include the one I had) 
- but seemingly we now have one, which already in the trunk.)


Tobias


Re: Cgraph thunk reorg

2011-05-06 Thread Jan Hubicka
Hi,
given that the patch has received feedback and I have weekend for fixing the
fallout, I decided to commit the following version today.  It contains fix in
visibility handling of thunks that has shown in Mozilla build.


* cgraph.c (cgraph_add_thunk): Create real function node instead
of alias node; finalize it and mark needed/reachale; arrange visibility
to be right and add it into the corresponding same comdat group list.
(dump_cgraph_node): Dump thunks.
* cgraph.h (cgraph_first_defined_function, cgraph_next_defined_function,
cgraph_function_with_gimple_body_p, 
cgraph_first_function_with_gimple_body,
cgraph_next_function_with_gimple_body): New functions.
(FOR_EACH_FUNCTION_WITH_GIMPLE_BODY, FOR_EACH_DEFINED_FUNCTION):
New macros.
* ipa-cp.c (ipcp_need_redirect_p): Thunks can't be redirected.
(ipcp_generate_summary): Use FOR_EACH_FUNCTION_WITH_GIMPLE_BODY.
* cgraphunit.c (cgraph_finalize_function): Only look into possible
devirtualization when optimizing.
(verify_cgraph_node): Verify thunks.
(cgraph_analyze_function): Analyze thunks.
(cgraph_mark_functions_to_output): Output thunks only in combination
with function they are assigned to.
(assemble_thunk): Turn thunk into non-thunk; don't try to turn
alias into normal node.
(assemble_thunks): New functoin.
(cgraph_expand_function): Use it.
* lto-cgraph.c (lto_output_node): Stream thunks.
(input_overwrite_node): Stream in thunks.
* ipa-pure-const.c (analyze_function): Thunks do nothing interesting.
* lto-streamer-out.c (lto_output): Do not try to output thunk's body.
* ipa-inline.c (inline_small_functions): Use FOR_EACH_DEFINED_FUNCTION.
* ipa-inline-analysis.c (compute_inline_parameters): Analyze thunks.
(inline_analyze_function): Do not care about thunk jump functions.
(inline_generate_summary):Use FOR_EACH_DEFINED_FUNCTION.
* ipa-prop.c (ipa_prop_write_jump_functions): Use 
cgraph_function_with_gimple_body_p.
* passes.c (do_per_function_toporder): Use 
cgraph_function_with_gimple_body_p.
(execute_one_pass);Use FOR_EACH_FUNCTION_WITH_GIMPLE_BODY.
(ipa_write_summaries): Use cgraph_function_with_gimple_body_p.
(function_called_by_processed_nodes_p): Likewise.

* lto.c (lto_materialize_function): Use 
cgraph_function_with_gimple_body_p.
(add_cgraph_node_to_partition): Do not re-add items to partition; 
handle thunks.
(add_varpool_node_to_partition): Do not re-add items to partition.

Index: cgraph.c
===
*** cgraph.c(revision 173251)
--- cgraph.c(working copy)
*** cgraph_same_body_alias (struct cgraph_no
*** 595,608 
 See comments in thunk_adjust for detail on the parameters.  */
  
  struct cgraph_node *
! cgraph_add_thunk (struct cgraph_node *decl_node, tree alias, tree decl,
  bool this_adjusting,
  HOST_WIDE_INT fixed_offset, HOST_WIDE_INT virtual_value,
  tree virtual_offset,
  tree real_alias)
  {
!   struct cgraph_node *node = cgraph_get_node (alias);
  
if (node)
  {
gcc_assert (node-local.finalized);
--- 595,610 
 See comments in thunk_adjust for detail on the parameters.  */
  
  struct cgraph_node *
! cgraph_add_thunk (struct cgraph_node *decl_node ATTRIBUTE_UNUSED,
! tree alias, tree decl,
  bool this_adjusting,
  HOST_WIDE_INT fixed_offset, HOST_WIDE_INT virtual_value,
  tree virtual_offset,
  tree real_alias)
  {
!   struct cgraph_node *node;
  
+   node = cgraph_get_node (alias);
if (node)
  {
gcc_assert (node-local.finalized);
*** cgraph_add_thunk (struct cgraph_node *de
*** 610,617 
cgraph_remove_node (node);
  }

!   node = cgraph_same_body_alias_1 (decl_node, alias, decl);
!   gcc_assert (node);
gcc_checking_assert (!virtual_offset
   || tree_int_cst_equal (virtual_offset,
  size_int (virtual_value)));
--- 612,618 
cgraph_remove_node (node);
  }

!   node = cgraph_create_node (alias);
gcc_checking_assert (!virtual_offset
   || tree_int_cst_equal (virtual_offset,
  size_int (virtual_value)));
*** cgraph_add_thunk (struct cgraph_node *de
*** 621,626 
--- 622,636 
node-thunk.virtual_offset_p = virtual_offset != NULL;
node-thunk.alias = real_alias;
node-thunk.thunk_p = true;
+   node-local.finalized = true;
+ 
+   if (cgraph_decide_is_function_needed (node, decl))
+ cgraph_mark_needed_node (node);
+ 
+   if ((TREE_PUBLIC (decl)  !DECL_COMDAT (decl)  

Re: [PATCH] Fix up typed DWARF stack support for POINTERS_EXTEND_UNSIGNED targets (PR debug/48853)

2011-05-06 Thread H.J. Lu
On Thu, May 5, 2011 at 2:20 AM, Jakub Jelinek ja...@redhat.com wrote:
 Hi!

 My typed DWARF stack changes apparently broke ia64-hpux and H.J.'s out of
 tree x32 target.  There are several issues:
 1) for SUBREG mem_loc_descriptor's 3rd argument was wrong, found by code
   inspection
 2) CONST/SYMBOL_REF/LABEL_REF when in MEM addresses on 
 POINTERS_EXTEND_UNSIGNED
   targets are often Pmode, which is unfortunately larger than DWARF2_ADDR_SIZE
   and my conditional would just return NULL in that case instead of
   emitting DW_OP_addr.
 3) and, when mem_loc_descriptor is called from unwind code, Pmodes larger
   than DWARF2_ADDR_SIZE would result in the new DW_OP_GNU_*_type etc. ops
   which are not allowed in .eh_frame/.debug_frame
 The following patch ought to fix that, bootstrapped/regtested on
 x86_64-linux and i686-linux and Steve tested it on ia64-hpux and H.J. on his
 port.  Ok for trunk?

 2011-05-05  Jakub Jelinek  ja...@redhat.com

        PR debug/48853
        * dwarf2out.c (mem_loc_descriptor) case SUBREG: Pass mem_mode
        instead of mode as 3rd argument to recursive call.
        (mem_loc_descriptor) case REG: If POINTERS_EXTEND_UNSIGNED, don't
        emit DW_OP_GNU_regval_type if mode is Pmode and mem_mode is not
        VOIDmode.
        (mem_loc_descriptor) case SYMBOL_REF: If POINTERS_EXTEND_UNSIGNED,
        don't give up if mode is Pmode and mem_mode is not VOIDmode.
        (mem_loc_descriptor) case CONST_INT: If POINTERS_EXTEND_UNSIGNED,
        use int_loc_descriptor if mode is Pmode and mem_mode is not VOIDmode.


Here is the missing patch for case SUBREG.  OK for trunk if there is
no regressions?

Thanks.


H.J.

2011-05-06  H.J. Lu  hongjiu...@intel.com

PR debug/48853
* dwarf2out.c (mem_loc_descriptor) case SUBREG: If
POINTERS_EXTEND_UNSIGNED, don't give up if mode is Pmode and
mem_mode is not VOIDmode.

diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index 026e4a7..049ca8e 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -13892,7 +13892,11 @@ mem_loc_descriptor (rtx rtl, enum machine_mode mode,
break;
   if (GET_MODE_CLASS (mode) == MODE_INT
   GET_MODE_CLASS (GET_MODE (SUBREG_REG (rtl))) == MODE_INT
-  GET_MODE_SIZE (mode) = DWARF2_ADDR_SIZE
+  (GET_MODE_SIZE (mode) = DWARF2_ADDR_SIZE
+#ifdef POINTERS_EXTEND_UNSIGNED
+ || (mode == Pmode  mem_mode != VOIDmode)
+#endif
+)
   GET_MODE_SIZE (GET_MODE (SUBREG_REG (rtl))) = DWARF2_ADDR_SIZE)
{
  mem_loc_result = mem_loc_descriptor (SUBREG_REG (rtl),


[Patch, Fortran] Fixes for scalar coarrays

2011-05-06 Thread Tobias Burnus
The interface.c patch is to avoid a strange error (actual argument must 
be simply contiguous) which is a bit odd if the actual argument is a 
scalar. As the dummy was an array, a rank mismatch would have been the 
proper error. - The patch simply suppresses the error message such that 
the later error check becomes active.


The rest of the patch: For scalar coarray dummy arguments, the cobounds 
were not properly saved - thus calling the one of the coindex intrinsics 
gave an ICE.


Build and regtested on x86-64-linux.
OK for the trunk?

Tobias
2011-05-07  Tobias Burnus  bur...@net-b.de

	PR fortran/18918
	* interface.c (compare_parameter): Skip diagnostic if
	actual argument is not an array; rank mismatch is diagnosted later.
	* trans-decl.c (gfc_get_symbol_decl, gfc_trans_deferred_vars): Handle
	scalar coarrays.
	* trans-types.c (gfc_get_array_type_bounds): Ditto.

2011-05-07  Tobias Burnus  bur...@net-b.de

	PR fortran/18918
	* gfortran.de/coarray_20.f90: New.
	* gfortran.dg/coarray/image_index_2.f90: New.

diff --git a/gcc/fortran/interface.c b/gcc/fortran/interface.c
index 1f75724..732a0c5 100644
--- a/gcc/fortran/interface.c
+++ b/gcc/fortran/interface.c
@@ -1618,6 +1618,7 @@ compare_parameter (gfc_symbol *formal, gfc_expr *actual,
   /* F2008, 12.5.2.8.  */
   if (formal-attr.dimension
 	   (formal-attr.contiguous || formal-as-type != AS_ASSUMED_SHAPE)
+	   gfc_expr_attr (actual).dimension
 	   !gfc_is_simply_contiguous (actual, true))
 	{
 	  if (where)
diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c
index 63f03de..a78b5ac 100644
--- a/gcc/fortran/trans-decl.c
+++ b/gcc/fortran/trans-decl.c
@@ -1228,7 +1228,8 @@ gfc_get_symbol_decl (gfc_symbol * sym)
 	}
 
   /* Use a copy of the descriptor for dummy arrays.  */
-  if (sym-attr.dimension  !TREE_USED (sym-backend_decl))
+  if ((sym-attr.dimension || sym-attr.codimension)
+  !TREE_USED (sym-backend_decl))
 {
 	  decl = gfc_build_dummy_array_decl (sym, sym-backend_decl);
 	  /* Prevent the dummy from being detected as unused if it is copied.  */
@@ -1316,7 +1317,7 @@ gfc_get_symbol_decl (gfc_symbol * sym)
 	DECL_IGNORED_P (decl) = 1;
 }
 
-  if (sym-attr.dimension)
+  if (sym-attr.dimension || sym-attr.codimension)
 {
   /* Create variables to hold the non-constant bits of array info.  */
   gfc_build_qualified_array (decl, sym);
@@ -3435,7 +3436,7 @@ gfc_trans_deferred_vars (gfc_symbol * proc_sym, gfc_wrapped_block * block)
   if (sym-assoc)
 	continue;
 
-  if (sym-attr.dimension)
+  if (sym-attr.dimension || sym-attr.codimension)
 	{
 	  switch (sym-as-type)
 	{
diff --git a/gcc/fortran/trans-types.c b/gcc/fortran/trans-types.c
index 22a2c5b..4dd82ca 100644
--- a/gcc/fortran/trans-types.c
+++ b/gcc/fortran/trans-types.c
@@ -1694,9 +1694,10 @@ gfc_get_array_type_bounds (tree etype, int dimen, int codimen, tree * lbound,
 stride = gfc_index_one_node;
   else
 stride = NULL_TREE;
-  for (n = 0; n  dimen; n++)
+  for (n = 0; n  dimen + codimen; n++)
 {
-  GFC_TYPE_ARRAY_STRIDE (fat_type, n) = stride;
+  if (n  dimen)
+	GFC_TYPE_ARRAY_STRIDE (fat_type, n) = stride;
 
   if (lbound)
 	lower = lbound[n];
@@ -1711,6 +1712,9 @@ gfc_get_array_type_bounds (tree etype, int dimen, int codimen, tree * lbound,
 	lower = NULL_TREE;
 	}
 
+  if (codimen  n == dimen + codimen - 1)
+	break;
+
   upper = ubound[n];
   if (upper != NULL_TREE)
 	{
@@ -1720,6 +1724,9 @@ gfc_get_array_type_bounds (tree etype, int dimen, int codimen, tree * lbound,
 	upper = NULL_TREE;
 	}
 
+  if (n = dimen)
+	continue;
+
   if (upper != NULL_TREE  lower != NULL_TREE  stride != NULL_TREE)
 	{
 	  tmp = fold_build2_loc (input_location, MINUS_EXPR,
--- /dev/null	2011-05-06 19:43:06.071892303 +0200
+++ gcc/gcc/testsuite/gfortran.dg/coarray_20.f90	2011-05-07 00:40:46.0 +0200
@@ -0,0 +1,15 @@
+! { dg-do compile }
+! { dg-options -fcoarray=single }
+!
+! Before a bogus error (argument not simply contiguous)
+! was printed instead of the rank mismatch
+!
+! PR fortran/18918
+!
+integer :: A[*]
+call bar(A) ! { dg-error Rank mismatch in argument }
+contains
+  subroutine bar(x)
+integer :: x(1)[*]
+  end subroutine bar
+end
--- /dev/null	2011-05-06 19:43:06.071892303 +0200
+++ gcc/gcc/testsuite/gfortran.dg/coarray/image_index_2.f90	2011-05-07 00:28:14.0 +0200
@@ -0,0 +1,76 @@
+! { dg-do run }
+!
+! Scalar coarray
+!
+! Run-time test for IMAGE_INDEX with cobounds only known at
+! the compile time, suitable for any number of NUM_IMAGES()
+! For compile-time cobounds, the -fcoarray=lib version still
+! needs to run-time evalulation if image_index returns  1
+! as image_index is 0 if the index would exceed num_images().
+!
+! Please set num_images() to = 13, if possible.
+!
+! PR fortran/18918
+!
+
+program test_image_index
+implicit none
+integer :: index1, index2, index3
+logical :: one
+
+integer, save :: d[-1:3, *]
+integer, save ::