Go patch committed: Use backend interface for interface types
This patch to the Go frontend uses the backend interface for interface types. Bootstrapped and ran Go testsuite on x86_64-unknown-linux-gnu. Committed to mainline. Ian diff -r 37dae2a9c21b go/gogo-tree.cc --- a/go/gogo-tree.cc Wed May 04 08:38:47 2011 -0700 +++ b/go/gogo-tree.cc Wed May 04 22:18:22 2011 -0700 @@ -1936,38 +1936,6 @@ return build_fold_addr_expr(decl); } -// Build the type of the struct that holds a slice for the given -// element type. - -tree -Gogo::slice_type_tree(tree element_type_tree) -{ - // We use int for the count and capacity fields in a slice header. - // This matches 6g. The language definition guarantees that we - // can't allocate space of a size which does not fit in int - // anyhow. FIXME: integer_type_node is the the C type int but is - // not necessarily the Go type int. They will differ when the C - // type int has fewer than 32 bits. - return Gogo::builtin_struct(NULL, __go_slice, NULL_TREE, 3, - __values, - build_pointer_type(element_type_tree), - __count, - integer_type_node, - __capacity, - integer_type_node); -} - -// Given the tree for a slice type, return the tree for the type of -// the elements of the slice. - -tree -Gogo::slice_element_type_tree(tree slice_type_tree) -{ - go_assert(TREE_CODE(slice_type_tree) == RECORD_TYPE - POINTER_TYPE_P(TREE_TYPE(TYPE_FIELDS(slice_type_tree; - return TREE_TYPE(TREE_TYPE(TYPE_FIELDS(slice_type_tree))); -} - // Build a constructor for a slice. SLICE_TYPE_TREE is the type of // the slice. VALUES is the value pointer and COUNT is the number of // entries. If CAPACITY is not NULL, it is the capacity; otherwise @@ -2011,21 +1979,6 @@ return build_constructor(slice_type_tree, init); } -// Build a constructor for an empty slice. - -tree -Gogo::empty_slice_constructor(tree slice_type_tree) -{ - tree element_field = TYPE_FIELDS(slice_type_tree); - tree ret = Gogo::slice_constructor(slice_type_tree, - fold_convert(TREE_TYPE(element_field), - null_pointer_node), - size_zero_node, - size_zero_node); - TREE_CONSTANT(ret) = 1; - return ret; -} - // Build a map descriptor for a map of type MAPTYPE. tree diff -r 37dae2a9c21b go/gogo.h --- a/go/gogo.h Wed May 04 08:38:47 2011 -0700 +++ b/go/gogo.h Wed May 04 22:18:22 2011 -0700 @@ -465,16 +465,6 @@ static void mark_fndecl_as_builtin_library(tree fndecl); - // Build the type of the struct that holds a slice for the given - // element type. - tree - slice_type_tree(tree element_type_tree); - - // Given a tree for a slice type, return the tree for the element - // type. - static tree - slice_element_type_tree(tree slice_type_tree); - // Build a constructor for a slice. SLICE_TYPE_TREE is the type of // the slice. VALUES points to the values. COUNT is the size, // CAPACITY is the capacity. If CAPACITY is NULL, it is set to @@ -483,11 +473,6 @@ slice_constructor(tree slice_type_tree, tree values, tree count, tree capacity); - // Build a constructor for an empty slice. SLICE_TYPE_TREE is the - // type of the slice. - static tree - empty_slice_constructor(tree slice_type_tree); - // Build a map descriptor. tree map_descriptor(Map_type*); diff -r 37dae2a9c21b go/types.cc --- a/go/types.cc Wed May 04 08:38:47 2011 -0700 +++ b/go/types.cc Wed May 04 22:18:22 2011 -0700 @@ -4399,6 +4399,41 @@ return this-length_tree_; } +// Get the backend representation of the fields of a slice. This is +// not declared in types.h so that types.h doesn't have to #include +// backend.h. +// +// We use int for the count and capacity fields. This matches 6g. +// The language more or less assumes that we can't allocate space of a +// size which does not fit in int. + +static void +get_backend_slice_fields(Gogo* gogo, Array_type* type, + std::vectorBackend::Btyped_identifier* bfields) +{ + bfields-resize(3); + + Type* pet = Type::make_pointer_type(type-element_type()); + Btype* pbet = tree_to_type(pet-get_tree(gogo)); + + Backend::Btyped_identifier* p = (*bfields)[0]; + p-name = __values; + p-btype = pbet; + p-location = UNKNOWN_LOCATION; + + Type* int_type = Type::lookup_integer_type(int); + + p = (*bfields)[1]; + p-name = __count; + p-btype = tree_to_type(int_type-get_tree(gogo)); + p-location = UNKNOWN_LOCATION; + + p = (*bfields)[2]; + p-name = __capacity; + p-btype = tree_to_type(int_type-get_tree(gogo)); + p-location = UNKNOWN_LOCATION; +} + // Get a tree for the type of this array. A fixed array is simply // represented as ARRAY_TYPE with the appropriate index--i.e., it is // just like an array in C. An open array is a struct with three @@ -4409,8 +,9 @@ { if (this-length_ == NULL) { - tree struct_type = gogo-slice_type_tree(void_type_node); - return this-fill_in_slice_tree(gogo, struct_type); + std::vectorBackend::Btyped_identifier bfields; + get_backend_slice_fields(gogo,
Re: RFC: A new MIPS64 ABI
Reviewing some old e-mail... On Feb 21, 2011, David Daney dda...@caviumnetworks.com wrote: Everything identical to n32, except Pmode == DImode and POINTERS_EXTEND_UNSIGNED == true. Here is a patch that allows me to generate plausible looking assembly for trivial programs. Neat! Just one suggestion: instead of NB32 (what does that stand for?) how about naming it u32? It's shorter, clear (to me), and there's the fun factor that the lower-case u looks like an upside-down n. -- Alexandre Oliva, freedom fighterhttp://FSFLA.org/~lxoliva/ You must be the change you wish to see in the world. -- Gandhi Be Free! -- http://FSFLA.org/ FSF Latin America board member Free Software Evangelist Red Hat Brazil Compiler Engineer
[committed] Fix var-tracking.c compilation on PCC_STATIC_STRUCT_RETURN targets (PR debug/48902)
Hi! Committed as obvious: 2011-05-06 Jakub Jelinek ja...@redhat.com PR debug/48902 * var-tracking.c (prepare_call_arguments): Move else before #endif. --- gcc/var-tracking.c.jj 2011-03-31 08:51:04.0 +0200 +++ gcc/var-tracking.c 2011-05-06 09:18:50.0 +0200 @@ -5646,8 +5646,8 @@ prepare_call_arguments (basic_block bb, } } } -#endif else +#endif INIT_CUMULATIVE_ARGS (args_so_far, type, NULL_RTX, fndecl, nargs); if (obj_type_ref TYPE_ARG_TYPES (type) != void_list_node) Jakub
Re: [google][RFA] add extra text to stack frame warnings (issue4479046)
On Thu, May 5, 2011 at 12:19, Andrew Pinski pins...@gmail.com wrote: Is there a reason why this cannot be an option that someone passes on the command line of GCC instead of a configure option? I don't think we ever considered that approach. That's actually a great idea, I think better for our purposes than a configuration option. (Previously, it didn't much matter, since in our tree this was a small local patch directly to final.c.) Thank you, I'm going to do over taking the approach you suggested. Also can you show an example of why this message would be changed? We use the stack frame size warning on some of our internal code. (Obvious, I guess -- otherwise, why would I be messing with it. 8-) In summary, -Wframe-larger-than does not always produce obvious results. 8-) There are common questions, e.g.: * why we care about this warning at all (i.e., why does stack frame size matter?!). * how to identify the cause of the warning (since it's not necessarily obvious what's causing stack growth, and because the warning is somewhat ... finicky thanks to inlining and thanks to sometimes-less-than-great reuse of stack space from dead variables in optimized and especially unoptimized code). * how to work around, or if absolutely necessary disable the warning. So, to help, when we output the frame-size warning, we also provide a link to an internal documentation page to help with the stuff mentioned above. Of necessity, the doc link we provide explains our internal circumstances and workarounds. (Generic documentation wouldn't help with a number of the questions.) In theory, a more general warning-text-addition mechanism could be useful. e.g. a flag that said when outputting a warning about flag 'foo', output this additional text could be useful. However, we haven't felt the need to do this for other warnings. IMO, a general solution along these lines would be solving a problem that ~nobody has. 8-) If one wanted to dive into warning message changes, there are other, more substantial changes IMO that would be generally useful and would enable this type of functionality via external tools. E.g., structured warnings with fixed identifiers (numbers, words, whatever), blah blah blah. If there were support for *that*, then people could write wrapper tools that automatically annotate warnings with additional information as necessary. (it would also make parsing errors/warnings a lot easier. 8-) Anyway, thanks for the suggestion. 8-) chris
Re: RFA: Improve jump threading 5 of N
On Thu, May 5, 2011 at 6:11 PM, Jeff Law l...@redhat.com wrote: -BEGIN PGP SIGNED MESSAGE- Hash: SHA1 I should have included these in the last patch of infrastructure changes. The main change is create_block_for_threading no longer calls remove_ctrl_stmt_and_useless_edges and instead its callers are expected to handle that, when needed. This will allow me to use create_block_for_threading to duplicate the join block in a future patch. Additionally there was another place I should have been using a macro to access the edges stored in the aux field. Bootstrapped and regression tested on x86_64-unknown-linux-gnu. OK for trunk? Ok. Thanks, Richard. Thanks, Jeff -BEGIN PGP SIGNATURE- Version: GnuPG v1.4.11 (GNU/Linux) Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/ iQEcBAEBAgAGBQJNwswlAAoJEBRtltQi2kC72U4H/Rup77S9Pi2bZgkT8k1wEY7x +teD8FOKAW52dhfFrYmI8pmOBsmC8WTvn3WlOX+a0/+eB+j2aX3OITDYAzxinu45 6w+5jBHw96iJ3IvI1HIg6wsXo0HEJW40z6OeyPR06xz9AUh2xtJCh5Mh5WCC66Qf SPisgr/w5wteuHpDT/URsW/cPfhTS26SeB5x61QAXM7wwXDETBnI5nX+kGtZ7zTG x0qslTTePWvpYj4OqtlYzUSC/a0qKhc724ZRBsRlME+OQ/ClGh0ikAWD1kzjU899 AmtrUWYf/NpYRe1XKLmylcAhN5qwYJ7rGNL5AdgD0lCzkjic63axOb9t3z6d3aY= =yU+L -END PGP SIGNATURE-
Re: [google] Patch to support calling multi-versioned functions via new GCC builtin. (issue4440078)
On Thu, May 5, 2011 at 7:02 PM, Xinliang David Li davi...@google.com wrote: On Thu, May 5, 2011 at 2:16 AM, Richard Guenther richard.guent...@gmail.com wrote: On Thu, May 5, 2011 at 12:19 AM, Xinliang David Li davi...@google.com wrote: I can think of some more-or-less obvious high-level forms, one would for example simply stick a new DISPATCH tree into gimple_call_fn (similar to how we can have OBJ_TYPE_REF there), the DISPATCH tree would be of variable length, first operand the selector function and further operands function addresses. That would keep the actual call visible (instead of a fake __builtin_dispatch call), something I'd really like to see. This sounds like a good long term solution. Thinking about it again maybe, similar to OBJ_TYPE_REF, have the selection itself lowered and only keep the set of functions as additional info. Thus instead of having the selector function as first operand have a pointer to the selected function there (that also avoids too much knowledge about the return value of the selector). Thus, sel = selector (); switch (sel) { case A: fn = bar; case B: fn = foo; } val = (*DISPATCH (fn, bar, foo)) (...); that way regular optimizations can apply to the selection, eventually discard the dispatch if fn becomes a known direct function (similar to devirtualization). At expansion time the call address is simply taken from the first operand and an indirect call is assembled. Does the above still provide enough knowledge for the IPA path isolation? I like your original proposal (extending call) better because related information are tied together and is easier to hoist and clean up. I want propose a more general solution. 1) Generic Annotation Support for gcc IR -- it is used attach to application/optimization specific annotation to gimple statements and annotations can be passed around across passes. In gcc, I only see HISTOGRAM annotation for value profiling, which is not general enough 2) Support of CallInfo for each callsite. This is an annotation, but more standardized. The callinfo can be used to record information such as call attributes, call side effects, mod-ref information etc --- current gimple_call_flags can be folded into this Info structure. I don't like generic annotation facilities. What should passes to with annotated stmts that are a) transformed, b) removed? See RTL notes and all the interesting issues they cause. Similarly (not related to this discussion), LoopInfo structure can be introduced to annotate loop back edge jumps to allow FE to pass useful information at loop level. For floating pointer operations, things like the precision constraint, sensitivity to floating environment etc can be recorded in FPInfo. Yes, the idea is to keep the loop structures live throughout the whole compilation. Just somebody needs to do the last 1% of work. Richard. T Restricting ourselves to use the existing target attribute at the beginning (with a single, compiler-generated selector function) is probably good enough to get a prototype up and running. Extending it to arbitrary selector-function, value pairs using a new attribute is then probably easy (I don't see the exact use-case for that yet, but I suppose it exists if you say so). For the use cases, CPU model will be looked at instead of just the core architecture -- this will give use more information about the numbrer of cores, size of caches etc. Intel's runtime library does this checkiing at start up time so that the multi-versioned code can look at those and make the appropriate decisions. It will be even more complicated for arm processors -- which can have the same processor cores but configured differently w.r.t VFP, NEON etc. Ah, indeed. I hadn't thought about the tuning for different variants as opposed to enabling HW features. So the interface for overloading would be sth like enum X { Foo = 0, Bar = 5 }; enum X select () { return Bar; } void foo (void) __attribute__((dispatch(select, Bar))); Yes, for overloading -- something like this looks good. Thanks, David
Re: [PATCH, ARM] PR47855 Compute attr length for thumb2 insns, 3/3 (issue4475042)
On Thu, May 5, 2011 at 5:42 PM, Richard Earnshaw rearn...@arm.com wrote: On Thu, 2011-05-05 at 14:51 +0800, Guozhi Wei wrote: Hi This is the third part of the fixing for http://gcc.gnu.org/bugzilla/show_bug.cgi?id=47855 This patch contains the length computation/refinement for insn patterns *thumb2_movsi_insn, *thumb2_cbz and *thumb2_cbnz. At the same time this patch revealed two bugs. The first is the maximum offset of cbz/cbnz, it should be 126, but it is 128 in patterns *thumb2_cbz and *thumb2_cbnz. The second is that only 2-register form of shift instructions can be 16 bit, but 3-register form is allowed in *thumb2_shiftsi3_short and related peephole2. The fix is also contained in this patch. The patch has been tested on arm qemu. thanks Carrot 2011-05-05 Guozhi Wei car...@google.com PR target/47855 * config/arm/thumb2.md (thumb2_movsi_insn): Add length addtribute. (thumb2_shiftsi3_short and peephole2): Remove 3-register case. (thumb2_cbz): Refine length computation. (thumb2_cbnz): Likewise. Hmm, although these changes are all related to length calculations, they are really three patches that are unrelated to each other. It would be easier to review this if they were kept separate. 1) thumb2_shiftsi3_short This appears to be a straight bug. We are putting out a 32-bit instruction when we are claiming it to be only 16 bits. This is OK. 2) thumb2_movsi_insn There are two things here. a) Thumb2 has a 16-bit move instruction for all core register-to-register transfers, so the separation of alternatives 1 and 2 is unnecessary -- just code these as rk. done. b) The ldm form does not support unaligned memory accesses. I'm aware that work is being done to add unaligned support to GCC for ARM, so I need to find out whether this patch will interfere with those changes. I'll try to find out what the situation is here and get back to you. 3) thumb2_cbz and thumb2_cbnz The range calculations look wrong here. Remember that the 'pc' as far as GCC is concerned is the address of the start of the insn. So for a backwards branch you need to account for all the bytes in the insn pattern that occur before the branch instruction itself, and secondly you also have to remember that the 'pc' that the CPU uses is the address of the branch instruction plus 4. All these conspire to reduce the backwards range of a short branch to several bytes less than the 256 that you currently have coded. The usage of 'pc' is more complex than I thought. I understood it after reading the comment in file arm.md. And the description at http://gcc.gnu.org/onlinedocs/gccint/Insn-Lengths.html#Insn-Lengths is not right for forward branch cases. Now the ranges are modified accordingly. It has been tested on arm qemu in thumb2 mode. thanks Carrot 2011-05-06 Guozhi Wei car...@google.com PR target/47855 * config/arm/thumb2.md (thumb2_movsi_insn): Add length addtribute. (thumb2_shiftsi3_short and peephole2): Remove 3-register case. (thumb2_cbz): Refine length computation. (thumb2_cbnz): Likewise. Index: config/arm/thumb2.md === --- config/arm/thumb2.md(revision 173350) +++ config/arm/thumb2.md(working copy) @@ -165,23 +165,46 @@ ;; regs. The high register alternatives are not taken into account when ;; choosing register preferences in order to reflect their expense. (define_insn *thumb2_movsi_insn - [(set (match_operand:SI 0 nonimmediate_operand =rk,r,r,r,l ,*hk,m,*m) - (match_operand:SI 1 general_operand rk ,I,K,j,mi,*mi,l,*hk))] + [(set (match_operand:SI 0 nonimmediate_operand =rk,r,r,r,l ,*rk,Uu,*m) + (match_operand:SI 1 general_operand rk ,I,K,j,Uu,*mi,l ,*rk))] TARGET_THUMB2 ! TARGET_IWMMXT !(TARGET_HARD_FLOAT TARGET_VFP) ( register_operand (operands[0], SImode) || register_operand (operands[1], SImode)) - @ - mov%?\\t%0, %1 - mov%?\\t%0, %1 - mvn%?\\t%0, #%B1 - movw%?\\t%0, %1 - ldr%?\\t%0, %1 - ldr%?\\t%0, %1 - str%?\\t%1, %0 - str%?\\t%1, %0 + * + switch (which_alternative) +{ +case 0: return \mov%?\\t%0, %1\; +case 1: return \mov%?\\t%0, %1\; +case 2: return \mvn%?\\t%0, #%B1\; +case 3: return \movw%?\\t%0, %1\; + +case 4: + if (GET_CODE (XEXP (operands[1], 0)) == POST_INC) + { + operands[1] = XEXP (XEXP (operands[1], 0), 0); + return \ldm%(ia%)\t%1!, {%0}\; + } + else + return \ldr%?\\t%0, %1\; + +case 5: return \ldr%?\\t%0, %1\; + +case 6: + if (GET_CODE (XEXP (operands[0], 0)) == POST_INC) + { + operands[0] = XEXP (XEXP (operands[0], 0), 0); + return \stm%(ia%)\t%0!, {%1}\; + } + else + return \str%?\\t%1, %0\; + +case 7: return \str%?\\t%1, %0\; +default: gcc_unreachable (); +}
Re: [PATCH] Cleanup expand_shift
On Thu, 5 May 2011, Hans-Peter Nilsson wrote: On Thu, 5 May 2011, Richard Guenther wrote: On Wed, 4 May 2011, Richard Guenther wrote: On Wed, 4 May 2011, Eric Botcazou wrote: Hm. I guess people will scream if something breaks (I can't imagine what though). AAAaaarghh! Building cris-elf is now broken. I have applied the following after re-bootstrapping and testing on x86_64-unknown-linux-gnu and re-checking the mipsel cross testcase. Richard. 2011-05-05 Richard Guenther rguent...@suse.de * expmed.c (expand_variable_shift): Rename to ... (expand_shift_1): ... this. Take an expanded shift amount. For rotates recurse directly not building trees for the shift amount. (expand_variable_shift): Wrap around expand_shift_1. (expand_shift): Adjust. PR 48908. Ok, it seems simplify_gen_binary doesn't like VOIDmode. The following side-steps the issue of choosing an appropriate mode for a constant shift amount and instead computes it in HWI. Similar to the SHIFT_COUNT_TRUNCATED path we don't bother about a CONST_DOUBLE shift amount. I'm going to bootstrap regtest this on x86_64-unknown-linux-gnu (with again zero testing coverage ...). The patch fixes the reported ICE with a cross to cris-elf, more testing is appreciated (though I guess autotesters will pick it up). Does it look sane? Thanks, Richard. 2011-05-06 Richard Guenther rguent...@suse.de PR middle-end/48908 * expmed.c (expand_shift_1): Compute adjusted constant shift amount manually. Index: gcc/expmed.c === *** gcc/expmed.c(revision 173473) --- gcc/expmed.c(working copy) *** expand_shift_1 (enum tree_code code, enu *** 2141,2151 rtx new_amount, other_amount; rtx temp1; new_amount = op1; ! other_amount ! = simplify_gen_binary (MINUS, GET_MODE (op1), ! GEN_INT (GET_MODE_BITSIZE (mode)), ! op1); shifted = force_reg (mode, shifted); --- 2141,2156 rtx new_amount, other_amount; rtx temp1; + op1_mode = GET_MODE (op1); new_amount = op1; ! if (op1_mode == VOIDmode) ! other_amount = GEN_INT (GET_MODE_BITSIZE (mode) ! - INTVAL (op1)); ! else ! other_amount ! = simplify_gen_binary (MINUS, op1_mode, !GEN_INT (GET_MODE_BITSIZE (mode)), !op1); shifted = force_reg (mode, shifted);
Re: [patch, ARM] Fix PR target/48252
On Sun, 2011-05-01 at 10:30 +0300, Ira Rosen wrote: Ramana Radhakrishnan ramana.radhakrish...@linaro.org wrote on 07/04/2011 03:16:44 PM: On 07/04/11 08:42, Ira Rosen wrote: Hi, This patch makes both outputs of neon_vzip/vuzp/vtrn_internal explicitly dependent on both inputs, preventing incorrect optimization: for (a,b)- vzip (c,d) and (e,f)- vzip (g,d) CSE decides that b==f, since b and f depend only on d. Tested on arm-linux-gnueabi. OK for trunk? This is OK for trunk. OK for 4.6 after testing? I don't understand how it has happened, but the 4.6 patch that has been committed is corrupt (the patch submitted here looks OK). Please remember that it is essential to test release branches before commits are made. R. I have no objections to this going into 4.5 and 4.6 since it corrects the implementation of the neon intrinsics but please check with the release managers. OK to backport to 4.5 and 4.6 - both tested on arm-linux-gnueabi? Thanks, Ira 4.5 and 4.6 ChangeLog: Backport from mainline: 2011-04-18 Ulrich Weigand ulrich.weig...@linaro.org Ira Rosen ira.ro...@linaro.org PR target/48252 * config/arm/arm.c (neon_emit_pair_result_insn): Swap arguments to match neon_vzip/vuzp/vtrn_internal. * config/arm/neon.md (neon_vtrnmode_internal): Make both outputs explicitly dependent on both inputs. (neon_vzipmode_internal, neon_vuzpmode_internal): Likewise. testsuite/Changelog: Backport from mainline: 2011-04-18 Ulrich Weigand ulrich.weig...@linaro.org Ira Rosen ira.ro...@linaro.org PR target/48252 * gcc.target/arm/pr48252.c: New test. 4.5 patch: Index: config/arm/arm.c === --- config/arm/arm.c(revision 172714) +++ config/arm/arm.c(working copy) @@ -18237,7 +18237,7 @@ neon_emit_pair_result_insn (enum machine_mode mode rtx tmp1 = gen_reg_rtx (mode); rtx tmp2 = gen_reg_rtx (mode); - emit_insn (intfn (tmp1, op1, tmp2, op2)); + emit_insn (intfn (tmp1, op1, op2, tmp2)); emit_move_insn (mem, tmp1); mem = adjust_address (mem, mode, GET_MODE_SIZE (mode)); Index: config/arm/neon.md === --- config/arm/neon.md (revision 172714) +++ config/arm/neon.md (working copy) @@ -3895,13 +3895,14 @@ (define_insn neon_vtrnmode_internal [(set (match_operand:VDQW 0 s_register_operand =w) - (unspec:VDQW [(match_operand:VDQW 1 s_register_operand 0)] -UNSPEC_VTRN1)) - (set (match_operand:VDQW 2 s_register_operand =w) -(unspec:VDQW [(match_operand:VDQW 3 s_register_operand 2)] -UNSPEC_VTRN2))] +(unspec:VDQW [(match_operand:VDQW 1 s_register_operand 0) + (match_operand:VDQW 2 s_register_operand w)] + UNSPEC_VTRN1)) + (set (match_operand:VDQW 3 s_register_operand =2) + (unspec:VDQW [(match_dup 1) (match_dup 2)] + UNSPEC_VTRN2))] TARGET_NEON - vtrn.V_sz_elem\t%V_reg0, %V_reg2 + vtrn.V_sz_elem\t%V_reg0, %V_reg3 [(set (attr neon_type) (if_then_else (ne (symbol_ref Is_d_reg) (const_int 0)) (const_string neon_bp_simple) @@ -3921,13 +3922,14 @@ (define_insn neon_vzipmode_internal [(set (match_operand:VDQW 0 s_register_operand =w) - (unspec:VDQW [(match_operand:VDQW 1 s_register_operand 0)] -UNSPEC_VZIP1)) - (set (match_operand:VDQW 2 s_register_operand =w) -(unspec:VDQW [(match_operand:VDQW 3 s_register_operand 2)] -UNSPEC_VZIP2))] +(unspec:VDQW [(match_operand:VDQW 1 s_register_operand 0) + (match_operand:VDQW 2 s_register_operand w)] + UNSPEC_VZIP1)) + (set (match_operand:VDQW 3 s_register_operand =2) +(unspec:VDQW [(match_dup 1) (match_dup 2)] + UNSPEC_VZIP2))] TARGET_NEON - vzip.V_sz_elem\t%V_reg0, %V_reg2 + vzip.V_sz_elem\t%V_reg0, %V_reg3 [(set (attr neon_type) (if_then_else (ne (symbol_ref Is_d_reg) (const_int 0)) (const_string neon_bp_simple) @@ -3947,13 +3949,14 @@ (define_insn neon_vuzpmode_internal [(set (match_operand:VDQW 0 s_register_operand =w) - (unspec:VDQW [(match_operand:VDQW 1 s_register_operand 0)] +(unspec:VDQW [(match_operand:VDQW 1 s_register_operand 0) + (match_operand:VDQW 2 s_register_operand w)] UNSPEC_VUZP1)) - (set (match_operand:VDQW 2 s_register_operand =w) -(unspec:VDQW [(match_operand:VDQW 3 s_register_operand 2)] -UNSPEC_VUZP2))] + (set (match_operand:VDQW 3 s_register_operand =2) +(unspec:VDQW [(match_dup 1) (match_dup 2)] +
Re: [PATCH] Canonicalize compares in combine [2/3] Modifications to try_combine()
On 04/22/2011 05:21 PM, Chung-Lin Tang wrote: Also, instead of testing for XEXP(SET_SRC(PATTERN(i3)),1) == const0_rtx at the top, it now allows CONST_INT_P(XEXP(SET_SRC(PATTERN(i3)),1)), tries to adjust it by simplify_compare_const() from the last patch, and then tests if op1 == const0_rtx. This is a small improvement in some cases. I'm not sure why it doesn't allow both? Paolo
[committed] Fix -fdefault-integer-8 omp_lib wrappers (PR fortran/pr48894)
Hi! This patch fixes the -fdefault-integer-8 wrappers, tested on x86_64-linux, committed to trunk and 4.[654] branches. 2011-05-06 Jakub Jelinek ja...@redhat.com PR fortran/48894 * fortran.c: Include limits.h. (TO_INT): Define. (omp_set_dynamic_8_, omp_set_num_threads_8_): Use !!*set instead of *set. (omp_set_num_threads_8_, omp_set_schedule_8_, omp_set_max_active_levels_8_, omp_get_ancestor_thread_num_8_, omp_get_team_size_8_): Use TO_INT macro. * testsuite/libgomp.fortran/pr48894.f90: New test. --- libgomp/fortran.c.jj2009-04-14 16:33:07.0 +0200 +++ libgomp/fortran.c 2011-05-06 10:13:46.0 +0200 @@ -1,4 +1,4 @@ -/* Copyright (C) 2005, 2007, 2008, 2009 Free Software Foundation, Inc. +/* Copyright (C) 2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc. Contributed by Jakub Jelinek ja...@redhat.com. This file is part of the GNU OpenMP Library (libgomp). @@ -27,6 +27,7 @@ #include libgomp.h #include libgomp_f.h #include stdlib.h +#include limits.h #ifdef HAVE_ATTRIBUTE_ALIAS /* Use internal aliases if possible. */ @@ -244,6 +245,8 @@ omp_lock_symver (omp_unset_nest_lock_) omp_lock_symver (omp_test_nest_lock_) #endif +#define TO_INT(x) ((x) INT_MIN ? (x) INT_MAX ? (x) : INT_MAX : INT_MIN) + void omp_set_dynamic_ (const int32_t *set) { @@ -253,7 +256,7 @@ omp_set_dynamic_ (const int32_t *set) void omp_set_dynamic_8_ (const int64_t *set) { - omp_set_dynamic (*set); + omp_set_dynamic (!!*set); } void @@ -265,7 +268,7 @@ omp_set_nested_ (const int32_t *set) void omp_set_nested_8_ (const int64_t *set) { - omp_set_nested (*set); + omp_set_nested (!!*set); } void @@ -277,7 +280,7 @@ omp_set_num_threads_ (const int32_t *set void omp_set_num_threads_8_ (const int64_t *set) { - omp_set_num_threads (*set); + omp_set_num_threads (TO_INT (*set)); } int32_t @@ -343,7 +346,7 @@ omp_set_schedule_ (const int32_t *kind, void omp_set_schedule_8_ (const int32_t *kind, const int64_t *modifier) { - omp_set_schedule (*kind, *modifier); + omp_set_schedule (*kind, TO_INT (*modifier)); } void @@ -381,7 +384,7 @@ omp_set_max_active_levels_ (const int32_ void omp_set_max_active_levels_8_ (const int64_t *levels) { - omp_set_max_active_levels (*levels); + omp_set_max_active_levels (TO_INT (*levels)); } int32_t @@ -405,7 +408,7 @@ omp_get_ancestor_thread_num_ (const int3 int32_t omp_get_ancestor_thread_num_8_ (const int64_t *level) { - return omp_get_ancestor_thread_num (*level); + return omp_get_ancestor_thread_num (TO_INT (*level)); } int32_t @@ -417,7 +420,7 @@ omp_get_team_size_ (const int32_t *level int32_t omp_get_team_size_8_ (const int64_t *level) { - return omp_get_team_size (*level); + return omp_get_team_size (TO_INT (*level)); } int32_t --- libgomp/testsuite/libgomp.fortran/pr48894.f90.jj2011-05-06 10:27:01.0 +0200 +++ libgomp/testsuite/libgomp.fortran/pr48894.f90 2011-05-06 10:26:29.0 +0200 @@ -0,0 +1,23 @@ +! PR fortran/48894 +! { dg-do run } +! { dg-options -fdefault-integer-8 } + + use omp_lib + integer, parameter :: zero = 0 + integer :: err + logical :: l + err = 0 + !$omp parallel +!$omp parallel private (l) + l = omp_get_ancestor_thread_num (-HUGE (zero)) .ne. -1 + l = l .or. (omp_get_ancestor_thread_num (HUGE (zero)) .ne. -1) + l = l .or. (omp_get_team_size (-HUGE (zero)) .ne. -1) + l = l .or. (omp_get_team_size (HUGE (zero)) .ne. -1) + if (l) then +!$omp atomic + err = err + 1 + endif +!$omp end parallel + !$omp end parallel + if (err .ne. 0) call abort +end Jakub
Re: [PATCH][ARM] Thumb2 replicated constants
On Thu, 2011-04-21 at 12:23 +0100, Andrew Stubbs wrote: This patch is a repost of the one I previously posted here: http://gcc.gnu.org/ml/gcc-patches/2010-12/msg00652.html As requested, I've broken out the other parts of the original patch, and those have already been reposted yesterday (and one committed also). This (final) part is support for using Thumb2's replicated constants and addw/subw instructions as part of split constant loads. Previously the compiler could use these constants, but only where they would be loaded in a single instruction. This patch must be applied on top of the addw/subw patch I posted yesterday. The patch also optimizes the use of inverted or negated constants as a short-cut to the final value. The previous code did this in some cases, but could not be easily adapted to replicated constants. The previous code also had a bug that prevented optimal use of shifted constants in Thumb code by imposing the same restrictions as ARM code. This has been fixed. Example 1: addw as part of a split constant load a + 0xf Before: movwr3, #65535 ; 0x0 movtr3, 15 ; 0xf addsr3, r0, r3 After: add r0, r0, #1044480 ; 0xff000 addwr0, r0, #4095; 0x00fff Example 2: arbitrary shifts bug fix a - 0xfff1 Before: sub r0, r0, #65024 ; 0xfe00 sub r0, r0, #496 ; 0x01f0 sub r0, r0, #1 ; 0x0001 After: sub r0, r0, #65280 ; 0xff00 sub r0, r0, #241 ; 0x00f1 Example 3: 16-bit replicated patterns a + 0x44004401 Before: movwr3, #17409 ; 0x4401 movtr3, 17408 ; 0x4400 addsr3, r0, r3 After: add r0, r0, #1140868096 ; 0x44004400 addsr0, r0, #1 ; 0x0001 Example 4: 32-bit replicated patterns a 0xaa00 Before: mov r3, #43520 ; 0xaa00 movtr3, 43690; 0x and r3, r0, r3 After: and r0, r0, #-1431655766 ; 0x bic r0, r0, #170 ; 0x00aa The constant splitting code was duplicated in two places, and I would have needed to modify both quite heavily, so I have taken the opportunity to unify the two, and hopefully reduce the future maintenance burden. Let me respond to a point Richard Earnshaw raised following the original posting: A final note is that you may have missed some cases. Now that we have movw, reg ~(16-bit const) can now be done in at most 2 insns: movw t1, #16-bit const bic Rd, reg, t1 Actually, I think we can do better than that for a 16-bit constant. Given: a ~(0xabcd) Before my changes, GCC gave: bic r0, r0, #43520 bic r0, r0, #460 bic r0, r0, #1 and after applying my patch: bic r0, r0, #43776 bic r0, r0, #205 Two instructions and no temporary register. On thumb-2 you can also use ORN that way as well. It turns out that my previous patch was broken for ORN. I traced the problem to some confusing code already in arm.c that set can_invert for IOR, but then explicitly ignored it later (I had removed the second part, but not the first). I posted, and committed a patch to fix this yesterday. In fact ORN is only of limited use for this kind of thing. Like AND, you can't use multiple ORNs to build a constant. The compiler already does use ORN in some circumstances, and this patch has not changed that. Is the patch OK? Andrew + RETURN_SEQUENCE must be an int[4]. It would be a more robust coding style to define a struct with an int[4] array as its only member. Then it wouldn't be possible to pass an undersized object to these routines. OK with a change to do that. R.
Re: Ping: Make 128 bits the default vector size for NEON
On Thu, 2011-04-21 at 09:02 +0300, Ira Rosen wrote: http://gcc.gnu.org/ml/gcc-patches/2011-03/msg02172.html The last version: ChangeLog: * doc/invoke.texi (preferred-vector-size): Document. * params.h (PREFERRED_VECTOR_SIZE): Define. * config/arm/arm.c (arm_preferred_simd_mode): Use param PREFERRED_VECTOR_SIZE instead of TARGET_NEON_VECTORIZE_QUAD. Make 128 bits the default. (arm_autovectorize_vector_sizes): Likewise. * config/arm/arm.opt (NEON_VECTORIZE_QUAD): Add RejectNegative. * params.def (PARAM_PREFERRED_VECTOR_SIZE): Define. testsuite/ChangeLog: * lib/target-supports.exp (check_effective_target_vect_multiple_sizes): New procedure. (add_options_for_quad_vectors): Replace with ... (add_options_for_double_vectors): ... this. * gfortran.dg/vect/pr19049.f90: Expect more printings on targets that support multiple vector sizes since the vectorizer attempts to vectorize with both vector sizes. * gcc.dg/vect/slp-reduc-6.c, gcc.dg/vect/no-vfa-vect-79.c, gcc.dg/vect/no-vfa-vect-102a.c, gcc.dg/vect/vect-outer-1a.c, gcc.dg/vect/vect-outer-1b.c, gcc.dg/vect/vect-outer-2b.c, gcc.dg/vect/vect-outer-3a.c, gcc.dg/vect/no-vfa-vect-37.c, gcc.dg/vect/vect-outer-3b.c, gcc.dg/vect/no-vfa-vect-101.c, gcc.dg/vect/no-vfa-vect-102.c, gcc.dg/vect/vect-reduc-dot-s8b.c, gcc.dg/vect/vect-outer-1.c, gcc.dg/vect/vect-104.c: Likewise. * gcc.dg/vect/vect-16.c: Rename to... * gcc.dg/vect/no-fast-math-vect-16.c: ... this to ensure that it runs without -ffast-math. * gcc.dg/vect/vect-42.c: Run with 64 bit vectors if applicable. * gcc.dg/vect/vect-multitypes-6.c, gcc.dg/vect/vect-52.c, gcc.dg/vect/vect-54.c, gcc.dg/vect/vect-46.c, gcc.dg/vect/vect-48.c, gcc.dg/vect/vect-96.c, gcc.dg/vect/vect-multitypes-3.c, gcc.dg/vect/vect-40.c: Likewise. * gcc.dg/vect/vect-outer-5.c: Remove quad-vectors option as redundant. * gcc.dg/vect/vect-109.c, gcc.dg/vect/vect-peel-1.c, gcc.dg/vect/vect-peel-2.c, gcc.dg/vect/slp-25.c, gcc.dg/vect/vect-multitypes-1.c, gcc.dg/vect/slp-3.c, gcc.dg/vect/no-vfa-pr29145.c, gcc.dg/vect/vect-multitypes-4.c: Likewise. * gcc.dg/vect/vect.exp: Run no-fast-math-vect*.c tests with -fno-fast-math. Thanks, Ira +@item preferred-vector-size +Preferred vector size in bits for targets that support multiple vector sizes. +Invalid values are ignored. The default is 128. + Shouldn't the preferred size be the largest size supported by the target? Setting it to 128 might be OK today, but who knows what might happen in future? R.
[patch] PR 48837
Hi, when accumulator transformation is performed on a function like foo(a) { if (a 0) return 1 + foo (a - 1) return bla(); } this becomes foo(a) { int tmp = 0; while (a 0) tm = 1 + tmp; return tmp + bla(); } Before, bla was a tail-call, but after the optimization, it is not (since an addition has to be performed after the result of bla is known). However, we used to mark bla as tail-call, leading to a misscompilation later. Fixed by not marking tail-calls when the transformation is performed. Bootstrapped and regtested on i686. Zdenek PR tree-optimization/48837 * tree-tailcall.c (tree_optimize_tail_calls_1): Do not mark tailcalls when accumulator transformation is performed. * gcc.dg/pr48837.c: New testcase. Index: tree-tailcall.c === --- tree-tailcall.c (revision 173354) +++ tree-tailcall.c (working copy) @@ -1021,6 +1021,14 @@ tree_optimize_tail_calls_1 (bool opt_tailcalls) integer_one_node); } + if (a_acc || m_acc) +{ + /* When the tail call elimination using accumulators is performed, +statements adding the accumulated value are inserted at all exits. +This turns all other tail calls to non-tail ones. */ + opt_tailcalls = false; +} + for (; tailcalls; tailcalls = next) { next = tailcalls-next; Index: testsuite/gcc.dg/pr48837.c === --- testsuite/gcc.dg/pr48837.c (revision 0) +++ testsuite/gcc.dg/pr48837.c (revision 0) @@ -0,0 +1,30 @@ +/* PR tree-optimization/48837 */ +/* { dg-do run } */ +/* { dg-options -O2 } */ + +void abort (void); + +__attribute__((noinline)) +int baz(void) +{ + return 1; +} + +inline const int *bar(const int *a, const int *b) +{ + return *a ? a : b; +} + +int foo(int a, int b) +{ + return a || b ? baz() : foo(*bar(a, b), 1) + foo(1, 0); +} + +int main(void) +{ + if (foo(0, 0) != 2) + abort(); + + return 0; +} +
Re: Add an array_mode_supported_p target hook
On Fri, 2011-05-06 at 11:35 +0100, Richard Sandiford wrote: Richard Earnshaw rearn...@arm.com writes: +/* Implements target hook array_mode_supported_p. */ + +static bool +arm_array_mode_supported_p (enum machine_mode mode, + unsigned HOST_WIDE_INT nelems) +{ + if (TARGET_NEON + (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)) + (nelems = 2 nelems = 4)) +return true; + + return false; +} I'm not sure I understand why this is limited to 4 or fewer elements. A Q reg of chars would surely be 16 elements. The mode here is the mode of the array element, which for the cases we're interested in would be something like V4HI (D) or V4SI (Q). nelems says how many of those (in our case, vector) elements there are in the array. The element range we want is 1-4 because that matches the number of vectors that can be loaded by the vld1-vld4 instructions. We don't include 1 because arrays of one element are already treated as having the same mode as their element. Richard I understand now... Ok. R.
Re: [PATCH] Canonicalize compares in combine [2/3] Modifications to try_combine()
On 2011/5/6 05:57 PM, Paolo Bonzini wrote: On 04/22/2011 05:21 PM, Chung-Lin Tang wrote: Also, instead of testing for XEXP(SET_SRC(PATTERN(i3)),1) == const0_rtx at the top, it now allows CONST_INT_P(XEXP(SET_SRC(PATTERN(i3)),1)), tries to adjust it by simplify_compare_const() from the last patch, and then tests if op1 == const0_rtx. This is a small improvement in some cases. I'm not sure why it doesn't allow both? Paolo Hi Paolo, I'm not sure I understand your meaning of 'both', but before this patch, it only tested for == const0_rtx, without any attempt of other cases. Now it tests CONST_INT_P(XEXP(SET_SRC(PATTERN(i3)),1)), attempts a simplification which may change a non-zero constant to const0_rtx, then test for const0_rtx. Supposedly, the new code should be strictly more general. Thanks, Chung-Lin
[PATCH] Fix PR target/48807
Hi, The attached patch is a fix for PR/48807, which is a segfault when compiling the bfin compiler. The problem appears to be that we're not checking the return value of cgraph_local_info for null before attempting to dereference it. This wasn't a problem before, but now cgraph_local_info calls cgraph_get_node (instead of the old cgraph_node), we cannot assume it will always return non-null. Fix is in bfin specific code. Ok to commit to trunk? Stu 2011-05-06 Stuart Henderson shend...@gcc.gnu.org * config/bfin/bfin.c: Check return value of cgraph_local_info for null before attempting to use it. Index: gcc/config/bfin/bfin.c === --- gcc/config/bfin/bfin.c (revision 173363) +++ gcc/config/bfin/bfin.c (working copy) @@ -2077,6 +2077,8 @@ this_func = cgraph_local_info (current_function_decl); called_func = cgraph_local_info (decl); + if (!called_func || !this_func) +return false; return !called_func-local || this_func-local; }
Re: Add an array_mode_supported_p target hook
Richard Earnshaw rearn...@arm.com writes: +/* Implements target hook array_mode_supported_p. */ + +static bool +arm_array_mode_supported_p (enum machine_mode mode, +unsigned HOST_WIDE_INT nelems) +{ + if (TARGET_NEON + (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)) + (nelems = 2 nelems = 4)) +return true; + + return false; +} I'm not sure I understand why this is limited to 4 or fewer elements. A Q reg of chars would surely be 16 elements. The mode here is the mode of the array element, which for the cases we're interested in would be something like V4HI (D) or V4SI (Q). nelems says how many of those (in our case, vector) elements there are in the array. The element range we want is 1-4 because that matches the number of vectors that can be loaded by the vld1-vld4 instructions. We don't include 1 because arrays of one element are already treated as having the same mode as their element. Richard
Re: [PATCH] Fix PR c++/48574
Jason Merrill ja...@redhat.com writes: How about type_dependent_expression_p_push instead? Like this ? Lightly tested. A full bootstrap and regression test is under way. Thanks. gcc/cp/ PR c++/48574 * class.c (fixed_type_or_null): Use type_dependent_p_push to test if the instance has a dependent initializer. gcc/testsuite/ PR c++/48574 * g++.dg/template/dependent-expr8.C: New test case. --- gcc/cp/class.c |2 +- gcc/testsuite/g++.dg/template/dependent-expr8.C | 25 +++ 2 files changed, 26 insertions(+), 1 deletions(-) create mode 100644 gcc/testsuite/g++.dg/template/dependent-expr8.C diff --git a/gcc/cp/class.c b/gcc/cp/class.c index a67b34a..6b08a03 100644 --- a/gcc/cp/class.c +++ b/gcc/cp/class.c @@ -5939,7 +5939,7 @@ fixed_type_or_null (tree instance, int *nonnull, int *cdtorp) itself. */ if (TREE_CODE (instance) == VAR_DECL DECL_INITIAL (instance) - !type_dependent_expression_p (DECL_INITIAL (instance)) + !type_dependent_expression_p_push (DECL_INITIAL (instance)) !htab_find (ht, instance)) { tree type; diff --git a/gcc/testsuite/g++.dg/template/dependent-expr8.C b/gcc/testsuite/g++.dg/template/dependent-expr8.C new file mode 100644 index 000..20014d6 --- /dev/null +++ b/gcc/testsuite/g++.dg/template/dependent-expr8.C @@ -0,0 +1,25 @@ +// Origin PR c++/48574 +// { dg-options -std=c++0x } +// { dg-do compile } + +struct A +{ + virtual int foo(); +}; + +void baz (int); + +template typename T +void +bar(T x) +{ + A b = *x; + baz (b.foo ()); +} + +void +foo() +{ + A a; + bar(a); +} -- Dodji
[Comitted] S/390: Fix static chain trampolines with -m31 -mzarch
Hi, the attached patch fixes a problem with trampolines in -m31 -mzarch mode. The elements of the trampoline are pointer size entities so they have to depend on -m31/-m64 (UNITS_PER_LONG) but not on -mesa/-mzarch (UNITS_PER_WORD). Fixed with the attached patch. Committed to mainline and 4.6. Bye, -Andreas- 2011-05-06 Andreas Krebbel andreas.kreb...@de.ibm.com * config/s390/s390.c (s390_asm_trampoline_template): Comment instruction sizes. (s390_trampoline_init): Replace UNITS_PER_WORD with UNITS_PER_LONG. Index: gcc/config/s390/s390.c === *** gcc/config/s390/s390.c.orig --- gcc/config/s390/s390.c *** s390_asm_trampoline_template (FILE *file *** 9281,9296 if (TARGET_64BIT) { ! output_asm_insn (basr\t%1,0, op); ! output_asm_insn (lmg\t%0,%1,14(%1), op); ! output_asm_insn (br\t%1, op); ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10)); } else { ! output_asm_insn (basr\t%1,0, op); ! output_asm_insn (lm\t%0,%1,6(%1), op); ! output_asm_insn (br\t%1, op); ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8)); } } --- 9281,9296 if (TARGET_64BIT) { ! output_asm_insn (basr\t%1,0, op); /* 2 byte */ ! output_asm_insn (lmg\t%0,%1,14(%1), op); /* 6 byte */ ! output_asm_insn (br\t%1, op); /* 2 byte */ ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10)); } else { ! output_asm_insn (basr\t%1,0, op); /* 2 byte */ ! output_asm_insn (lm\t%0,%1,6(%1), op);/* 4 byte */ ! output_asm_insn (br\t%1, op); /* 2 byte */ ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8)); } } *** s390_trampoline_init (rtx m_tramp, tree *** 9306,9316 rtx mem; emit_block_move (m_tramp, assemble_trampoline_template (), ! GEN_INT (2*UNITS_PER_WORD), BLOCK_OP_NORMAL); ! mem = adjust_address (m_tramp, Pmode, 2*UNITS_PER_WORD); emit_move_insn (mem, cxt); ! mem = adjust_address (m_tramp, Pmode, 3*UNITS_PER_WORD); emit_move_insn (mem, fnaddr); } --- 9306,9316 rtx mem; emit_block_move (m_tramp, assemble_trampoline_template (), ! GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL); ! mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG); emit_move_insn (mem, cxt); ! mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG); emit_move_insn (mem, fnaddr); }
[PATCH, i386]: Apply * constraint modifier to r in *movdf_internal_nointeger
Hello! We should avoid to allocate r in *_nointeger instructions. Constraint modifiers apply to a following character, not to the whole alternative. 2011-05-06 Uros Bizjak ubiz...@gmail.com * config/i386/i386.md (*movdf_internal_nointeger): Apply * constraint modifier to r. Tested on x86_64-pc-linux-gnu {,-m32}, committed to mainline SVN and (soon) 4.6 branch. Uros. Index: config/i386/i386.md === --- config/i386/i386.md (revision 173469) +++ config/i386/i386.md (working copy) @@ -3251,7 +3251,7 @@ [(set (match_operand:DF 0 nonimmediate_operand =f,m,f,*r ,o ,Y2*x,Y2*x,Y2*x ,m ) (match_operand:DF 1 general_operand - fm,f,G,*roF,*Fr,C ,Y2*x,mY2*x,Y2*x))] + fm,f,G,*roF,F*r,C ,Y2*x,mY2*x,Y2*x))] !TARGET_64BIT !(MEM_P (operands[0]) MEM_P (operands[1])) (optimize_function_for_size_p (cfun) || !TARGET_INTEGER_DFMODE_MOVES)
Re: [patch] PR 48837
On Fri, May 6, 2011 at 12:44 PM, Zdenek Dvorak rakd...@kam.mff.cuni.cz wrote: Hi, when accumulator transformation is performed on a function like foo(a) { if (a 0) return 1 + foo (a - 1) return bla(); } this becomes foo(a) { int tmp = 0; while (a 0) tm = 1 + tmp; return tmp + bla(); } Before, bla was a tail-call, but after the optimization, it is not (since an addition has to be performed after the result of bla is known). However, we used to mark bla as tail-call, leading to a misscompilation later. Fixed by not marking tail-calls when the transformation is performed. Bootstrapped and regtested on i686. Ok. Thanks, Richard. Zdenek PR tree-optimization/48837 * tree-tailcall.c (tree_optimize_tail_calls_1): Do not mark tailcalls when accumulator transformation is performed. * gcc.dg/pr48837.c: New testcase. Index: tree-tailcall.c === --- tree-tailcall.c (revision 173354) +++ tree-tailcall.c (working copy) @@ -1021,6 +1021,14 @@ tree_optimize_tail_calls_1 (bool opt_tailcalls) integer_one_node); } + if (a_acc || m_acc) + { + /* When the tail call elimination using accumulators is performed, + statements adding the accumulated value are inserted at all exits. + This turns all other tail calls to non-tail ones. */ + opt_tailcalls = false; + } + for (; tailcalls; tailcalls = next) { next = tailcalls-next; Index: testsuite/gcc.dg/pr48837.c === --- testsuite/gcc.dg/pr48837.c (revision 0) +++ testsuite/gcc.dg/pr48837.c (revision 0) @@ -0,0 +1,30 @@ +/* PR tree-optimization/48837 */ +/* { dg-do run } */ +/* { dg-options -O2 } */ + +void abort (void); + +__attribute__((noinline)) +int baz(void) +{ + return 1; +} + +inline const int *bar(const int *a, const int *b) +{ + return *a ? a : b; +} + +int foo(int a, int b) +{ + return a || b ? baz() : foo(*bar(a, b), 1) + foo(1, 0); +} + +int main(void) +{ + if (foo(0, 0) != 2) + abort(); + + return 0; +} +
[PATCH,c++] introduce {class,type}_of_this functions
The patch below introduces simple accessors for getting at the class or the type of the `this' parameter. It hides a couple of TYPE_ARG_TYPES usages and makes the code slightly more obvious, I think. Tested on x86_64-unknown-linux-gnu. OK to commit? -Nathan gcc/cp/ * cp-tree.h (type_of_this, class_of_this): New functions. * call.c (standard_conversion): Call class_of_this. * cxx-pretty-print.c (pp_cxx_implicit_parameter_type): Likewise. (pp_cxx_direct_abstract_declarator): Likewise. * decl2.c (change_return_type): Likewise. (cp_reconstruct_complex_type): Likewise. * error.c (dump_type_suffix, dump_function_decl): Likewise. * mangle.c (write_function_type): Likewise. * pt.c (unify): Likewise. * typeck.c (merge_types, type_memfn_quals): Likewise. * decl.c (build_this_parm): Call type_of_this. diff --git a/gcc/cp/call.c b/gcc/cp/call.c index f5bd521..7ad9279 100644 --- a/gcc/cp/call.c +++ b/gcc/cp/call.c @@ -1146,8 +1146,8 @@ standard_conversion (tree to, tree from, tree expr, bool c_cast_p, { tree fromfn = TREE_TYPE (TYPE_PTRMEMFUNC_FN_TYPE (from)); tree tofn = TREE_TYPE (TYPE_PTRMEMFUNC_FN_TYPE (to)); - tree fbase = TREE_TYPE (TREE_VALUE (TYPE_ARG_TYPES (fromfn))); - tree tbase = TREE_TYPE (TREE_VALUE (TYPE_ARG_TYPES (tofn))); + tree fbase = class_of_this (fromfn); + tree tbase = class_of_this (tofn); if (!DERIVED_FROM_P (fbase, tbase) || !same_type_p (TREE_TYPE (fromfn), TREE_TYPE (tofn)) diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 9d13393..d410e02 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -4616,6 +4616,24 @@ struct GTY(()) tinst_level { bool in_system_header_p; }; +/* Return the type of the `this' parameter of FNTYPE. */ + +static inline tree +type_of_this (const_tree fntype) +{ + function_args_iterator iter; + function_args_iter_init (iter, fntype); + return function_args_iter_cond (iter); +} + +/* Return the class of the `this' parameter of FNTYPE. */ + +static inline tree +class_of_this (const_tree fntype) +{ + return TREE_TYPE (type_of_this (fntype)); +} + /* A parameter list indicating for a function with no parameters, e.g int f(void). */ extern cp_parameter_declarator *no_parameters; diff --git a/gcc/cp/cxx-pretty-print.c b/gcc/cp/cxx-pretty-print.c index bd0381b..eeb6d07 100644 --- a/gcc/cp/cxx-pretty-print.c +++ b/gcc/cp/cxx-pretty-print.c @@ -1363,7 +1363,7 @@ pp_cxx_ptr_operator (cxx_pretty_printer *pp, tree t) static inline tree pp_cxx_implicit_parameter_type (tree mf) { - return TREE_TYPE (TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (mf; + return class_of_this (TREE_TYPE (mf)); } /* @@ -1652,8 +1652,7 @@ pp_cxx_direct_abstract_declarator (cxx_pretty_printer *pp, tree t) if (TREE_CODE (t) == METHOD_TYPE) { pp_base (pp)-padding = pp_before; - pp_cxx_cv_qualifier_seq - (pp, TREE_TYPE (TREE_VALUE (TYPE_ARG_TYPES (t; + pp_cxx_cv_qualifier_seq (pp, class_of_this (t)); } pp_cxx_exception_specification (pp, t); break; diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index 3622c2c..962dd22 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -6924,7 +6924,7 @@ build_this_parm (tree type, cp_cv_quals quals) tree parm; cp_cv_quals this_quals; - this_type = TREE_VALUE (TYPE_ARG_TYPES (type)); + this_type = type_of_this (type); /* The `this' parameter is implicitly `const'; it cannot be assigned to. */ this_quals = (quals TYPE_QUAL_RESTRICT) | TYPE_QUAL_CONST; diff --git a/gcc/cp/decl2.c b/gcc/cp/decl2.c index ef8de31..02d9fd9 100644 --- a/gcc/cp/decl2.c +++ b/gcc/cp/decl2.c @@ -161,8 +161,7 @@ change_return_type (tree new_ret, tree fntype) } else newtype = build_method_type_directly - (TREE_TYPE (TREE_VALUE (TYPE_ARG_TYPES (fntype))), - new_ret, TREE_CHAIN (args)); + (class_of_this (fntype), new_ret, TREE_CHAIN (args)); if (raises) newtype = build_exception_variant (newtype, raises); if (attrs) @@ -1249,8 +1248,7 @@ cp_reconstruct_complex_type (tree type, tree bottom) so we must compensate by getting rid of it. */ outer = build_method_type_directly - (TREE_TYPE (TREE_VALUE (TYPE_ARG_TYPES (type))), -inner, + (class_of_this (type), inner, TREE_CHAIN (TYPE_ARG_TYPES (type))); } else if (TREE_CODE (type) == OFFSET_TYPE) diff --git a/gcc/cp/error.c b/gcc/cp/error.c index fce7403..b364824 100644 --- a/gcc/cp/error.c +++ b/gcc/cp/error.c @@ -794,8 +794,7 @@ dump_type_suffix (tree t, int flags) dump_parameters (arg, flags ~TFF_FUNCTION_DEFAULT_ARGUMENTS); if (TREE_CODE (t) == METHOD_TYPE) - pp_cxx_cv_qualifier_seq - (cxx_pp, TREE_TYPE (TREE_VALUE (TYPE_ARG_TYPES (t; + pp_cxx_cv_qualifier_seq (cxx_pp, class_of_this (t)); else
Re: [PATCH] Canonicalize compares in combine [2/3] Modifications to try_combine()
On 05/06/2011 12:56 PM, Chung-Lin Tang wrote: I'm not sure why it doesn't allow both? Paolo Hi Paolo, I'm not sure I understand your meaning of 'both', but before this patch, it only tested for == const0_rtx, without any attempt of other cases. Now it tests CONST_INT_P(XEXP(SET_SRC(PATTERN(i3)),1)), attempts a simplification which may change a non-zero constant to const0_rtx, then test for const0_rtx. Supposedly, the new code should be strictly more general. Uff. Stupid question is stupid. Paolo
[PATCH, ARM] Unaligned accesses for packed structures [1/2]
Hi, This is the first of two patches to add unaligned-access support to the ARM backend. This is done somewhat differently to Jie Zhang's earlier patch: http://gcc.gnu.org/ml/gcc-patches/2010-12/msg01890.html In that with Jie's patch, *any* pointer dereference would be allowed to access unaligned data. This has the undesirable side-effect of disallowing instructions which don't support unaligned accesses (LDRD, LDM etc.) when unaligned accesses are enabled. Instead, this patch enables only packed-structure accesses to use ldr/str/ldrh/strh, by taking a hint from the MIPS ldl/ldr implementation. I figured the unaligned-access ARM case is kind of similar to those, except that normal loads/stores are used, and the shifting/merging happens in hardware. The standard names extv/extzv/insv can take a memory operand for the source/destination of the extract/insert operation, so we just expand to unspec'ed versions of the load and store operations when unaligned-access support is enabled: the benefit of doing that rather than, say, expanding using the regular movsi pattern is that we bypass any smartness in the compiler which might replace operations which work for unaligned accesses (ldr/str/ldrh/strh) with operations which don't work (ldrd/strd/ldm/stm/vldr/...). The downside is we might potentially miss out on optimization opportunities (since these things no longer look like plain memory accesses). Doing things this way allows us to leave the settings for STRICT_ALIGNMENT/SLOW_BYTE_ACCESS alone, avoiding the disruption that changing them might cause. The most awkward change in the patch is to generic code (expmed.c, {store,extract}_bit_field_1): in big-endian mode, the existing behaviour (when inserting/extracting a bitfield to a memory location) is definitely bogus: unit is set to BITS_PER_UNIT for memory locations, and if bitsize (the size of the field to insert/extract) is greater than BITS_PER_UNIT (which isn't unusual at all), xbitpos becomes negative. That can't possibly be intentional; I can only assume that this code path is not exercised for machines which have memory alternatives for bitfield insert/extract, and BITS_BIG_ENDIAN of 0 in BYTES_BIG_ENDIAN mode. The logic for choosing when to enable the unaligned-access support (and the name of the option to override the default behaviour) is lifted from Jie's patch. Tested with cross to ARM Linux, and (on a branch) in both little big-endian mode cross to ARM EABI, with no regressions. OK to apply? Thanks, Julian ChangeLog gcc/ * config/arm/arm.c (arm_override_options): Add unaligned_access support. * config/arm/arm.md (UNSPEC_UNALIGNED_LOAD) (UNSPEC_UNALIGNED_STORE): Add constants for unspecs. (insv, extzv): Add unaligned-access support. (extv): Change to expander. Likewise. (unaligned_loadsi, unaligned_loadhis, unaligned_loadhiu) (unaligned_storesi, unaligned_storehi): New. (*extv_reg): New (previous extv implementation). * config/arm/arm.opt (munaligned_access): Add option. * expmed.c (store_bit_field_1): Don't tweak bitfield numbering for memory locations if BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN. (extract_bit_field_1): Likewise. commit e76508ff702406fd63bc59465d9c7ab70dcb3266 Author: Julian Brown jul...@henry7.codesourcery.com Date: Wed May 4 10:06:25 2011 -0700 Permit regular ldr/str/ldrh/strh for packed-structure accesses etc. diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 4f9c2aa..a18aea6 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -1833,6 +1833,22 @@ arm_option_override (void) fix_cm3_ldrd = 0; } + /* Enable -munaligned-access by default for + - all ARMv6 architecture-based processors + - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors. + + Disable -munaligned-access by default for + - all pre-ARMv6 architecture-based processors + - ARMv6-M architecture-based processors. */ + + if (unaligned_access == 2) +{ + if (arm_arch6 (arm_arch_notm || arm_arch7)) + unaligned_access = 1; + else + unaligned_access = 0; +} + if (TARGET_THUMB1 flag_schedule_insns) { /* Don't warn since it's on by default in -O2. */ diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 40ebf35..7d37445 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -104,6 +104,10 @@ UNSPEC_SYMBOL_OFFSET ; The offset of the start of the symbol from ; another symbolic address. UNSPEC_MEMORY_BARRIER ; Represent a memory barrier. + UNSPEC_UNALIGNED_LOAD ; Used to represent ldr/ldrh instructions that access + ; unaligned locations, on architectures which support + ; that. + UNSPEC_UNALIGNED_STORE ; Same for str/strh. ]) ;; UNSPEC_VOLATILE Usage: @@ -2393,7 +2397,7 @@ ;;; this insv pattern, so this pattern needs to be reevalutated. (define_expand insv - [(set (zero_extract:SI (match_operand:SI 0 s_register_operand ) + [(set
Re: Cgraph thunk reorg
On Fri, 6 May 2011, Jan Hubicka wrote: Hi, this patch implements thunks as real cgraph nodes instead of alias nodes. I am not entirely happy about it, but I can't come with anything better. The main problem is that thunks can be seen in two ways: 1) As alternative entry points into functions This is how the existing code attempts to be structured: thunks do not appear in callgraph, instead of the calgraph edges points to the functions the thunk are associated with. The problem with current code is that none of IPA code nor rest of compiler is familiar with the concept of alternative entry points. Consequentely the direct calls to thunks appears in the program in equivalent way as direct calls to function they are associated to that consequentely may lead to miscompilations when we decide to inline and ignore thunk or do ipa-prop. As a temporary measure, we declared direct calls to thunk invalid. This lead to need for devirtualization code to inline the thunk when devirtualizing the call or to not devirtualize. For siple thunks this is not big deal to do, but for covariant thunks this imply extra control flow that is something Richi don't like. Also we now devirtualize implicitely via folding lookups into the vtables. Requiring that code to ponder about thunk adjustments don't look quite right. Next problem is that with LTO we can merge direct call to external function with thunk and in this case we have to represent the direct call to thunk. To allow direct calls to thunks would mean adding concept of entry points into callgraph edgess that would mean next pointer to something that would describe it. Most probably chain of thunk structures: we do allow and build thunks of thunks. We discussed this quite few times on IRC and always this was voted down as weird. One argument agains is that it will be easy to do simple wrong code bugs by forgetting about the info hanging on cgraph edges, since in most cases there is nothing. 2) As real functions calling the function they are associated with. Because backend don't handle alternative entry points, we really implement thunks as small functions that usually tail call into the associated functions after doing adjustments to THIS. Other natural abstraction seems to be handle thunks as real functions. This is what the patch does. There are several issues with this. 1) Not all thunks have bodies that represent in gimple. The variadic thunks currently don't have any gimple representation. While we can come with some, there is not that much of value for it because... 2) We can't expand thunks into RTL. On many archs we have existing ASM output machinery that leads to better code (and only possible code for variadic thunks that are not really representable in RTL either). 3) Thunks are not real functions in C++ ABI sense. They share comdat groups implicitely and they must be output in specified order to get proper comdat group signatures This patch takes this route and does the compensation where needed. In particular all IPA passes that worries about gimple bodies needs to be updated to handle thunks. This is not that hard to do and as first cut I simply disabled inlining, ipa-prop and cloning on thunks. We can handle that incrementally. The problem of thunks is related to problem of proper representation of aliases. Again aliases can be transparent that is not having cgraph nodes to them and all edges going to the final destination or they can be separate nodes. I originally indended to go for the first case that also has problem with representing the visibilities of aliases: i.e. depending on alias used, the edges may or may not be overwritable by the linker, so the alternative entry point info would need to represent this, too. With thunks as separate nodes, I will turn aliases into separate nodes, too that will have link via ipa-ref infrastructure (i.e. in addition to load/store and address links we will also have alias links). Because IPA passes really care about objects themselves, not the aliases (i.e. ipa-reference or ipa-pta wants to see the variable and all its aliases as one object, so wants the inliner or ipa-propagate), we will need to add some accessor functions that will walk to real destination of the edge and also walk all real objects referencing the given object skipping the aliases. This approach has the advantage of getting cgraph/varpool closer to symbol table and making things bit easier at lto-symtab side. The patch does basicaly the following: 1) turns thunks from alias node into function nodes with node-thunk.thunk_p flag set 2)
Re: [PATCH] Fix PR c++/48574
On 05/06/2011 07:08 AM, Dodji Seketeli wrote: Jason Merrillja...@redhat.com writes: How about type_dependent_expression_p_push instead? Like this ? Lightly tested. A full bootstrap and regression test is under way. OK. Jason
Re: Fix PR48900, powerpc duplicate __tls_get_addr calls
On Thu, May 5, 2011 at 10:17 PM, Alan Modra amo...@gmail.com wrote: My fix for PR44266 using the libcall machinery to ensure we had a proper stack frame allocated for __tls_get_addr calls sloppily used r3 as the arg to the dummy libcall. This made the call seem to depend on whatever was in r3 previously, at least until we get to the first split pass and the real arg is exposed. So DCE couldn't merge calls. Even for a simple testcase like extern __thread int i; void foo (void) { i++; } we get two __tls_get_addr calls if using global-dynamic tls model. Easliy fixed by giving the dummy libcall an arg of zero. An alternative giving slightly better -O0 code would be to say that the libcall doesn't have any args. I chose to leave the libcall with one arg since this is closest to the real __tls_get_addr call, and the whole point of faking up a libcall here is to have the generic code do whatever is necessary when making function calls. It's not totally impossible to imagine some future ABI change that treats zero arg calls differently from other calls. Bootstrapped and regression tested powerpc64-linux. OK to apply mainline, 4.6 and 4.5? PR target/48900 * config/rs6000/rs6000.c (rs6000_legitimize_tls_address): Use const0_rtx as the arg to the dummy __tls_get_addr libcall. Okay,. Thanks, David
[PATCH, ARM] Unaligned accesses for builtin memcpy [2/2]
Hi, This is the second of two patches to add unaligned-access support to the ARM backend. It builds on the first patch to provide support for unaligned accesses when expanding block moves (i.e. for builtin memcpy operations). It makes some effort to use load/store multiple instructions where appropriate (when accessing sufficiently-aligned source or destination addresses), and also makes some effort to generate fast code (for -O1/2/3) or small code (for -Os), though some of the heuristics may need tweaking still. Examples: #include string.h void foo (char *dest, char *src) { memcpy (dest, src, AMOUNT); } char known[64]; void dst_aligned (char *src) { memcpy (known, src, AMOUNT); } void src_aligned (char *dst) { memcpy (dst, known, AMOUNT); } For -mcpu=cortex-m4 -mthumb -O2 -DAMOUNT=15 we get: foo: ldr r2, [r1, #4]@ unaligned ldr r3, [r1, #8]@ unaligned push{r4} ldr r4, [r1, #0]@ unaligned str r2, [r0, #4]@ unaligned str r4, [r0, #0]@ unaligned str r3, [r0, #8]@ unaligned ldrhr2, [r1, #12] @ unaligned ldrbr3, [r1, #14] @ zero_extendqisi2 strhr2, [r0, #12] @ unaligned strbr3, [r0, #14] pop {r4} bx lr dst_aligned: push{r4} mov r4, r0 movwr3, #:lower16:known ldr r1, [r4, #4]@ unaligned ldr r2, [r4, #8]@ unaligned ldr r0, [r0, #0]@ unaligned movtr3, #:upper16:known stmia r3!, {r0, r1, r2} ldrhr1, [r4, #12] @ unaligned ldrbr2, [r4, #14] @ zero_extendqisi2 strhr1, [r3, #0]@ unaligned strbr2, [r3, #2] pop {r4} bx lr src_aligned: push{r4} movwr3, #:lower16:known movtr3, #:upper16:known mov r4, r0 ldmia r3!, {r0, r1, r2} str r0, [r4, #0]@ unaligned str r1, [r4, #4]@ unaligned str r2, [r4, #8]@ unaligned ldrhr2, [r3, #0]@ unaligned ldrbr3, [r3, #2]@ zero_extendqisi2 strhr2, [r4, #12] @ unaligned strbr3, [r4, #14] pop {r4} bx lr Whereas for -mcpu=cortex-m4 -mthumb -Os -DAMOUNT=15, e.g.: foo: add r3, r1, #12 .L2: ldr r2, [r1], #4@ unaligned cmp r1, r3 str r2, [r0], #4@ unaligned bne .L2 ldrhr3, [r1, #0]@ unaligned strhr3, [r0, #0]@ unaligned ldrbr3, [r1, #2]@ zero_extendqisi2 strbr3, [r0, #2] bx lr Tested (alongside the first patch) with cross to ARM Linux. OK to apply? Thanks, Julian ChangeLog gcc/ * config/arm/arm.c (arm_block_move_unaligned_straight) (arm_adjust_block_mem, arm_block_move_unaligned_loop) (arm_movmemqi_unaligned): New. (arm_gen_movmemqi): Support unaligned block copies. commit 16973f69fce37a2b347ea7daffd6f593aba843d5 Author: Julian Brown jul...@henry7.codesourcery.com Date: Wed May 4 11:26:01 2011 -0700 Optimize block moves when unaligned accesses are permitted. diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index a18aea6..b6df0d3 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -10362,6 +10362,335 @@ gen_const_stm_seq (rtx *operands, int nops) return true; } +/* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit + unaligned copies on processors which support unaligned semantics for those + instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency + (using more registers) by doing e.g. load/load/store/store for a factor of 2. + An interleave factor of 1 (the minimum) will perform no interleaving. + Load/store multiple are used for aligned addresses where possible. */ + +static void +arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase, + HOST_WIDE_INT length, + unsigned int interleave_factor) +{ + rtx *regs = XALLOCAVEC (rtx, interleave_factor); + int *regnos = XALLOCAVEC (int, interleave_factor); + HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD; + HOST_WIDE_INT i, j; + HOST_WIDE_INT remaining = length, words; + rtx halfword_tmp = NULL, byte_tmp = NULL; + rtx dst, src; + bool src_aligned = MEM_ALIGN (srcbase) = BITS_PER_WORD; + bool dst_aligned = MEM_ALIGN (dstbase) = BITS_PER_WORD; + HOST_WIDE_INT srcoffset, dstoffset; + HOST_WIDE_INT src_autoinc, dst_autoinc; + rtx mem, addr; + + gcc_assert (1 = interleave_factor interleave_factor = 4); + + /* Use hard registers if we have aligned source or destination so we can use + load/store multiple with contiguous registers. */ + if (dst_aligned || src_aligned) +for (i = 0; i interleave_factor; i++) + regs[i] = gen_rtx_REG (SImode, i); + else +for (i =
Re: [google] Patch to support calling multi-versioned functions via new GCC builtin. (issue4440078)
On Fri, May 6, 2011 at 04:55, Richard Guenther richard.guent...@gmail.com wrote: On Thu, May 5, 2011 at 7:02 PM, Xinliang David Li davi...@google.com wrote: 2) Support of CallInfo for each callsite. This is an annotation, but more standardized. The callinfo can be used to record information such as call attributes, call side effects, mod-ref information etc --- current gimple_call_flags can be folded into this Info structure. I don't like generic annotation facilities. What should passes to with annotated stmts that are a) transformed, b) removed? See RTL notes and all the interesting issues they cause. Likewise. We kind of tried having them in the early days of gimple and tree-ssa, but quickly removed them. Anything that is not a first-class IL member, makes life difficult. We have some examples in PHI nodes and EH regions. They're a bit to the side, and require extra code to manage. Diego.
Re: [PATCH,c++] introduce {class,type}_of_this functions
On 05/06/2011 07:49 AM, Nathan Froyd wrote: The patch below introduces simple accessors for getting at the class or the type of the `this' parameter. It hides a couple of TYPE_ARG_TYPES usages and makes the code slightly more obvious, I think. Hmm, when I first read the names I expected them to refer to the 'this' in the current function. I think adding _parm to the end of the names would help. The type_ function should also have an assert that fntype is a METHOD_TYPE. OK with those changes. Jason
Re: [Patch,AVR]: Fix PR27663
Denis Chertykov schrieb: 2011/5/2 Georg-Johann Lay a...@gjlay.de: This is a fix for an optimization flaw when a long value is composed from byte values. For -fsplit-wide-types (which is still default for avr) the code is worse than with -fno-split-wide-types. The code for the test case is better in either situations, i.e. compared to code without the patch, but it is still not optimal. Fixing this by some combine patterns is the only thing the BE can do. I did not write more complex patterns because things get too complex with little performance gain. Tested without regressions. Johann 2011-05-02 Georg-Johann Lay a...@gjlay.de PR target/27663 * config/avr/predicates.md (const_8_16_24_operand): New predicate. * config/avr/avr.md (*iormodeqi.byte0, *iormodeqi.byte1-3): New define_insn_and_split patterns. I'm sorry, but I dot'n like to have a both combiner related patches in port because code improvement isn't much and your patterns are difficult to understand and maintain. You refer to this patch for PR42210? http://gcc.gnu.org/ml/gcc-patches/2011-04/msg02099.html May be somebody else have a different oppinion ? I'm open to discussion. The patterns in this patch are similar to *addhi3_zero_extend, *addhi3_zero_extend1 that handle HI+QI resp. *addhi3_zero_extend that handle SI+QI. The difference is that they handle IOR instead of PLUS. It's true that the user has to use some specific code (addition of QI to HI resp. SI in the first case and ORing of QI to HI resp. SI in the second). IMO insn combine is a very powerful pass and I do not see why the avr BE should not take advantage of it to synthesize new instructions. Note that other parts like *sbi or *cbi rely on insn combine, too. If it's hard to understand what their intention is, I can add some more comments. As insn combine is capable of generating new instructions that are not covered by standard patterns, it is only natural that they might be more complicated than standard patterns. But almost everything in GCC is complicated, even in the avr BE stuff like, e.g. handling of rotate, is way much more complicated. The new patterns are restricted to one single place in the backend. If they are correct, they are supposed to work in the future without steadily maintaining them. I agree that it would be nice if the middleend detected the expressions as, say, (set (zero_extract:QI (reg:SI ...))), but that's not the case; not even on 32-bit targets with full insv/extzv support. And as I already wrote, the -fsplit-wide-types is not a good choice on avr (except for 64-bit stuff where subreg lowering leads to much code), see http://gcc.gnu.org/ml/gcc/2011-03/msg00261.html So with -fno-split-wide-types and some more elaborate testcase you will see that the new patterns are a clear improvement. Johann Denis.
Re: [Patch,AVR]: Fix PR45099
Nathan Froyd schrieb: On Mon, May 02, 2011 at 05:23:48PM +0200, Georg-Johann Lay wrote: PR45099 is an extension that gives an error when a fixed register is needed to pass a parameter to a function. Because the program will show malfunction when such code is generated, anyway, I think an error is more appropriate than a warning (as proposed in the PR). This seems like something that should be handled by common code. -Nathan Yes, I agree. However, common code it too complicated for me to run tests for, so I restrict myself to avr backend. Until such a test will find its way into common code, it might still be useful in avr backend. I think this has quite low priority for other targets because global registers are not very common in, e.g. i386. Johann
Re: Cgraph thunk reorg
Hi, On Fri, 6 May 2011, Jan Hubicka wrote: *** dump_cgraph_node (FILE *f, struct cgraph *** 1874,1880 if (node-only_called_at_exit) fprintf (f, only_called_at_exit); ! fprintf (f, \n called by: ); for (edge = node-callers; edge; edge = edge-next_caller) { fprintf (f, %s/%i , cgraph_node_name (edge-caller), --- 1884,1907 if (node-only_called_at_exit) fprintf (f, only_called_at_exit); ! fprintf (f, \n); ! ! if (node-thunk.thunk_p) ! { ! if (node-thunk.thunk_p) ! { Doubled conditional. Ciao, Michael.
Re: [patch] PR 48837
-BEGIN PGP SIGNED MESSAGE- Hash: SHA1 On 05/06/11 04:44, Zdenek Dvorak wrote: Hi, when accumulator transformation is performed on a function like foo(a) { if (a 0) return 1 + foo (a - 1) return bla(); } this becomes foo(a) { int tmp = 0; while (a 0) tm = 1 + tmp; return tmp + bla(); } Before, bla was a tail-call, but after the optimization, it is not (since an addition has to be performed after the result of bla is known). However, we used to mark bla as tail-call, leading to a misscompilation later. Fixed by not marking tail-calls when the transformation is performed. Bootstrapped and regtested on i686. Zdenek PR tree-optimization/48837 * tree-tailcall.c (tree_optimize_tail_calls_1): Do not mark tailcalls when accumulator transformation is performed. * gcc.dg/pr48837.c: New testcase. OK. Thanks, jeff -BEGIN PGP SIGNATURE- Version: GnuPG v1.4.11 (GNU/Linux) Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/ iQEcBAEBAgAGBQJNxA66AAoJEBRtltQi2kC7yysIAKZYUpU9JlyH2XwvvVslq8C1 CJ7E/akRDsBoYBS+syNsLMwkbGG0WoaFJzOd7vUmIknAHEusF6OasczsN8PD9aEB i8xJNTZm2yxhrVZh8m/KBX96r80RwzpAhr9L1WAspiS/xpw12lRoJoh3XeKXYXWw Z0aBL4ljCgLj6GKEyy7FbGHx0gEqaa1x7EDM1kJGCgZPAFJalJPozBiiriYL9/Th gHqLXZ0HUhXNGql5M2S+lfZG2d30Rj1KBXDrU8EOXedHRjxb+U9+WLGsUHZtkcTI j3//n6bjTr/YmyTe43voG3Rn6z6k0g2Eb8gF8UMvDbaSJlH9+xb6SuWLS8+mEhY= =luE9 -END PGP SIGNATURE-
Re: [PATCH] Canonicalize compares in combine [2/3] Modifications to try_combine()
-BEGIN PGP SIGNED MESSAGE- Hash: SHA1 On 05/06/11 03:57, Paolo Bonzini wrote: On 04/22/2011 05:21 PM, Chung-Lin Tang wrote: Also, instead of testing for XEXP(SET_SRC(PATTERN(i3)),1) == const0_rtx at the top, it now allows CONST_INT_P(XEXP(SET_SRC(PATTERN(i3)),1)), tries to adjust it by simplify_compare_const() from the last patch, and then tests if op1 == const0_rtx. This is a small improvement in some cases. I'm not sure why it doesn't allow both? Part of the purpose of the patch is be more general in the constants allowed; prior to Chung-Lin's patch only const0_rtx was allowed. Chung-Lin's patch generalizes the code to allow other constants is specific cases. Jeff -BEGIN PGP SIGNATURE- Version: GnuPG v1.4.11 (GNU/Linux) Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/ iQEcBAEBAgAGBQJNxA9+AAoJEBRtltQi2kC79lUH/2s2u2HNJMSedW5RFGPhYghX zIosctPzZ4EkqrH5uvNJMBRxnxu0sBmDcJM5HcoaA5tz/T1aHlsGk6XvPeh+gSJO wDnFHCUMdmB7hXSB/BcpAC5496DTrZNoyix5qIwIpxPjlaA9n4LoSA+ZiO6nObPH dZ6UfyCihF+zCukSSQ0qHywJvSVfsQByBYefspS7uy0yFhzm45LHTcIN/j4hC685 lC2lIsBH7ZtMV01tRbr47PGgoey0pwvVeiHf/FcCWA6+Zo2ctfyjzsaE3exg8ms6 zylDHA/9gf2D1oYFn5FmrnHiYt3WGX/75u7bJCCJK1OUKknq6MnexVnfITsovFo= =7ZnG -END PGP SIGNATURE-
Re: Cgraph thunk reorg
On May 6, 2011, at 5:12 AM, Jan Hubicka wrote: 2) As real functions calling the function they are associated with. Because backend don't handle alternative entry points, we really implement thunks as small functions that usually tail call into the associated functions after doing adjustments to THIS. I don't like the word usually here... that tail call for non-adjusting covariant returns and non-covariant returns, and a normal call otherwise Since this is just the email description, not a problem, but I did want to ensure people don't think of thunks as tail calling.
Re: [PING] Fix PR46399 - missing mode promotion for libcall args
On Wed, May 4, 2011 at 9:45 AM, Richard Guenther richard.guent...@gmail.com wrote: On Wed, May 4, 2011 at 3:45 PM, Kai Tietz ktiet...@googlemail.com wrote: 2011/5/4 Richard Guenther richard.guent...@gmail.com: On Mon, Apr 18, 2011 at 10:17 AM, Andreas Krebbel kreb...@linux.vnet.ibm.com wrote: Hi, the attached patch uses the existing promote_function_mode hook. For a libcall neither TYPE nor FNTYPE is available so I had to change a few related function in order to deal with that. The patch also fixes the s390 DFP problems. Bye, -Andreas- 2011-04-18 Andreas Krebbel andreas.kreb...@de.ibm.com * calls.c (emit_library_call_value_1): Invoke promote_function_mode hook on libcall arguments. * explow.c (promote_function_mode, promote_mode): Handle TYPE argument being NULL. * targhooks.c (default_promote_function_mode): Lisewise. * config/s390/s390.c (s390_promote_function_mode): Likewise. * config/sparc/sparc.c (sparc_promote_function_mode): Likewise. * doc/tm.texi: Document that TYPE argument might be NULL. Index: gcc/calls.c === *** gcc/calls.c.orig --- gcc/calls.c *** emit_library_call_value_1 (int retval, r *** 3484,3489 --- 3484,3490 { rtx val = va_arg (p, rtx); enum machine_mode mode = (enum machine_mode) va_arg (p, int); + int unsigned_p = 0; /* We cannot convert the arg value to the mode the library wants here; must do it earlier where we know the signedness of the arg. */ *** emit_library_call_value_1 (int retval, r *** 3531,3539 val = force_operand (XEXP (slot, 0), NULL_RTX); } ! argvec[count].value = val; argvec[count].mode = mode; ! argvec[count].reg = targetm.calls.function_arg (args_so_far, mode, NULL_TREE, true); --- 3532,3540 val = force_operand (XEXP (slot, 0), NULL_RTX); } ! mode = promote_function_mode (NULL_TREE, mode, unsigned_p, NULL_TREE, 0); argvec[count].mode = mode; ! argvec[count].value = convert_modes (mode, GET_MODE (val), val, 0); argvec[count].reg = targetm.calls.function_arg (args_so_far, mode, NULL_TREE, true); Index: gcc/config/s390/s390.c === *** gcc/config/s390/s390.c.orig --- gcc/config/s390/s390.c *** s390_promote_function_mode (const_tree t *** 8742,8748 if (INTEGRAL_MODE_P (mode) GET_MODE_SIZE (mode) UNITS_PER_LONG) { ! if (POINTER_TYPE_P (type)) *punsignedp = POINTERS_EXTEND_UNSIGNED; return Pmode; } --- 8742,8748 if (INTEGRAL_MODE_P (mode) GET_MODE_SIZE (mode) UNITS_PER_LONG) { ! if (type != NULL_TREE POINTER_TYPE_P (type)) *punsignedp = POINTERS_EXTEND_UNSIGNED; return Pmode; } Index: gcc/explow.c === *** gcc/explow.c.orig --- gcc/explow.c *** enum machine_mode *** 771,776 --- 771,787 promote_function_mode (const_tree type, enum machine_mode mode, int *punsignedp, const_tree funtype, int for_return) { + /* Called without a type node for a libcall. */ + if (type == NULL_TREE) + { + if (INTEGRAL_MODE_P (mode)) + return targetm.calls.promote_function_mode (NULL_TREE, mode, + punsignedp, funtype, + for_return); + else + return mode; + } + switch (TREE_CODE (type)) { case INTEGER_TYPE: case ENUMERAL_TYPE: case BOOLEAN_TYPE: *** enum machine_mode *** 791,796 --- 802,813 promote_mode (const_tree type ATTRIBUTE_UNUSED, enum machine_mode mode, int *punsignedp ATTRIBUTE_UNUSED) { + /* For libcalls this is invoked without TYPE from the backends + TARGET_PROMOTE_FUNCTION_MODE hooks. Don't do anything in that + case. */ + if (type == NULL_TREE) + return mode; + This broke bootstrap /space/rguenther/src/svn/trunk/gcc/explow.c: In function 'promote_mode': /space/rguenther/src/svn/trunk/gcc/explow.c:815:3: error: ISO C90 forbids mixed declarations and code [-Werror=edantic] cc1: all warnings being treated as errors /* FIXME: this is the same logic that was there until GCC 4.4, but we probably want to test POINTERS_EXTEND_UNSIGNED even if PROMOTE_MODE is not defined. The affected targets are M32C, S390, SPARC. */ Index: gcc/config/sparc/sparc.c === *** gcc/config/sparc/sparc.c.orig ---
Ping: [PATCH] PR 48175, Make CASE_VALUES_THRESHOLD settable via --param
On Thu, Apr 21, 2011 at 03:02:10PM -0400, Michael Meissner wrote: In looking at some improvements to the powerpc, we wanted to change the default for when a table jump is generated vs. a series of if statements. Now, we could just add a powerpc specific TARGET_CASE_VALUES_THRESHOLD, but I tend to think that these should be settable on all/most ports with --param. At present, there are only two ports (avr and mn10300) that define their own TARGET_CASE_VALUES_THRESHOLD hook. My first patch does not remove the target hook and modify the avr/mn10300 ports to use maybe_set_param_value, but that can be done if desired. The patch adds two --param values, one for when the port is using the casesi insn, and the other when it uses the more primitive tablejump insn. I have bootstrapped the compiler with this patch and run the test suite with no regressions. Is it ok to apply as is? Should I modify the avr and mn10300 ports to use the parameters and do away with the target hook? Or should I do this just as a powerpc target hook? I never got a response for this, and my earlier ping didn't seem to go out. I'll check it in on Monday if there are no objections. -- Michael Meissner, IBM 5 Technology Place Drive, M/S 2757, Westford, MA 01886-3141, USA meiss...@linux.vnet.ibm.com fax +1 (978) 399-6899
Re: [google][RFA] add extra text to stack frame warnings (issue4479046)
On Fri, May 6, 2011 at 1:52 AM, Chris Demetriou c...@google.com wrote: In theory, a more general warning-text-addition mechanism could be useful. e.g. a flag that said when outputting a warning about flag 'foo', output this additional text could be useful. However, we haven't felt the need to do this for other warnings. IMO, a general solution along these lines would be solving a problem that ~nobody has. 8-) We already output the option which enables the warning that seems like a general solution. Thanks, Andrew Pinski
Re: [Patch,Fortran] Minor libcaf cleanup
On 05/05/11 21:18, Tobias Burnus wrote: Changes: - Remove (not working) critical functions; a normal coarray of LOCK type should be used instead. (Stub left in until it is removed the the front end.) - Added prototypes and stub implementations for registering/deregistering coarray (currently unused). - Small bug fixes. OK for the trunk? Ok. Yours, Daniel -- http://www.pro-vegan.info/ -- Done: Arc-Bar-Cav-Kni-Ran-Rog-Sam-Tou-Val-Wiz To go: Hea-Mon-Pri
Re: Ping: [PATCH] PR 48175, Make CASE_VALUES_THRESHOLD settable via --param
On Fri, May 06, 2011 at 12:21:24PM -0400, Michael Meissner wrote: On Thu, Apr 21, 2011 at 03:02:10PM -0400, Michael Meissner wrote: In looking at some improvements to the powerpc, we wanted to change the default for when a table jump is generated vs. a series of if statements. Now, we could just add a powerpc specific TARGET_CASE_VALUES_THRESHOLD, but I tend to think that these should be settable on all/most ports with --param. At present, there are only two ports (avr and mn10300) that define their own TARGET_CASE_VALUES_THRESHOLD hook. My first patch does not remove the target hook and modify the avr/mn10300 ports to use maybe_set_param_value, but that can be done if desired. The patch adds two --param values, one for when the port is using the casesi insn, and the other when it uses the more primitive tablejump insn. I have bootstrapped the compiler with this patch and run the test suite with no regressions. Is it ok to apply as is? Should I modify the avr and mn10300 ports to use the parameters and do away with the target hook? Or should I do this just as a powerpc target hook? I never got a response for this, and my earlier ping didn't seem to go out. I'll check it in on Monday if there are no objections. I think it is very weird to have two different params, if we need any such param, there should be just one and its default value should depend on HAVE_casesi. Jakub
Re: [google] Patch to support calling multi-versioned functions via new GCC builtin. (issue4440078)
I want propose a more general solution. 1) Generic Annotation Support for gcc IR -- it is used attach to application/optimization specific annotation to gimple statements and annotations can be passed around across passes. In gcc, I only see HISTOGRAM annotation for value profiling, which is not general enough 2) Support of CallInfo for each callsite. This is an annotation, but more standardized. The callinfo can be used to record information such as call attributes, call side effects, mod-ref information etc --- current gimple_call_flags can be folded into this Info structure. I don't like generic annotation facilities. What should passes to with annotated stmts that are a) transformed, b) removed? See RTL notes and all the interesting issues they cause. Then how do you store information that needs to be passed across optimization passes -- you can not possibly dump all of them into the core IR. In fact, anything that is derived from (via analysis) but not part of the core IR need to worry about update and maintenance. In current GIMPLE, we can find many such instances -- DU chains, Memory SSA, control flow information, as well as flags like visited, no_warning, PLF (?), etc. Have a unified way of representing them is a good thing so that 1) make the IR lean and mean; 2) avoid too many different side data structures. The important thing is to have a good verifier to catch insanity and inconsistency of the annotation after each pass. Thanks, David Similarly (not related to this discussion), LoopInfo structure can be introduced to annotate loop back edge jumps to allow FE to pass useful information at loop level. For floating pointer operations, things like the precision constraint, sensitivity to floating environment etc can be recorded in FPInfo. Yes, the idea is to keep the loop structures live throughout the whole compilation. Just somebody needs to do the last 1% of work. Richard. T Restricting ourselves to use the existing target attribute at the beginning (with a single, compiler-generated selector function) is probably good enough to get a prototype up and running. Extending it to arbitrary selector-function, value pairs using a new attribute is then probably easy (I don't see the exact use-case for that yet, but I suppose it exists if you say so). For the use cases, CPU model will be looked at instead of just the core architecture -- this will give use more information about the numbrer of cores, size of caches etc. Intel's runtime library does this checkiing at start up time so that the multi-versioned code can look at those and make the appropriate decisions. It will be even more complicated for arm processors -- which can have the same processor cores but configured differently w.r.t VFP, NEON etc. Ah, indeed. I hadn't thought about the tuning for different variants as opposed to enabling HW features. So the interface for overloading would be sth like enum X { Foo = 0, Bar = 5 }; enum X select () { return Bar; } void foo (void) __attribute__((dispatch(select, Bar))); Yes, for overloading -- something like this looks good. Thanks, David
Re: PR 47793 - Support relative paths using -fprofile-generate
Honza, what do you think of the patch? It actually fixed a regression. Thanks, David On Wed, May 4, 2011 at 4:40 PM, Xinliang David Li davi...@google.com wrote: Is this patch ok for trunk? Allowing relative path in -fprofile-generate= is very useful when running the program remotely -- the profile data will be just dumped in the dir relative to the working dir in the remote machine. Using GCOV_PREFIX_STRIP can workaround the problem, but it is not always to pass environment around. Thanks, David On Wed, Feb 23, 2011 at 3:37 PM, Martin Thuresson mart...@google.com wrote: On Wed, Feb 23, 2011 at 10:21 AM, Martin Thuresson mart...@google.com wrote: Change 165596 and 168475 updated the code for handling gcda-paths. As part of this change, relative paths stopped working. http://gcc.gnu.org/bugzilla/show_bug.cgi?id=47793 This patch adds a guard so that / is not added when no prefix is given. The added testcase uses the path ../gcc/. This puts the gcda in the same directory, so that the cleanup-coverage-files will find them. I have tested the patch using make bootstrap; make -k check with target x86_64-unknown-linux-gnu and saw no additional test failures. Let me know if there is any other testing I should do. ChangeLog gcc/ 2011-02-23 Martin Thuresson mart...@google.com PR gcov-profile/47793 * libgcov.c (gcov_exit): Support relative profile paths. gcc/testsuite/ 2011-02-23 Martin Thuresson mart...@google.com PR gcov-profile/47793 * gcc.dg/pr47793.c: New. Thanks, Martin
Re: [ping] 3 unreviewed patches
Ping again. Still no review. On Fri, Apr 15, 2011 at 7:08 AM, Eric Botcazou ebotca...@adacore.com wrote: Fix annoying gcov filename handling: http://gcc.gnu.org/ml/gcc-patches/2011-03/msg01380.html (rs6000) Fix thinko in output_profile_hook: http://gcc.gnu.org/ml/gcc-patches/2011-03/msg01624.html Introduce -Wstack-usage: http://gcc.gnu.org/ml/gcc-patches/2011-03/msg01992.html Thanks in advance. -- Eric Botcazou
[google] Backport r172837 and r172788 to google/main
Backported r172788 and r172837 from trunk to google/main. 2011-05-06 Easwaran Raman era...@google.com Backport r172837: * cfgexpand.c (stack_var): Remove OFFSET... (add_stack_var): ...and its reference here... (expand_stack_vars): ...and here. (stack_var_cmp): Sort by descending order of size. (partition_stack_vars): Change heuristic. (union_stack_vars): Fix to reflect changes in partition_stack_vars. (dump_stack_var_partition): Add newline after each partition. 2011-05-06 Easwaran Raman era...@google.com Backport r172788: * cfgexpand.c (add_alias_set_conflicts): Add conflicts with a variable containing union type only with -fstrict-aliasing. testsuite/ChangeLog.google-main: 2011-05-06 Easwaran Raman era...@google.com Backport r172837: * gcc.dg/stack-layout-2.c: New test. 2011-05-06 Easwaran Raman era...@google.com Backport r172788: * gcc.dg/stack-layout-1.c: New test. Index: gcc/testsuite/gcc.dg/stack-layout-1.c === --- gcc/testsuite/gcc.dg/stack-layout-1.c (revision 0) +++ gcc/testsuite/gcc.dg/stack-layout-1.c (revision 173499) @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options -O2 -fno-strict-aliasing -fdump-rtl-expand } */ +union U { + int a; + float b; +}; +struct A { + union U u1; + char a[100]; +}; +void bar (struct A *); +void foo () + { +{ + struct A a; + bar (a); +} +{ + struct A a; + bar (a); +} + } + +/* { dg-final { scan-rtl-dump-times Partition 1 expand } } */ +/* { dg-final { cleanup-rtl-dump expand } } */ Index: gcc/testsuite/gcc.dg/stack-layout-2.c === --- gcc/testsuite/gcc.dg/stack-layout-2.c (revision 0) +++ gcc/testsuite/gcc.dg/stack-layout-2.c (revision 173499) @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options -O2 -fdump-rtl-expand } */ +void bar( char *); +int foo() +{ + int i=0; + { +char a[8000]; +bar(a); +i += a[0]; + } + { +char a[8192]; +char b[32]; +bar(a); +i += a[0]; +bar(b); +i += a[0]; + } + return i; +} +/* { dg-final { scan-rtl-dump size 8192 expand } } */ +/* { dg-final { scan-rtl-dump size 32 expand } } */ Index: gcc/cfgexpand.c === --- gcc/cfgexpand.c (revision 173498) +++ gcc/cfgexpand.c (revision 173499) @@ -158,11 +158,6 @@ struct stack_var /* The Variable. */ tree decl; - /* The offset of the variable. During partitioning, this is the - offset relative to the partition. After partitioning, this - is relative to the stack frame. */ - HOST_WIDE_INT offset; - /* Initially, the size of the variable. Later, the size of the partition, if this variable becomes it's partition's representative. */ HOST_WIDE_INT size; @@ -267,7 +262,6 @@ add_stack_var (tree decl) v = stack_vars[stack_vars_num]; v-decl = decl; - v-offset = 0; v-size = tree_low_cst (DECL_SIZE_UNIT (SSAVAR (decl)), 1); /* Ensure that all variables have size, so that a != b for any two variables that are simultaneously live. */ @@ -372,8 +366,9 @@ add_alias_set_conflicts (void) to elements will conflict. In case of unions we have to be careful as type based aliasing rules may say access to the same memory does not conflict. So play - safe and add a conflict in this case. */ - || contains_union) + safe and add a conflict in this case when + -fstrict-aliasing is used. */ + || (contains_union flag_strict_aliasing)) add_stack_var_conflict (i, j); } } @@ -403,9 +398,9 @@ stack_var_cmp (const void *a, const void *b) return (int)largeb - (int)largea; /* Secondary compare on size, decreasing */ - if (sizea sizeb) -return -1; if (sizea sizeb) +return -1; + if (sizea sizeb) return 1; /* Tertiary compare on true alignment, decreasing. */ @@ -564,28 +559,19 @@ update_alias_info_with_stack_vars (void) /* A subroutine of partition_stack_vars. The UNION portion of a UNION/FIND partitioning algorithm. Partitions A and B are known to be non-conflicting. - Merge them into a single partition A. + Merge them into a single partition A. */ - At the same time, add OFFSET to all variables in partition B. At the end - of the partitioning process we've have a nice block easy to lay out within - the stack frame. */ - static void -union_stack_vars (size_t a, size_t b, HOST_WIDE_INT offset) +union_stack_vars (size_t a, size_t b) { - size_t i, last; struct stack_var *vb = stack_vars[b]; bitmap_iterator bi; unsigned u; - /* Update each element of partition B with the given offset, - and merge them into partition A. */ - for (last = i = b; i != EOC; last = i, i = stack_vars[i].next) -{ - stack_vars[i].offset
Re: [google] Backport r172837 and r172788 to google/main
On Fri, May 6, 2011 at 14:22, Easwaran Raman era...@google.com wrote: Backported r172788 and r172837 from trunk to google/main. Minor nit: 2011-05-06 Easwaran Raman era...@google.com Backport r172837: This needs to be indented by 1 tab. * cfgexpand.c (stack_var): Remove OFFSET... (add_stack_var): ...and its reference here... (expand_stack_vars): ...and here. Diego.
Go patch committed: Use backend representation for string type
This small patch to the Go frontend uses the backend representation for the string type. Bootstrapped and ran Go testsuite on x86_64-unknown-linux-gnu. Committed to mainline. Ian diff -r 434e7ba47e8d go/types.cc --- a/go/types.cc Thu May 05 23:32:03 2011 -0700 +++ b/go/types.cc Fri May 06 11:12:12 2011 -0700 @@ -2172,14 +2172,27 @@ // struct with two fields: a pointer to the characters and a length. tree -String_type::do_get_tree(Gogo*) -{ - static tree struct_type; - return Gogo::builtin_struct(struct_type, __go_string, NULL_TREE, 2, - __data, - build_pointer_type(unsigned_char_type_node), - __length, - integer_type_node); +String_type::do_get_tree(Gogo* gogo) +{ + static Btype* backend_string_type; + if (backend_string_type == NULL) +{ + std::vectorBackend::Btyped_identifier fields(2); + + Type* b = gogo-lookup_global(byte)-type_value(); + Type* pb = Type::make_pointer_type(b); + fields[0].name = __data; + fields[0].btype = tree_to_type(pb-get_tree(gogo)); + fields[0].location = UNKNOWN_LOCATION; + + Type* int_type = Type::lookup_integer_type(int); + fields[1].name = __length; + fields[1].btype = tree_to_type(int_type-get_tree(gogo)); + fields[1].location = UNKNOWN_LOCATION; + + backend_string_type = gogo-backend()-struct_type(fields); +} + return type_to_tree(backend_string_type); } // Return a tree for the length of STRING.
[patch] fix typos and grammar in -fuse-linker-plugin docs
2011-05-06 Jonathan Wakely jwakely@gmail.com * doc/invoke.texi (-fuse-linker-plugin): Improve grammar. I was going to commit a smaller version of this patch as obvious (just the second of the three hunks in the patch) but I spotted a few other improvements that could be made. I think my changes preserve the intended meaning, but improve the English slightly and (I hope) clarify it. OK for trunk? Would removing do from and shared libraries that do use hidden visibility be a further improvement? Index: doc/invoke.texi === --- doc/invoke.texi (revision 173499) +++ doc/invoke.texi (working copy) @@ -7701,17 +7701,17 @@ Disabled by default. @item -fuse-linker-plugin -Enables the use of linker plugin during link time optimization. This option -relies on the linker plugin support in linker that is available in gold +Enables the use of a linker plugin during link time optimization. This option +relies on plugin support in the linker, which is available in gold or in GNU ld 2.21 or newer. This option enables the extraction of object files with GIMPLE bytecode out of library archives. This improves the quality of optimization by exposing more -code the link time optimizer. This information specify what symbols +code to the link time optimizer. This information specifies what symbols can be accessed externally (by non-LTO object or during dynamic linking). Resulting code quality improvements on binaries (and shared libraries that do -use hidden visibility) is similar to @code{-fwhole-program}. See -@option{-flto} for a description on the effect of this flag and how to use it. +use hidden visibility) are similar to @code{-fwhole-program}. See +@option{-flto} for a description of the effect of this flag and how to use it. Enabled by default when LTO support in GCC is enabled and GCC was compiled with a linker supporting plugins (GNU ld 2.21 or newer or gold).
Re: [google] Backport r172837 and r172788 to google/main
Thanks. Fixed them (as well as the same issue in some earlier entries). -Easwaran On Fri, May 6, 2011 at 11:27 AM, Diego Novillo dnovi...@google.com wrote: On Fri, May 6, 2011 at 14:22, Easwaran Raman era...@google.com wrote: Backported r172788 and r172837 from trunk to google/main. Minor nit: 2011-05-06 Easwaran Raman era...@google.com Backport r172837: This needs to be indented by 1 tab. * cfgexpand.c (stack_var): Remove OFFSET... (add_stack_var): ...and its reference here... (expand_stack_vars): ...and here. Diego.
Re: [google] Backport r172837 and r172788 to google/main
On Fri, May 6, 2011 at 14:34, Easwaran Raman era...@google.com wrote: Thanks. Fixed them (as well as the same issue in some earlier entries). Great, thanks!
Re: Ping: [PATCH] PR 48175, Make CASE_VALUES_THRESHOLD settable via --param
On Fri, May 06, 2011 at 06:30:07PM +0200, Jakub Jelinek wrote: On Fri, May 06, 2011 at 12:21:24PM -0400, Michael Meissner wrote: On Thu, Apr 21, 2011 at 03:02:10PM -0400, Michael Meissner wrote: In looking at some improvements to the powerpc, we wanted to change the default for when a table jump is generated vs. a series of if statements. Now, we could just add a powerpc specific TARGET_CASE_VALUES_THRESHOLD, but I tend to think that these should be settable on all/most ports with --param. At present, there are only two ports (avr and mn10300) that define their own TARGET_CASE_VALUES_THRESHOLD hook. My first patch does not remove the target hook and modify the avr/mn10300 ports to use maybe_set_param_value, but that can be done if desired. The patch adds two --param values, one for when the port is using the casesi insn, and the other when it uses the more primitive tablejump insn. I have bootstrapped the compiler with this patch and run the test suite with no regressions. Is it ok to apply as is? Should I modify the avr and mn10300 ports to use the parameters and do away with the target hook? Or should I do this just as a powerpc target hook? I never got a response for this, and my earlier ping didn't seem to go out. I'll check it in on Monday if there are no objections. I think it is very weird to have two different params, if we need any such param, there should be just one and its default value should depend on HAVE_casesi. The problem is the values in params.def must be constant, and can't depend on switches. I imagine we can have a single param that is normally 0, and if it is non-zero use that value, otherwise fall back to (HAVE_casesi ? 4 : 5). Or we could set it in finish_options in opts.c. Any preference? -- Michael Meissner, IBM 5 Technology Place Drive, M/S 2757, Westford, MA 01886-3141, USA meiss...@linux.vnet.ibm.com fax +1 (978) 399-6899
Go patch committed: More uses of backend interface for types
This patch to the Go frontend and to libgo adds more uses of the backend interface for types. There were some changes to libgo because the code now uses produces a Go type for maps. Previously the map types were using size_t, but there is no equivalent to size_t in Go. Go instead has uintptr_t, so I changed the libgo code accordingly. This should not make any actual difference, of course. Bootstrapped and ran Go testsuite on x86_64-unknown-linux-gnu. Committed to mainline. Ian diff -r 0a1edd881eca go/types.cc --- a/go/types.cc Fri May 06 11:28:30 2011 -0700 +++ b/go/types.cc Fri May 06 12:58:15 2011 -0700 @@ -845,7 +845,7 @@ if (this-forward_declaration_type() != NULL || this-named_type() != NULL) -return this-get_tree_without_hash(gogo); +return type_to_tree(this-get_btype_without_hash(gogo)); if (this-is_error_type()) return error_mark_node; @@ -865,7 +865,7 @@ return ins.first-second; } - tree t = this-get_tree_without_hash(gogo); + tree t = type_to_tree(this-get_btype_without_hash(gogo)); if (ins.first-second == NULL_TREE) ins.first-second = t; @@ -884,43 +884,33 @@ return t; } -// Return a tree for a type without looking in the hash table for -// identical types. This is used for named types, since there is no -// point to looking in the hash table for them. - -tree -Type::get_tree_without_hash(Gogo* gogo) +// Return the backend representation for a type without looking in the +// hash table for identical types. This is used for named types, +// since a named type is never identical to any other type. + +Btype* +Type::get_btype_without_hash(Gogo* gogo) { if (this-tree_ == NULL_TREE) { - tree t = this-do_get_tree(gogo); + Btype* bt = tree_to_type(this-do_get_tree(gogo)); // For a recursive function or pointer type, we will temporarily // return a circular pointer type during the recursion. We // don't want to record that for a forwarding type, as it may // confuse us later. if (this-forward_declaration_type() != NULL - gogo-backend()-is_circular_pointer_type(tree_to_type(t))) - return t; + gogo-backend()-is_circular_pointer_type(bt)) + return bt; if (gogo == NULL || !gogo-named_types_are_converted()) - return t; - + return bt; + + tree t = type_to_tree(bt); this-tree_ = t; - go_preserve_from_gc(t); -} - - return this-tree_; -} - -// Return the backend representation for a type without looking in the -// hash table for identical types. This is used for named types, -// since a named type is never identical to any other type. - -Btype* -Type::get_btype_without_hash(Gogo* gogo) -{ - return tree_to_type(this-get_tree_without_hash(gogo)); +} + + return tree_to_type(this-tree_); } // Return a tree representing a zero initialization for this type. @@ -1596,8 +1586,8 @@ protected: tree - do_get_tree(Gogo*) - { return error_mark_node; } + do_get_tree(Gogo* gogo) + { return type_to_tree(gogo-backend()-error_type()); } tree do_get_init_tree(Gogo*, tree, bool) @@ -3228,8 +3218,11 @@ protected: tree - do_get_tree(Gogo*) - { return ptr_type_node; } + do_get_tree(Gogo* gogo) + { +Btype* bt = gogo-backend()-pointer_type(gogo-backend()-void_type()); +return type_to_tree(bt); + } tree do_get_init_tree(Gogo*, tree type_tree, bool is_clear) @@ -5064,61 +5057,44 @@ return true; } -// Get a tree for a map type. A map type is represented as a pointer -// to a struct. The struct is __go_map in libgo/map.h. +// Get the backend representation for a map type. A map type is +// represented as a pointer to a struct. The struct is __go_map in +// libgo/map.h. tree Map_type::do_get_tree(Gogo* gogo) { - static tree type_tree; - if (type_tree == NULL_TREE) -{ - tree struct_type = make_node(RECORD_TYPE); - - tree map_descriptor_type = gogo-map_descriptor_type(); - tree const_map_descriptor_type = - build_qualified_type(map_descriptor_type, TYPE_QUAL_CONST); - tree name = get_identifier(__descriptor); - tree field = build_decl(BUILTINS_LOCATION, FIELD_DECL, name, - build_pointer_type(const_map_descriptor_type)); - DECL_CONTEXT(field) = struct_type; - TYPE_FIELDS(struct_type) = field; - tree last_field = field; - - name = get_identifier(__element_count); - field = build_decl(BUILTINS_LOCATION, FIELD_DECL, name, sizetype); - DECL_CONTEXT(field) = struct_type; - DECL_CHAIN(last_field) = field; - last_field = field; - - name = get_identifier(__bucket_count); - field = build_decl(BUILTINS_LOCATION, FIELD_DECL, name, sizetype); - DECL_CONTEXT(field) = struct_type; - DECL_CHAIN(last_field) = field; - last_field = field; - - name = get_identifier(__buckets); - field = build_decl(BUILTINS_LOCATION, FIELD_DECL, name, - build_pointer_type(ptr_type_node)); - DECL_CONTEXT(field) = struct_type;
Re: [PING] config/mep/mep.c: don't translate syntax description.
OK to check in? Ok with me. Thanks! 2011-05-06 Philipp Thomas p...@suse.de * config/mep/mep.c (mep_validate_vliw): Syntax description should not be translated.
Minor type merging optimization
Hi, while looking at type merging code I noticed that type pairs can be managed to be ordered by their UIDs. This save some of hashing overhead in one of most intensively querried hashes. Also gimple_lookup_type_leader is hot function that is better to be inlined. I also wonder, why unionfind algorithm is not used here to maintain the positive answers? Bootstrapped/regtested x86_64-linux, OK? Honza * gimple.c (type_pair_hash, type_pair_eq, lookup_type_pair): Arrange type pairs to be UID ordered. (gimple_lookup_type_leader): Make inline. Index: gimple.c === --- gimple.c(revision 173506) +++ gimple.c(working copy) @@ -3240,8 +3240,7 @@ type_pair_hash (const void *p) const struct type_pair_d *pair = (const struct type_pair_d *) p; hashval_t val1 = pair-uid1; hashval_t val2 = pair-uid2; - return (iterative_hash_hashval_t (val2, val1) - ^ iterative_hash_hashval_t (val1, val2)); + return iterative_hash_hashval_t (val1, val2); } /* Compare two type pairs pointed-to by P1 and P2. */ @@ -3251,8 +3250,7 @@ type_pair_eq (const void *p1, const void { const struct type_pair_d *pair1 = (const struct type_pair_d *) p1; const struct type_pair_d *pair2 = (const struct type_pair_d *) p2; - return ((pair1-uid1 == pair2-uid1 pair1-uid2 == pair2-uid2) - || (pair1-uid1 == pair2-uid2 pair1-uid2 == pair2-uid1)); + return (pair1-uid1 == pair2-uid1 pair1-uid2 == pair2-uid2); } /* Lookup the pair of types T1 and T2 in *VISITED_P. Insert a new @@ -3271,8 +3269,16 @@ lookup_type_pair (tree t1, tree t2, htab gcc_obstack_init (ob_p); } - pair.uid1 = TYPE_UID (t1); - pair.uid2 = TYPE_UID (t2); + if (TYPE_UID (t1) TYPE_UID (t2)) +{ + pair.uid1 = TYPE_UID (t1); + pair.uid2 = TYPE_UID (t2); +} + else +{ + pair.uid1 = TYPE_UID (t2); + pair.uid2 = TYPE_UID (t1); +} slot = htab_find_slot (*visited_p, pair, INSERT); if (*slot) @@ -3280,8 +3286,8 @@ lookup_type_pair (tree t1, tree t2, htab else { p = XOBNEW (ob_p, struct type_pair_d); - p-uid1 = TYPE_UID (t1); - p-uid2 = TYPE_UID (t2); + p-uid1 = pair.uid1; + p-uid2 = pair.uid2; p-same_p[0] = -2; p-same_p[1] = -2; *slot = (void *) p; @@ -3324,7 +3330,7 @@ static GTY((deletable, length(GIMPLE_TY /* Lookup an existing leader for T and return it or NULL_TREE, if there is none in the cache. */ -static tree +static inline tree gimple_lookup_type_leader (tree t) { gimple_type_leader_entry *leader;
Re: [PATCH] Canonicalize compares in combine [2/3] Modifications to try_combine()
-BEGIN PGP SIGNED MESSAGE- Hash: SHA1 On 05/06/11 03:38, Chung-Lin Tang wrote: Hi Jeff, I have verified the patch with a native bootstrap + testsuite run on powerpc-linux (32-bit), results were clean. Attached is a single patch with the 1+2 combine parts together, with comments updated. Please check if they feel descriptive enough. I haven't updated the CANONICALIZE_COMPARISON stuff, as we discussed it doesn't look like absolutely needed right now. As for the const0_rtx compare, because the entire case is guarded by a CONST_INT_P, I think it should be safe. Is this now okay for trunk? Yes, please install. Thanks, jeff -BEGIN PGP SIGNATURE- Version: GnuPG v1.4.11 (GNU/Linux) Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/ iQEcBAEBAgAGBQJNxFocAAoJEBRtltQi2kC7IhQH/2P8rOuJloYS4ckDCOhbqBcW w37R+qlzQztJLKRrI+cxSHl/uUPZ4iJ0NPsZ/WnuMcj2o/eWnU8zERYvky8NGb0g FnHbhBsRz6cvw0+vEhfBxmZ4i2RKezSZwXquu/Dt4ZZ/Wy4agTMKEQoiimGz2QvR f8/6JSfkJKLuj/4t/XkoQIzK516ADG1mvvp6CWKR/UoXSnfJKS9eXcmZZ5YMuVpp NiQ4oXJHGZguH1ecv31l/Eqz6KsJTLsX+3nhriSwfORdlmDGi3IVQZy3vCP02iw8 IFDm5mxH7mUWPrTVaW4wEgMIFdiBIinsC7/mNARO2FLGnkMW++lFLuSWeRc7A9Y= =darN -END PGP SIGNATURE-
Use Enum for MIPS -march=, -mtune=, -mips options
This patch makes the MIPS options -march=, -mtune= and -mips use the .opt Enum facility, completing the avoidance of global state in mips_handle_option. The list of possible arguments for those options in mips-tables.opt is generated by awk code in genopt.sh, which reimplements the logic formerly in mips.c to allow some variants of CPU names to match. The generated list uses Canonical markings where appropriate, so specs now only need to match the particular form of each name that is used in mips-cpus.def. There is enough information in mips-cpus.def for it to be possible to generate MIPS_ISA_LEVEL_SPEC automatically as well, but I haven't implemented that. Interpretation of from-abi is now deferred to mips_option_override, so it uses whatever the final ABI setting from the command line was. The processing of MIPS_CPU_STRING_DEFAULT is replaced by much simpler use of strcmp to find a matching entry; the from-abi default definition of that macro is replaced by code in mips_default_arch if that macro is not defined. (Previously it would always have been defined, so the previous fallback for it being undefined was dead code.) MIPS is one of several targets that have code in cc1 to set a default architecture (etc.) that may or may not be derived from --with-arch etc. configure settings; ideally there would be some generic mechanism for the --with-* settings to be used in cc1 as well as via specs (maybe appending a -march= option to the end of the cc1 command line if none was there originally, though --with-arch-32 etc. complicates that - maybe we actually want cc1 to be able to process specs) that doesn't need this custom code. (Then target-specific defaults would always be defaults in config.gcc for the relevant configure options.) Tested building cc1 and xgcc for cross to mips-elf. Will commit to trunk in the absence of target maintainer objections. contrib: 2011-05-06 Joseph Myers jos...@codesourcery.com * gcc_update (gcc/config/mips/mips-tables.opt): New dependencies. gcc: 2011-05-06 Joseph Myers jos...@codesourcery.com * config/mips/genopt.sh, config/mips/mips-cpus.def: New files. * config/mips/mips-tables.opt: New file (generated). * config.gcc (mips*-*-*): Add mips/mips-tables.opt to extra_options. * config/mips/mips-opts.h (MIPS_ARCH_OPTION_FROM_ABI, MIPS_ARCH_OPTION_NATIVE): Define. * config/mips/mips.c (mips_cpu_info_table): Move contents to mips-cpus.def. (mips_strict_matching_cpu_name_p, mips_matching_cpu_name_p, mips_parse_cpu): Remove. (mips_cpu_info_from_opt, mips_default_arch): New. (mips_handle_option): Don't assert that global structures are in use. Don't handle OPT_march_, OPT_mtune_ and OPT_mips here. (mips_option_override): Use new variables and functions to set state of these options. Use strcmp to check for individual CPU names. * config/mips/mips.h (MIPS_CPU_STRING_DEFAULT): Remove default definition. * config/mips/mips.opt (march=): Use ToLower and Enum. (mips): Use ToLower, Enum and Var. (mtune=): Use ToLower and Enum. * config/mips/t-mips ($(srcdir)/config/mips/mips-tables.opt): New. Index: contrib/gcc_update === --- contrib/gcc_update (revision 173491) +++ contrib/gcc_update (working copy) @@ -82,6 +82,7 @@ gcc/fixinc/fixincl.x: gcc/fixinc/fixincl gcc/config/arm/arm-tune.md: gcc/config/arm/arm-cores.def gcc/config/arm/gentune.sh gcc/config/arm/arm-tables.opt: gcc/config/arm/arm-arches.def gcc/config/arm/arm-cores.def gcc/config/arm/genopt.sh gcc/config/m68k/m68k-tables.opt: gcc/config/m68k/m68k-devices.def gcc/config/m68k/m68k-isas.def gcc/config/m68k/m68k-microarchs.def gcc/config/m68k/genopt.sh +gcc/config/mips/mips-tables.opt: gcc/config/mips/mips-cpus.def gcc/config/mips/genopt.sh # And then, language-specific files gcc/cp/cfns.h: gcc/cp/cfns.gperf gcc/java/keyword.h: gcc/java/keyword.gperf Index: gcc/config.gcc === --- gcc/config.gcc (revision 173491) +++ gcc/config.gcc (working copy) @@ -371,7 +371,7 @@ mips*-*-*) cpu_type=mips need_64bit_hwint=yes extra_headers=loongson.h - extra_options=${extra_options} g.opt + extra_options=${extra_options} g.opt mips/mips-tables.opt ;; picochip-*-*) cpu_type=picochip Index: gcc/config/mips/mips-tables.opt === --- gcc/config/mips/mips-tables.opt (revision 0) +++ gcc/config/mips/mips-tables.opt (revision 0) @@ -0,0 +1,605 @@ +; -*- buffer-read-only: t -*- +; Generated automatically by genopt.sh from mips-cpus.def. + +; Copyright (C) 2011 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it
[google] revert 173158 (-fstrict-enum-precisions) (issue4503041)
The following patch reverted r173158 from google/main -- -fstrict-enums provides a better implementation. The test cases are kept with slight modification. Bootstrap and tested with related test cases. Ok for google/main? 2011-05-06 David Li davi...@google.com Revert r173158. Index: tree-vrp.c === --- tree-vrp.c (revision 173415) +++ tree-vrp.c (working copy) @@ -5553,9 +5553,7 @@ stmt_interesting_for_vrp (gimple stmt) ((is_gimple_call (stmt) gimple_call_fndecl (stmt) != NULL_TREE DECL_IS_BUILTIN (gimple_call_fndecl (stmt))) - || !gimple_vuse (stmt)) - (flag_strict_enum_precision - || TREE_CODE (TREE_TYPE (lhs)) != ENUMERAL_TYPE)) + || !gimple_vuse (stmt))) return true; } else if (gimple_code (stmt) == GIMPLE_COND Index: doc/invoke.texi === --- doc/invoke.texi (revision 173415) +++ doc/invoke.texi (working copy) @@ -395,8 +395,8 @@ Objective-C and Objective-C++ Dialects}. -fsel-sched-pipelining -fsel-sched-pipelining-outer-loops @gol -fsignaling-nans -fsingle-precision-constant -fsplit-ivs-in-unroller @gol -fsplit-wide-types -fstack-protector -fstack-protector-all @gol --fstrict-aliasing -fstrict-overflow -fno-strict-enum-precision -fthread-jumps --ftracer -ftree-bit-ccp @gol +-fstrict-aliasing -fstrict-overflow -fthread-jumps -ftracer @gol +-ftree-bit-ccp @gol -ftree-builtin-call-dce -ftree-ccp -ftree-ch -ftree-copy-prop @gol -ftree-copyrename -ftree-dce -ftree-dominator-opts -ftree-dse @gol -ftree-forwprop -ftree-fre -ftree-loop-if-convert @gol @@ -2075,11 +2075,6 @@ represented in the minimum number of bit enumerators). This assumption may not be valid if the program uses a cast to convert an arbitrary integer value to the enumeration type. -@item -fno-strict-enum-precision -@opindex fno-strict-enum-precision -Do not perform optimizations of switch() statements based on the -precision of enum types. - @item -ftemplate-depth=@var{n} @opindex ftemplate-depth Set the maximum instantiation depth for template classes to @var{n}. Index: testsuite/g++.dg/other/no-strict-enum-precision-3.C === --- testsuite/g++.dg/other/no-strict-enum-precision-3.C (revision 173415) +++ testsuite/g++.dg/other/no-strict-enum-precision-3.C (working copy) @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options -O2 -fno-strict-enum-precision } */ +/* { dg-options -O2 -fno-strict-enums } */ extern C void abort (void); Index: testsuite/g++.dg/other/no-strict-enum-precision-1.C === --- testsuite/g++.dg/other/no-strict-enum-precision-1.C (revision 173415) +++ testsuite/g++.dg/other/no-strict-enum-precision-1.C (working copy) @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options -fno-strict-enum-precision } */ +/* { dg-options -fno-strict-enums } */ extern C void abort (void); Index: testsuite/g++.dg/other/no-strict-enum-precision-2.C === --- testsuite/g++.dg/other/no-strict-enum-precision-2.C (revision 173415) +++ testsuite/g++.dg/other/no-strict-enum-precision-2.C (working copy) @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options -O2 -fno-strict-enum-precision } */ +/* { dg-options -O2 -fno-strict-enums } */ extern C void abort (void); Index: gimplify.c === --- gimplify.c (revision 173415) +++ gimplify.c (working copy) @@ -1602,8 +1602,6 @@ gimplify_switch_expr (tree *expr_p, gimp type = TREE_TYPE (SWITCH_COND (switch_expr)); if (len INTEGRAL_TYPE_P (type) - (flag_strict_enum_precision - || TREE_CODE (type) != ENUMERAL_TYPE) TYPE_MIN_VALUE (type) TYPE_MAX_VALUE (type) tree_int_cst_equal (CASE_LOW (VEC_index (tree, labels, 0)), -- This patch is available for review at http://codereview.appspot.com/4503041
Re: [google] revert 173158 (-fstrict-enum-precisions) (issue4503041)
On Fri, May 6, 2011 at 16:53, David Li davi...@google.com wrote: The following patch reverted r173158 from google/main -- -fstrict-enums provides a better implementation. The test cases are kept with slight modification. Bootstrap and tested with related test cases. Ok for google/main? 2011-05-06 David Li davi...@google.com Revert r173158. OK. Minor nit, when reverting a patch, please also include the ChangeLog entry corresponding to the revision. Diego.
C++ PATCH for c++/48909 (constexpr ICE)
Here, the problem was that in (*(first + 1) *first) != false integral promotion changes the expr to have type int. Then fold_binary_op_with_conditional_arg wants to change this to (*(first + 1) *first) ? true : false without changing the type, so the condition of a ?: has the wrong type by the time we get to the constexpr expander. For 4.6, it seems simplest to fix this by making the constexpr code more permissive. Tested x86_64-pc-linux-gnu, applying to 4.6 and trunk. commit 1be580e74a6e959ffaa041b49be08c895d44eb01 Author: Jason Merrill ja...@redhat.com Date: Fri May 6 10:48:52 2011 -0400 PR c++/48909 * semantics.c (cxx_eval_conditional_expression): Check integer_zerop/onep instead. diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c index d0c559b..cc8db90 100644 --- a/gcc/cp/semantics.c +++ b/gcc/cp/semantics.c @@ -6299,13 +6299,12 @@ cxx_eval_conditional_expression (const constexpr_call *call, tree t, allow_non_constant, addr, non_constant_p); VERIFY_CONSTANT (val); - if (val == boolean_true_node) -return cxx_eval_constant_expression (call, TREE_OPERAND (t, 1), + /* Don't VERIFY_CONSTANT the other operands. */ + if (integer_zerop (val)) +return cxx_eval_constant_expression (call, TREE_OPERAND (t, 2), allow_non_constant, addr, non_constant_p); - gcc_assert (val == boolean_false_node); - /* Don't VERIFY_CONSTANT here. */ - return cxx_eval_constant_expression (call, TREE_OPERAND (t, 2), + return cxx_eval_constant_expression (call, TREE_OPERAND (t, 1), allow_non_constant, addr, non_constant_p); } @@ -7872,12 +7871,12 @@ potential_constant_expression_1 (tree t, bool want_rval, tsubst_flags_t flags) tmp = TREE_OPERAND (t, 0); if (!potential_constant_expression_1 (tmp, rval, flags)) return false; - else if (tmp == boolean_true_node) - return potential_constant_expression_1 (TREE_OPERAND (t, 1), - want_rval, flags); - else if (tmp == boolean_false_node) + else if (integer_zerop (tmp)) return potential_constant_expression_1 (TREE_OPERAND (t, 2), want_rval, flags); + else if (TREE_CODE (tmp) == INTEGER_CST) + return potential_constant_expression_1 (TREE_OPERAND (t, 1), + want_rval, flags); for (i = 1; i 3; ++i) if (potential_constant_expression_1 (TREE_OPERAND (t, i), want_rval, tf_none)) diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-condition2.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-condition2.C new file mode 100644 index 000..2434096 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-condition2.C @@ -0,0 +1,18 @@ +// PR c++/48909 +// { dg-options -std=c++0x } + +#define SA(X) static_assert((X),#X) + +constexpr int const * is_sorted_until(int const * first, int const * last) +{ + return first == last || first + 1 == last ? last + : (*(first + 1) *first) != false ? first + 1 + : is_sorted_until(first + 1, last); +} + +int main() +{ + static constexpr int array[2] = {0, 1}; + constexpr int const * last = is_sorted_until(array, array + 2); + SA(last==array+2); +}
Re: [PATCH] Cleanup expand_shift
I'm going to bootstrap regtest this on x86_64-unknown-linux-gnu (with again zero testing coverage ...). The patch fixes the reported ICE with a cross to cris-elf, more testing is appreciated (though I guess autotesters will pick it up). Does it look sane? Yes, I think so, but... Index: gcc/expmed.c === *** gcc/expmed.c (revision 173473) --- gcc/expmed.c (working copy) *** expand_shift_1 (enum tree_code code, enu *** 2141,2151 rtx new_amount, other_amount; rtx temp1; new_amount = op1; ! other_amount ! = simplify_gen_binary (MINUS, GET_MODE (op1), !GEN_INT (GET_MODE_BITSIZE (mode)), !op1); shifted = force_reg (mode, shifted); --- 2141,2156 rtx new_amount, other_amount; rtx temp1; + op1_mode = GET_MODE (op1); new_amount = op1; ! if (op1_mode == VOIDmode) ! other_amount = GEN_INT (GET_MODE_BITSIZE (mode) ! - INTVAL (op1)); ! else ! other_amount ! = simplify_gen_binary (MINUS, op1_mode, ! GEN_INT (GET_MODE_BITSIZE (mode)), ! op1); shifted = force_reg (mode, shifted); ... I'd test CONST_INT_P (op1) instead of op1_mode == VOIDmode since you are accessing INTVAL in the branch. -- Eric Botcazou
C++ PATCH for c++/48911 (constexpr and implicit aggregate initializers)
In 48911, the constexpr expander wasn't properly dealing with aggregate/string constant array initializers with omitted elements. We should build up a value-initialization as needed. Tested x86_64-pc-linux-gnu, applying to trunk and 4.6. commit b557b9384f1a6509735c25574f1c1d09703e6252 Author: Jason Merrill ja...@redhat.com Date: Fri May 6 10:21:38 2011 -0400 PR c++/48911 * semantics.c (cxx_eval_array_reference): Handle implicit initializers. diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c index 8bf5a52..d0c559b 100644 --- a/gcc/cp/semantics.c +++ b/gcc/cp/semantics.c @@ -6324,6 +6324,7 @@ cxx_eval_array_reference (const constexpr_call *call, tree t, non_constant_p); tree index, oldidx; HOST_WIDE_INT i; + tree elem_type; unsigned len, elem_nchars = 1; if (*non_constant_p) return t; @@ -6336,16 +6337,27 @@ cxx_eval_array_reference (const constexpr_call *call, tree t, return t; else if (addr) return build4 (ARRAY_REF, TREE_TYPE (t), ary, index, NULL, NULL); + elem_type = TREE_TYPE (TREE_TYPE (ary)); if (TREE_CODE (ary) == CONSTRUCTOR) len = CONSTRUCTOR_NELTS (ary); else { - elem_nchars = (TYPE_PRECISION (TREE_TYPE (TREE_TYPE (ary))) + elem_nchars = (TYPE_PRECISION (elem_type) / TYPE_PRECISION (char_type_node)); len = (unsigned) TREE_STRING_LENGTH (ary) / elem_nchars; } if (compare_tree_int (index, len) = 0) { + if (tree_int_cst_lt (index, array_type_nelts_top (TREE_TYPE (ary + { + /* If it's within the array bounds but doesn't have an explicit +initializer, it's value-initialized. */ + tree val = build_value_init (elem_type, tf_warning_or_error); + return cxx_eval_constant_expression (call, val, + allow_non_constant, addr, + non_constant_p); + } + if (!allow_non_constant) error (array subscript out of bound); *non_constant_p = true; diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-missing.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-missing.C new file mode 100644 index 000..547f552 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-missing.C @@ -0,0 +1,39 @@ +// PR c++/48911 +// { dg-do compile } +// { dg-options -std=c++0x } + +#define SA(X) static_assert((X),#X) + +struct A +{ + constexpr A () : a (6) {} + int a; +}; + +int +main () +{ + constexpr int a[2] = { 42 }; + constexpr int i = a[1]; + SA(i==0); + constexpr int b[1] = { }; + constexpr int j = b[0]; + SA(j==0); + constexpr char c[2] = a; + constexpr char k = c[1]; + SA(k==0); + constexpr char d[2] = ; + constexpr char l = d[1]; + SA(l==0); + constexpr wchar_t e[2] = La; + constexpr wchar_t m = e[1]; + SA(m==0); + constexpr wchar_t f[2] = L; + constexpr wchar_t n = f[1]; + SA(n==0); + constexpr A g[2] = { A () }; + constexpr A o = g[0]; + SA(o.a == 6); + constexpr A p = g[1]; + SA(p.a == 6); +}
Re: C++ PATCH for c++/48446 (ICE with VLA)
I noticed a minor tweak I could make to speed this up and figure I might as well, even though it shouldn't be a significant component of compile time. Tested x86_64-pc-linux-gnu, applying to trunk and 4.6. commit 4c3e6de3e988799dac490b6eb2b762674b5bb9f8 Author: Jason Merrill ja...@redhat.com Date: Thu May 5 17:57:50 2011 -0400 * decl.c (stabilize_save_expr_r): Set *walk_subtrees as appropriate. diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index c5184e0..b5d4cc2 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -7615,8 +7615,9 @@ stabilize_save_expr_r (tree *expr_p, int *walk_subtrees, void *data) cp_walk_tree (op, stabilize_save_expr_r, data, pset); if (TREE_SIDE_EFFECTS (op)) TREE_OPERAND (expr, 0) = get_temp_regvar (TREE_TYPE (op), op); + *walk_subtrees = 0; } - else if (!EXPR_P (expr)) + else if (!EXPR_P (expr) || !TREE_SIDE_EFFECTS (expr)) *walk_subtrees = 0; return NULL; }
Re: [Patch, Fortran] Support scalar coarrays in this_image/ucobound/image_index
On Wed, May 4, 2011 at 11:07 PM, Tobias Burnus bur...@net-b.de wrote: Before, scalar coarrays were not supported in the coindex intrinsics as they did not have - on tree level - cobounds attached to them. This patch adds them. Additionally, it fixes the algorithm of this_image, which seemingly only worked by chance for the test case; hopefully it now works always correctly. Note: Allocatable scalar coarrays remain unsupported for the moment. Is the patch OK for the trunk? This caused: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=48919 -- H.J.
Re: [Patch, Fortran] Support scalar coarrays in this_image/ucobound/image_index
Am 07.05.2011 00:50, schrieb H.J. Lu: On Wed, May 4, 2011 at 11:07 PM, Tobias Burnusbur...@net-b.de wrote: Is the patch OK for the trunk? This caused: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=4891 That happens if patches do not get approved in the order in which they were written/submitted. The failure is fixed by my patch at: http://gcc.gnu.org/ml/fortran/2011-05/msg00023.html (The patch did not include a test case (I didn't include the one I had) - but seemingly we now have one, which already in the trunk.) Tobias
Re: Cgraph thunk reorg
Hi, given that the patch has received feedback and I have weekend for fixing the fallout, I decided to commit the following version today. It contains fix in visibility handling of thunks that has shown in Mozilla build. * cgraph.c (cgraph_add_thunk): Create real function node instead of alias node; finalize it and mark needed/reachale; arrange visibility to be right and add it into the corresponding same comdat group list. (dump_cgraph_node): Dump thunks. * cgraph.h (cgraph_first_defined_function, cgraph_next_defined_function, cgraph_function_with_gimple_body_p, cgraph_first_function_with_gimple_body, cgraph_next_function_with_gimple_body): New functions. (FOR_EACH_FUNCTION_WITH_GIMPLE_BODY, FOR_EACH_DEFINED_FUNCTION): New macros. * ipa-cp.c (ipcp_need_redirect_p): Thunks can't be redirected. (ipcp_generate_summary): Use FOR_EACH_FUNCTION_WITH_GIMPLE_BODY. * cgraphunit.c (cgraph_finalize_function): Only look into possible devirtualization when optimizing. (verify_cgraph_node): Verify thunks. (cgraph_analyze_function): Analyze thunks. (cgraph_mark_functions_to_output): Output thunks only in combination with function they are assigned to. (assemble_thunk): Turn thunk into non-thunk; don't try to turn alias into normal node. (assemble_thunks): New functoin. (cgraph_expand_function): Use it. * lto-cgraph.c (lto_output_node): Stream thunks. (input_overwrite_node): Stream in thunks. * ipa-pure-const.c (analyze_function): Thunks do nothing interesting. * lto-streamer-out.c (lto_output): Do not try to output thunk's body. * ipa-inline.c (inline_small_functions): Use FOR_EACH_DEFINED_FUNCTION. * ipa-inline-analysis.c (compute_inline_parameters): Analyze thunks. (inline_analyze_function): Do not care about thunk jump functions. (inline_generate_summary):Use FOR_EACH_DEFINED_FUNCTION. * ipa-prop.c (ipa_prop_write_jump_functions): Use cgraph_function_with_gimple_body_p. * passes.c (do_per_function_toporder): Use cgraph_function_with_gimple_body_p. (execute_one_pass);Use FOR_EACH_FUNCTION_WITH_GIMPLE_BODY. (ipa_write_summaries): Use cgraph_function_with_gimple_body_p. (function_called_by_processed_nodes_p): Likewise. * lto.c (lto_materialize_function): Use cgraph_function_with_gimple_body_p. (add_cgraph_node_to_partition): Do not re-add items to partition; handle thunks. (add_varpool_node_to_partition): Do not re-add items to partition. Index: cgraph.c === *** cgraph.c(revision 173251) --- cgraph.c(working copy) *** cgraph_same_body_alias (struct cgraph_no *** 595,608 See comments in thunk_adjust for detail on the parameters. */ struct cgraph_node * ! cgraph_add_thunk (struct cgraph_node *decl_node, tree alias, tree decl, bool this_adjusting, HOST_WIDE_INT fixed_offset, HOST_WIDE_INT virtual_value, tree virtual_offset, tree real_alias) { ! struct cgraph_node *node = cgraph_get_node (alias); if (node) { gcc_assert (node-local.finalized); --- 595,610 See comments in thunk_adjust for detail on the parameters. */ struct cgraph_node * ! cgraph_add_thunk (struct cgraph_node *decl_node ATTRIBUTE_UNUSED, ! tree alias, tree decl, bool this_adjusting, HOST_WIDE_INT fixed_offset, HOST_WIDE_INT virtual_value, tree virtual_offset, tree real_alias) { ! struct cgraph_node *node; + node = cgraph_get_node (alias); if (node) { gcc_assert (node-local.finalized); *** cgraph_add_thunk (struct cgraph_node *de *** 610,617 cgraph_remove_node (node); } ! node = cgraph_same_body_alias_1 (decl_node, alias, decl); ! gcc_assert (node); gcc_checking_assert (!virtual_offset || tree_int_cst_equal (virtual_offset, size_int (virtual_value))); --- 612,618 cgraph_remove_node (node); } ! node = cgraph_create_node (alias); gcc_checking_assert (!virtual_offset || tree_int_cst_equal (virtual_offset, size_int (virtual_value))); *** cgraph_add_thunk (struct cgraph_node *de *** 621,626 --- 622,636 node-thunk.virtual_offset_p = virtual_offset != NULL; node-thunk.alias = real_alias; node-thunk.thunk_p = true; + node-local.finalized = true; + + if (cgraph_decide_is_function_needed (node, decl)) + cgraph_mark_needed_node (node); + + if ((TREE_PUBLIC (decl) !DECL_COMDAT (decl)
Re: [PATCH] Fix up typed DWARF stack support for POINTERS_EXTEND_UNSIGNED targets (PR debug/48853)
On Thu, May 5, 2011 at 2:20 AM, Jakub Jelinek ja...@redhat.com wrote: Hi! My typed DWARF stack changes apparently broke ia64-hpux and H.J.'s out of tree x32 target. There are several issues: 1) for SUBREG mem_loc_descriptor's 3rd argument was wrong, found by code inspection 2) CONST/SYMBOL_REF/LABEL_REF when in MEM addresses on POINTERS_EXTEND_UNSIGNED targets are often Pmode, which is unfortunately larger than DWARF2_ADDR_SIZE and my conditional would just return NULL in that case instead of emitting DW_OP_addr. 3) and, when mem_loc_descriptor is called from unwind code, Pmodes larger than DWARF2_ADDR_SIZE would result in the new DW_OP_GNU_*_type etc. ops which are not allowed in .eh_frame/.debug_frame The following patch ought to fix that, bootstrapped/regtested on x86_64-linux and i686-linux and Steve tested it on ia64-hpux and H.J. on his port. Ok for trunk? 2011-05-05 Jakub Jelinek ja...@redhat.com PR debug/48853 * dwarf2out.c (mem_loc_descriptor) case SUBREG: Pass mem_mode instead of mode as 3rd argument to recursive call. (mem_loc_descriptor) case REG: If POINTERS_EXTEND_UNSIGNED, don't emit DW_OP_GNU_regval_type if mode is Pmode and mem_mode is not VOIDmode. (mem_loc_descriptor) case SYMBOL_REF: If POINTERS_EXTEND_UNSIGNED, don't give up if mode is Pmode and mem_mode is not VOIDmode. (mem_loc_descriptor) case CONST_INT: If POINTERS_EXTEND_UNSIGNED, use int_loc_descriptor if mode is Pmode and mem_mode is not VOIDmode. Here is the missing patch for case SUBREG. OK for trunk if there is no regressions? Thanks. H.J. 2011-05-06 H.J. Lu hongjiu...@intel.com PR debug/48853 * dwarf2out.c (mem_loc_descriptor) case SUBREG: If POINTERS_EXTEND_UNSIGNED, don't give up if mode is Pmode and mem_mode is not VOIDmode. diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c index 026e4a7..049ca8e 100644 --- a/gcc/dwarf2out.c +++ b/gcc/dwarf2out.c @@ -13892,7 +13892,11 @@ mem_loc_descriptor (rtx rtl, enum machine_mode mode, break; if (GET_MODE_CLASS (mode) == MODE_INT GET_MODE_CLASS (GET_MODE (SUBREG_REG (rtl))) == MODE_INT - GET_MODE_SIZE (mode) = DWARF2_ADDR_SIZE + (GET_MODE_SIZE (mode) = DWARF2_ADDR_SIZE +#ifdef POINTERS_EXTEND_UNSIGNED + || (mode == Pmode mem_mode != VOIDmode) +#endif +) GET_MODE_SIZE (GET_MODE (SUBREG_REG (rtl))) = DWARF2_ADDR_SIZE) { mem_loc_result = mem_loc_descriptor (SUBREG_REG (rtl),
[Patch, Fortran] Fixes for scalar coarrays
The interface.c patch is to avoid a strange error (actual argument must be simply contiguous) which is a bit odd if the actual argument is a scalar. As the dummy was an array, a rank mismatch would have been the proper error. - The patch simply suppresses the error message such that the later error check becomes active. The rest of the patch: For scalar coarray dummy arguments, the cobounds were not properly saved - thus calling the one of the coindex intrinsics gave an ICE. Build and regtested on x86-64-linux. OK for the trunk? Tobias 2011-05-07 Tobias Burnus bur...@net-b.de PR fortran/18918 * interface.c (compare_parameter): Skip diagnostic if actual argument is not an array; rank mismatch is diagnosted later. * trans-decl.c (gfc_get_symbol_decl, gfc_trans_deferred_vars): Handle scalar coarrays. * trans-types.c (gfc_get_array_type_bounds): Ditto. 2011-05-07 Tobias Burnus bur...@net-b.de PR fortran/18918 * gfortran.de/coarray_20.f90: New. * gfortran.dg/coarray/image_index_2.f90: New. diff --git a/gcc/fortran/interface.c b/gcc/fortran/interface.c index 1f75724..732a0c5 100644 --- a/gcc/fortran/interface.c +++ b/gcc/fortran/interface.c @@ -1618,6 +1618,7 @@ compare_parameter (gfc_symbol *formal, gfc_expr *actual, /* F2008, 12.5.2.8. */ if (formal-attr.dimension (formal-attr.contiguous || formal-as-type != AS_ASSUMED_SHAPE) + gfc_expr_attr (actual).dimension !gfc_is_simply_contiguous (actual, true)) { if (where) diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c index 63f03de..a78b5ac 100644 --- a/gcc/fortran/trans-decl.c +++ b/gcc/fortran/trans-decl.c @@ -1228,7 +1228,8 @@ gfc_get_symbol_decl (gfc_symbol * sym) } /* Use a copy of the descriptor for dummy arrays. */ - if (sym-attr.dimension !TREE_USED (sym-backend_decl)) + if ((sym-attr.dimension || sym-attr.codimension) + !TREE_USED (sym-backend_decl)) { decl = gfc_build_dummy_array_decl (sym, sym-backend_decl); /* Prevent the dummy from being detected as unused if it is copied. */ @@ -1316,7 +1317,7 @@ gfc_get_symbol_decl (gfc_symbol * sym) DECL_IGNORED_P (decl) = 1; } - if (sym-attr.dimension) + if (sym-attr.dimension || sym-attr.codimension) { /* Create variables to hold the non-constant bits of array info. */ gfc_build_qualified_array (decl, sym); @@ -3435,7 +3436,7 @@ gfc_trans_deferred_vars (gfc_symbol * proc_sym, gfc_wrapped_block * block) if (sym-assoc) continue; - if (sym-attr.dimension) + if (sym-attr.dimension || sym-attr.codimension) { switch (sym-as-type) { diff --git a/gcc/fortran/trans-types.c b/gcc/fortran/trans-types.c index 22a2c5b..4dd82ca 100644 --- a/gcc/fortran/trans-types.c +++ b/gcc/fortran/trans-types.c @@ -1694,9 +1694,10 @@ gfc_get_array_type_bounds (tree etype, int dimen, int codimen, tree * lbound, stride = gfc_index_one_node; else stride = NULL_TREE; - for (n = 0; n dimen; n++) + for (n = 0; n dimen + codimen; n++) { - GFC_TYPE_ARRAY_STRIDE (fat_type, n) = stride; + if (n dimen) + GFC_TYPE_ARRAY_STRIDE (fat_type, n) = stride; if (lbound) lower = lbound[n]; @@ -1711,6 +1712,9 @@ gfc_get_array_type_bounds (tree etype, int dimen, int codimen, tree * lbound, lower = NULL_TREE; } + if (codimen n == dimen + codimen - 1) + break; + upper = ubound[n]; if (upper != NULL_TREE) { @@ -1720,6 +1724,9 @@ gfc_get_array_type_bounds (tree etype, int dimen, int codimen, tree * lbound, upper = NULL_TREE; } + if (n = dimen) + continue; + if (upper != NULL_TREE lower != NULL_TREE stride != NULL_TREE) { tmp = fold_build2_loc (input_location, MINUS_EXPR, --- /dev/null 2011-05-06 19:43:06.071892303 +0200 +++ gcc/gcc/testsuite/gfortran.dg/coarray_20.f90 2011-05-07 00:40:46.0 +0200 @@ -0,0 +1,15 @@ +! { dg-do compile } +! { dg-options -fcoarray=single } +! +! Before a bogus error (argument not simply contiguous) +! was printed instead of the rank mismatch +! +! PR fortran/18918 +! +integer :: A[*] +call bar(A) ! { dg-error Rank mismatch in argument } +contains + subroutine bar(x) +integer :: x(1)[*] + end subroutine bar +end --- /dev/null 2011-05-06 19:43:06.071892303 +0200 +++ gcc/gcc/testsuite/gfortran.dg/coarray/image_index_2.f90 2011-05-07 00:28:14.0 +0200 @@ -0,0 +1,76 @@ +! { dg-do run } +! +! Scalar coarray +! +! Run-time test for IMAGE_INDEX with cobounds only known at +! the compile time, suitable for any number of NUM_IMAGES() +! For compile-time cobounds, the -fcoarray=lib version still +! needs to run-time evalulation if image_index returns 1 +! as image_index is 0 if the index would exceed num_images(). +! +! Please set num_images() to = 13, if possible. +! +! PR fortran/18918 +! + +program test_image_index +implicit none +integer :: index1, index2, index3 +logical :: one + +integer, save :: d[-1:3, *] +integer, save ::