Fortran Patches
Hi Janus, could you also patches, which you commit as obvious to the mailing lists? Regarding the last patch, the GNU style puts a line break after the ")" in: + if (!sym) return NULL; + Tobias commit 12c8610481cc199a6019cd41d07dbdf8906032d0 Author: janus Date: Thu Sep 15 17:48:27 2011 + 2011-09-15 Janus Weil PR fortran/50401 * resolve.c (resolve_transfer): Check if component 'ref' is defined. PR fortran/50403 * symbol.c (gfc_use_derived): Check if argument 'sym' is defined. 2011-09-15 Janus Weil PR fortran/50401 PR fortran/50403 * gfortran.dg/function_types_3.f90: New. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@178889 138bc75d-0d04-0410-961f-82ee72b054a4 diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index eeb462f..a8e0273 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,11 @@ +2011-09-15 Janus Weil + + PR fortran/50401 + * resolve.c (resolve_transfer): Check if component 'ref' is defined. + + PR fortran/50403 + * symbol.c (gfc_use_derived): Check if argument 'sym' is defined. + 2011-09-14 Tobias Burnus PR fortran/34547 diff --git a/gcc/fortran/resolve.c b/gcc/fortran/resolve.c index 9aab836..62750af 100644 --- a/gcc/fortran/resolve.c +++ b/gcc/fortran/resolve.c @@ -8222,7 +8222,7 @@ resolve_transfer (gfc_code *code) } } - if (sym->as != NULL && sym->as->type == AS_ASSUMED_SIZE + if (sym->as != NULL && sym->as->type == AS_ASSUMED_SIZE && exp->ref && exp->ref->type == REF_ARRAY && exp->ref->u.ar.type == AR_FULL) { gfc_error ("Data transfer element at %L cannot be a full reference to " diff --git a/gcc/fortran/symbol.c b/gcc/fortran/symbol.c index b2f0f2b..e2f13b8 100644 --- a/gcc/fortran/symbol.c +++ b/gcc/fortran/symbol.c @@ -1945,6 +1945,8 @@ gfc_use_derived (gfc_symbol *sym) gfc_symtree *st; int i; + if (!sym) return NULL; + if (sym->components != NULL || sym->attr.zero_comp) return sym; /* Already defined. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 86cdde0..0accd60 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2011-09-15 Janus Weil + + PR fortran/50401 + PR fortran/50403 + * gfortran.dg/function_types_3.f90: New. + 2011-09-15 Jason Merrill PR c++/50365 diff --git a/gcc/testsuite/gfortran.dg/function_types_3.f90 b/gcc/testsuite/gfortran.dg/function_types_3.f90 new file mode 100644 index 000..8d00f5f --- /dev/null +++ b/gcc/testsuite/gfortran.dg/function_types_3.f90 @@ -0,0 +1,19 @@ +! { dg-do compile } +! +! Contributed by Vittorio Zecca +! +! PR 50401: SIGSEGV in resolve_transfer + + interface +function f() ! { dg-error "must be a dummy argument" } + dimension f(*) +end function + end interface + print *,f() +end + +! PR 50403: SIGSEGV in gfc_use_derived + +type(f) function f() ! { dg-error "conflicts with DERIVED attribute|is not accessible" } + f=110 ! { dg-error "Unclassifiable statement" } +end commit cde48a27b12d2c4d1a5aababb94f6695e9c00469 Author: janus Date: Tue Sep 13 18:37:33 2011 + 2011-09-13 Janus Weil PR fortran/50379 * symbol.c (check_conflict): Check conflict between GENERIC and RESULT attributes. 2011-09-13 Janus Weil PR fortran/50379 * gfortran.dg/result_2.f90: New. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@178829 138bc75d-0d04-0410-961f-82ee72b054a4 diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 836967d..6e82538 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,9 @@ +2011-09-13 Janus Weil + + PR fortran/50379 + * symbol.c (check_conflict): Check conflict between GENERIC and RESULT + attributes. + 2011-09-11 Thomas Koenig PR fortran/50327 diff --git a/gcc/fortran/symbol.c b/gcc/fortran/symbol.c index ce4ab3d..b2f0f2b 100644 --- a/gcc/fortran/symbol.c +++ b/gcc/fortran/symbol.c @@ -373,7 +373,7 @@ check_conflict (symbol_attribute *attr, const char *name, locus *where) *volatile_ = "VOLATILE", *is_protected = "PROTECTED", *is_bind_c = "BIND(C)", *procedure = "PROCEDURE", *asynchronous = "ASYNCHRONOUS", *codimension = "CODIMENSION", -*contiguous = "CONTIGUOUS"; +*contiguous = "CONTIGUOUS", *generic = "GENERIC"; static const char *threadprivate = "THREADPRIVATE"; const char *a1, *a2; @@ -490,8 +490,6 @@ check_conflict (symbol_attribute *attr, const char *name, locus *where) conf (in_common, codimension); conf (in_common, result); - conf (dummy, result); - conf (in_equivalence, use_assoc); conf (in_equivalence, codimension); conf (in_equivalence, dummy); @@ -503,7 +501,9 @@ check_conflict (symbol_attribute *attr, const char *name, locus *where) conf (in_equivalence, allocatable); conf (in_equivalence, threadprivate); + conf (dummy, result); conf (entry, resul
Go patch committed: Fix defer
The Go frontend had a bug when a function with named result parameter called defer on a function which did not call recover. The caller would execute a return statement to return the updated values of the named result parameters. This was wrong when the deferred function did not call recover and when some callee called panic, because in that case the panic should continue propagating up the stack, but executing the return caused it to return instead. This patch fixes the problem by only executing the return in the case where it is appropriate, and otherwise falling out of the TRY_FINALLY_EXPR in order to continue throwing the exception. It would be a bit nicer if we could ask the try/finally handler whether we are going to return since, after all, it does know. However, there is currently no way to do that. Bootstrapped and ran Go testsuite on x86_64-unknown-linux-gnu. Committed to mainline. Ian diff -r a10bd634db30 go/gogo-tree.cc --- a/go/gogo-tree.cc Wed Sep 14 15:25:34 2011 -0700 +++ b/go/gogo-tree.cc Thu Sep 15 22:36:43 2011 -0700 @@ -1592,15 +1592,25 @@ && !this->type_->results()->empty() && !this->type_->results()->front().name().empty()) { - // If the result variables are named, we need to return them - // again, because they might have been changed by a defer - // function. + // If the result variables are named, and we are returning from + // this function rather than panicing through it, we need to + // return them again, because they might have been changed by a + // defer function. The runtime routines set the defer_stack + // variable to true if we are returning from this function. retval = this->return_value(gogo, named_function, end_loc, &stmt_list); set = fold_build2_loc(end_loc, MODIFY_EXPR, void_type_node, DECL_RESULT(this->fndecl_), retval); ret_stmt = fold_build1_loc(end_loc, RETURN_EXPR, void_type_node, set); - append_to_statement_list(ret_stmt, &stmt_list); + + Expression* ref = + Expression::make_temporary_reference(this->defer_stack_, end_loc); + tree tref = ref->get_tree(&context); + tree s = build3_loc(end_loc, COND_EXPR, void_type_node, tref, + ret_stmt, NULL_TREE); + + append_to_statement_list(s, &stmt_list); + } go_assert(*fini == NULL_TREE); diff -r a10bd634db30 go/gogo.cc --- a/go/gogo.cc Wed Sep 14 15:25:34 2011 -0700 +++ b/go/gogo.cc Thu Sep 15 22:36:43 2011 -0700 @@ -2976,27 +2976,27 @@ this->block_->determine_types(); } -// Get a pointer to the variable holding the defer stack for this -// function, making it if necessary. At least at present, the value -// of this variable is not used. However, a pointer to this variable -// is used as a marker for the functions on the defer stack associated -// with this function. Doing things this way permits inlining a +// Get a pointer to the variable representing the defer stack for this +// function, making it if necessary. The value of the variable is set +// by the runtime routines to true if the function is returning, +// rather than panicing through. A pointer to this variable is used +// as a marker for the functions on the defer stack associated with +// this function. A function-specific variable permits inlining a // function which uses defer. Expression* Function::defer_stack(source_location location) { - Type* t = Type::make_pointer_type(Type::make_void_type()); if (this->defer_stack_ == NULL) { - Expression* n = Expression::make_nil(location); + Type* t = Type::lookup_bool_type(); + Expression* n = Expression::make_boolean(false, location); this->defer_stack_ = Statement::make_temporary(t, n, location); this->defer_stack_->set_is_address_taken(); } Expression* ref = Expression::make_temporary_reference(this->defer_stack_, location); - Expression* addr = Expression::make_unary(OPERATOR_AND, ref, location); - return Expression::make_unsafe_cast(t, addr, location); + return Expression::make_unary(OPERATOR_AND, ref, location); } // Export the function. diff -r a10bd634db30 go/runtime.def --- a/go/runtime.def Wed Sep 14 15:25:34 2011 -0700 +++ b/go/runtime.def Thu Sep 15 22:36:43 2011 -0700 @@ -165,10 +165,10 @@ R1(BOOL)) // Check for a deferred function in an exception handler. -DEF_GO_RUNTIME(CHECK_DEFER, "__go_check_defer", P1(POINTER), R0()) +DEF_GO_RUNTIME(CHECK_DEFER, "__go_check_defer", P1(BOOLPTR), R0()) // Run deferred functions. -DEF_GO_RUNTIME(UNDEFER, "__go_undefer", P1(POINTER), R0()) +DEF_GO_RUNTIME(UNDEFER, "__go_undefer", P1(BOOLPTR), R0()) // Panic with a runtime error. DEF_GO_RUNTIME(RUNTIME_ERROR, "__go_runtime_error", P1(INT), R0()) @@ -207,7 +207,7 @@ // Defer a function. -DEF_GO_RUNTIME(DEFER, "__go_defer", P3(POINTER, FUNC_PTR, POINTER), R0()) +DEF_GO_RUNTIME(DEFER, "__go_defer", P3(BOOLPTR, FUNC_PTR, POINTER), R0()) // Run a select statement. diff -r
Re: [PATCH], PR 50341, Fix TOC load scheduling for powerpc on GCC 4.6 and 4.7 (patches included)
On Thu, Sep 15, 2011 at 2:04 PM, Michael Meissner wrote: > It would help if I included the patches: > > [gcc-4.6] > 2011-09-15 Alan Modra > Michael Meissner > > PR target/50341 > * config/rs6000/rs6000.md (call_indirect_aix32): Do not split the > load of the indirect function's TOC from the call to prevent the > compiler from moving the load of the new TOC above code that > references the current function's TOC. > (call_indirect_aix64): Ditto. > (call_value_indirect_aix32): Ditto. > (call_value_indirect_aix64): Ditto. > (call_indirect_nonlocal_aix32_internal): Ditto. > (call_indirect_nonlocal_aix32): Ditto. > (call_indirect_nonlocal_aix64_internal): Ditto. > (call_indirect_nonlocal_aix64): Ditto. > (call_value_indirect_nonlocal_aix32_internal): Ditto. > (call_value_indirect_nonlocal_aix32): Ditto. > (call_value_indirect_nonlocal_aix64_internal): Ditto. > (call_value_indirect_nonlocal_aix64): Ditto. > > [gcc-4.7] > 2011-09-15 Alan Modra > Michael Meissner > > PR target/50341 > * config/rs6000/rs6000.md (call_indirect_aix): Do not > split the load of the indirect function's TOC from the call to > prevent the compiler from moving the load of the new TOC above > code that references the current function's TOC. > (call_indirect_aix_internal): Ditto. > (call_indirect_aix_nor11): Ditto. > (call_indirect_aix_internal2): Ditto. > (call_value_indirect_aix): Ditto. > (call_value_indirect_aix_internal): Ditto. > (call_value_indirect_aix_nor11): Ditto. > (call_value_indirect_aix_internal2): Ditto. Okay. Thanks, David
[v3] Declare std::make_tuple, std::forward_as_tuple constexpr
Hi, tested x86_64-linux, committed to mainline. Paolo. // 2011-09-15 Paolo Carlini * include/std/tuple (make_tuple, forward_as_tuple): Declare constexpr. (_Tuple_impl<>::_Tuple_impl(_Tuple_impl<>&&)): Likewise. * testsuite/20_util/tuple/creation_functions/constexpr.cc: Enable make_tuple test. * testsuite/20_util/weak_ptr/comparison/cmp_neg.cc: Adjust dg-warning line number. Index: include/std/tuple === --- include/std/tuple (revision 178898) +++ include/std/tuple (working copy) @@ -273,6 +273,7 @@ constexpr _Tuple_impl(const _Tuple_impl&) = default; + constexpr _Tuple_impl(_Tuple_impl&& __in) noexcept(__and_, is_nothrow_move_constructible<_Inherited>>::value) @@ -285,9 +286,9 @@ _Base(_Tuple_impl<_Idx, _UElements...>::_M_head(__in)) { } template -_Tuple_impl(_Tuple_impl<_Idx, _UHead, _UTails...>&& __in) - : _Inherited(std::move - (_Tuple_impl<_Idx, _UHead, _UTails...>::_M_tail(__in))), +constexpr _Tuple_impl(_Tuple_impl<_Idx, _UHead, _UTails...>&& __in) + : _Inherited(std::move +(_Tuple_impl<_Idx, _UHead, _UTails...>::_M_tail(__in))), _Base(std::forward<_UHead> (_Tuple_impl<_Idx, _UHead, _UTails...>::_M_head(__in))) { } @@ -580,7 +581,7 @@ template, is_convertible<_U2, _T2>>::value>::type> - constexpr tuple(pair<_U1, _U2>&& __in) +constexpr tuple(pair<_U1, _U2>&& __in) : _Inherited(std::forward<_U1>(__in.first), std::forward<_U2>(__in.second)) { } @@ -872,7 +873,7 @@ // NB: DR 705. template -inline tuple::__type...> +constexpr tuple::__type...> make_tuple(_Elements&&... __args) { typedef tuple::__type...> @@ -881,7 +882,7 @@ } template -inline tuple<_Elements&&...> +constexpr tuple<_Elements&&...> forward_as_tuple(_Elements&&... __args) noexcept { return tuple<_Elements&&...>(std::forward<_Elements>(__args)...); } Index: testsuite/20_util/tuple/creation_functions/constexpr.cc === --- testsuite/20_util/tuple/creation_functions/constexpr.cc (revision 178898) +++ testsuite/20_util/tuple/creation_functions/constexpr.cc (working copy) @@ -29,7 +29,6 @@ // make_tuple -#if 0 void test_make_tuple() { @@ -43,7 +42,6 @@ constexpr tuple_type p1 = std::make_tuple(22, 22.222, 77799); } } -#endif // get void @@ -77,10 +75,7 @@ int main() { -#if 0 test_make_tuple(); -#endif - test_get(); test_tuple_cat(); Index: testsuite/20_util/weak_ptr/comparison/cmp_neg.cc === --- testsuite/20_util/weak_ptr/comparison/cmp_neg.cc(revision 178898) +++ testsuite/20_util/weak_ptr/comparison/cmp_neg.cc(working copy) @@ -51,7 +51,7 @@ // { dg-warning "note" "" { target *-*-* } 485 } // { dg-warning "note" "" { target *-*-* } 479 } // { dg-warning "note" "" { target *-*-* } 468 } -// { dg-warning "note" "" { target *-*-* } 840 } +// { dg-warning "note" "" { target *-*-* } 841 } // { dg-warning "note" "" { target *-*-* } 1056 } // { dg-warning "note" "" { target *-*-* } 1050 } // { dg-warning "note" "" { target *-*-* } 342 }
[Patch, fortran] [06/21] Remove coarray support in the scalarizer: Request coarray for an actual arg associed with a coarray dummy
This one is more controversial and I need input from a coarray-fluent guy, for example Tobias. As I introduced the want_coarray flag (see previous patch), it made sense to me to also set want_coarray for coarray dummies. After some more thought, I decided to restrict further want_coarray to assumed shape coarray dummies. The patch does that. However, what comes up from testing is that neither is needed. I'm not submiting this patch for approval, I'm submitting it to understand why it is not necessary. How are cobounds passed to a coarray dummy? 2011-09-14 Mikael Morin * trans-expr.c (gfc_conv_procedure_call): Set want_coarray flag if the dummy argument is an assumed-shape coarray. diff --git a/trans-expr.c b/trans-expr.c index 131927c..80447fa 100644 --- a/trans-expr.c +++ b/trans-expr.c @@ -2990,8 +2990,12 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym, { /* A scalar or transformational function. */ gfc_init_se (&parmse, NULL); - argss = gfc_walk_expr (e); + if (fsym && fsym->attr.codimension + && (fsym->as->cotype == AS_ASSUMED_SHAPE + || fsym->as->cotype == AS_DEFERRED)) + parmse.want_coarray = 1; + argss = gfc_walk_expr (e); if (argss == gfc_ss_terminator) { if (e->expr_type == EXPR_VARIABLE
[Patch, fortran] [04/21] Remove coarray support in the scalarizer: Fix gfc_get_corank
This patch was necessary on a previous version of the patchset (I was calling gfc_get_corank on non-coarrays with e->symtree == NULL, and it was segfaulting on the first e->symtree dereferencing). In the current version, it is optional, but I propose it anyway as (I think) it makes some sense. The patch itself doesn't need extra explanations. We could add an early return dealing with non-coarray cases (this is the variant proposed), an assertion that input is really a coarray, or nothing at all. Either works. Which one is best? 2011-09-14 Morin * expr.c (gfc_get_corank): Return 0 if input expression is not a coarray. diff --git a/expr.c b/expr.c index 3c09a2a..056da71 100644 --- a/expr.c +++ b/expr.c @@ -4293,13 +4293,19 @@ gfc_get_corank (gfc_expr *e) { int corank; gfc_ref *ref; + + if (!gfc_is_coarray (e)) +return 0; + corank = e->symtree->n.sym->as ? e->symtree->n.sym->as->corank : 0; + for (ref = e->ref; ref; ref = ref->next) { if (ref->type == REF_ARRAY) corank = ref->u.ar.as->corank; gcc_assert (ref->type != REF_SUBSTRING); } + return corank; }
[Patch, fortran] [05/21] Remove coarray support in the scalarizer: Calculate codim earlier.
In the next patches, we are going to evaluate cobounds out of the scalarizer. This needs to be done before the call to gfc_get_array_type_bounds (which takes [co]bounds as input). As a result we need to know the corank before that call. Furthermore, as we are going to remove coarray support in the scalarizer, let's calculate the corank without relying on the scalarizer. This patch just does that. A new want_coarray flag is introduced as sometimes we want to treat coarrays as normal arrays, that is we want a descriptor for the local image only. OK? 2011-09-14 Mikael Morin * trans_array.c (gfc_conv_expr_descriptor): Evaluate codimension earlier and without relying on the scalarizer. diff --git a/trans-array.c b/trans-array.c index 88849ef..88998de 100644 --- a/trans-array.c +++ b/trans-array.c @@ -5988,6 +5988,11 @@ gfc_conv_expr_descriptor (gfc_se * se, gfc_expr * expr, gfc_ss * ss) tree to; tree base; + if (se->want_coarray) + codim = gfc_get_corank (expr); + else + codim = 0; + /* Set the string_length for a character array. */ if (expr->ts.type == BT_CHARACTER) se->string_length = gfc_get_expr_charlen (expr); @@ -6036,7 +6041,6 @@ gfc_conv_expr_descriptor (gfc_se * se, gfc_expr * expr, gfc_ss * ss) base = NULL_TREE; ndim = info->ref ? info->ref->u.ar.dimen : info->dimen; - codim = info->codimen; for (n = 0; n < ndim; n++) { stride = gfc_conv_array_stride (desc, n); diff --git a/trans-intrinsic.c b/trans-intrinsic.c index de5a809..c216873 100644 --- a/trans-intrinsic.c +++ b/trans-intrinsic.c @@ -974,6 +974,7 @@ trans_this_image (gfc_se * se, gfc_expr *expr) ss = gfc_walk_expr (expr->value.function.actual->expr); gcc_assert (ss != gfc_ss_terminator); ss->data.info.codimen = corank; + argse.want_coarray = 1; gfc_conv_expr_descriptor (&argse, expr->value.function.actual->expr, ss); gfc_add_block_to_block (&se->pre, &argse.pre); gfc_add_block_to_block (&se->post, &argse.post); @@ -1161,6 +1162,7 @@ trans_image_index (gfc_se * se, gfc_expr *expr) ss = gfc_walk_expr (expr->value.function.actual->expr); gcc_assert (ss != gfc_ss_terminator); ss->data.info.codimen = corank; + argse.want_coarray = 1; gfc_conv_expr_descriptor (&argse, expr->value.function.actual->expr, ss); gfc_add_block_to_block (&se->pre, &argse.pre); gfc_add_block_to_block (&se->post, &argse.post); @@ -1488,6 +1490,7 @@ conv_intrinsic_cobound (gfc_se * se, gfc_expr * expr) gcc_assert (ss != gfc_ss_terminator); ss->data.info.codimen = corank; gfc_init_se (&argse, NULL); + argse.want_coarray = 1; gfc_conv_expr_descriptor (&argse, arg->expr, ss); gfc_add_block_to_block (&se->pre, &argse.pre); diff --git a/trans.h b/trans.h index 0c249a6..6157a88 100644 --- a/trans.h +++ b/trans.h @@ -86,6 +86,8 @@ typedef struct gfc_se args alias. */ unsigned force_tmp:1; + unsigned want_coarray:1; + /* Scalarization parameters. */ struct gfc_se *parent; struct gfc_ss *ss;
[Patch, fortran] [07/21] Remove coarray support in the scalarizer: Use codim as argument gfc_get_array_type_bounds
This uses the just set codim (see patch 5) as argument to gfc_get_array_type_bounds. OK? 2011-09-14 Mikael Morin * trans-array.c (gfc_conv_expr_descriptor): Use codim instead of loop.codimen as argument to gfc_get_array_type_bounds. diff --git a/trans-array.c b/trans-array.c index 88998de..0f5f29c 100644 --- a/trans-array.c +++ b/trans-array.c @@ -6008,9 +6008,8 @@ gfc_conv_expr_descriptor (gfc_se * se, gfc_expr * expr, gfc_ss * ss) { /* Otherwise make a new one. */ parmtype = gfc_get_element_type (TREE_TYPE (desc)); - parmtype = gfc_get_array_type_bounds (parmtype, loop.dimen, - loop.codimen, loop.from, - loop.to, 0, + parmtype = gfc_get_array_type_bounds (parmtype, loop.dimen, codim, + loop.from, loop.to, 0, GFC_ARRAY_UNKNOWN, false); parm = gfc_create_var (parmtype, "parm"); }
[Patch, fortran] [08/21] Remove coarray support in the scalarizer: Factor array ref references
The Changelog says it all. OK? 2011-09-14 Mikael Morin * trans-array.c (gfc_conv_section_startstride): Factor common array ref references. diff --git a/trans-array.c b/trans-array.c index 0f5f29c..7cc86ba 100644 --- a/trans-array.c +++ b/trans-array.c @@ -3187,12 +3187,14 @@ gfc_conv_section_startstride (gfc_loopinfo * loop, gfc_ss * ss, int dim, tree desc; gfc_se se; gfc_ss_info *info; + gfc_array_ref *ar; gcc_assert (ss->type == GFC_SS_SECTION); info = &ss->data.info; + ar = &info->ref->u.ar; - if (info->ref->u.ar.dimen_type[dim] == DIMEN_VECTOR) + if (ar->dimen_type[dim] == DIMEN_VECTOR) { /* We use a zero-based index to access the vector. */ info->start[dim] = gfc_index_zero_node; @@ -3202,12 +3204,12 @@ gfc_conv_section_startstride (gfc_loopinfo * loop, gfc_ss * ss, int dim, return; } - gcc_assert (info->ref->u.ar.dimen_type[dim] == DIMEN_RANGE); + gcc_assert (ar->dimen_type[dim] == DIMEN_RANGE); desc = info->descriptor; - start = info->ref->u.ar.start[dim]; - end = info->ref->u.ar.end[dim]; + start = ar->start[dim]; + end = ar->end[dim]; if (!coarray) -stride = info->ref->u.ar.stride[dim]; +stride = ar->stride[dim]; /* Calculate the start of the range. For vector subscripts this will be the range of the vector. */
[Patch, fortran] [09/21] Remove coarray support in the scalarizer: Accept coarray dimensions in gfc_conv_section_startstride
Currently, gfc_walk_variable_expr changes codimension's type from DIMEN_THIS_IMAGE to DIMEN_RANGE so that it looks like a regular array. We are going to remove that but still want to call gfc_conv_section_startstride on the coarray dimensions to get the cobounds. This patch relaxes an assertion in gfc_conv_section_startstride to accept coarray dimensions. OK? 2011-09-14 Mikael Morin * trans-array.c (gfc_conv_section_startstride): Update assertion to also accept coarrays. diff --git a/trans-array.c b/trans-array.c index 7cc86ba..7f44514 100644 --- a/trans-array.c +++ b/trans-array.c @@ -3204,7 +3204,8 @@ gfc_conv_section_startstride (gfc_loopinfo * loop, gfc_ss * ss, int dim, return; } - gcc_assert (ar->dimen_type[dim] == DIMEN_RANGE); + gcc_assert (ar->dimen_type[dim] == DIMEN_RANGE + || ar->dimen_type[dim] == DIMEN_THIS_IMAGE); desc = info->descriptor; start = ar->start[dim]; end = ar->end[dim];
[Patch, fortran] [12/21] Remove coarray support in the scalarizer: Get cobounds without the scalarizer
This patch sets cobounds in a coarray's gfc_ss struct without relying on the scalarizer doing it automatically. The convention to have the bounds and cobounds stored in the loop::from and loop::to arrays while calling gfc_get_array_type_bounds is not changed, so we need to set the loop.from[n] and loop.to[n] "by hand". OK? 2011-09-14 Mikael Morin * trans-array.c (gfc_conv_expr_descriptor): Add out-of-the-scalarizer cobounds evaluation. diff --git a/trans-array.c b/trans-array.c index 067fe0b..b132bf6 100644 --- a/trans-array.c +++ b/trans-array.c @@ -5994,7 +5994,21 @@ gfc_conv_expr_descriptor (gfc_se * se, gfc_expr * expr, gfc_ss * ss) tree base; if (se->want_coarray) - codim = gfc_get_corank (expr); + { + codim = gfc_get_corank (expr); + for (n = ss->data.info.dimen; n < ss->data.info.dimen + codim - 1; + n++) + { + gfc_conv_section_startstride (&loop, ss, n, true, false); + loop.from[n] = info->start[n]; + loop.to[n] = info->end[n]; + } + + gcc_assert (n == ss->data.info.dimen + codim - 1); + evaluate_bound (&loop.pre, info->start, info->ref->u.ar.start, + info->descriptor, n, true); + loop.from[n] = info->start[n]; + } else codim = 0;
[Patch, fortran] [13/21] Remove coarray support in the scalarizer: Add specific walk_coarray function.
We have two contradictory needs: we want to make gfc_walk_expr free of coarray hacks as much as possible, we want to have a gfc_ss generated for scalar coarrays suitable for gfc_conv_expr_descriptor. This patch creates a wrapper around gfc_walk_expr, so that in the scalar case (i.e. value returned is gfc_ss_terminator) we generate the needed gfc_ss struct. The new function includes at the same time the code (made slightly more assertive) previously present in convert_element_to_coarray_ref. OK? Note: I haven't understood why walk_coarray is needed in the intrinsic case, but not in the general function call case (cf also my comment about patch 6). On the other hand, it was also the case for the convert_element_to_coarray_ref function before the patch. Note2: This patch may need some adjustments to fix PR50420. http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50420 2011-09-14 Mikael Morin * trans-intrinsic.c (walk_coarray): New function. (convert_element_to_coarray_ref): Move code to walk_coarray. Remove. (trans-this_image, trans_image_index, conv_intrinsic_cobound): Use walk_coarray. diff --git a/trans-intrinsic.c b/trans-intrinsic.c index c216873..bc21b02 100644 --- a/trans-intrinsic.c +++ b/trans-intrinsic.c @@ -924,18 +924,32 @@ gfc_conv_intrinsic_exponent (gfc_se *se, gfc_expr *expr) /* Convert the last ref of a scalar coarray from an AR_ELEMENT to an AR_FULL, suitable for the scalarizer. */ -static void -convert_element_to_coarray_ref (gfc_expr *expr) +static gfc_ss * +walk_coarray (gfc_expr *e) { - gfc_ref *ref; + gfc_ss *ss; - for (ref = expr->ref; ref; ref = ref->next) -if (ref->type == REF_ARRAY && ref->next == NULL - && ref->u.ar.codimen) - { - ref->u.ar.type = AR_FULL; - break; - } + gcc_assert (gfc_get_corank (e) > 0); + + ss = gfc_walk_expr (e); + + /* Fix scalar coarray. */ + if (ss == gfc_ss_terminator) +{ + gfc_ref *ref; + + ss = gfc_get_array_ss (gfc_ss_terminator, e, 0, GFC_SS_SECTION); + + ref = e->ref; + while (ref->next) + ref = ref->next; + + gcc_assert (ref->type == REF_ARRAY && ref->u.ar.codimen > 0); + ref->u.ar.type = AR_FULL; + ss->data.info.ref = ref; +} + + return ss; } @@ -969,9 +983,7 @@ trans_this_image (gfc_se * se, gfc_expr *expr) /* Obtain the descriptor of the COARRAY. */ gfc_init_se (&argse, NULL); - if (expr->value.function.actual->expr->rank == 0) -convert_element_to_coarray_ref (expr->value.function.actual->expr); - ss = gfc_walk_expr (expr->value.function.actual->expr); + ss = walk_coarray (expr->value.function.actual->expr); gcc_assert (ss != gfc_ss_terminator); ss->data.info.codimen = corank; argse.want_coarray = 1; @@ -1157,9 +1169,7 @@ trans_image_index (gfc_se * se, gfc_expr *expr) /* Obtain the descriptor of the COARRAY. */ gfc_init_se (&argse, NULL); - if (expr->value.function.actual->expr->rank == 0) -convert_element_to_coarray_ref (expr->value.function.actual->expr); - ss = gfc_walk_expr (expr->value.function.actual->expr); + ss = walk_coarray (expr->value.function.actual->expr); gcc_assert (ss != gfc_ss_terminator); ss->data.info.codimen = corank; argse.want_coarray = 1; @@ -1484,9 +1494,7 @@ conv_intrinsic_cobound (gfc_se * se, gfc_expr * expr) gcc_assert (arg->expr->expr_type == EXPR_VARIABLE); corank = gfc_get_corank (arg->expr); - if (expr->value.function.actual->expr->rank == 0) -convert_element_to_coarray_ref (expr->value.function.actual->expr); - ss = gfc_walk_expr (arg->expr); + ss = walk_coarray (arg->expr); gcc_assert (ss != gfc_ss_terminator); ss->data.info.codimen = corank; gfc_init_se (&argse, NULL);
[Patch, fortran] [16/21] Remove coarray support in the scalarizer: Remove gfc_loopinfo::codimen
This removes the gfc_loopinfo::codimen field and the code associated with it. OK? 2011-09-14 Mikael Morin * trans.h (gfc_loopinfo): Remove codimen field. * trans-array.c (gfc_set_vector_loop_bounds, gfc_trans_scalarizing_loops, gfc_conv_loop_setup): Update loop upper limit. * trans-array.c (gfc_set_loop_bounds_from_array_spec): Ditto. Remove skip on last codimension. (gfc_start_scalarized_body): Update loop lower limit. (gfc_conv_ss_startstride): Don't set loop's codimen field. (gfc_conv_loop_setup): Remove unnecessary condition. (gfc_conv_expr_descriptor): Don't use loop's codimen field as corank. diff --git a/trans-array.c b/trans-array.c index 9d4ef5a..0a9d281 100644 --- a/trans-array.c +++ b/trans-array.c @@ -641,7 +641,7 @@ gfc_set_loop_bounds_from_array_spec (gfc_interface_mapping * mapping, tree tmp; if (as && as->type == AS_EXPLICIT) -for (n = 0; n < se->loop->dimen + se->loop->codimen; n++) +for (n = 0; n < se->loop->dimen; n++) { dim = se->ss->data.info.dim[n]; gcc_assert (dim < as->rank); @@ -655,22 +655,18 @@ gfc_set_loop_bounds_from_array_spec (gfc_interface_mapping * mapping, gfc_add_block_to_block (&se->post, &tmpse.post); lower = fold_convert (gfc_array_index_type, tmpse.expr); - if (se->loop->codimen == 0 - || n < se->loop->dimen + se->loop->codimen - 1) - { - /* ...and the upper bound. */ - gfc_init_se (&tmpse, NULL); - gfc_apply_interface_mapping (mapping, &tmpse, as->upper[dim]); - gfc_add_block_to_block (&se->pre, &tmpse.pre); - gfc_add_block_to_block (&se->post, &tmpse.post); - upper = fold_convert (gfc_array_index_type, tmpse.expr); - - /* Set the upper bound of the loop to UPPER - LOWER. */ - tmp = fold_build2_loc (input_location, MINUS_EXPR, - gfc_array_index_type, upper, lower); - tmp = gfc_evaluate_now (tmp, &se->pre); - se->loop->to[n] = tmp; - } + /* ...and the upper bound. */ + gfc_init_se (&tmpse, NULL); + gfc_apply_interface_mapping (mapping, &tmpse, as->upper[dim]); + gfc_add_block_to_block (&se->pre, &tmpse.pre); + gfc_add_block_to_block (&se->post, &tmpse.post); + upper = fold_convert (gfc_array_index_type, tmpse.expr); + + /* Set the upper bound of the loop to UPPER - LOWER. */ + tmp = fold_build2_loc (input_location, MINUS_EXPR, + gfc_array_index_type, upper, lower); + tmp = gfc_evaluate_now (tmp, &se->pre); + se->loop->to[n] = tmp; } } } @@ -2116,7 +2112,7 @@ gfc_set_vector_loop_bounds (gfc_loopinfo * loop, gfc_ss_info * info) int n; int dim; - for (n = 0; n < loop->dimen + loop->codimen; n++) + for (n = 0; n < loop->dimen; n++) { dim = info->dim[n]; if (info->ref->u.ar.dimen_type[dim] == DIMEN_VECTOR @@ -2948,7 +2944,7 @@ gfc_start_scalarized_body (gfc_loopinfo * loop, stmtblock_t * pbody) gcc_assert (!loop->array_parameter); - for (dim = loop->dimen + loop->codimen - 1; dim >= 0; dim--) + for (dim = loop->dimen - 1; dim >= 0; dim--) { n = loop->order[dim]; @@ -3102,7 +3098,7 @@ gfc_trans_scalarizing_loops (gfc_loopinfo * loop, stmtblock_t * body) pblock = body; /* Generate the loops. */ - for (dim = 0; dim < loop->dimen + loop->codimen; dim++) + for (dim = 0; dim < loop->dimen; dim++) { n = loop->order[dim]; gfc_trans_scalarized_loop_end (loop, n, pblock); @@ -3288,7 +3284,6 @@ gfc_conv_ss_startstride (gfc_loopinfo * loop) case GFC_SS_FUNCTION: case GFC_SS_COMPONENT: loop->dimen = ss->data.info.dimen; - loop->codimen = ss->data.info.codimen; goto done; /* As usual, lbound and ubound are exceptions!. */ @@ -3298,14 +3293,12 @@ gfc_conv_ss_startstride (gfc_loopinfo * loop) case GFC_ISYM_LBOUND: case GFC_ISYM_UBOUND: loop->dimen = ss->data.info.dimen; - loop->codimen = 0; goto done; case GFC_ISYM_LCOBOUND: case GFC_ISYM_UCOBOUND: case GFC_ISYM_THIS_IMAGE: loop->dimen = ss->data.info.dimen; - loop->codimen = ss->data.info.codimen; goto done; default: @@ -3888,7 +3881,7 @@ gfc_conv_loop_setup (gfc_loopinfo * loop, locus * where) mpz_t i; mpz_init (i); - for (n = 0; n < loop->dimen + loop->codimen; n++) + for (n = 0; n < loop->dimen; n++) { loopspec[n] = NULL; dynamic[n] = false; @@ -3997,7 +3990,7 @@ gfc_conv_loop_setup (gfc_loopinfo * loop, locus * where) /* Set the extents of this range. */ cshape = loopspec[n]->shape; - if (n < loop->dimen && cshape && INTEGER_CST_P (info->start[dim]) + if (cshape && INTEGER_CST_P (info->start[dim]) && INTEGER_CST_P (info->stride[dim])) { loop->from[n] = info->start[dim]; diff --git a/trans.h b/trans.h index 3404123..085334c 100644 --- a/trans.h +++ b/trans.h @@ -245,7 +245,7 @@ typedef struct gfc_loopinfo stmtblock_t pre; stmtblock_t post; - int dimen, codi
[Patch, fortran] [20/21] Remove coarray support in the scalarizer: Remove coarray argument
This patch, like the previous one, removes one argument from gfc_conv_section_startstride. We have to be more careful here as the two callers use different values for the flag. gfc_conv_ss_startstride calls gfc_conv_section_startstride with coarray=false. Thus removing every !coarray condition should not be worrying. gfc_conv_expr_descriptor on the other hand calls it with coarray=true; we have to be more careful. The first "if (!coarray)" is under an if (ar->dimen_type[dim] == DIMEN_VECTOR), irrelevant for coarrays. We add an assertion that ar->dimen_type[dim] == DIMEN_THIS_IMAGE in gfc_conv_expr_descriptor and can remove that condition. The second one protects the stride initialization. As stride is a local variable, we can remove that condition safely. The third and four ones protect initialisation of info->stride[dim]. For codimensions, the stride is ignored, so we can remove that one two. To make sure that we don't add unnecessary (and possibly costly) stride evaluation code, that is the (stride == NULL) branch is taken, we add an assertion in gfc_conv_expr_descriptor to check that ar->stride[dim] == NULL. Then we can remove the flag. OK? 2011-09-14 Mikael Morin * trans-array.c (gfc_conv_section_startstride): Remove coarray argument. Remove conditions on coarray. (gfc_conv_ss_startstride): Update call to gfc_conv_section_startstride. (gfc_conv_expr_descriptor): Ditto. Add assertions before the call. diff --git a/trans-array.c b/trans-array.c index 95ebf6c..a034886 100644 --- a/trans-array.c +++ b/trans-array.c @@ -3200,8 +3200,7 @@ evaluate_bound (stmtblock_t *block, tree *bounds, gfc_expr ** values, /* Calculate the lower bound of an array section. */ static void -gfc_conv_section_startstride (gfc_loopinfo * loop, gfc_ss * ss, int dim, - bool coarray) +gfc_conv_section_startstride (gfc_loopinfo * loop, gfc_ss * ss, int dim) { gfc_expr *stride = NULL; tree desc; @@ -3219,16 +3218,14 @@ gfc_conv_section_startstride (gfc_loopinfo * loop, gfc_ss * ss, int dim, /* We use a zero-based index to access the vector. */ info->start[dim] = gfc_index_zero_node; info->end[dim] = NULL; - if (!coarray) - info->stride[dim] = gfc_index_one_node; + info->stride[dim] = gfc_index_one_node; return; } gcc_assert (ar->dimen_type[dim] == DIMEN_RANGE || ar->dimen_type[dim] == DIMEN_THIS_IMAGE); desc = info->descriptor; - if (!coarray) -stride = ar->stride[dim]; + stride = ar->stride[dim]; /* Calculate the start of the range. For vector subscripts this will be the range of the vector. */ @@ -3240,9 +3237,9 @@ gfc_conv_section_startstride (gfc_loopinfo * loop, gfc_ss * ss, int dim, evaluate_bound (&loop->pre, info->end, ar->end, desc, dim, false); /* Calculate the stride. */ - if (!coarray && stride == NULL) + if (stride == NULL) info->stride[dim] = gfc_index_one_node; - else if (!coarray) + else { gfc_init_se (&se, NULL); gfc_conv_expr_type (&se, stride, gfc_array_index_type); @@ -3319,8 +3316,7 @@ done: gfc_conv_ss_descriptor (&loop->pre, ss, !loop->array_parameter); for (n = 0; n < ss->data.info.dimen; n++) - gfc_conv_section_startstride (loop, ss, ss->data.info.dim[n], - false); + gfc_conv_section_startstride (loop, ss, ss->data.info.dim[n]); break; case GFC_SS_INTRINSIC: @@ -5975,7 +5971,14 @@ gfc_conv_expr_descriptor (gfc_se * se, gfc_expr * expr, gfc_ss * ss) for (n = ss->data.info.dimen; n < ss->data.info.dimen + codim - 1; n++) { - gfc_conv_section_startstride (&loop, ss, n, true); + /* Make sure we are not lost somehow. */ + gcc_assert (info->ref->u.ar.dimen_type[n] == DIMEN_THIS_IMAGE); + + /* Make sure the call to gfc_conv_section_startstride won't + generate unnecessary code to calculate stride. */ + gcc_assert (info->ref->u.ar.stride[n] == NULL); + + gfc_conv_section_startstride (&loop, ss, n); loop.from[n] = info->start[n]; loop.to[n] = info->end[n]; }
[Patch, fortran] [17/21] Remove coarray support in the scalarizer: Remove gfc_ss::dimen field
This patch removes gfc_ss::codimen field and any code depending on it. OK? 2011-09-14 Mikael Morin * trans.h (gfc_ss_info): Remove codimen field. * trans-array.c (gfc_get_array_ss): Don't set codimen field. (gfc_trans_create_temp_array): Don't set descriptor's cobounds. (gfc_trans_constant_array_constructor): Update loop upper limit. (gfc_conv_ss_startstride): Don't set codimen field. Don't get descriptor's cobounds. (gfc_walk_variable_expr): Update dimension index. * trans-intrinsic.c (trans_this_image, trans_image_index, conv_intrinsic_cobound): Don't set codimen field diff --git a/trans-array.c b/trans-array.c index 0a9d281..fa05d2b 100644 --- a/trans-array.c +++ b/trans-array.c @@ -526,7 +526,6 @@ gfc_get_array_ss (gfc_ss *next, gfc_expr *expr, int dimen, gfc_ss_type type) ss->expr = expr; info = &ss->data.info; info->dimen = dimen; - info->codimen = 0; for (i = 0; i < info->dimen; i++) info->dim[i] = i; @@ -973,13 +972,6 @@ gfc_trans_create_temp_array (stmtblock_t * pre, stmtblock_t * post, size, tmp); size = gfc_evaluate_now (size, pre); } - for (n = info->dimen; n < info->dimen + info->codimen; n++) -{ - gfc_conv_descriptor_lbound_set (pre, desc, gfc_rank_cst[n], - gfc_index_zero_node); - if (n < info->dimen + info->codimen - 1) - gfc_conv_descriptor_ubound_set (pre, desc, gfc_rank_cst[n], loop->to[n]); -} /* Get the size of the array. */ @@ -1872,7 +1864,7 @@ gfc_trans_constant_array_constructor (gfc_loopinfo * loop, info->data = gfc_build_addr_expr (NULL_TREE, tmp); info->offset = gfc_index_zero_node; - for (i = 0; i < info->dimen + info->codimen; i++) + for (i = 0; i < info->dimen; i++) { info->delta[i] = gfc_index_zero_node; info->start[i] = gfc_index_zero_node; @@ -3330,12 +3322,6 @@ done: for (n = 0; n < ss->data.info.dimen; n++) gfc_conv_section_startstride (loop, ss, ss->data.info.dim[n], false, false); - for (n = ss->data.info.dimen; - n < ss->data.info.dimen + ss->data.info.codimen; n++) - gfc_conv_section_startstride (loop, ss, ss->data.info.dim[n], true, - n == ss->data.info.dimen - + ss->data.info.codimen -1); - break; case GFC_SS_INTRINSIC: @@ -7690,8 +7676,7 @@ gfc_walk_variable_expr (gfc_ss * ss, gfc_expr * expr) case DIMEN_RANGE: /* We don't add anything for sections, just remember this dimension for later. */ - newss->data.info.dim[newss->data.info.dimen - + newss->data.info.codimen] = n; + newss->data.info.dim[newss->data.info.dimen] = n; if (n < ar->dimen) newss->data.info.dimen++; break; @@ -7703,8 +7688,7 @@ gfc_walk_variable_expr (gfc_ss * ss, gfc_expr * expr) 1, GFC_SS_VECTOR); indexss->loop_chain = gfc_ss_terminator; newss->data.info.subscript[n] = indexss; - newss->data.info.dim[newss->data.info.dimen - + newss->data.info.codimen] = n; + newss->data.info.dim[newss->data.info.dimen] = n; if (n < ar->dimen) newss->data.info.dimen++; break; diff --git a/trans-intrinsic.c b/trans-intrinsic.c index bc21b02..c47e678 100644 --- a/trans-intrinsic.c +++ b/trans-intrinsic.c @@ -985,7 +985,6 @@ trans_this_image (gfc_se * se, gfc_expr *expr) gfc_init_se (&argse, NULL); ss = walk_coarray (expr->value.function.actual->expr); gcc_assert (ss != gfc_ss_terminator); - ss->data.info.codimen = corank; argse.want_coarray = 1; gfc_conv_expr_descriptor (&argse, expr->value.function.actual->expr, ss); gfc_add_block_to_block (&se->pre, &argse.pre); @@ -1171,7 +1170,6 @@ trans_image_index (gfc_se * se, gfc_expr *expr) gfc_init_se (&argse, NULL); ss = walk_coarray (expr->value.function.actual->expr); gcc_assert (ss != gfc_ss_terminator); - ss->data.info.codimen = corank; argse.want_coarray = 1; gfc_conv_expr_descriptor (&argse, expr->value.function.actual->expr, ss); gfc_add_block_to_block (&se->pre, &argse.pre); @@ -1496,7 +1494,6 @@ conv_intrinsic_cobound (gfc_se * se, gfc_expr * expr) ss = walk_coarray (arg->expr); gcc_assert (ss != gfc_ss_terminator); - ss->data.info.codimen = corank; gfc_init_se (&argse, NULL); argse.want_coarray = 1; diff --git a/trans.h b/trans.h index 085334c..535c207 100644 --- a/trans.h +++ b/trans.h @@ -118,7 +118,7 @@ gfc_coarray_type; typedef struct gfc_ss_info { - int dimen, codimen; + int dimen; /* The ref that holds information on this section. */ gfc_ref *ref; /* The descriptor of this array. */
[Patch, fortran] [21/21] Remove coarray support in the scalarizer: Final cleanup
This merges two identical switch cases. It didn't fit anywhere else, so here it is, alone. OK? 2011-09-14 Mikael Morin * trans-array.c (gfc_conv_ss_startstride): Merge two switch cases. diff --git a/trans-array.c b/trans-array.c index a034886..86eb6c8 100644 --- a/trans-array.c +++ b/trans-array.c @@ -3280,9 +3280,6 @@ gfc_conv_ss_startstride (gfc_loopinfo * loop) { case GFC_ISYM_LBOUND: case GFC_ISYM_UBOUND: - loop->dimen = ss->data.info.dimen; - goto done; - case GFC_ISYM_LCOBOUND: case GFC_ISYM_UCOBOUND: case GFC_ISYM_THIS_IMAGE:
[Patch, fortran] [19/21] Remove coarray support in the scalarizer: Remove coarray_last argument
At this point, gfc_conv_section_startstride has two callers, and for both of them, the last argument (coarray_last) has the value false. This patch removes the argument. OK? 2011-09-14 Mikael Morin * trans-array.c (gfc_conv_section_startstride): Remove coarray_last argument. Remove condition on coarray_last. (gfc_conv_ss_startstride): Update call to gfc_conv_section_startstride. (gfc_conv_expr_descriptor): Ditto. diff --git a/trans-array.c b/trans-array.c index 87d5200..95ebf6c 100644 --- a/trans-array.c +++ b/trans-array.c @@ -3201,7 +3201,7 @@ evaluate_bound (stmtblock_t *block, tree *bounds, gfc_expr ** values, static void gfc_conv_section_startstride (gfc_loopinfo * loop, gfc_ss * ss, int dim, - bool coarray, bool coarray_last) + bool coarray) { gfc_expr *stride = NULL; tree desc; @@ -3237,8 +3237,7 @@ gfc_conv_section_startstride (gfc_loopinfo * loop, gfc_ss * ss, int dim, /* Similarly calculate the end. Although this is not used in the scalarizer, it is needed when checking bounds and where the end is an expression with side-effects. */ - if (!coarray_last) -evaluate_bound (&loop->pre, info->end, ar->end, desc, dim, false); + evaluate_bound (&loop->pre, info->end, ar->end, desc, dim, false); /* Calculate the stride. */ if (!coarray && stride == NULL) @@ -3321,7 +3320,7 @@ done: for (n = 0; n < ss->data.info.dimen; n++) gfc_conv_section_startstride (loop, ss, ss->data.info.dim[n], - false, false); + false); break; case GFC_SS_INTRINSIC: @@ -5976,7 +5975,7 @@ gfc_conv_expr_descriptor (gfc_se * se, gfc_expr * expr, gfc_ss * ss) for (n = ss->data.info.dimen; n < ss->data.info.dimen + codim - 1; n++) { - gfc_conv_section_startstride (&loop, ss, n, true, false); + gfc_conv_section_startstride (&loop, ss, n, true); loop.from[n] = info->start[n]; loop.to[n] = info->end[n]; }
[Patch, fortran] [10/21] Remove coarray support in the scalarizer: Factor bound evaluation
In the following patches, we are going to use gfc_conv_section_startstride to calculate cobounds. The problem is that we don't want to calculate the last ucobound. As a result, we should have a way to evaluate the lower bound and the upper bound independently. This patch moves bound evaluation to a function of its own, and use that new function in gfc_conv_section_startstride. OK? PS: I could have kept the current flag trick to separate lower vs upper bound evaluation, but besides the fact that I don't like flags in general, having coarray in their name finished to convince me that they deserved to die. ;-) 2011-09-14 Mikael Morin * trans-array.c (gfc_conv_section_startstride): Move code to evaluate_bound. Use evaluate_bound. (evaluate_bound): New function. diff --git a/trans-array.c b/trans-array.c index 7f44514..ee5761b 100644 --- a/trans-array.c +++ b/trans-array.c @@ -3175,14 +3175,46 @@ gfc_trans_scalarized_loop_boundary (gfc_loopinfo * loop, stmtblock_t * body) } +/* Precalculate (either lower or upper) bound of an array section. + BLOCK: Block in which the (pre)calculation code will go. + BOUNDS[DIM]: Where the bound value will be stored once evaluated. + VALUES[DIM]: Specified bound (NULL <=> unspecified). + DESC: Array descriptor from which the bound will be picked if unspecified + (either lower or upper bound according to LBOUND). */ + +static void +evaluate_bound (stmtblock_t *block, tree *bounds, gfc_expr ** values, + tree desc, int dim, bool lbound) +{ + gfc_se se; + gfc_expr * input_val = values[dim]; + tree *output = &bounds[dim]; + + + if (input_val) +{ + /* Specified section bound. */ + gfc_init_se (&se, NULL); + gfc_conv_expr_type (&se, input_val, gfc_array_index_type); + gfc_add_block_to_block (block, &se.pre); + *output = se.expr; +} + else +{ + /* No specific bound specified so use the bound of the array. */ + *output = lbound ? gfc_conv_array_lbound (desc, dim) : + gfc_conv_array_ubound (desc, dim); +} + *output = gfc_evaluate_now (*output, block); +} + + /* Calculate the lower bound of an array section. */ static void gfc_conv_section_startstride (gfc_loopinfo * loop, gfc_ss * ss, int dim, bool coarray, bool coarray_last) { - gfc_expr *start; - gfc_expr *end; gfc_expr *stride = NULL; tree desc; gfc_se se; @@ -3207,48 +3239,18 @@ gfc_conv_section_startstride (gfc_loopinfo * loop, gfc_ss * ss, int dim, gcc_assert (ar->dimen_type[dim] == DIMEN_RANGE || ar->dimen_type[dim] == DIMEN_THIS_IMAGE); desc = info->descriptor; - start = ar->start[dim]; - end = ar->end[dim]; if (!coarray) stride = ar->stride[dim]; /* Calculate the start of the range. For vector subscripts this will be the range of the vector. */ - if (start) -{ - /* Specified section start. */ - gfc_init_se (&se, NULL); - gfc_conv_expr_type (&se, start, gfc_array_index_type); - gfc_add_block_to_block (&loop->pre, &se.pre); - info->start[dim] = se.expr; -} - else -{ - /* No lower bound specified so use the bound of the array. */ - info->start[dim] = gfc_conv_array_lbound (desc, dim); -} - info->start[dim] = gfc_evaluate_now (info->start[dim], &loop->pre); + evaluate_bound (&loop->pre, info->start, ar->start, desc, dim, true); /* Similarly calculate the end. Although this is not used in the scalarizer, it is needed when checking bounds and where the end is an expression with side-effects. */ if (!coarray_last) -{ - if (end) - { - /* Specified section start. */ - gfc_init_se (&se, NULL); - gfc_conv_expr_type (&se, end, gfc_array_index_type); - gfc_add_block_to_block (&loop->pre, &se.pre); - info->end[dim] = se.expr; - } - else - { - /* No upper bound specified so use the bound of the array. */ - info->end[dim] = gfc_conv_array_ubound (desc, dim); - } - info->end[dim] = gfc_evaluate_now (info->end[dim], &loop->pre); -} +evaluate_bound (&loop->pre, info->end, ar->end, desc, dim, false); /* Calculate the stride. */ if (!coarray && stride == NULL)
[Patch, fortran] [18/21] Remove coarray support in the scalarizer: Cleanup gfc_walk_variable_expr
This removes the coarray code in gfc_walk_variable_expr. See the ChangeLog for details. OK? 2011-09-14 Mikael Morin * trans-array.c (gfc_walk_variable_expr): Remove scalar coarray handling. Don't reset array ref's corank and codimensions' types in the full array ref case. Update loop upper limit. Remove DIMEN_THIS_IMAGE case. Remove unnecessary conditions. diff --git a/trans-array.c b/trans-array.c index fa05d2b..87d5200 100644 --- a/trans-array.c +++ b/trans-array.c @@ -7612,12 +7612,6 @@ gfc_walk_variable_expr (gfc_ss * ss, gfc_expr * expr) ar = &ref->u.ar; - if (ar->as->rank == 0 && ref->next != NULL) - { - /* Scalar coarray. */ - continue; - } - switch (ar->type) { case AR_ELEMENT: @@ -7632,7 +7626,6 @@ gfc_walk_variable_expr (gfc_ss * ss, gfc_expr * expr) /* Make sure array is the same as array(:,:), this way we don't need to special case all the time. */ ar->dimen = ar->as->rank; - ar->codimen = 0; for (n = 0; n < ar->dimen; n++) { ar->dimen_type[n] = DIMEN_RANGE; @@ -7641,14 +7634,6 @@ gfc_walk_variable_expr (gfc_ss * ss, gfc_expr * expr) gcc_assert (ar->end[n] == NULL); gcc_assert (ar->stride[n] == NULL); } - for (n = ar->dimen; n < ar->dimen + ar->as->corank; n++) - { - newss->data.info.dim[n] = n; - ar->dimen_type[n] = DIMEN_RANGE; - - gcc_assert (ar->start[n] == NULL); - gcc_assert (ar->end[n] == NULL); - } ss = newss; break; @@ -7657,14 +7642,12 @@ gfc_walk_variable_expr (gfc_ss * ss, gfc_expr * expr) newss->data.info.ref = ref; /* We add SS chains for all the subscripts in the section. */ - for (n = 0; n < ar->dimen + ar->codimen; n++) + for (n = 0; n < ar->dimen; n++) { gfc_ss *indexss; switch (ar->dimen_type[n]) { - case DIMEN_THIS_IMAGE: - continue; case DIMEN_ELEMENT: /* Add SS for elemental (scalar) subscripts. */ gcc_assert (ar->start[n]); @@ -7677,8 +7660,7 @@ gfc_walk_variable_expr (gfc_ss * ss, gfc_expr * expr) /* We don't add anything for sections, just remember this dimension for later. */ newss->data.info.dim[newss->data.info.dimen] = n; - if (n < ar->dimen) - newss->data.info.dimen++; + newss->data.info.dimen++; break; case DIMEN_VECTOR: @@ -7689,8 +7671,7 @@ gfc_walk_variable_expr (gfc_ss * ss, gfc_expr * expr) indexss->loop_chain = gfc_ss_terminator; newss->data.info.subscript[n] = indexss; newss->data.info.dim[newss->data.info.dimen] = n; - if (n < ar->dimen) - newss->data.info.dimen++; + newss->data.info.dimen++; break; default:
[Patch, fortran] [14/21] Remove coarray support in the scalarizer: Fix full array dimension type
This prevents a regression. The problem is in the full array case, ar->dimen is not set at resolution time. As a result, the code seting ar->dimen_type to DIMEN_THIS_IMAGE (see patch 2) sets codimensions' types starting at index 0 (as if it was a scalar coarray). Later, gfc_walk_variable_expr sets ar->dimen and the associated ar->dimen_type, overwriting the DIMEN_THIS_IMAGE from resolve_array_ref. There is code in gfc_walk_variable_expr which sets ar->dimen_type to DIMEN_RANGE for codimensions too, so there is no bug until we remove that code (patch 18). After patch 18, some codimensions can have dimen_type unset, more exactly set to 0, which is not an enum valid value, and everything breaks from there. This patch copies the code present in gfc_walk_variable expr to set dimension types for full array references. Then we can overwrite array dimen_type part as much as we want, the coarray dimen_type part will be left untouched (and properly set). The duplicate code in gfc_walk_variable_expr can't be removed, as it seems that some array references are not passed through resolve_array_ref (I didn't investigate further). OK? 2011-09-14 Mikael Morin * resolve.c (resolve_array_ref): Set array_ref's dimen field (and the associated dimen_type) in the full array ref case. diff --git a/resolve.c b/resolve.c index 4c991c8..c594ebf 100644 --- a/resolve.c +++ b/resolve.c @@ -4637,8 +4637,23 @@ resolve_array_ref (gfc_array_ref *ar) } } - if (ar->type == AR_FULL && ar->as->rank == 0) -ar->type = AR_ELEMENT; + if (ar->type == AR_FULL) +{ + if (ar->as->rank == 0) + ar->type = AR_ELEMENT; + + /* Make sure array is the same as array(:,:), this way + we don't need to special case all the time. */ + ar->dimen = ar->as->rank; + for (i = 0; i < ar->dimen; i++) + { + ar->dimen_type[i] = DIMEN_RANGE; + + gcc_assert (ar->start[i] == NULL); + gcc_assert (ar->end[i] == NULL); + gcc_assert (ar->stride[i] == NULL); + } +} /* If the reference type is unknown, figure out what kind it is. */
[Patch, fortran] [15/21] Remove coarray support in the scalarizer: Remove gfc_ss::data::temp_ss::codimen field
This removes the gfc_ss::data::temp::codimen field and the code depending on it. OK? 2011-09-14 Mikael Morin * trans.h (gfc_ss): Remove data.temp.codimen field. * trans-array.c (gfc_conv_resolve_dependencies, gfc_conv_expr_descriptor): Don't set temp's codimen field. diff --git a/trans-array.c b/trans-array.c index b132bf6..9d4ef5a 100644 --- a/trans-array.c +++ b/trans-array.c @@ -3861,7 +3861,6 @@ temporary: base_type = gfc_get_element_type (base_type); loop->temp_ss = gfc_get_temp_ss (base_type, dest->string_length, loop->dimen); - loop->temp_ss->data.temp.codimen = loop->codimen; gfc_add_ss_to_loop (loop, loop->temp_ss); } else @@ -5920,7 +5919,6 @@ gfc_conv_expr_descriptor (gfc_se * se, gfc_expr * expr, gfc_ss * ss) se->string_length = loop.temp_ss->string_length; gcc_assert (loop.temp_ss->data.temp.dimen == loop.dimen); - loop.temp_ss->data.temp.codimen = loop.codimen; gfc_add_ss_to_loop (&loop, loop.temp_ss); } diff --git a/trans.h b/trans.h index 6157a88..3404123 100644 --- a/trans.h +++ b/trans.h @@ -212,7 +212,7 @@ typedef struct gfc_ss { /* The rank of the temporary. May be less than the rank of the assigned expression. */ - int dimen, codimen; + int dimen; tree type; } temp;
[Patch, fortran] [11/21] Remove coarray support in the scalarizer: Support 0-rank loop in gfc_conv_ss_startstride
We are going to remove the gfc_loopinfo::codimen field. The following assertion in gfc_conv_ss_startstride: gcc_assert (loop->dimen + loop->codimen != 0); is going to fail for scalar coarrays when updated to gcc_assert (loop->dimen != 0); However, gfc_conv_expr_descriptor, requires (if we don't want to rewrite it completely) that the expression's gfc_ss struct passes through the scalarizer (in the scalar coarray case, it will do nothing but get the descriptor). This patch changes the assertion so that zero rank loops are accepted. OK? 2011-09-14 Mikael Morin * trans-array.c (gfc_conv_ss_startstride): Support zero rank loop. diff --git a/trans-array.c b/trans-array.c index ee5761b..067fe0b 100644 --- a/trans-array.c +++ b/trans-array.c @@ -3279,8 +3279,7 @@ gfc_conv_ss_startstride (gfc_loopinfo * loop) loop->dimen = 0; /* Determine the rank of the loop. */ - for (ss = loop->ss; - ss != gfc_ss_terminator && loop->dimen == 0; ss = ss->loop_chain) + for (ss = loop->ss; ss != gfc_ss_terminator; ss = ss->loop_chain) { switch (ss->type) { @@ -3290,7 +3289,7 @@ gfc_conv_ss_startstride (gfc_loopinfo * loop) case GFC_SS_COMPONENT: loop->dimen = ss->data.info.dimen; loop->codimen = ss->data.info.codimen; - break; + goto done; /* As usual, lbound and ubound are exceptions!. */ case GFC_SS_INTRINSIC: @@ -3300,14 +3299,14 @@ gfc_conv_ss_startstride (gfc_loopinfo * loop) case GFC_ISYM_UBOUND: loop->dimen = ss->data.info.dimen; loop->codimen = 0; - break; + goto done; case GFC_ISYM_LCOBOUND: case GFC_ISYM_UCOBOUND: case GFC_ISYM_THIS_IMAGE: loop->dimen = ss->data.info.dimen; loop->codimen = ss->data.info.codimen; - break; + goto done; default: break; @@ -3320,8 +3319,9 @@ gfc_conv_ss_startstride (gfc_loopinfo * loop) /* We should have determined the rank of the expression by now. If not, that's bad news. */ - gcc_assert (loop->dimen + loop->codimen != 0); + gcc_unreachable (); +done: /* Loop over all the SS in the chain. */ for (ss = loop->ss; ss != gfc_ss_terminator; ss = ss->loop_chain) {
[Patch, fortran] [03/21] Remove coarray support in the scalarizer: Simplify coarray descriptor setup.
In gfc_conv_expr_descriptor, the code setting the descriptor cobounds (copied from the non-coarray case) has unnecessary stuff irrelevant to coarrays: as codimensions can't be transposed, the condition dim[n] == n is guaranteed to hold true for the codimensions. This patch removes unnecessary code based on that assumption. OK? 2011-09-14 Mikael Morin * trans-array.c (gfc_conv_expr_descriptor): Simplify coarray descriptor setup code. diff --git a/trans-array.c b/trans-array.c index 37cdeb5..88849ef 100644 --- a/trans-array.c +++ b/trans-array.c @@ -6140,22 +6140,13 @@ gfc_conv_expr_descriptor (gfc_se * se, gfc_expr * expr, gfc_ss * ss) for (n = ndim; n < ndim + codim; n++) { - /* look for the corresponding scalarizer dimension: dim. */ - for (dim = 0; dim < ndim + codim; dim++) - if (info->dim[dim] == n) - break; - - /* loop exited early: the DIM being looked for has been found. */ - gcc_assert (dim < ndim + codim); - - from = loop.from[dim]; - to = loop.to[dim]; + from = loop.from[n]; + to = loop.to[n]; gfc_conv_descriptor_lbound_set (&loop.pre, parm, - gfc_rank_cst[dim], from); + gfc_rank_cst[n], from); if (n < ndim + codim - 1) gfc_conv_descriptor_ubound_set (&loop.pre, parm, - gfc_rank_cst[dim], to); - dim++; + gfc_rank_cst[n], to); } if (se->data_not_needed)
[Patch, fortran] [02/21] Remove coarray support in the scalarizer: Move coarray resolution code around
compare_spec_to_ref, despite its name, has the side effect of writing to the array ref it is supposed to check (in the coarray case). I found this very surprising, hence this patch which moves the relevant code to resolve_array_ref, just after the call to compare_spec_to_ref (so that the code path is exactly the same). OK? 2011-09-14 Mikael Morin * resolve.c (compare_spec_to_ref): Move coarray ref initialization code... (resolve_array_ref): ... here. diff --git a/resolve.c b/resolve.c index b038402..4c991c8 100644 --- a/resolve.c +++ b/resolve.c @@ -4389,14 +4389,6 @@ compare_spec_to_ref (gfc_array_ref *ar) return FAILURE; } - if (as->corank && ar->codimen == 0) -{ - int n; - ar->codimen = as->corank; - for (n = ar->dimen; n < ar->dimen + ar->codimen; n++) - ar->dimen_type[n] = DIMEN_THIS_IMAGE; -} - return SUCCESS; } @@ -4665,6 +4657,14 @@ resolve_array_ref (gfc_array_ref *ar) if (!ar->as->cray_pointee && compare_spec_to_ref (ar) == FAILURE) return FAILURE; + if (ar->as->corank && ar->codimen == 0) +{ + int n; + ar->codimen = ar->as->corank; + for (n = ar->dimen; n < ar->dimen + ar->codimen; n++) + ar->dimen_type[n] = DIMEN_THIS_IMAGE; +} + return SUCCESS; }
[Patch, fortran] [00/21] Remove coarray support in the scalarizer
Hello, the scalarizer is there to generate loops for assignments over more than one element. Tobias extended it at various places to support coarrays, but this should not be necessary as coarrays in assignments either refer to the array present on the local image or to the one on the remote image (in which case a local temporary is created). In either case the coarray is seen as a normal array by the assignment code, which makes the point of having coarray-specific handling in the scalarizer moot. In fact the reason for the presence of coarrays in the scalarizer is that gfc_conv_expr_descriptor uses the scalarizer to setup array (co)bounds. This patch serie removes the coarray-specific code in the scalarizer, and replaces it with some additional cobound setup code in gfc_conv_expr_descriptor. It is supposed to make the code easier to grasp by having a scalarizer free of coarray stuff (it is complicated enough without it), and by having cobound setup code gathered in a single place. The downside is that it makes gfc_conv_expr_descriptor even bigger than it was already. Every patch has been tested by incremental bootstrap and running the fortran testsuite with RUNTESTLAGS=caf.exp and RUNTESTFLAGS="dg.exp=*coarray*". The last one has also passed a full fortran regression test. OK for trunk? Mikael patchset layout: - patches 1..4: Preliminary cleanups. Those are quite independant on the rest. Patch 4 is optional. - patches 5..13: Step by step move from scalarizer-provided cobounds setup code to explicit specific code in gfc_conv_expr_descriptor. Patch 6 is a request for explaination and is not intended for check-in. - patch 14: Fixes a regression. - patches 15..21: This is the point of all the rest: remove coarray-specific code in the scalarizer. check.c | 34 +--- expr.c|6 ++ resolve.c | 35 ++-- trans-array.c | 248 +++-- trans-expr.c |6 +- trans-intrinsic.c | 52 +++- trans.h |8 +- 7 files changed, 180 insertions(+), 209 deletions(-)
[Patch, fortran] [01/21] Remove coarray support in the scalarizer: Remove is_coarray
There are two functions checking that an expression is a coarray. This keeps the public one (which seems to be the more complete) and removes the other one. OK? PS: As this changes a rejects-valid into an ice-on-valid (see PR 50420 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50420), it can be delayed if preferred; unfortunately, patch 14 fails without this one. 2011-09-14 Mikael Morin * check.c (is_coarray): Remove. (coarray_check): Use gfc_is_coarray. diff --git a/check.c b/check.c index 3d4f4c8..81f7b30 100644 --- a/check.c +++ b/check.c @@ -203,42 +203,10 @@ double_check (gfc_expr *d, int n) } -/* Check whether an expression is a coarray (without array designator). */ - -static bool -is_coarray (gfc_expr *e) -{ - bool coarray = false; - gfc_ref *ref; - - if (e->expr_type != EXPR_VARIABLE) -return false; - - coarray = e->symtree->n.sym->attr.codimension; - - for (ref = e->ref; ref; ref = ref->next) -{ - if (ref->type == REF_COMPONENT) - coarray = ref->u.c.component->attr.codimension; - else if (ref->type != REF_ARRAY || ref->u.ar.dimen != 0) - coarray = false; - else if (ref->type == REF_ARRAY && ref->u.ar.codimen != 0) - { - int n; - for (n = 0; n < ref->u.ar.codimen; n++) - if (ref->u.ar.dimen_type[n] != DIMEN_THIS_IMAGE) - coarray = false; - } -} - - return coarray; -} - - static gfc_try coarray_check (gfc_expr *e, int n) { - if (!is_coarray (e)) + if (!gfc_is_coarray (e)) { gfc_error ("Expected coarray variable as '%s' argument to the %s " "intrinsic at %L", gfc_current_intrinsic_arg[n]->name,
Re: [IA64] Use match_test rather than eq/ne symbol_ref
On Tue, 2011-09-13 at 18:51 +0100, Richard Sandiford wrote: > As per the subject. Tested by making sure that there were no new > warnings building ia64-linux-gnu, and that there were no changes > in the assembly output for the C and C++ testsuite. OK to install? > > Richard Looks OK to me. Steve Ellcey s...@cup.hp.com
Re: [cxx-mem-model] C++ wrappers
On 09/15/2011 05:49 PM, Jason Merrill wrote: On 09/15/2011 05:14 PM, Richard Henderson wrote: On 09/15/2011 02:03 PM, Andrew MacLeod wrote: a template calling that particular method was probably not invoked. The joys of c++ templates eh :-) Isn't there an extension that forces an entire class template to be instantiated? It's standard C++. template struct __atomic_base; etc. Do we want to start exporting these from libstdc++? I dont know the ins and outs of the library... the typedefs and specializations are already there, I can write: atomic_char c; c = 'a' in my program and it will invoke the required template for the class and call the appropriate store routine or whatever. I'm working on the changes now which will enable lock-free versions of the templates for the types which are supported by the __sync routines, and locked versions for everything else. do we need to explicitly export something? Andrew
Re: [cxx-mem-model] C++ wrappers
On 09/15/2011 05:14 PM, Richard Henderson wrote: On 09/15/2011 02:03 PM, Andrew MacLeod wrote: On 09/15/2011 04:47 PM, Richard Henderson wrote: On 09/09/2011 01:29 PM, Andrew MacLeod wrote: ! { return __sync_mem_add__fetch(&_M_i, 1, memory_order_seq_cst); } ^^ typo Which begs the question of how this didn't get caught in the testsuite somewhere? good catch :-) a template calling that particular method was probably not invoked. The joys of c++ templates eh :-) Isn't there an extension that forces an entire class template to be instantiated? It is instantiated, but if someone doesn't actually make a call to operator++() volatile (which I think is where this mistyped call was), you don't end up with an unresolved function call... At least it didn't trip up any tests cases and I did a full bootstrap & make check. Andrew
Re: [cxx-mem-model] C++ wrappers
On 09/15/2011 05:14 PM, Richard Henderson wrote: On 09/15/2011 02:03 PM, Andrew MacLeod wrote: a template calling that particular method was probably not invoked. The joys of c++ templates eh :-) Isn't there an extension that forces an entire class template to be instantiated? It's standard C++. template struct __atomic_base; etc. Do we want to start exporting these from libstdc++? Jason
Re: [cxx-mem-model] C++ wrappers
On 09/15/2011 02:03 PM, Andrew MacLeod wrote: > On 09/15/2011 04:47 PM, Richard Henderson wrote: >> On 09/09/2011 01:29 PM, Andrew MacLeod wrote: >>> ! { return __sync_mem_add__fetch(&_M_i, 1, memory_order_seq_cst); } >> >> ^^ typo >> >> Which begs the question of how this didn't get caught >> in the testsuite somewhere? >> > > good catch :-) > > a template calling that particular method was probably not invoked. The joys > of c++ templates eh :-) Isn't there an extension that forces an entire class template to be instantiated? r~
Re: [cxx-mem-model] Consolidation 2 - the fetch ops.
On 09/15/2011 02:06 PM, Andrew MacLeod wrote: >> Please don't embed = inside IF conditions. >> > sure, i just figured the extra parenthesis covered that. It covers the -Wall warning, but I still find it significantly harder to read. And in this case all it does is make the IF line too long. r~
Re: [cxx-mem-model] Consolidation 2 - the fetch ops.
On 09/15/2011 05:02 PM, Richard Henderson wrote: ! /* Initialize the fields for each supported opcode. */ ! static struct op_functions add_op = { sync_mem_fetch_add_optab, ! sync_mem_add_fetch_optab, ! sync_mem_add_optab, ! sync_old_add_optab, ! sync_new_add_optab, ! sync_add_optab, ! MINUS ! }; Can these be const? yes, and I was thinking about doing just that in fact. ! static struct op_functions xor_op = { sync_mem_fetch_xor_optab, ! sync_mem_xor_fetch_optab, ! sync_mem_xor_optab, ! sync_old_xor_optab, ! sync_new_xor_optab, ! sync_xor_optab, ! UNKNOWN ! }; XOR is its own reverse. ;-) yeah, that would help with compile time... :-) ! if ((result = maybe_emit_op (optab, target, mem, val, true, model, true))) ! return result; Please don't embed = inside IF conditions. sure, i just figured the extra parenthesis covered that. I'll break them out. (They started this way, then I broke them out, and then put them back.) Andrew
Re: [cxx-mem-model] C++ wrappers
On 09/15/2011 04:47 PM, Richard Henderson wrote: On 09/09/2011 01:29 PM, Andrew MacLeod wrote: ! { return __sync_mem_add__fetch(&_M_i, 1, memory_order_seq_cst); } ^^ typo Which begs the question of how this didn't get caught in the testsuite somewhere? good catch :-) a template calling that particular method was probably not invoked. The joys of c++ templates eh :-) I do plan to flush out a set of tests which try every variant, just havent gotten to it yet. Andrew
Re: [cxx-mem-model] Consolidation 2 - the fetch ops.
> ! /* Initialize the fields for each supported opcode. */ > ! static struct op_functions add_op = { sync_mem_fetch_add_optab, > ! sync_mem_add_fetch_optab, > ! sync_mem_add_optab, > ! sync_old_add_optab, > ! sync_new_add_optab, > ! sync_add_optab, > ! MINUS > ! }; Can these be const? > ! static struct op_functions xor_op = { sync_mem_fetch_xor_optab, > ! sync_mem_xor_fetch_optab, > ! sync_mem_xor_optab, > ! sync_old_xor_optab, > ! sync_new_xor_optab, > ! sync_xor_optab, > ! UNKNOWN > ! }; XOR is its own reverse. ;-) > ! if ((result = maybe_emit_op (optab, target, mem, val, true, model, > true))) > ! return result; Please don't embed = inside IF conditions. Otherwise this looks ok. r~
Re: Using sysroot in testsuite. Trunk version. (issue4248059)
On Mon, Mar 7, 2011 at 20:47, Mike Stump wrote: > On Mar 7, 2011, at 5:30 PM, Diego Novillo wrote: >> This is the patch I had in mind for trunk. > >> OK for 4.7? > > Ok. I'll just note here, that this is necessary for canadian, but not always > sufficient. It is monotonically better however. I looked at the > Make/configure issues, and they look fine to me, though, there are others > that specialize in that. Thanks. Took me a while, but I've applied this patch as rev 178897. Diego.
Re: Fwd: [cxx-mem-model] Don't over process __sync_mem parameters.
On 09/15/2011 04:35 PM, Richard Henderson wrote: * c-family/c-common.c (sync_resolve_params, sync_resolve_return): Only tweak parameters that are the same type size. (resolve_overloaded_builtin): Use new param for __sync_mem builtins. * testsuite/gcc.dg/sync-mem-param.c: New testcase to check correct number of parameters on a sample __sync_mem builtin. Ok, except, + /* __sync_mem routines are not variadic. */ + if (!orig_format&& VEC_length (tree, params) != parmnum + 1) + { + error ("too many arguments to function %qE", orig_function); + return false; + } I shouldn't think you need this? Surely because the function type is non-variadic the c front end would diagnose this. It didn't seem to until I added it... The expanders handle issuing the message if there aren't enough parameters already... It doesnt really complain about types either.. maybe it just passes builtins on to be processed by expanders? Andrew
[lra] patch to use equiv for restoring values around calls
The following patch permits to use equivalences to restore pseudo values around the call. As side effect the patch solves SPECFP2000 art degradation on x86-64 and possibly on ppc64 (unfortunately the score results for art is too unstable on power7). The patch was successfully bootstrapped on x86-64 and ppc64. 2011-09-15 Vladimir Makarov * lra.c (remove_scratches): Rescan changed insns. (lra): Call df_analyze after lr_save_restore if necessary. * lra-int.h (lr_save_restore): Change the prototype. * lra-saves.c (struct save_regs): Rename mem_reg to saved_value. Add new member equiv_p. (equiv_for_save): New function. (update_live_info_p): New static variable. (setup_save_regs): Add code to regno equivalence. (insert_save_restore): Ditto. (lra_save_restore): Return true if need to update live info. Index: lra.c === --- lra.c (revision 178734) +++ lra.c (working copy) @@ -1791,6 +1791,7 @@ static void remove_scratches (void) { int i; + bool insn_changed_p; basic_block bb; rtx insn, reg; loc_t loc; @@ -1806,10 +1807,12 @@ remove_scratches (void) { id = lra_get_insn_recog_data (insn); static_id = id->insn_static_data; + insn_changed_p = false; for (i = 0; i < static_id->n_operands; i++) if (GET_CODE (*id->operand_loc[i]) == SCRATCH && GET_MODE (*id->operand_loc[i]) != VOIDmode) { + insn_changed_p = true; *id->operand_loc[i] = reg = lra_create_new_reg (static_id->operand[i].mode, *id->operand_loc[i], ALL_REGS, NULL); @@ -1826,6 +1829,10 @@ remove_scratches (void) fprintf (lra_dump_file, "Removing SCRATCH in insn #%u (nop %d)\n", INSN_UID (insn), i); } + if (insn_changed_p) + /* Because we might use DF right after caller-saves sub-pass +we need to keep DF info up to date. */ + df_insn_rescan (insn); } } @@ -2105,7 +2112,10 @@ lra (FILE *f) df_set_regs_ever_live (i, true); if (flag_caller_saves) -lra_save_restore (); +{ + if (lra_save_restore ()) + df_analyze (); +} /* We don't DF from now and avoid its using because it is to expensive when a lot of RTL changes are made. */ Index: lra-int.h === --- lra-int.h (revision 178734) +++ lra-int.h (working copy) @@ -328,7 +328,7 @@ extern void lra_coalesce (void); /* lra-saves.c: */ -extern void lra_save_restore (void); +extern bool lra_save_restore (void); /* lra-spills.c: */ Index: lra-saves.c === --- lra-saves.c (revision 178734) +++ lra-saves.c (working copy) @@ -161,9 +161,12 @@ mark_pseudo_store (rtx reg, const_rtx se /* Info about save/restore code for a pseudo. */ struct save_regs { - /* A spilled pseudo which hold value of the corresponding - saved/restored pseudo around calls. */ - rtx mem_reg; + /* True if MEM_REG is actually an equivalence of the corresponding + saved pseudo. */ + bool equiv_p; + /* A spilled pseudo or equivalence which hold value of the + corresponding saved/restored pseudo around calls. */ + rtx saved_value; /* Saving/restoring of a pseudo could be done in a mode different from the pseudo mode. The following is the pseudo or a subreg of the pseudo and is used in save/restore code. */ @@ -175,71 +178,125 @@ struct save_regs /* Map: regno -> info about save/restore for REGNO. */ static struct save_regs *save_regs; -/* Set up if necessary saved_reg and mem_reg for REGNO. */ +/* Return equivalence value we can use for restoring. NULL, + otherwise. ??? Should we use and is it worth to invariants with + caller saved hard registers. */ +static rtx +equiv_for_save (int regno) +{ + return ira_reg_equiv[regno].constant; +} + +/* Set to true if changes in live info are too complex to update it + here. */ +static bool update_live_info_p; + +/* Set up if necessary equiv_p, saved_value, and saved_reg for REGNO. */ static void setup_save_regs (int regno) { - if (save_regs[regno].mem_reg == NULL_RTX) + if (save_regs[regno].saved_value == NULL_RTX) { enum machine_mode mode; - rtx saved_reg = regno_reg_rtx[regno]; - int hard_regno = reg_renumber[regno]; + int hard_regno; + rtx equiv, saved_reg = regno_reg_rtx[regno]; - gcc_assert (hard_regno >= 0); - mode = (HARD_REGNO_CALLER_SAVE_MODE - (hard_regno, - hard_regno_nregs[hard_regno][PSEUDO_REGNO_MODE (regno)], - PSEUDO_REGNO_MODE (regno))); - if (mode != PSEUDO_REGNO_MODE (regno)) - saved_reg = gen_rtx_SUBREG (mode, saved_reg, 0); - save_
Re: [cxx-mem-model] C++ wrappers
On 09/09/2011 01:29 PM, Andrew MacLeod wrote: > ! { return __sync_mem_add__fetch(&_M_i, 1, memory_order_seq_cst); } ^^ typo Which begs the question of how this didn't get caught in the testsuite somewhere? Otherwise it looks ok. r~
Re: [cxx-mem-model] Some consolidation.
> * expr.h: Remove prototypes. > * sync-builtins.def (BUILT_IN_SYNC_MEM_FLAG_TEST_AND_SET, > BUILT_IN_SYNC_MEM_FLAG_CLEAR): Remove. > * optabs.h (enum direct_optab_index): Remove DOI_sync_mem_flag_*. > * optabs.c (expand_sync_lock_test_and_set): Remove. > (expand_sync_mem_exchange): Incorporate sync_lock_test_and_set here. > (expand_sync_mem_store): If storing const0_rtx, try using > sync_lock_release. > * builtins.c (expand_builtin_sync_lock_test_and_set): Expand into > sync_mem_exchange instead. > (expand_builtin_sync_lock_release): Expand into sync_mem_store of 0. > (expand_builtin_sync_mem_flag_test_and_set): Remove. > (expand_builtin_sync_mem_flag_clear): Remove. > (expand_builtin): Remove cases for __SYNC_MEM_FLAG_*. > * doc/extend.texi: Update documentation to match. > > * testsuite/gcc.dg/sync-mem-invalid.c: Remove __sync_mem_flag_clear > tests. > * testsuite/gcc.dg/sync-mem-flag.c: Remove. Ok. r~
Re: [google] Add l-ipo.h to PLUGIN_HEADERS in gcc-4_6 branch
On Thu, Sep 15, 2011 at 16:37, Easwaran Raman wrote: > This patch adds l-ipo.h to PLUGIN_HEADERS. Ok for gcc-4_6 branch? > -Easwaran > 2011-09-15 Easwaran Raman > * Makefile.in (PLUGIN_HEADERS): Add l-ipo.h. OK. Also needed in google/main, I think. Diego.
Re: Fwd: [cxx-mem-model] Don't over process __sync_mem parameters.
> * c-family/c-common.c (sync_resolve_params, sync_resolve_return): Only > tweak parameters that are the same type size. > (resolve_overloaded_builtin): Use new param for __sync_mem builtins. > > * testsuite/gcc.dg/sync-mem-param.c: New testcase to check correct > number of parameters on a sample __sync_mem builtin. Ok, except, > + /* __sync_mem routines are not variadic. */ > + if (!orig_format && VEC_length (tree, params) != parmnum + 1) > + { > + error ("too many arguments to function %qE", orig_function); > + return false; > + } I shouldn't think you need this? Surely because the function type is non-variadic the c front end would diagnose this. r~
[wwwdocs] Buildstat update for 4.6
Latest results for 4.6.x -tgc Testresults for 4.6.1: i386-pc-mingw32 mips-sgi-irix6.5 Index: buildstat.html === RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-4.6/buildstat.html,v retrieving revision 1.6 diff -u -r1.6 buildstat.html --- buildstat.html 21 Aug 2011 20:58:32 - 1.6 +++ buildstat.html 15 Sep 2011 19:51:33 - @@ -87,6 +87,7 @@ i386-pc-mingw32 Test results: +http://gcc.gnu.org/ml/gcc-testresults/2011-08/msg02381.html";>4.6.1, http://gcc.gnu.org/ml/gcc-testresults/2011-07/msg00696.html";>4.6.1 @@ -165,6 +166,7 @@ mips-sgi-irix6.5 Test results: +http://gcc.gnu.org/ml/gcc-testresults/2011-09/msg00848.html";>4.6.1, http://gcc.gnu.org/ml/gcc-testresults/2011-04/msg00060.html";>4.6.0, http://gcc.gnu.org/ml/gcc-testresults/2011-04/msg00031.html";>4.6.0, http://gcc.gnu.org/ml/gcc-testresults/2011-03/msg02830.html";>4.6.0,
Re: Handle multi-word regsiters in REG_CFA_RESTORE notes
On 09/14/2011 06:12 PM, Bernd Schmidt wrote: > + unsigned int orig_regno = REGNO (reg); > + int nregs = hard_regno_nregs[orig_regno][GET_MODE (reg)]; > + while (nregs-- > 0) The rest of the file seems to use targetm.dwarf_register_span. This probably ought to do the same. r~
[PATCH, i386]: A couple of cleanups.
Hello! No functional change. 2011-09-15 Uros Bizjak * config/i386/i386.c (output_fp_compare): Return %v prefixed instruction mnemonics for TARGET_AVX. * config/i386/i386.md (*movdf_internal_rex64): use cond RTX in "type" attribute calculation. (*movdf_internal): Ditto. (*movsf_internal): Ditto. Tested on x86_64-pc-linux-gnu {,-m32}, committed to mainline SVN. Uros. Index: i386.md === --- i386.md (revision 178889) +++ i386.md (working copy) @@ -3028,7 +3030,17 @@ gcc_unreachable(); } } - [(set_attr "type" "fmov,fmov,fmov,imov,imov,imov,multi,sselog1,ssemov,ssemov,ssemov,ssemov,ssemov") + [(set (attr "type") + (cond [(eq_attr "alternative" "0,1,2") +(const_string "fmov") + (eq_attr "alternative" "3,4,5") +(const_string "imov") + (eq_attr "alternative" "6") +(const_string "multi") + (eq_attr "alternative" "7") +(const_string "sselog1") + ] + (const_string "ssemov"))) (set (attr "modrm") (if_then_else (and (eq_attr "alternative" "5") (eq_attr "type" "imov")) @@ -3158,7 +3170,15 @@ (if_then_else (eq_attr "alternative" "5,6,7,8") (const_string "sse2") (const_string "*"))) - (set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov") + (set (attr "type") + (cond [(eq_attr "alternative" "0,1,2") +(const_string "fmov") + (eq_attr "alternative" "3,4") +(const_string "multi") + (eq_attr "alternative" "5,9") +(const_string "sselog1") + ] + (const_string "ssemov"))) (set (attr "prefix") (if_then_else (eq_attr "alternative" "0,1,2,3,4") (const_string "orig") @@ -3272,7 +3292,17 @@ gcc_unreachable (); } } - [(set_attr "type" "fmov,fmov,fmov,imov,imov,sselog1,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov,ssemov,ssemov,mmxmov,mmxmov") + [(set (attr "type") + (cond [(eq_attr "alternative" "0,1,2") +(const_string "fmov") + (eq_attr "alternative" "3,4") +(const_string "multi") + (eq_attr "alternative" "5") +(const_string "sselog1") + (eq_attr "alternative" "9,10,11,14,15") +(const_string "mmxmov") + ] + (const_string "ssemov"))) (set (attr "prefix") (if_then_else (eq_attr "alternative" "5,6,7,8,12,13") (const_string "maybe_vex") Index: i386.c === --- i386.c (revision 178889) +++ i386.c (working copy) @@ -14900,21 +14900,16 @@ output_fp_compare (rtx insn, rtx *operands, bool e if (is_sse) { - static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}"; - static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}"; - static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}"; - static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}"; - if (GET_MODE (operands[0]) == SFmode) if (unordered_p) - return &ucomiss[TARGET_AVX ? 0 : 1]; + return "%vucomiss\t{%1, %0|%0, %1}"; else - return &comiss[TARGET_AVX ? 0 : 1]; + return "%vcomiss\t{%1, %0|%0, %1}"; else if (unordered_p) - return &ucomisd[TARGET_AVX ? 0 : 1]; + return "%vucomisd\t{%1, %0|%0, %1}"; else - return &comisd[TARGET_AVX ? 0 : 1]; + return "%vcomisd\t{%1, %0|%0, %1}"; } gcc_assert (STACK_TOP_P (cmp_op0));
Re: Vector shuffling
> +The elements of the input vectors are numbered from left to right across > +one or both of the vectors. Each element in the mask specifies a number > +of element from the input vector(s). Consider the following example. It would be more preferable to talk about the memory ordering of the elements rather than "left" and "right" which are ambiguous at best. > + if (TREE_CODE (mask) == VECTOR_CST) > +{ > + tree m_type, call; > + tree fn = targetm.vectorize.builtin_vec_perm (TREE_TYPE (v0), &m_type); > + /*rtx t;*/ Leftover crap. > + > + if (!fn) > + goto vshuffle; > + > + if (m_type != TREE_TYPE (TREE_TYPE (mask))) > + { > + int units = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask)); > + tree cvt = build_vector_type (m_type, units); > + mask = fold_convert (cvt, mask); > + } > + > + call = fold_build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE (fn)), > fn); > + call = build_call_nary (type /* ? */, call, 3, v0, v1, mask); > + > + return expand_expr_real_1 (call, target, VOIDmode, EXPAND_NORMAL, > NULL); > +} > + > +vshuffle: > + gcc_assert (operand_equal_p (v0, v1, 0)); Why can't a non-constant shuffle have different V0 and V1? That seems like a direct violation of the documentation, and any sort of usefulness. Also, while I'm ok with the use of builtin_vec_perm here in the short term, I think that in the long term we should simply force the named pattern to handle constants. Then the vectorizer can simply use the predicate and the tree code and we can drop the large redundancy of builtins with different argument types. Indeed, once this patch is applied, I think that ought to be the very next task in this domain. > +/* Vector shuffle expression. A = VEC_SHUFFLE_EXPR Typo in "mask". > + foreach i in length (mask): > + A = mask[i] < length (v0) ? v0[mask[i]] : v1[mask[i]] Surely it's v1[mask[i] - length]. > + if (TREE_CODE (vect) == VECTOR_CST) > +{ > +unsigned i; Indentation is off all through this function. > + mask = gen_rtx_AND (maskmode, mask, mm); > + > + /* Convert mask to vector of chars. */ > + mask = simplify_gen_subreg (V16QImode, mask, maskmode, 0); > + mask = force_reg (V16QImode, mask); Why are you using force_reg to do all the dirty work? Seems to me this should be using expand_normal. All throughout this function. That would also avoid the need for all of the extra force_reg stuff that ought not be there for -O0. I also see that you're not even attempting to use xop_pperm. Is ssse3_pshufb why you do the wrong thing in the expander for v0 != v1? And give the vshuffle named pattern the wrong number of arguments? It's certainly possible to handle it, though it takes a few more steps, and might well be more efficient as a libgcc function rather than inline. r~
Re: [PATCH, PR50322] Fix test-case ivopts-lt.c to use int of same size as pointer.
On Sep 15, 2011, at 1:20 AM, Tom de Vries wrote: > On 09/14/2011 06:35 PM, Zdenek Dvorak wrote: >> Hi, >> >>> The attached patch fixes PR50322. >>> >>> The test-case is supposed to succeed if the loop counter data-type has the >>> same >>> size as a pointer. The patch defines TYPE to be an int datatype of the same >>> size >>> as a pointer, and uses it. After this fix, there's no need for the avr >>> xfails >>> anymore. >>> >>> tested with avr, x86_64 and x86_64 -m32. >> >> what about using uintptr_t? >> > > That's shorter indeed. > > Tested with x86_64 and x86_64 -m32. When running on my gcc+binutils avr build, > the test is listed as unsupported because it doesn't contain stdint.h, but I > think that just means that libc is missing in my build setup. > > OK for trunk? Ok.
Re: [PATCH], PR 50341, Fix TOC load scheduling for powerpc on GCC 4.6 and 4.7
On Thu, Sep 15, 2011 at 11:07:43AM -0700, Andrew Pinski wrote: > On Thu, Sep 15, 2011 at 10:52 AM, Michael Meissner > wrote: > > The recent changes for the new code models tickled a latent bug in the GCC > > 4.6 > > and 4.7 compilers with the AIX calling sequence (PR 50341). When you call a > > function indirectly, the compiler has to save the current function's TOC > > pointer that is in r2 on the stack frame, and then load the new function's > > TOC > > pointer before doing the call. If the indirect function that you are > > calling > > has the same TOC value as the current function, the code will run, but if > > the > > indirection function has a different TOC (for example it is in the main > > program, and the indirect call is in a shared library), the wrong address > > will > > be used. > > > > The existing code did this by doing a split after reload has finished. > > Scheduling after reload then moves the load of the new TOC. These patches > > for > > GCC 4.6 and GCC 4.7 disable the splits so that the load of the new TOC is > > always next to the bctrl instruction, preventing the compiler from moving > > the > > load. Ideally, we can eventually tackle the underlying problem that we did > > not > > have the right dependencies to prevent the scheduler from moving the new TOC > > load before the use. > > ENOPATCH. Yes, I realized that after the fact and put it out later. > It was originally splitting before reload and I had changed it to be > splitting after reload assuming the dependencies would be correct. > What is the instruction which is being described as not dependent on > the load of r2? >From the example in the bug, it is the 3rd cpu_fprintf, where it decides to move loading up an address into register 27, and move the load of the address before the call, since register 27 is preserved across calls. void cpu_dump_state (struct CPUPPCState *env, FILE *f, fprintf_function cpu_fprintf, int flags) { int i; cpu_fprintf(f, "NIP " "%016" "l" "x" " LR " "%016" "l" "x" " CTR " "%016" "l" "x" " XER " "%016" "l" "x" "\n", env->nip, env->lr, env->ctr, env->xer); cpu_fprintf(f, "MSR " "%016" "l" "x" " HID0 " "%016" "l" "x" " HF " "%016" "l" "x" " idx %d\n", env->msr, env->spr[(0x3F0)], env->hflags, env->mmu_idx); cpu_fprintf(f, "TB %08" "u" " %08" "l" "u" " DECR %08" "u" "\n", cpu_ppc_load_tbu(env), cpu_ppc_load_tbl(env) , cpu_ppc_load_decr(env) ); These are the insns that are moved before the call: (insn 1105 153 203 2 (set (reg/f:DI 27 27 [603]) (const:DI (plus:DI (reg:DI 2 2) (high:DI (const:DI (unspec:DI [ (symbol_ref/f:DI ("*.LC5") [flags 0x82] ) ] UNSPEC_TOCREL)) 513 {largetoc_high} (expr_list:REG_EQUIV (const:DI (plus:DI (reg:DI 2 2) (high:DI (const:DI (unspec:DI [ (symbol_ref/f:DI ("*.LC5") [flags 0x82] ) ] UNSPEC_TOCREL) (nil))) (insn 1107 161 204 2 (set (reg/f:DI 27 27 [601]) (lo_sum:DI (reg/f:DI 27 27 [603]) (const:DI (unspec:DI [ (symbol_ref/f:DI ("*.LC5") [flags 0x82] ) ] UNSPEC_TOCREL 514 {largetoc_low} (expr_list:REG_EQUIV (symbol_ref/f:DI ("*.LC5") [flags 0x82] ) (nil))) The RTL logic is not looking into the const. This is triggered by Alan's patch in June: 2011-06-20 Alan Modra * config/rs6000/rs6000.c (create_TOC_reference): Wrap high part of toc-relative address in CONST. (rs6000_delegitimize_address): Recognize changed address. (rs6000_legitimize_reload_address): Likewise. (rs6000_emit_move): Don't force these constants to memory. * config/rs6000/rs6000.md (tls_gd, tls_gd_high): Wrap high part of toc-relative address in CONST. (tls_ld, tls_ld_high, tls_got_dtprel, tls_got_dtprel_high): Likewise. (tls_got_tprel, tls_got_tprel_high, largetoc_high): Likewise. Now, I anticipate that the patch in 4.7 will be temporary until we are confident that we have the right solution, but it is better to put a bandaid on the patch to limit the damage. -- Michael Meissner, IBM 5 Technology Place Drive, M/S 2757, Westford, MA 01886-3141, USA meiss...@linux.vnet.ibm.com fax +1 (978) 399-6899
Re: [PATCH], PR 50341, Fix TOC load scheduling for powerpc on GCC 4.6 and 4.7
On Thu, 2011-09-15 at 11:07 -0700, Andrew Pinski wrote: > It was originally splitting before reload and I had changed it to be > splitting after reload assuming the dependencies would be correct. > What is the instruction which is being described as not dependent on > the load of r2? Alan describes the scenario here: http://gcc.gnu.org/ml/gcc-patches/2011-09/msg00304.html Peter
Re: [RFC] Add FMA support to sparc backend
On Wed, Sep 14, 2011 at 04:00:02AM -0400, David Miller wrote: > > Eric, this is a preliminary version of the FMA patch I've been > working on. Just so you can see what I'm doing. > > First, ignore the fact that there are two configure tests for the > presence of support for these instructions. I'm busy normalizing the > -xarch options which binutils supports so that they are the same as > Sun AS and therefore just one test is necessary. > > Second, like rs6000 the sparc negate fused multiply instructions > negate the full result, not the multiply result. So we cannot use > those instructions for the fnmadf4/fnmsdf4/fnmasf4/fnmssf4 patterns. > Since rs6000 provides patterns for such negate operations (presumably > just in case the combiner creates a match) I have done so for sparc > as well. > > I was really surprised that cpu designers haven't settled on a > consistent formula for negated fused multiply add/sub instructions. > Ho hum... On the powerpc, we have an issue with Spec 2006 and calculix when FMAs are generated and -ffast-math is used, where line 307 of rubber.f is: tt=datan2(dsqrt(1.d0-cn*cn),cn)/3.d0 The FNMSUB instruction generates a -0.0 while doing the multiply and subtract generates +0.0. Dsqrt returns a -0.0 when given a -0.0, and datan2 (-0.0, 1.0) returns -0.0. Note, calculix is nothing but nested FMAs, and if you disable FMAs you get about a 10% drop in performance. I suspect that the issue may be a powerpc backend issue where the wrong comparison is generated, but I haven't tracked it down. -- Michael Meissner, IBM 5 Technology Place Drive, M/S 2757, Westford, MA 01886-3141, USA meiss...@linux.vnet.ibm.com fax +1 (978) 399-6899
Re: [PATCH], PR 50341, Fix TOC load scheduling for powerpc on GCC 4.6 and 4.7
On Thu, Sep 15, 2011 at 10:52 AM, Michael Meissner wrote: > The recent changes for the new code models tickled a latent bug in the GCC 4.6 > and 4.7 compilers with the AIX calling sequence (PR 50341). When you call a > function indirectly, the compiler has to save the current function's TOC > pointer that is in r2 on the stack frame, and then load the new function's TOC > pointer before doing the call. If the indirect function that you are calling > has the same TOC value as the current function, the code will run, but if the > indirection function has a different TOC (for example it is in the main > program, and the indirect call is in a shared library), the wrong address will > be used. > > The existing code did this by doing a split after reload has finished. > Scheduling after reload then moves the load of the new TOC. These patches for > GCC 4.6 and GCC 4.7 disable the splits so that the load of the new TOC is > always next to the bctrl instruction, preventing the compiler from moving the > load. Ideally, we can eventually tackle the underlying problem that we did > not > have the right dependencies to prevent the scheduler from moving the new TOC > load before the use. ENOPATCH. It was originally splitting before reload and I had changed it to be splitting after reload assuming the dependencies would be correct. What is the instruction which is being described as not dependent on the load of r2? -- Andrew
[PATCH], PR 50341, Fix TOC load scheduling for powerpc on GCC 4.6 and 4.7 (patches included)
It would help if I included the patches: [gcc-4.6] 2011-09-15 Alan Modra Michael Meissner PR target/50341 * config/rs6000/rs6000.md (call_indirect_aix32): Do not split the load of the indirect function's TOC from the call to prevent the compiler from moving the load of the new TOC above code that references the current function's TOC. (call_indirect_aix64): Ditto. (call_value_indirect_aix32): Ditto. (call_value_indirect_aix64): Ditto. (call_indirect_nonlocal_aix32_internal): Ditto. (call_indirect_nonlocal_aix32): Ditto. (call_indirect_nonlocal_aix64_internal): Ditto. (call_indirect_nonlocal_aix64): Ditto. (call_value_indirect_nonlocal_aix32_internal): Ditto. (call_value_indirect_nonlocal_aix32): Ditto. (call_value_indirect_nonlocal_aix64_internal): Ditto. (call_value_indirect_nonlocal_aix64): Ditto. [gcc-4.7] 2011-09-15 Alan Modra Michael Meissner PR target/50341 * config/rs6000/rs6000.md (call_indirect_aix): Do not split the load of the indirect function's TOC from the call to prevent the compiler from moving the load of the new TOC above code that references the current function's TOC. (call_indirect_aix_internal): Ditto. (call_indirect_aix_nor11): Ditto. (call_indirect_aix_internal2): Ditto. (call_value_indirect_aix): Ditto. (call_value_indirect_aix_internal): Ditto. (call_value_indirect_aix_nor11): Ditto. (call_value_indirect_aix_internal2): Ditto. -- Michael Meissner, IBM 5 Technology Place Drive, M/S 2757, Westford, MA 01886-3141, USA meiss...@linux.vnet.ibm.com fax +1 (978) 399-6899 Index: gcc/config/rs6000/rs6000.md === --- gcc/config/rs6000/rs6000.md (revision 17) +++ gcc/config/rs6000/rs6000.md (working copy) @@ -12165,43 +12165,67 @@ (define_insn "largetoc_low" (define_expand "call_indirect_aix32" [(set (match_dup 2) (mem:SI (match_operand:SI 0 "gpc_reg_operand" ""))) - (set (mem:SI (plus:SI (reg:SI 1) (const_int 20))) + (set (match_dup 3) (reg:SI 2)) (set (reg:SI 11) (mem:SI (plus:SI (match_dup 0) (const_int 8 (parallel [(call (mem:SI (match_dup 2)) (match_operand 1 "" "")) - (use (mem:SI (plus:SI (match_dup 0) (const_int 4 + (use (match_dup 4)) + (set (reg:SI 2) (match_dup 3)) (use (reg:SI 11)) - (use (mem:SI (plus:SI (reg:SI 1) (const_int 20 (clobber (reg:SI LR_REGNO))])] "TARGET_32BIT" " -{ operands[2] = gen_reg_rtx (SImode); }") +{ + operands[2] = gen_reg_rtx (SImode); + operands[3] = gen_rtx_MEM (SImode, +gen_rtx_PLUS (SImode, stack_pointer_rtx, + GEN_INT (20))); + + operands[4] = gen_rtx_MEM (SImode, +gen_rtx_PLUS (SImode, operands[0], + GEN_INT (4))); + + /* Make sure the compiler does not optimize away the store of the TOC. */ + MEM_VOLATILE_P (operands[3]) = 1; +}") (define_expand "call_indirect_aix64" [(set (match_dup 2) (mem:DI (match_operand:DI 0 "gpc_reg_operand" ""))) - (set (mem:DI (plus:DI (reg:DI 1) (const_int 40))) + (set (match_dup 3) (reg:DI 2)) (set (reg:DI 11) (mem:DI (plus:DI (match_dup 0) (const_int 16 (parallel [(call (mem:SI (match_dup 2)) (match_operand 1 "" "")) - (use (mem:DI (plus:DI (match_dup 0) (const_int 8 + (use (match_dup 4)) + (set (reg:DI 2) (match_dup 3)) (use (reg:DI 11)) - (use (mem:DI (plus:DI (reg:DI 1) (const_int 40 - (clobber (reg:SI LR_REGNO))])] + (clobber (reg:DI LR_REGNO))])] "TARGET_64BIT" " -{ operands[2] = gen_reg_rtx (DImode); }") +{ + operands[2] = gen_reg_rtx (DImode); + operands[3] = gen_rtx_MEM (DImode, +gen_rtx_PLUS (DImode, stack_pointer_rtx, + GEN_INT (40))); + + operands[4] = gen_rtx_MEM (DImode, +gen_rtx_PLUS (DImode, operands[0], + GEN_INT (8))); + + /* Make sure the compiler does not optimize away the store of the TOC. */ + MEM_VOLATILE_P (operands[3]) = 1; +}") (define_expand "call_value_indirect_aix32" [(set (match_dup 3) (mem:SI (match_operand:SI 1 "gpc_reg_operand" ""))) - (set (mem:SI (plus:SI (reg:SI 1) (const_int 20))) + (set (match_dup 4) (reg:SI 2)) (set (reg:SI 11) (mem:SI (plus:SI (match_dup 1) @@ -12209,18 +12233,30 @@ (define_expand "call_value_indirect_aix3 (parallel [(set (match_operand 0 "" "")
[PATCH], PR 50341, Fix TOC load scheduling for powerpc on GCC 4.6 and 4.7
The recent changes for the new code models tickled a latent bug in the GCC 4.6 and 4.7 compilers with the AIX calling sequence (PR 50341). When you call a function indirectly, the compiler has to save the current function's TOC pointer that is in r2 on the stack frame, and then load the new function's TOC pointer before doing the call. If the indirect function that you are calling has the same TOC value as the current function, the code will run, but if the indirection function has a different TOC (for example it is in the main program, and the indirect call is in a shared library), the wrong address will be used. The existing code did this by doing a split after reload has finished. Scheduling after reload then moves the load of the new TOC. These patches for GCC 4.6 and GCC 4.7 disable the splits so that the load of the new TOC is always next to the bctrl instruction, preventing the compiler from moving the load. Ideally, we can eventually tackle the underlying problem that we did not have the right dependencies to prevent the scheduler from moving the new TOC load before the use. [gcc-4.6] 2011-09-15 Alan Modra Michael Meissner PR target/50341 * config/rs6000/rs6000.md (call_indirect_aix32): Do not split the load of the indirect function's TOC from the call to prevent the compiler from moving the load of the new TOC above code that references the current function's TOC. (call_indirect_aix64): Ditto. (call_value_indirect_aix32): Ditto. (call_value_indirect_aix64): Ditto. (call_indirect_nonlocal_aix32_internal): Ditto. (call_indirect_nonlocal_aix32): Ditto. (call_indirect_nonlocal_aix64_internal): Ditto. (call_indirect_nonlocal_aix64): Ditto. (call_value_indirect_nonlocal_aix32_internal): Ditto. (call_value_indirect_nonlocal_aix32): Ditto. (call_value_indirect_nonlocal_aix64_internal): Ditto. (call_value_indirect_nonlocal_aix64): Ditto. [gcc-4.7] 2011-09-15 Alan Modra Michael Meissner PR target/50341 * config/rs6000/rs6000.md (call_indirect_aix): Do not split the load of the indirect function's TOC from the call to prevent the compiler from moving the load of the new TOC above code that references the current function's TOC. (call_indirect_aix_internal): Ditto. (call_indirect_aix_nor11): Ditto. (call_indirect_aix_internal2): Ditto. (call_value_indirect_aix): Ditto. (call_value_indirect_aix_internal): Ditto. (call_value_indirect_aix_nor11): Ditto. (call_value_indirect_aix_internal2): Ditto. I have bootstraped both compilers for C, C++, and Fortran. There were no differences in the make check output. -- Michael Meissner, IBM 5 Technology Place Drive, M/S 2757, Westford, MA 01886-3141, USA meiss...@linux.vnet.ibm.com fax +1 (978) 399-6899
[1/2] tree-ssa-strlen optimization pass
Hi! Here is an updated version of the support patch for the tree-ssa-strlen.c optimization. The only change is the propagation of gimple_block to the new call from the old call, otherwise Wobjsize-1.c testcase fails when we optimize that __strcpy_chk into __memcpy_chk, but don't propagate its BLOCK. Perhaps gimplify_and_update_call_from_tree and update_call_from_tree should do it as well, for all the statements without gimple_block it creates (can do that as a follow-up). Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2011-09-15 Jakub Jelinek * gimple-fold.c (gimplify_and_update_call_from_tree): Set gctx.into_ssa after push_gimplify_context. * gimple.c (gimple_build_call_valist): New function. * gimple.h (gimple_build_call_valist): New prototype. * tree-ssa-propagate.c (finish_update_gimple_call): New function. (update_gimple_call): Likewise. (update_call_from_tree): Use finish_update_gimple_call. * tree-ssa-propagate.h (update_gimple_call): New prototype. --- gcc/gimple-fold.c.jj2011-09-15 12:18:54.0 +0200 +++ gcc/gimple-fold.c 2011-09-15 12:23:58.0 +0200 @@ -551,6 +551,7 @@ gimplify_and_update_call_from_tree (gimp reaching_vuse = gimple_vuse (stmt); push_gimplify_context (&gctx); + gctx.into_ssa = gimple_in_ssa_p (cfun); if (lhs == NULL_TREE) { --- gcc/gimple.c.jj 2011-09-15 12:18:37.0 +0200 +++ gcc/gimple.c2011-09-15 12:23:58.0 +0200 @@ -215,9 +215,10 @@ gimple_call_reset_alias_info (gimple s) pt_solution_reset (gimple_call_clobber_set (s)); } -/* Helper for gimple_build_call, gimple_build_call_vec and - gimple_build_call_from_tree. Build the basic components of a - GIMPLE_CALL statement to function FN with NARGS arguments. */ +/* Helper for gimple_build_call, gimple_build_call_valist, + gimple_build_call_vec and gimple_build_call_from_tree. Build the basic + components of a GIMPLE_CALL statement to function FN with NARGS + arguments. */ static inline gimple gimple_build_call_1 (tree fn, unsigned nargs) @@ -272,6 +273,26 @@ gimple_build_call (tree fn, unsigned nar } +/* Build a GIMPLE_CALL statement to function FN. NARGS is the number of + arguments. AP contains the arguments. */ + +gimple +gimple_build_call_valist (tree fn, unsigned nargs, va_list ap) +{ + gimple call; + unsigned i; + + gcc_assert (TREE_CODE (fn) == FUNCTION_DECL || is_gimple_call_addr (fn)); + + call = gimple_build_call_1 (fn, nargs); + + for (i = 0; i < nargs; i++) +gimple_call_set_arg (call, i, va_arg (ap, tree)); + + return call; +} + + /* Helper for gimple_build_call_internal and gimple_build_call_internal_vec. Build the basic components of a GIMPLE_CALL statement to internal function FN with NARGS arguments. */ --- gcc/gimple.h.jj 2011-09-15 12:18:54.0 +0200 +++ gcc/gimple.h2011-09-15 12:23:58.0 +0200 @@ -831,6 +831,7 @@ gimple gimple_build_debug_source_bind_st gimple gimple_build_call_vec (tree, VEC(tree, heap) *); gimple gimple_build_call (tree, unsigned, ...); +gimple gimple_build_call_valist (tree, unsigned, va_list); gimple gimple_build_call_internal (enum internal_fn, unsigned, ...); gimple gimple_build_call_internal_vec (enum internal_fn, VEC(tree, heap) *); gimple gimple_build_call_from_tree (tree); --- gcc/tree-ssa-propagate.c.jj 2011-09-15 12:18:37.0 +0200 +++ gcc/tree-ssa-propagate.c2011-09-15 16:46:54.0 +0200 @@ -1,5 +1,5 @@ /* Generic SSA value propagation engine. - Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010 + Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. Contributed by Diego Novillo @@ -673,6 +673,40 @@ move_ssa_defining_stmt_for_defs (gimple } } +/* Helper function for update_gimple_call and update_call_from_tree. + A GIMPLE_CALL STMT is being replaced with GIMPLE_CALL NEW_STMT. */ + +static void +finish_update_gimple_call (gimple_stmt_iterator *si_p, gimple new_stmt, + gimple stmt) +{ + gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); + move_ssa_defining_stmt_for_defs (new_stmt, stmt); + gimple_set_vuse (new_stmt, gimple_vuse (stmt)); + gimple_set_vdef (new_stmt, gimple_vdef (stmt)); + gimple_set_location (new_stmt, gimple_location (stmt)); + if (gimple_block (new_stmt) == NULL_TREE) +gimple_set_block (new_stmt, gimple_block (stmt)); + gsi_replace (si_p, new_stmt, false); +} + +/* Update a GIMPLE_CALL statement at iterator *SI_P to call to FN + with number of arguments NARGS, where the arguments in GIMPLE form + follow NARGS argument. */ + +bool +update_gimple_call (gimple_stmt_iterator *si_p, tree fn, int nargs, ...) +{ + va_list ap; + gimple new_stmt, stmt = gsi_stmt (*si_p); + + gcc_assert (is_gimple_call (stmt)); + va_start (ap, nargs); + new_stmt = gimple_build_call_valist (fn, nargs, ap); + fini
C++ PATCH for c++/50361 (ICE with initializer_list and nullptr)
Since NULLPTR_TYPE can be a member (though I don't see the point), we need to know how to count it. Tested x86_64-pc-linux-gnu, applying to trunk. commit 435875780fc2c4d6b3bc70ca6fb2d2ff8608dfe0 Author: Jason Merrill Date: Tue Sep 13 06:15:16 2011 -0400 PR c++/50361 * expr.c (count_type_elements): Handle NULLPTR_TYPE. diff --git a/gcc/expr.c b/gcc/expr.c index e4bb633..29bf68b 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -5296,6 +5296,7 @@ count_type_elements (const_tree type, bool for_ctor_p) case POINTER_TYPE: case OFFSET_TYPE: case REFERENCE_TYPE: +case NULLPTR_TYPE: return 1; case ERROR_MARK: diff --git a/gcc/testsuite/g++.dg/cpp0x/nullptr23.C b/gcc/testsuite/g++.dg/cpp0x/nullptr23.C new file mode 100644 index 000..a078269 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/nullptr23.C @@ -0,0 +1,24 @@ +// PR c++/50361 +// { dg-options -std=c++0x } + +#include + +struct Foo +{ + Foo(std::initializer_list) { }; + + template Foo(T t) { T u(t); }; + +private: + union Data + { +Data() : null(nullptr) {} + +std::nullptr_t null; + } u_; +}; + +int main() +{ + Foo f = { {} }; +}
C++ PATCH for c++/50365 (regression with -> decl/expr ambiguity)
I forgot to fix the other test in my patch for 49691. Oops. Tested x86_64-pc-linux-gnu, applying to trunk. commit 5e0bb3e7f8287cd88f09b8f10c126e5b5f0711b2 Author: Jason Merrill Date: Thu Sep 15 10:06:19 2011 -0400 PR c++/50365 * parser.c (cp_parser_late_return_type_opt): Check quals parameter for clearing current_class_ptr, too. diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index 03f75fc..2283312 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -15721,7 +15721,7 @@ cp_parser_late_return_type_opt (cp_parser* parser, cp_cv_quals quals) type = cp_parser_trailing_type_id (parser); - if (current_class_type) + if (quals >= 0) current_class_ptr = current_class_ref = NULL_TREE; return type; diff --git a/gcc/testsuite/g++.dg/cpp0x/trailing7.C b/gcc/testsuite/g++.dg/cpp0x/trailing7.C new file mode 100644 index 000..c4db10e --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/trailing7.C @@ -0,0 +1,14 @@ +// PR c++/50365 +// { dg-options -std=c++0x } + +struct A { int i; }; + +struct B { + B(); + A* f(); +}; + +B::B() +{ + int(f()->i); +}
Re: [ARM testsuite] remove option "-march=armv5te" for pr46934.c testcase
On 15/09/11 03:27, Bin Cheng wrote: > Hi, > Bug PR46934 exists on all thumb1 targets, for example cortex-m0. > This patch removes "-march=armv5te" option for testcase pr46934.c, > allowing the test to be run on all thumb1 targets, not just armv5te. > > Is it ok? Thanks. > > gcc/testsuite/ChangeLog: > 2011-09-15 Cheng Bin >* gcc.target/arm/pr46934.c: remove -march=armv5te option. s/remove/Remove/ R.
Re: [build, ada] Allow Solaris bootstrap with C++ (PR bootstrap/49794)
On 07/25/2011 07:00 PM, Rainer Orth wrote: >> * Also, the definition of HAVE_DESIGNATED_INITIALIZERS was wrong for g++ >>on Solaris which again defines __STDC_VERSION__ 199901L. To fix this, >>I never define H_D_I if __cplusplus. > > Is this a valid definition of __STDC_VERSION__ at all? Why wouldn't it be? It's the standard C99 value. Replying to an old message, yes, it's standard C99, but C++ does not support many of the differences between C89 and C99. I would have thought the C89 definition to be more conservatively correct. Paolo
Re: [ARM testsuite] remove option "-march=armv5te" for pr46934.c testcase
On 15 September 2011 03:27, Bin Cheng wrote: > Hi, > Bug PR46934 exists on all thumb1 targets, for example cortex-m0. > This patch removes "-march=armv5te" option for testcase pr46934.c, > allowing the test to be run on all thumb1 targets, not just armv5te. > > Is it ok? Thanks. > > gcc/testsuite/ChangeLog: > 2011-09-15 Cheng Bin > * gcc.target/arm/pr46934.c: remove -march=armv5te option. This is OK. Ramana > > Thanks-chengbin
[patch] Allow read-after-read dependence in basic block SLP
Bootstrapped and tested on powerpc64-suse-linux. Committed to trunk. Ira ChangeLog: * tree-vect-data-refs.c (vect_analyze_data_ref_dependence): Allow read-after-read dependencies in basic block SLP. testsuite/ChangeLog: * gcc.dg/vect/bb-slp-25.c: New. Index: tree-vect-data-refs.c === --- tree-vect-data-refs.c (revision 178879) +++ tree-vect-data-refs.c (working copy) @@ -607,6 +607,11 @@ vect_analyze_data_ref_dependence (struct data_depe if (vect_check_interleaving (dra, drb)) return false; + /* Read-read is OK (we need this check here, after checking for + interleaving). */ + if (DR_IS_READ (dra) && DR_IS_READ (drb)) +return false; + if (vect_print_dump_info (REPORT_DR_DETAILS)) { fprintf (vect_dump, "can't determine dependence between "); Index: testsuite/gcc.dg/vect/bb-slp-25.c === --- testsuite/gcc.dg/vect/bb-slp-25.c (revision 0) +++ testsuite/gcc.dg/vect/bb-slp-25.c (revision 0) @@ -0,0 +1,57 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "tree-vect.h" + +#define A 3 +#define B 4 +#define N 256 + +short src[N], dst[N]; + +void foo (short * __restrict dst, short * __restrict src, int h, int stride) +{ + int i; + h /= 16; + for (i = 0; i < h; i++) +{ + dst[0] += A*src[0] + src[stride]; + dst[1] += A*src[1] + src[1+stride]; + dst[2] += A*src[2] + src[2+stride]; + dst[3] += A*src[3] + src[3+stride]; + dst[4] += A*src[4] + src[4+stride]; + dst[5] += A*src[5] + src[5+stride]; + dst[6] += A*src[6] + src[6+stride]; + dst[7] += A*src[7] + src[7+stride]; + dst += 8; + src += 8; + } +} + + +int main (void) +{ + int i; + + check_vect (); + + for (i = 0; i < N; i++) +{ + dst[i] = 0; + src[i] = i; +} + + foo (dst, src, N, 8); + + for (i = 0; i < N/2; i++) +{ + if (dst[i] != A * i + i + 8) +abort (); +} + + return 0; +} + +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */ +/* { dg-final { cleanup-tree-dump "slp" } } */ +
Re: [ARM] pass "--be8" to linker when linking for M profile
On 15/09/11 03:41, Bin Cheng wrote: > Hi, > The linker should do endian swizzling at link-time according to "--be8" > option. > This patch modifies BE8_LINK_SPEC by adding cortex-m processors in the specs > string. > > Since R-profile supports configurable big-endian instruction fetch, I didn't > include it here. > > Is it ok? Thanks. > > 2011-09-15 Cheng Bin > * config/arm/bpabi.h (BE8_LINK_SPEC): add cortex-m arch and > processors. > > Thanks-chengbin= > > > gcc-be8-for-m-profile.patch > > +#define BE8_LINK_SPEC \ + " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5 \ + |mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15 \ + |march=armv7-m|march=armv7e-m|mcpu=cortex-m3|mcpu=cortex-m4 \ + |march=armv6-m|mcpu=cortex-m0:%{!r:--be8}}}" Please sort this so that the list is ordered alphabetically by architecture/cpu (with architectures first). It might save some patch churn in the future if each element was put on a line on its own. OK with that change. R.
Re: [Patch ARM] Add predefined macro when unaligned access is available.
On 15/09/11 09:48, James Greenhalgh wrote: > Hi, > > This patch adds a predefined macro __ARM_FEATURE_UNALIGNED > which is set when unaligned access is supported in hardware and > enabled by users. > > Thanks, > James Greenhalgh > > gcc/ > > 2011-09-14 James Greenhalgh > > * config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): > New builtin macro. > OK. R.
inline-analysis improvement
Hi, this patch updates inline heuristics so we could analyse also non-SSA vars. I.e. in: int aaa(int a) { if (a>4) bbb(&a); } We are still able to work out that bbb is called only when a>4 despite the fact that A has address taken. The patch also makes dataflow to simplify a bit the solution, so we don't have (a>4 || a<=4) type predicates. Bootstrapped/regtested x86_64-linux, will commit it later today. Honza * ipa-inline-analysis.c (add_condition): Add conditions parameter; simplify obviously true clauses. (and_predicates, or_predicates): Add conditions parameter. (inline_duplication_hoook): Update. (mark_modified): New function. (unmodified_parm): New function. (eliminated_by_inlining_prob, (set_cond_stmt_execution_predicate, set_switch_stmt_execution_predicate, will_be_nonconstant_predicate): Use unmodified_parm. (estimate_function_body_sizes): Update. (remap_predicate): Update. Index: ipa-inline-analysis.c === --- ipa-inline-analysis.c (revision 178858) +++ ipa-inline-analysis.c (working copy) @@ -232,11 +232,12 @@ add_condition (struct inline_summary *su /* Add clause CLAUSE into the predicate P. */ static inline void -add_clause (struct predicate *p, clause_t clause) +add_clause (conditions conditions, struct predicate *p, clause_t clause) { int i; int i2; int insert_here = -1; + int c1, c2; /* True clause. */ if (!clause) @@ -281,6 +282,28 @@ add_clause (struct predicate *p, clause_ if ((p->clause[i] & clause) != clause) i2++; } + + /* Look for clauses that are obviously true. I.e. + op0 == 5 || op0 != 5. */ + for (c1 = predicate_first_dynamic_condition; c1 < NUM_CONDITIONS; c1++) +for (c2 = c1 + 1; c2 <= NUM_CONDITIONS; c2++) + if ((clause & (1 << c1)) + && (clause & (1 << c2))) + { + condition *cc1 = VEC_index (condition, + conditions, + c1 - predicate_first_dynamic_condition); + condition *cc2 = VEC_index (condition, + conditions, + c2 - predicate_first_dynamic_condition); + if (cc1->operand_num == cc2->operand_num + && cc1->val == cc2->val + && cc1->code == invert_tree_comparison (cc2->code, + HONOR_NANS (TYPE_MODE (TREE_TYPE (cc1->val) + return; + } + + /* We run out of variants. Be conservative in positive direction. */ if (i2 == MAX_CLAUSES) return; @@ -298,7 +321,8 @@ add_clause (struct predicate *p, clause_ /* Return P & P2. */ static struct predicate -and_predicates (struct predicate *p, struct predicate *p2) +and_predicates (conditions conditions, + struct predicate *p, struct predicate *p2) { struct predicate out = *p; int i; @@ -319,7 +343,7 @@ and_predicates (struct predicate *p, str for (; p2->clause[i]; i++) { gcc_checking_assert (i < MAX_CLAUSES); - add_clause (&out, p2->clause[i]); + add_clause (conditions, &out, p2->clause[i]); } return out; } @@ -346,7 +370,7 @@ predicates_equal_p (struct predicate *p, /* Return P | P2. */ static struct predicate -or_predicates (struct predicate *p, struct predicate *p2) +or_predicates (conditions conditions, struct predicate *p, struct predicate *p2) { struct predicate out = true_predicate (); int i,j; @@ -364,7 +388,7 @@ or_predicates (struct predicate *p, stru for (j = 0; p2->clause[j]; j++) { gcc_checking_assert (i < MAX_CLAUSES && j < MAX_CLAUSES); -add_clause (&out, p->clause[i] | p2->clause[j]); +add_clause (conditions, &out, p->clause[i] | p2->clause[j]); } return out; } @@ -753,7 +777,7 @@ inline_node_duplication_hook (struct cgr break; } else - add_clause (&new_predicate, + add_clause (info->conds, &new_predicate, possible_truths & e->predicate.clause[j]); if (false_predicate_p (&new_predicate)) { @@ -781,7 +805,7 @@ inline_node_duplication_hook (struct cgr break; } else - add_clause (&new_predicate, + add_clause (info->conds, &new_predicate, possible_truths & es->predicate->clause[j]); if (false_predicate_p (&new_predicate) && !false_predicate_p (es->predicate)) @@ -812,7 +836,7 @@ inline_node_duplication_hook (struct cgr break; } else - add_clause (&new_predicate, + add_clause (info->conds, &new_predicate, possible_truths & es->predicate->clause[j]); if (false_predic
[Ada] Use CLOCK_MONOTONIC when possible
This patch takes advantage of CLOCK_MONOTONIC (when calling clock_gettime) when supported by the OS (so far, only AIX and FreeBSD. Linux only supports it on recent systems, so this is not enabled for now), and defaults to CLOCK_REALTIME when not available. Tested on x86_64-pc-linux-gnu, committed on trunk 2011-09-15 Arnaud Charlet * s-osinte-aix.ads, s-osinte-darwin.ads, s-osinte-freebsd.ads, s-osinte-hpux.ads, s-osinte-lynxos.ads, s-osinte-solaris-posix.ads, s-taprop-posix.adb (CLOCK_MONOTONIC): New constant. (CLOCK_REALTIME): Fix wrong value on some OSes. * s-taprop-posix.adb (Monotonic_Clock): Use CLOCK_MONOTONIC. Index: s-osinte-aix.ads === --- s-osinte-aix.ads(revision 178876) +++ s-osinte-aix.ads(working copy) @@ -7,7 +7,7 @@ -- S p e c -- -- -- -- Copyright (C) 1991-1994, Florida State University-- --- Copyright (C) 1995-2010, Free Software Foundation, Inc. -- +-- Copyright (C) 1995-2011, Free Software Foundation, Inc. -- -- -- -- GNAT is free software; you can redistribute it and/or modify it under -- -- terms of the GNU General Public License as published by the Free Soft- -- @@ -199,7 +199,8 @@ type clockid_t is private; - CLOCK_REALTIME : constant clockid_t; + CLOCK_REALTIME : constant clockid_t; + CLOCK_MONOTONIC : constant clockid_t; function clock_gettime (clock_id : clockid_t; @@ -539,7 +540,8 @@ pragma Convention (C, timespec); type clockid_t is new int; - CLOCK_REALTIME : constant clockid_t := 0; + CLOCK_REALTIME : constant clockid_t := 9; + CLOCK_MONOTONIC : constant clockid_t := 10; type pthread_attr_t is new System.Address; pragma Convention (C, pthread_attr_t); Index: s-osinte-darwin.ads === --- s-osinte-darwin.ads (revision 178876) +++ s-osinte-darwin.ads (working copy) @@ -7,7 +7,7 @@ -- S p e c -- -- -- -- Copyright (C) 1991-1994, Florida State University-- --- Copyright (C) 1995-2010, Free Software Foundation, Inc. -- +-- Copyright (C) 1995-2011, Free Software Foundation, Inc. -- -- -- -- GNAT is free software; you can redistribute it and/or modify it under -- -- terms of the GNU General Public License as published by the Free Soft- -- @@ -185,7 +185,8 @@ type clockid_t is private; - CLOCK_REALTIME : constant clockid_t; + CLOCK_REALTIME : constant clockid_t; + CLOCK_MONOTONIC : constant clockid_t; function clock_gettime (clock_id : clockid_t; @@ -516,7 +517,8 @@ pragma Convention (C, timespec); type clockid_t is new int; - CLOCK_REALTIME : constant clockid_t := 0; + CLOCK_REALTIME : constant clockid_t := 0; + CLOCK_MONOTONIC : constant clockid_t := CLOCK_REALTIME; -- -- Darwin specific signal implementation Index: s-osinte-freebsd.ads === --- s-osinte-freebsd.ads(revision 178876) +++ s-osinte-freebsd.ads(working copy) @@ -7,7 +7,7 @@ -- S p e c-- -- -- -- Copyright (C) 1991-1994, Florida State University-- --- Copyright (C) 1995-2010, Free Software Foundation, Inc. -- +-- Copyright (C) 1995-2011, Free Software Foundation, Inc. -- -- -- -- GNAT is free software; you can redistribute it and/or modify it under -- -- terms of the GNU General Public License as published by the Free Soft- -- @@ -202,7 +202,8 @@ type clockid_t is private; - CLOCK_REALTIME : constant clockid_t; + CLOCK_REALTIME : constant clockid_t; + CLOCK_MONOTONIC : constant clockid_t; function clock_gettime (clock_id : clockid_t; @@ -635,7 +636,8 @@ pragma Convention (C, timespec); type clockid_t is new int; - CLOCK_REALTIME : constant clockid_t := 0; + CLOCK_REALTIME : constant clockid_t := 0; + CLOCK_MONOTONIC : constant clockid_t := 4; type pthread_t is new System.Address; type pthread_attr_t is new System.Address; Index: s-osinte-hpux.ads === --- s-osinte-hpux.ads (revision 178876) +++ s-osint
[Ada] Warnings on unused formals of expression functions
This patch simplifies the expansion of expression functions, and ensures that proper warnings are generated for unused formals, if present. The following compilation: gcc -c -gnat12 -gnatwa -gnatc ada12.ads must yield: ada12.ads:6:24: warning: formal parameter "Self" is not referenced ada12.ads:9:35: warning: formal parameter "Self" is not referenced -- package Ada12 is type T is tagged record Value : Integer := 17; end record; function Template (Self : T) return String is ("foo.thtml"); type T2 is new T with null record; overriding function Template (Self : T2) return String is ("bar.thtml"); end Ada12; Tested on x86_64-pc-linux-gnu, committed on trunk 2011-09-15 Ed Schonberg * sem_ch6.adb (Analyze_Expression_Function): Code cleanup: if the expression function is not a completion, create a new specification for the generated declaration, and keep the original specification in the generated body. Shorter code also ensures that proper warnings are generated for unused formals in all cases. Index: sem_ch6.adb === --- sem_ch6.adb (revision 178876) +++ sem_ch6.adb (working copy) @@ -268,6 +268,7 @@ Loc : constant Source_Ptr := Sloc (N); LocX : constant Source_Ptr := Sloc (Expression (N)); Def_Id : constant Entity_Id := Defining_Entity (Specification (N)); + Expr : constant Node_Id:= Expression (N); New_Body : Node_Id; New_Decl : Node_Id; @@ -315,31 +316,28 @@ Set_Is_Inlined (Prev); Analyze (N); - -- If this is not a completion, create both a declaration and a body, - -- so that the expression can be inlined whenever possible. + -- If this is not a completion, create both a declaration and a body, so + -- that the expression can be inlined whenever possible. The spec of the + -- new subprogram declaration is a copy of the original specification, + -- which is now part of the subprogram body. else New_Decl := Make_Subprogram_Declaration (Loc, - Specification => Specification (N)); + Specification => Copy_Separate_Tree (Specification (N))); Rewrite (N, New_Decl); Analyze (N); Set_Is_Inlined (Defining_Entity (New_Decl)); - -- Create new set of formals for specification in body. - - Set_Specification (New_Body, - Make_Function_Specification (Loc, - Defining_Unit_Name => - Make_Defining_Identifier (Loc, Chars (Defining_Entity (N))), - Parameter_Specifications => - Copy_Parameter_List (Defining_Entity (New_Decl)), - Result_Definition => - New_Copy_Tree (Result_Definition (Specification (New_Decl); - Insert_After (N, New_Body); Analyze (New_Body); end if; + + -- If the return expression is a static constant, we suppress warning + -- messages on unused formals, which in most cases will be noise. + + Set_Is_Trivial_Subprogram (Defining_Entity (New_Body), +Is_OK_Static_Expression (Expr)); end Analyze_Expression_Function;
[Patch ARM] Add predefined macro when unaligned access is available.
Hi, This patch adds a predefined macro __ARM_FEATURE_UNALIGNED which is set when unaligned access is supported in hardware and enabled by users. Thanks, James Greenhalgh gcc/ 2011-09-14 James Greenhalgh * config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): New builtin macro. diff --git gcc/config/arm/arm.h gcc/config/arm/arm.h index 208ee51..a76988e 100644 --- gcc/config/arm/arm.h +++ gcc/config/arm/arm.h @@ -47,6 +47,8 @@ extern char arm_arch_name[]; { \ if (TARGET_DSP_MULTIPLY) \ builtin_define ("__ARM_FEATURE_DSP"); \ + if (unaligned_access)\ + builtin_define ("__ARM_FEATURE_UNALIGNED"); \ /* Define __arm__ even when in thumb mode, for \ consistency with armcc. */ \ builtin_define ("__arm__"); \
Re: [PATCH, PR50322] Fix test-case ivopts-lt.c to use int of same size as pointer.
On 09/14/2011 06:35 PM, Zdenek Dvorak wrote: > Hi, > >> The attached patch fixes PR50322. >> >> The test-case is supposed to succeed if the loop counter data-type has the >> same >> size as a pointer. The patch defines TYPE to be an int datatype of the same >> size >> as a pointer, and uses it. After this fix, there's no need for the avr xfails >> anymore. >> >> tested with avr, x86_64 and x86_64 -m32. > > what about using uintptr_t? > That's shorter indeed. Tested with x86_64 and x86_64 -m32. When running on my gcc+binutils avr build, the test is listed as unsupported because it doesn't contain stdint.h, but I think that just means that libc is missing in my build setup. OK for trunk? > Zdenek > 2011-09-14 Tom de Vries PR testsuite/50322 * gcc.dg/tree-ssa/ivopts-lt.c: require stdint_types. include stdint.h. Use uintptr_t in f1. Undo avr xfails. Index: gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c === --- gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c (revision 178804) +++ gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c (working copy) @@ -1,8 +1,11 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fdump-tree-ivopts" } */ +/* { dg-require-effective-target stdint_types } */ + +#include "stdint.h" void -f1 (char *p, unsigned long int i, unsigned long int n) +f1 (char *p, uintptr_t i, uintptr_t n) { p += i; do @@ -14,8 +17,7 @@ f1 (char *p, unsigned long int i, unsign while (i < n); } -/* For the fails on avr see PR tree-optimization/50322. */ -/* { dg-final { scan-tree-dump-times "PHI" 1 "ivopts" { xfail { "avr-*-*" } } } } */ +/* { dg-final { scan-tree-dump-times "PHI" 1 "ivopts" } } */ /* { dg-final { scan-tree-dump-times "PHI