Hello world, this patch adds two tweaks to the argument repacking.
First, when the size of an argument is known to be one, as in a(n1:n1), we can directly pass a pointer - the stride may not be one, but it does not matter. Second, the case where the array passed is actually contiguous is more likely in practice, so it should get the fast path. I have done this by defining a new predictor and setting the estimated likelyhood at 75%, which ensured a path without jumps when the arguments passed to bar were contiguous: module y contains subroutine bar(a,b) real, dimension(:) :: a,b call foo(a,b,size(a)) end subroutine bar end module y Test case is only for the first part - making one for the second part would have been a bit too much. Regression-tested. OK for trunk? Regards Thomas 2019-06-02 Thomas Koenig <tkoe...@gcc.gnu.org> PR fortran/90539 * trans-expr.c (gfc_conv_subref_array_arg): If the size of the expression can be determined to be one, treat it as contiguous. Set likelyhood of presence of an actual argument according to PRED_FORTRAN_ABSENT_DUMMY and likelyhood of being contiguous according to PRED_FORTRAN_CONTIGUOUS. 2019-06-02 Thomas Koenig <tkoe...@gcc.gnu.org> PR fortran/90539 * predict.def (PRED_FORTRAN_CONTIGUOUS): New predictor.
Index: fortran/trans-expr.c =================================================================== --- fortran/trans-expr.c (Revision 271751) +++ fortran/trans-expr.c (Arbeitskopie) @@ -4922,16 +4922,36 @@ class_array_fcn: gfc_se cont_se, array_se; stmtblock_t if_block, else_block; tree if_stmt, else_stmt; + mpz_t size; + bool size_set; cont_var = gfc_create_var (boolean_type_node, "contiguous"); - /* cont_var = is_contiguous (expr); . */ - gfc_init_se (&cont_se, parmse); - gfc_conv_is_contiguous_expr (&cont_se, expr); - gfc_add_block_to_block (&se->pre, &(&cont_se)->pre); - gfc_add_modify (&se->pre, cont_var, cont_se.expr); - gfc_add_block_to_block (&se->pre, &(&cont_se)->post); + /* If the size is known to be one at compile-time, set + cont_var to true unconditionally. This may look + inelegant, but we're only doing this during + optimization, so the statements will be optimized away, + and this saves complexity here. */ + size_set = gfc_array_size (expr, &size); + if (size_set && mpz_cmp_ui (size, 1) == 0) + { + gfc_add_modify (&se->pre, cont_var, + build_one_cst (boolean_type_node)); + } + else + { + /* cont_var = is_contiguous (expr); . */ + gfc_init_se (&cont_se, parmse); + gfc_conv_is_contiguous_expr (&cont_se, expr); + gfc_add_block_to_block (&se->pre, &(&cont_se)->pre); + gfc_add_modify (&se->pre, cont_var, cont_se.expr); + gfc_add_block_to_block (&se->pre, &(&cont_se)->post); + } + + if (size_set) + mpz_clear (size); + /* arrayse->expr = descriptor of a. */ gfc_init_se (&array_se, se); gfc_conv_expr_descriptor (&array_se, expr); @@ -4953,7 +4973,9 @@ class_array_fcn: /* And put the above into an if statement. */ pre_stmts = fold_build3_loc (input_location, COND_EXPR, void_type_node, - cont_var, if_stmt, else_stmt); + gfc_likely (cont_var, + PRED_FORTRAN_CONTIGUOUS), + if_stmt, else_stmt); } else { @@ -4976,11 +4998,11 @@ class_array_fcn: gfc_add_modify (&else_block, pointer, build_int_cst (type, 0)); else_stmt = gfc_finish_block (&else_block); - tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, present_var, + tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, + gfc_likely (present_var, + PRED_FORTRAN_ABSENT_DUMMY), pre_stmts, else_stmt); gfc_add_expr_to_block (&se->pre, tmp); - - } else gfc_add_expr_to_block (&se->pre, pre_stmts); @@ -4995,9 +5017,16 @@ class_array_fcn: tmp = fold_build2_loc (input_location, EQ_EXPR, boolean_type_node, cont_var, build_zero_cst (boolean_type_node)); + tmp = gfc_unlikely (tmp, PRED_FORTRAN_CONTIGUOUS); + if (pass_optional) - post_cond = fold_build2_loc (input_location, TRUTH_ANDIF_EXPR, - boolean_type_node, present_var, tmp); + { + tree present_likely = gfc_likely (present_var, + PRED_FORTRAN_ABSENT_DUMMY); + post_cond = fold_build2_loc (input_location, TRUTH_ANDIF_EXPR, + boolean_type_node, present_likely, + tmp); + } else post_cond = tmp; } Index: predict.def =================================================================== --- predict.def (Revision 271629) +++ predict.def (Arbeitskopie) @@ -229,3 +229,10 @@ DEF_PREDICTOR (PRED_FORTRAN_ABSENT_DUMMY, "Fortran to be very likely. */ DEF_PREDICTOR (PRED_FORTRAN_LOOP_PREHEADER, "Fortran loop preheader", \ HITRATE (99), 0) + +/* Fortran assumed size arrays can be non-contiguous, so they need + to be repacked. */ + +DEF_PREDICTOR (PRED_FORTRAN_CONTIGUOUS, "Fortran contiguous", \ + HITRATE (75), 0) +
! { dg-do run } ! { dg-additional-options "-O -fdump-tree-optimized" } module y implicit none contains subroutine foo(a,b,c,d,e,f) real, dimension(1), intent(inout) :: a, b, c, d, e, f if (any([a,b,c,d,e,f] /= [1,2,3,4,5,6])) stop 1 a = -a b = -b c = -c d = -d e = -e f = -f end subroutine foo end module y module x use y implicit none contains subroutine bar(a) real, dimension(:) :: a integer :: n1, n3, n5 n1 = 1 n3 = 3 n5 = 5 call foo(a(n1:n1), a(n1+1:n1+1), a(n3:n3), a(n3+1:n3+1), a(n5:n5), a(n5+1:n5+1)) end subroutine bar end module x program main use x real, dimension(6) :: a,b b = [1,2,3,4,5,6] a = b call bar(a) if (any(a /= -b)) stop 2 end program main ! { dg-final { scan-tree-dump-not "contiguous" "optimized" } }