Hello world,

this patch adds two tweaks to the argument repacking.

First, when the size of an argument is known to be one, as in a(n1:n1),
we can directly pass a pointer - the stride may not be one, but it
does not matter.

Second, the case where the array passed is actually contiguous is
more likely in practice, so it should get the fast path. I have
done this by defining a new predictor and setting the estimated
likelyhood at 75%, which ensured a path without jumps when the
arguments passed to bar were contiguous:

module y
contains
  subroutine bar(a,b)
    real, dimension(:) :: a,b
    call foo(a,b,size(a))
  end subroutine bar
end module y

Test case is only for the first part - making one for the second
part would have been a bit too much.

Regression-tested. OK for trunk?

Regards

        Thomas

2019-06-02  Thomas Koenig  <tkoe...@gcc.gnu.org>

        PR fortran/90539
        * trans-expr.c (gfc_conv_subref_array_arg): If the size of the
        expression can be determined to be one, treat it as contiguous.
        Set likelyhood of presence of an actual argument according to
        PRED_FORTRAN_ABSENT_DUMMY and likelyhood of being contiguous
        according to PRED_FORTRAN_CONTIGUOUS.

2019-06-02  Thomas Koenig  <tkoe...@gcc.gnu.org>

        PR fortran/90539
        * predict.def (PRED_FORTRAN_CONTIGUOUS): New predictor.
Index: fortran/trans-expr.c
===================================================================
--- fortran/trans-expr.c	(Revision 271751)
+++ fortran/trans-expr.c	(Arbeitskopie)
@@ -4922,16 +4922,36 @@ class_array_fcn:
 	  gfc_se cont_se, array_se;
 	  stmtblock_t if_block, else_block;
 	  tree if_stmt, else_stmt;
+	  mpz_t size;
+	  bool size_set;
 
 	  cont_var = gfc_create_var (boolean_type_node, "contiguous");
 
-	  /* cont_var = is_contiguous (expr); .  */
-	  gfc_init_se (&cont_se, parmse);
-	  gfc_conv_is_contiguous_expr (&cont_se, expr);
-	  gfc_add_block_to_block (&se->pre, &(&cont_se)->pre);
-	  gfc_add_modify (&se->pre, cont_var, cont_se.expr);
-	  gfc_add_block_to_block (&se->pre, &(&cont_se)->post);
+	  /* If the size is known to be one at compile-time, set
+	     cont_var to true unconditionally.  This may look
+	     inelegant, but we're only doing this during
+	     optimization, so the statements will be optimized away,
+	     and this saves complexity here.  */
 
+	  size_set = gfc_array_size (expr, &size);
+	  if (size_set && mpz_cmp_ui (size, 1) == 0)
+	    {
+	      gfc_add_modify (&se->pre, cont_var,
+			      build_one_cst (boolean_type_node));
+	    }
+	  else
+	    {
+	      /* cont_var = is_contiguous (expr); .  */
+	      gfc_init_se (&cont_se, parmse);
+	      gfc_conv_is_contiguous_expr (&cont_se, expr);
+	      gfc_add_block_to_block (&se->pre, &(&cont_se)->pre);
+	      gfc_add_modify (&se->pre, cont_var, cont_se.expr);
+	      gfc_add_block_to_block (&se->pre, &(&cont_se)->post);
+	    }
+
+	  if (size_set)
+	    mpz_clear (size);
+
 	  /* arrayse->expr = descriptor of a.  */
 	  gfc_init_se (&array_se, se);
 	  gfc_conv_expr_descriptor (&array_se, expr);
@@ -4953,7 +4973,9 @@ class_array_fcn:
 
 	  /* And put the above into an if statement.  */
 	  pre_stmts = fold_build3_loc (input_location, COND_EXPR, void_type_node,
-				      cont_var, if_stmt, else_stmt);
+				       gfc_likely (cont_var,
+						   PRED_FORTRAN_CONTIGUOUS),
+				       if_stmt, else_stmt);
 	}
       else
 	{
@@ -4976,11 +4998,11 @@ class_array_fcn:
 	  gfc_add_modify (&else_block, pointer, build_int_cst (type, 0));
 	  else_stmt = gfc_finish_block (&else_block);
 
-	  tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, present_var,
+	  tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node,
+				 gfc_likely (present_var,
+					     PRED_FORTRAN_ABSENT_DUMMY),
 				 pre_stmts, else_stmt);
 	  gfc_add_expr_to_block (&se->pre, tmp);
-
-
 	}
       else
 	gfc_add_expr_to_block (&se->pre, pre_stmts);
@@ -4995,9 +5017,16 @@ class_array_fcn:
 	  tmp = fold_build2_loc (input_location, EQ_EXPR, boolean_type_node,
 				 cont_var,
 				 build_zero_cst (boolean_type_node));
+	  tmp = gfc_unlikely (tmp, PRED_FORTRAN_CONTIGUOUS);
+
 	  if (pass_optional)
-	    post_cond = fold_build2_loc (input_location, TRUTH_ANDIF_EXPR,
-					 boolean_type_node, present_var, tmp);
+	    {
+	      tree present_likely = gfc_likely (present_var,
+						PRED_FORTRAN_ABSENT_DUMMY);
+	      post_cond = fold_build2_loc (input_location, TRUTH_ANDIF_EXPR,
+					   boolean_type_node, present_likely,
+					   tmp);
+	    }
 	  else
 	    post_cond = tmp;
 	}
Index: predict.def
===================================================================
--- predict.def	(Revision 271629)
+++ predict.def	(Arbeitskopie)
@@ -229,3 +229,10 @@ DEF_PREDICTOR (PRED_FORTRAN_ABSENT_DUMMY, "Fortran
    to be very likely.  */
 DEF_PREDICTOR (PRED_FORTRAN_LOOP_PREHEADER, "Fortran loop preheader", \
 	       HITRATE (99), 0)
+
+/* Fortran assumed size arrays can be non-contiguous, so they need
+   to be repacked.  */
+
+DEF_PREDICTOR (PRED_FORTRAN_CONTIGUOUS, "Fortran contiguous", \
+	       HITRATE (75), 0)
+	
! { dg-do run }
! { dg-additional-options "-O -fdump-tree-optimized" }
module y
  implicit none
contains
  subroutine foo(a,b,c,d,e,f)
    real, dimension(1), intent(inout) :: a, b, c, d, e, f
    if (any([a,b,c,d,e,f] /= [1,2,3,4,5,6])) stop 1
    a = -a
    b = -b
    c = -c
    d = -d
    e = -e
    f = -f
  end subroutine foo
end module y
module x
  use y
  implicit none
contains
  subroutine bar(a)
    real, dimension(:) :: a
    integer :: n1, n3, n5
    n1 = 1
    n3 = 3
    n5 = 5
    call foo(a(n1:n1), a(n1+1:n1+1), a(n3:n3), a(n3+1:n3+1), a(n5:n5), a(n5+1:n5+1))
  end subroutine bar
end module x

program main
  use x
  real, dimension(6) :: a,b
  b = [1,2,3,4,5,6]
  a = b
  call bar(a)
  if (any(a /= -b)) stop 2
end program main
! { dg-final { scan-tree-dump-not "contiguous" "optimized" } }

Reply via email to