https://gcc.gnu.org/g:081f12a1d9d1bd793e3571daf5ab25db594ff57a

commit 081f12a1d9d1bd793e3571daf5ab25db594ff57a
Author: Mikael Morin <mik...@gcc.gnu.org>
Date:   Thu Nov 16 22:14:41 2023 +0100

    fortran: Inline integral MINLOC/MAXLOC with no DIM and no MASK [PR90608]
    
    Enable generation of inline code for the MINLOC and MAXLOC intrinsic,
    if the ARRAY argument is of integral type and of any rank (only the rank 1
    case was previously inlined), and neither DIM nor MASK arguments are
    present.
    
    This needs a few adjustments in gfc_conv_intrinsic_minmaxloc,
    mainly to replace the single variables POS and OFFSET, with collections
    of variables, one variable per dimension each.
    
    The restriction to integral ARRAY and absent MASK limits the scope of
    the change to the cases where we generate single loop inline code.  The
    code generation for the second loop is only accessible with ARRAY of rank
    1, so it can continue using a single variable.  A later change will extend
    inlining to the double loop cases.
    
    There is some bounds checking code that was previously handled by the
    library, and that needed some changes in the scalarizer to avoid regressing.
    The bounds check code generation was already by the scalarizer, but it was
    only applying to array reference sections, checking both individual array
    bounds and shape conformability between all the array involved.  For MINLOC
    or MAXLOC, enable the conformability check between all the scalarized
    arrays, and disable the check that the array reference is within its bounds.
    
            PR fortran/90608
    
    gcc/fortran/ChangeLog:
    
            * trans-array.cc (gfc_conv_ss_startstride): Set the MINLOC/MAXLOC
            result upper bound using the ARRAY argument rank.  Ajdust the error
            message for intrinsic result arrays.  Only check array bounds for
            array references.  Move bound check decision code...
            (bounds_check_needed): ... here as a new predicate.  Allow bound
            check for MINLOC/MAXLOC intrinsic results.
            * trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Change the
            result array upper bound to the rank of ARRAY.  Update the NONEMPTY
            variable to depend on the non-empty extent of every dimension.  Use
            one variable per dimension instead of a single variable for the
            position and the offset.  Update their declaration, initialization,
            and update to affect the variable of each dimension.  Use the first
            variable only in areas only accessed with rank 1 ARRAY argument.
            Set every element of the result using its corresponding variable.
            (gfc_inline_intrinsic_function_p): Return true for integral ARRAY
            and absent DIM and MASK.
    
    gcc/testsuite/ChangeLog:
    
            * gfortran.dg/maxloc_bounds_4.f90: Additionally accept the error
            message emitted by the scalarizer.

Diff:
---
 gcc/fortran/trans-array.cc                    |  70 +++++++++---
 gcc/fortran/trans-intrinsic.cc                | 150 +++++++++++++++++++-------
 gcc/testsuite/gfortran.dg/maxloc_bounds_4.f90 |   4 +-
 3 files changed, 167 insertions(+), 57 deletions(-)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 99a603a3afb2..c9d63d13509d 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -4876,6 +4876,35 @@ add_check_section_in_array_bounds (stmtblock_t *inner, 
gfc_ss_info *ss_info,
 }
 
 
+/* Tells whether we need to generate bounds checking code for the array
+   associated with SS.  */
+
+bool
+bounds_check_needed (gfc_ss *ss)
+{
+  /* Catch allocatable lhs in f2003.  */
+  if (flag_realloc_lhs && ss->no_bounds_check)
+    return false;
+
+  gfc_ss_info *ss_info = ss->info;
+  if (ss_info->type == GFC_SS_SECTION)
+    return true;
+
+  if (!(ss_info->type == GFC_SS_INTRINSIC
+       && ss_info->expr
+       && ss_info->expr->expr_type == EXPR_FUNCTION))
+    return false;
+
+  gfc_intrinsic_sym *isym = ss_info->expr->value.function.isym;
+  if (!(isym
+       && (isym->id == GFC_ISYM_MAXLOC
+           || isym->id == GFC_ISYM_MINLOC)))
+    return false;
+
+  return gfc_inline_intrinsic_function_p (ss_info->expr);
+}
+
+
 /* Calculates the range start and stride for a SS chain.  Also gets the
    descriptor and data pointer.  The range of vector subscripts is the size
    of the vector.  Array bounds are also checked.  */
@@ -4977,10 +5006,19 @@ done:
                info->data = gfc_conv_array_data (info->descriptor);
                info->data = gfc_evaluate_now (info->data, &outer_loop->pre);
 
-               info->offset = gfc_index_zero_node;
+               gfc_expr *array = expr->value.function.actual->expr;
+               tree rank = build_int_cst (gfc_array_index_type, array->rank);
+
+               tree tmp = fold_build2_loc (input_location, MINUS_EXPR,
+                                           gfc_array_index_type,
+                                           fold_convert (gfc_array_index_type,
+                                                         rank),
+                                           gfc_index_one_node);
+
+               info->end[0] = gfc_evaluate_now (tmp, &outer_loop->pre);
                info->start[0] = gfc_index_zero_node;
-               info->end[0] = gfc_index_zero_node;
                info->stride[0] = gfc_index_one_node;
+               info->offset = gfc_index_zero_node;
                continue;
              }
 
@@ -5098,14 +5136,10 @@ done:
          const char *expr_name;
          char *ref_name = NULL;
 
-         ss_info = ss->info;
-         if (ss_info->type != GFC_SS_SECTION)
-           continue;
-
-         /* Catch allocatable lhs in f2003.  */
-         if (flag_realloc_lhs && ss->no_bounds_check)
+         if (!bounds_check_needed (ss))
            continue;
 
+         ss_info = ss->info;
          expr = ss_info->expr;
          expr_loc = &expr->where;
          if (expr->ref)
@@ -5123,10 +5157,13 @@ done:
          for (n = 0; n < loop->dimen; n++)
            {
              dim = ss->dim[n];
-             if (info->ref->u.ar.dimen_type[dim] != DIMEN_RANGE)
-               continue;
+             if (ss_info->type == GFC_SS_SECTION)
+               {
+                 if (info->ref->u.ar.dimen_type[dim] != DIMEN_RANGE)
+                   continue;
 
-             add_check_section_in_array_bounds (&inner, ss_info, dim);
+                 add_check_section_in_array_bounds (&inner, ss_info, dim);
+               }
 
              /* Check the section sizes match.  */
              tmp = fold_build2_loc (input_location, MINUS_EXPR,
@@ -5147,9 +5184,14 @@ done:
                {
                  tmp3 = fold_build2_loc (input_location, NE_EXPR,
                                          logical_type_node, tmp, size[n]);
-                 msg = xasprintf ("Array bound mismatch for dimension %d "
-                                  "of array '%s' (%%ld/%%ld)",
-                                  dim + 1, expr_name);
+                 if (ss_info->type == GFC_SS_INTRINSIC)
+                   msg = xasprintf ("Extent mismatch for dimension %d of the "
+                                    "result of intrinsic '%s' (%%ld/%%ld)",
+                                    dim + 1, expr_name);
+                 else
+                   msg = xasprintf ("Array bound mismatch for dimension %d "
+                                    "of array '%s' (%%ld/%%ld)",
+                                    dim + 1, expr_name);
 
                  gfc_trans_runtime_check (true, false, tmp3, &inner,
                                           expr_loc, msg,
diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index a947dd1ba0b2..ac8bd2d48123 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5332,9 +5332,30 @@ strip_kind_from_actual (gfc_actual_arglist * actual)
           if (a[S] < limit) { limit = a[S]; pos = S + (1 - from); }
           S++;
         }
-   B) ARRAY has rank 1, and DIM is absent.  Use the same code as the scalar
+   B: ARRAY has rank 1, and DIM is absent.  Use the same code as the scalar
       case and wrap the result in an array.
-   C) Otherwise, a call is generated
+   C: ARRAY has rank > 1, NANs are not supported, and DIM and MASK are absent.
+      Generate code similar to the single loop scalar case, but using one
+      variable per dimension, for example if ARRAY has rank 2:
+      4) NAN's aren't supported, no MASK:
+        limit = infinities_supported ? Infinity : huge (limit);
+        pos0 = (from0 <= to0 && from1 <= to1) ? 1 : 0;
+        pos1 = (from0 <= to0 && from1 <= to1) ? 1 : 0;
+        S1 = from1;
+        while (S1 <= to1) {
+          S0 = from0;
+          while (S0 <= to0) {
+            if (a[S1][S0] < limit) {
+              limit = a[S1][S0];
+              pos0 = S + (1 - from0);
+              pos1 = S + (1 - from1);
+            }
+            S0++;
+          }
+          S1++;
+        }
+        result = { pos0, pos1 };
+   D: Otherwise, a call is generated.
    For 2) and 4), if mask is scalar, this all goes into a conditional,
    setting pos = 0; in the else branch.
 
@@ -5348,8 +5369,8 @@ strip_kind_from_actual (gfc_actual_arglist * actual)
    if (cond) {
      ....
 
-     The optimizer is smart enough to move the condition out of the loop.
-     They are now marked as unlikely too for further speedup.  */
+   The optimizer is smart enough to move the condition out of the loop.
+   They are now marked as unlikely too for further speedup.  */
 
 static void
 gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op)
@@ -5364,7 +5385,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   tree cond;
   tree elsetmp;
   tree ifbody;
-  tree offset;
+  tree offset[GFC_MAX_DIMENSIONS];
   tree nonempty;
   tree lab1, lab2;
   tree b_if, b_else;
@@ -5379,7 +5400,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   gfc_expr *maskexpr;
   gfc_expr *backexpr;
   gfc_se backse;
-  tree pos;
+  tree pos[GFC_MAX_DIMENSIONS];
   tree result_var = NULL_TREE;
   int n;
   bool optional_mask;
@@ -5447,7 +5468,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
                                      HOST_WIDE_INT_1);
       as.upper[0] = gfc_get_int_expr (gfc_index_integer_kind,
                                      &arrayexpr->where,
-                                     HOST_WIDE_INT_1);
+                                     arrayexpr->rank);
 
       tree array = gfc_get_nodesc_array_type (type, &as, PACKED_STATIC, true);
 
@@ -5455,8 +5476,13 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
     }
 
   /* Initialize the result.  */
-  pos = gfc_create_var (gfc_array_index_type, "pos");
-  offset = gfc_create_var (gfc_array_index_type, "offset");
+  for (int i = 0; i < arrayexpr->rank; i++)
+    {
+      pos[i] = gfc_create_var (gfc_array_index_type,
+                              gfc_get_string ("pos%d", i));
+      offset[i] = gfc_create_var (gfc_array_index_type,
+                                 gfc_get_string ("offset%d", i));
+    }
 
   /* Walk the arguments.  */
   arrayss = gfc_walk_expr (arrayexpr);
@@ -5573,10 +5599,26 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   loop.temp_dim = loop.dimen;
   gfc_conv_loop_setup (&loop, &expr->where);
 
-  gcc_assert (loop.dimen == 1);
-  if (nonempty == NULL && maskss == NULL && loop.from[0] && loop.to[0])
-    nonempty = fold_build2_loc (input_location, LE_EXPR, logical_type_node,
-                               loop.from[0], loop.to[0]);
+  if (nonempty == NULL && maskss == NULL)
+    {
+      nonempty = logical_true_node;
+
+      for (int i = 0; i < loop.dimen; i++)
+       {
+         if (!(loop.from[i] && loop.to[i]))
+           {
+             nonempty = NULL;
+             break;
+           }
+
+         tree tmp = fold_build2_loc (input_location, LE_EXPR,
+                                     logical_type_node, loop.from[i],
+                                     loop.to[i]);
+
+         nonempty = fold_build2_loc (input_location, TRUTH_ANDIF_EXPR,
+                                     logical_type_node, nonempty, tmp);
+       }
+    }
 
   lab1 = NULL;
   lab2 = NULL;
@@ -5586,14 +5628,18 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
      is non-empty and no MASK is used, we can initialize to 1 to simplify
      the inner loop.  */
   if (nonempty != NULL && !HONOR_NANS (DECL_MODE (limit)))
-    gfc_add_modify (&loop.pre, pos,
-                   fold_build3_loc (input_location, COND_EXPR,
-                                    gfc_array_index_type,
-                                    nonempty, gfc_index_one_node,
-                                    gfc_index_zero_node));
+    {
+      tree init = fold_build3_loc (input_location, COND_EXPR,
+                                  gfc_array_index_type, nonempty,
+                                  gfc_index_one_node,
+                                  gfc_index_zero_node);
+      for (int i = 0; i < loop.dimen; i++)
+       gfc_add_modify (&loop.pre, pos[i], init);
+    }
   else
     {
-      gfc_add_modify (&loop.pre, pos, gfc_index_zero_node);
+      gcc_assert (loop.dimen == 1);
+      gfc_add_modify (&loop.pre, pos[0], gfc_index_zero_node);
       lab1 = gfc_build_label_decl (NULL_TREE);
       TREE_USED (lab1) = 1;
       lab2 = gfc_build_label_decl (NULL_TREE);
@@ -5602,11 +5648,14 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 
   /* An offset must be added to the loop
      counter to obtain the required position.  */
-  gcc_assert (loop.from[0]);
+  for (int i = 0; i < loop.dimen; i++)
+    {
+      gcc_assert (loop.from[i]);
 
-  tmp = fold_build2_loc (input_location, MINUS_EXPR, gfc_array_index_type,
-                        gfc_index_one_node, loop.from[0]);
-  gfc_add_modify (&loop.pre, offset, tmp);
+      tmp = fold_build2_loc (input_location, MINUS_EXPR, gfc_array_index_type,
+                            gfc_index_one_node, loop.from[i]);
+      gfc_add_modify (&loop.pre, offset[i], tmp);
+    }
 
   gfc_mark_ss_chain_used (arrayss, lab1 ? 3 : 1);
   if (maskss)
@@ -5647,20 +5696,23 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
       tree ifbody2;
 
       gfc_start_block (&ifblock2);
-      tmp = fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE (pos),
-                            loop.loopvar[0], offset);
-      gfc_add_modify (&ifblock2, pos, tmp);
+      tmp = fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE (pos[0]),
+                            loop.loopvar[0], offset[0]);
+      gfc_add_modify (&ifblock2, pos[0], tmp);
       ifbody2 = gfc_finish_block (&ifblock2);
-      cond = fold_build2_loc (input_location, EQ_EXPR, logical_type_node, pos,
-                             gfc_index_zero_node);
+      cond = fold_build2_loc (input_location, EQ_EXPR, logical_type_node,
+                             pos[0], gfc_index_zero_node);
       tmp = build3_v (COND_EXPR, cond, ifbody2,
                      build_empty_stmt (input_location));
       gfc_add_expr_to_block (&block, tmp);
     }
 
-  tmp = fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE (pos),
-                        loop.loopvar[0], offset);
-  gfc_add_modify (&ifblock, pos, tmp);
+  for (int i = 0; i < loop.dimen; i++)
+    {
+      tmp = fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE (pos[i]),
+                            loop.loopvar[i], offset[i]);
+      gfc_add_modify (&ifblock, pos[i], tmp);
+    }
 
   if (lab1)
     gfc_add_expr_to_block (&ifblock, build1_v (GOTO_EXPR, lab1));
@@ -5724,13 +5776,15 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 
   if (lab1)
     {
+      gcc_assert (loop.dimen == 1);
+
       gfc_trans_scalarized_loop_boundary (&loop, &body);
 
       if (HONOR_NANS (DECL_MODE (limit)))
        {
          if (nonempty != NULL)
            {
-             ifbody = build2_v (MODIFY_EXPR, pos, gfc_index_one_node);
+             ifbody = build2_v (MODIFY_EXPR, pos[0], gfc_index_one_node);
              tmp = build3_v (COND_EXPR, nonempty, ifbody,
                              build_empty_stmt (input_location));
              gfc_add_expr_to_block (&loop.code[0], tmp);
@@ -5767,9 +5821,9 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
       /* Assign the value to the limit...  */
       gfc_add_modify (&ifblock, limit, arrayse.expr);
 
-      tmp = fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE (pos),
-                            loop.loopvar[0], offset);
-      gfc_add_modify (&ifblock, pos, tmp);
+      tmp = fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE (pos[0]),
+                            loop.loopvar[0], offset[0]);
+      gfc_add_modify (&ifblock, pos[0], tmp);
 
       ifbody = gfc_finish_block (&ifblock);
 
@@ -5832,6 +5886,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   /* For a scalar mask, enclose the loop in an if statement.  */
   if (maskexpr && maskss == NULL)
     {
+      gcc_assert (loop.dimen == 1);
       tree ifmask;
 
       gfc_init_se (&maskse, NULL);
@@ -5846,7 +5901,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
         the pos variable the same way as above.  */
 
       gfc_init_block (&elseblock);
-      gfc_add_modify (&elseblock, pos, gfc_index_zero_node);
+      gfc_add_modify (&elseblock, pos[0], gfc_index_zero_node);
       elsetmp = gfc_finish_block (&elseblock);
       ifmask = conv_mask_condition (&maskse, maskexpr, optional_mask);
       tmp = build3_v (COND_EXPR, ifmask, tmp, elsetmp);
@@ -5860,18 +5915,22 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
     }
   gfc_cleanup_loop (&loop);
 
-  tree value = convert (type, pos);
   if (expr->rank > 0)
     {
-      tree res_arr_ref = gfc_build_array_ref (result_var, gfc_index_zero_node,
-                                             NULL_TREE, true);
+      for (int i = 0; i < arrayexpr->rank; i++)
+       {
+         tree res_idx = build_int_cst (gfc_array_index_type, i);
+         tree res_arr_ref = gfc_build_array_ref (result_var, res_idx,
+                                                 NULL_TREE, true);
 
-      gfc_add_modify (&se->pre, res_arr_ref, value);
+         tree value = convert (type, pos[i]);
+         gfc_add_modify (&se->pre, res_arr_ref, value);
+       }
 
       se->expr = result_var;
     }
   else
-    se->expr = value;
+    se->expr = convert (type, pos[0]);
 }
 
 /* Emit code for findloc.  */
@@ -11722,8 +11781,12 @@ gfc_inline_intrinsic_function_p (gfc_expr *expr)
          return false;
 
        gfc_actual_arglist *array_arg = expr->value.function.actual;
+       gfc_actual_arglist *dim_arg = array_arg->next;
+       gfc_actual_arglist *mask_arg = dim_arg->next;
 
        gfc_expr *array = array_arg->expr;
+       gfc_expr *dim = dim_arg->expr;
+       gfc_expr *mask = mask_arg->expr;
 
        if (!(array->ts.type == BT_INTEGER
              || array->ts.type == BT_REAL))
@@ -11732,6 +11795,11 @@ gfc_inline_intrinsic_function_p (gfc_expr *expr)
        if (array->rank == 1)
          return true;
 
+       if (array->ts.type == BT_INTEGER
+           && dim == nullptr
+           && mask == nullptr)
+         return true;
+
        return false;
       }
 
diff --git a/gcc/testsuite/gfortran.dg/maxloc_bounds_4.f90 
b/gcc/testsuite/gfortran.dg/maxloc_bounds_4.f90
index b1c7ca752d04..17f6cd86dc2d 100644
--- a/gcc/testsuite/gfortran.dg/maxloc_bounds_4.f90
+++ b/gcc/testsuite/gfortran.dg/maxloc_bounds_4.f90
@@ -1,6 +1,6 @@
 ! { dg-do run }
 ! { dg-options "-fbounds-check" }
-! { dg-shouldfail "Incorrect extent in return value of MAXLOC intrinsic: is 3, 
should be 2" }
+! { dg-shouldfail "Incorrect extent in return value of MAXLOC intrinsic: is 3, 
should be 2|Array bound mismatch for dimension 1 of array 'res' .3/2." }
 module tst
 contains
   subroutine foo(res)
@@ -18,4 +18,4 @@ program main
   integer :: res(3)
   call foo(res)
 end program main
-! { dg-output "Fortran runtime error: Incorrect extent in return value of 
MAXLOC intrinsic: is 3, should be 2" }
+! { dg-output "Fortran runtime error: Incorrect extent in return value of 
MAXLOC intrinsic: is 3, should be 2|Array bound mismatch for dimension 1 of 
array 'res' .3/2." }

Reply via email to