Hi Mikael,

Am 11.07.24 um 21:55 schrieb Mikael Morin:
From: Mikael Morin <mik...@gcc.gnu.org>

Hello,

I discovered this while testing the inline MINLOC/MAXLOC (aka PR90608) patches.
Regression tested on x86_64-linux.
OK for master?

this is a nice finding!  (NAG seems to fail on the cases with
array size 0, while Intel gets it right.)

The commit message promises to cover all variations ("with/out NANs"?)
but I fail to see these.  Were these removed in the submission?

Otherwise the patch looks pretty simple and is OK for mainline.
But do not forget to s/MINLOCK/MINLOC/ in the summary.

Thanks for the patch!

Harald

-- 8< --

Move the evaluation of the BACK argument out of the loop in the inline code
generated for MINLOC or MAXLOC.  For that, add a new (scalar) element
associated with BACK to the scalarization loop chain, evaluate the argument
with the context of that element, and let the scalarizer do its job.

The problem was not only a missed optimisation, but also a wrong code
one in the cases where the expression associated with BACK is not free of
side-effects, making multiple evaluations observable.

The new tests check the evaluation count of the BACK argument, and try to
cover all the variations (with/out NANs, constant or unknown shape, absent
or scalar or array MASK) supported by the inline implementation of the
functions.  Care has been taken to not check the case of a constant .FALSE.
MASK, for which the evaluation of BACK can be elided.

gcc/fortran/ChangeLog:

        * trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Create a new
        scalar scalarization chain element if BACK is present.  Add it to
        the loop.  Set the scalarization chain before evaluating the
        argument.

gcc/testsuite/ChangeLog:

        * gfortran.dg/maxloc_5.f90: New test.
        * gfortran.dg/minloc_5.f90: New test.
---
  gcc/fortran/trans-intrinsic.cc         |  10 +
  gcc/testsuite/gfortran.dg/maxloc_5.f90 | 257 +++++++++++++++++++++++++
  gcc/testsuite/gfortran.dg/minloc_5.f90 | 257 +++++++++++++++++++++++++
  3 files changed, 524 insertions(+)
  create mode 100644 gcc/testsuite/gfortran.dg/maxloc_5.f90
  create mode 100644 gcc/testsuite/gfortran.dg/minloc_5.f90

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 5ea10e84060..cadbd177452 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5325,6 +5325,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
    gfc_actual_arglist *actual;
    gfc_ss *arrayss;
    gfc_ss *maskss;
+  gfc_ss *backss;
    gfc_se arrayse;
    gfc_se maskse;
    gfc_expr *arrayexpr;
@@ -5390,6 +5391,11 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
      && maskexpr->symtree->n.sym->attr.dummy
      && maskexpr->symtree->n.sym->attr.optional;
    backexpr = actual->next->next->expr;
+  if (backexpr)
+    backss = gfc_get_scalar_ss (gfc_ss_terminator, backexpr);
+  else
+    backss = nullptr;
+
    nonempty = NULL;
    if (maskexpr && maskexpr->rank != 0)
      {
@@ -5449,6 +5455,9 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
    if (maskss)
      gfc_add_ss_to_loop (&loop, maskss);

+  if (backss)
+    gfc_add_ss_to_loop (&loop, backss);
+
    gfc_add_ss_to_loop (&loop, arrayss);

    /* Initialize the loop.  */
@@ -5535,6 +5544,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
    gfc_add_block_to_block (&block, &arrayse.pre);

    gfc_init_se (&backse, NULL);
+  backse.ss = backss;
    gfc_conv_expr_val (&backse, backexpr);
    gfc_add_block_to_block (&block, &backse.pre);

diff --git a/gcc/testsuite/gfortran.dg/maxloc_5.f90 
b/gcc/testsuite/gfortran.dg/maxloc_5.f90
new file mode 100644
index 00000000000..5d722450c8f
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/maxloc_5.f90
@@ -0,0 +1,257 @@
+! { dg-do run }
+!
+! Check that the evaluation of MAXLOC's BACK argument is made only once
+! before the scalarisation loops.
+
+program p
+  implicit none
+  integer, parameter :: data10(*) = (/ 7, 4, 7, 6, 6, 4, 6, 3, 9, 8 /)
+  logical, parameter :: mask10(*) = (/ .false., .true., .false., &
+                                       .false., .true., .true.,  &
+                                       .true. , .true., .false., &
+                                       .false. /)
+  integer :: calls_count = 0
+  call check_int_const_shape
+  call check_int_const_shape_scalar_mask
+  call check_int_const_shape_array_mask
+  call check_int_const_shape_optional_mask_present
+  call check_int_const_shape_optional_mask_absent
+  call check_int_const_shape_empty
+  call check_int_alloc
+  call check_int_alloc_scalar_mask
+  call check_int_alloc_array_mask
+  call check_int_alloc_empty
+  call check_real_const_shape
+  call check_real_const_shape_scalar_mask
+  call check_real_const_shape_array_mask
+  call check_real_const_shape_optional_mask_present
+  call check_real_const_shape_optional_mask_absent
+  call check_real_const_shape_empty
+  call check_real_alloc
+  call check_real_alloc_scalar_mask
+  call check_real_alloc_array_mask
+  call check_real_alloc_empty
+contains
+  function get_scalar_false()
+    logical :: get_scalar_false
+    calls_count = calls_count + 1
+    get_scalar_false = .false.
+  end function
+  subroutine check_int_const_shape()
+    integer :: a(10)
+    logical :: m(10)
+    integer :: r
+    a = data10
+    calls_count = 0
+    r = maxloc(a, dim = 1, back = get_scalar_false())
+    if (calls_count /= 1) stop 11
+  end subroutine
+  subroutine check_int_const_shape_scalar_mask()
+    integer :: a(10)
+    integer :: r
+    a = data10
+    calls_count = 0
+    ! We only check the case of a .true. mask.
+    ! If the mask is .false., the back argument is not necessary to deduce
+    ! the value returned by maxloc, so the compiler is free to elide it,
+    ! and the value of calls_count is undefined in that case.
+    r = maxloc(a, dim = 1, mask = .true., back = get_scalar_false())
+    if (calls_count /= 1) stop 18
+  end subroutine
+  subroutine check_int_const_shape_array_mask()
+    integer :: a(10)
+    logical :: m(10)
+    integer :: r
+    a = data10
+    m = mask10
+    calls_count = 0
+    r = maxloc(a, dim = 1, mask = m, back = get_scalar_false())
+    if (calls_count /= 1) stop 32
+  end subroutine
+  subroutine call_maxloc_int(r, a, m, b)
+    integer :: a(:)
+    logical, optional :: m(:)
+    logical, optional :: b
+    integer :: r
+    r = maxloc(a, dim = 1, mask = m, back = b)
+  end subroutine
+  subroutine check_int_const_shape_optional_mask_present()
+    integer :: a(10)
+    logical :: m(10)
+    integer :: r
+    a = data10
+    m = mask10
+    calls_count = 0
+    call call_maxloc_int(r, a, m, get_scalar_false())
+    if (calls_count /= 1) stop 39
+  end subroutine
+  subroutine check_int_const_shape_optional_mask_absent()
+    integer :: a(10)
+    integer :: r
+    a = data10
+    calls_count = 0
+    call call_maxloc_int(r, a, b = get_scalar_false())
+    if (calls_count /= 1) stop 46
+  end subroutine
+  subroutine check_int_const_shape_empty()
+    integer :: a(0)
+    logical :: m(0)
+    integer :: r
+    a = (/ integer:: /)
+    m = (/ logical:: /)
+    calls_count = 0
+    r = maxloc(a, dim = 1, mask = m, back = get_scalar_false())
+    if (calls_count /= 1) stop 53
+  end subroutine
+  subroutine check_int_alloc()
+    integer, allocatable :: a(:)
+    integer :: r
+    allocate(a(10))
+    a(:) = data10
+    calls_count = 0
+    r = maxloc(a, dim = 1, back = get_scalar_false())
+    if (calls_count /= 1) stop 60
+  end subroutine
+  subroutine check_int_alloc_scalar_mask()
+    integer, allocatable :: a(:)
+    integer :: r
+    allocate(a(10))
+    a(:) = data10
+    calls_count = 0
+    ! We only check the case of a .true. mask.
+    ! If the mask is .false., the back argument is not necessary to deduce
+    ! the value returned by maxloc, so the compiler is free to elide it,
+    ! and the value of calls_count is undefined in that case.
+    r = maxloc(a, dim = 1, mask = .true., back = get_scalar_false())
+    if (calls_count /= 1) stop 67
+  end subroutine
+  subroutine check_int_alloc_array_mask()
+    integer, allocatable :: a(:)
+    logical, allocatable :: m(:)
+    integer :: r
+    allocate(a(10), m(10))
+    a(:) = data10
+    m(:) = mask10
+    calls_count = 0
+    r = maxloc(a, dim = 1, mask = m, back = get_scalar_false())
+    if (calls_count /= 1) stop 81
+  end subroutine
+  subroutine check_int_alloc_empty()
+    integer, allocatable :: a(:)
+    logical, allocatable :: m(:)
+    integer :: r
+    allocate(a(0), m(0))
+    calls_count = 0
+    r = maxloc(a, dim = 1, mask = m, back = get_scalar_false())
+    if (calls_count /= 1) stop 88
+  end subroutine
+  subroutine check_real_const_shape()
+    real :: a(10)
+    integer :: r
+    a = (/ real:: data10 /)
+    calls_count = 0
+    r = maxloc(a, dim = 1, back = get_scalar_false())
+    if (calls_count /= 1) stop 95
+  end subroutine
+  subroutine check_real_const_shape_scalar_mask()
+    real :: a(10)
+    integer :: r
+    a = (/ real:: data10 /)
+    calls_count = 0
+    ! We only check the case of a .true. mask.
+    ! If the mask is .false., the back argument is not necessary to deduce
+    ! the value returned by maxloc, so the compiler is free to elide it,
+    ! and the value of calls_count is undefined in that case.
+    r = maxloc(a, dim = 1, mask = .true., back = get_scalar_false())
+    if (calls_count /= 1) stop 102
+  end subroutine
+  subroutine check_real_const_shape_array_mask()
+    real :: a(10)
+    logical :: m(10)
+    integer :: r
+    a = (/ real:: data10 /)
+    m = mask10
+    calls_count = 0
+    r = maxloc(a, dim = 1, mask = m, back = get_scalar_false())
+    if (calls_count /= 1) stop 116
+  end subroutine
+  subroutine call_maxloc_real(r, a, m, b)
+    real :: a(:)
+    logical, optional :: m(:)
+    logical, optional :: b
+    integer :: r
+    r = maxloc(a, dim = 1, mask = m, back = b)
+  end subroutine
+  subroutine check_real_const_shape_optional_mask_present()
+    real :: a(10)
+    logical :: m(10)
+    integer :: r
+    a = (/ real:: data10 /)
+    m = mask10
+    calls_count = 0
+    call call_maxloc_real(r, a, m, b = get_scalar_false())
+    if (calls_count /= 1) stop 123
+  end subroutine
+  subroutine check_real_const_shape_optional_mask_absent()
+    real :: a(10)
+    integer :: r
+    a = (/ real:: data10 /)
+    calls_count = 0
+    call call_maxloc_real(r, a, b = get_scalar_false())
+    if (calls_count /= 1) stop 130
+  end subroutine
+  subroutine check_real_const_shape_empty()
+    real :: a(0)
+    logical :: m(0)
+    integer :: r
+    a = (/ real:: /)
+    m = (/ logical:: /)
+    calls_count = 0
+    r = maxloc(a, dim = 1, mask = m, back = get_scalar_false())
+    if (calls_count /= 1) stop 137
+  end subroutine
+  subroutine check_real_alloc()
+    real, allocatable :: a(:)
+    integer :: r
+    allocate(a(10))
+    a(:) = (/ real:: data10 /)
+    calls_count = 0
+    r = maxloc(a, dim = 1, back = get_scalar_false())
+    if (calls_count /= 1) stop 144
+  end subroutine
+  subroutine check_real_alloc_scalar_mask()
+    real, allocatable :: a(:)
+    integer :: r
+    allocate(a(10))
+    a(:) = (/ real:: data10 /)
+    calls_count = 0
+    ! We only check the case of a .true. mask.
+    ! If the mask is .false., the back argument is not necessary to deduce
+    ! the value returned by maxloc, so the compiler is free to elide it,
+    ! and the value of calls_count is undefined in that case.
+    r = maxloc(a, dim = 1, mask = .true., back = get_scalar_false())
+    if (calls_count /= 1) stop 151
+  end subroutine
+  subroutine check_real_alloc_array_mask()
+    real, allocatable :: a(:)
+    logical, allocatable :: m(:)
+    integer :: r
+    allocate(a(10), m(10))
+    a(:) = (/ real:: data10 /)
+    m(:) = mask10
+    calls_count = 0
+    r = maxloc(a, dim = 1, mask = m, back = get_scalar_false())
+    if (calls_count /= 1) stop 165
+  end subroutine
+  subroutine check_real_alloc_empty()
+    real, allocatable :: a(:)
+    logical, allocatable :: m(:)
+    integer :: r
+    allocate(a(0), m(0))
+    a(:) = (/ real:: /)
+    m(:) = (/ logical :: /)
+    calls_count = 0
+    r = maxloc(a, dim = 1, mask = m, back = get_scalar_false())
+    if (calls_count /= 1) stop 172
+  end subroutine
+end program p
diff --git a/gcc/testsuite/gfortran.dg/minloc_5.f90 
b/gcc/testsuite/gfortran.dg/minloc_5.f90
new file mode 100644
index 00000000000..cb2cd008344
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/minloc_5.f90
@@ -0,0 +1,257 @@
+! { dg-do run }
+!
+! Check that the evaluation of MINLOC's BACK argument is made only once
+! before the scalarisation loops.
+
+program p
+  implicit none
+  integer, parameter :: data10(*) = (/ 2, 5, 2, 3, 3, 5, 3, 6, 0, 1 /)
+  logical, parameter :: mask10(*) = (/ .false., .true., .false., &
+                                       .false., .true., .true.,  &
+                                       .true. , .true., .false., &
+                                       .false. /)
+  integer :: calls_count = 0
+  call check_int_const_shape
+  call check_int_const_shape_scalar_mask
+  call check_int_const_shape_array_mask
+  call check_int_const_shape_optional_mask_present
+  call check_int_const_shape_optional_mask_absent
+  call check_int_const_shape_empty
+  call check_int_alloc
+  call check_int_alloc_scalar_mask
+  call check_int_alloc_array_mask
+  call check_int_alloc_empty
+  call check_real_const_shape
+  call check_real_const_shape_scalar_mask
+  call check_real_const_shape_array_mask
+  call check_real_const_shape_optional_mask_present
+  call check_real_const_shape_optional_mask_absent
+  call check_real_const_shape_empty
+  call check_real_alloc
+  call check_real_alloc_scalar_mask
+  call check_real_alloc_array_mask
+  call check_real_alloc_empty
+contains
+  function get_scalar_false()
+    logical :: get_scalar_false
+    calls_count = calls_count + 1
+    get_scalar_false = .false.
+  end function
+  subroutine check_int_const_shape()
+    integer :: a(10)
+    logical :: m(10)
+    integer :: r
+    a = data10
+    calls_count = 0
+    r = minloc(a, dim = 1, back = get_scalar_false())
+    if (calls_count /= 1) stop 11
+  end subroutine
+  subroutine check_int_const_shape_scalar_mask()
+    integer :: a(10)
+    integer :: r
+    a = data10
+    calls_count = 0
+    ! We only check the case of a .true. mask.
+    ! If the mask is .false., the back argument is not necessary to deduce
+    ! the value returned by minloc, so the compiler is free to elide it,
+    ! and the value of calls_count is undefined in that case.
+    r = minloc(a, dim = 1, mask = .true., back = get_scalar_false())
+    if (calls_count /= 1) stop 18
+  end subroutine
+  subroutine check_int_const_shape_array_mask()
+    integer :: a(10)
+    logical :: m(10)
+    integer :: r
+    a = data10
+    m = mask10
+    calls_count = 0
+    r = minloc(a, dim = 1, mask = m, back = get_scalar_false())
+    if (calls_count /= 1) stop 32
+  end subroutine
+  subroutine call_minloc_int(r, a, m, b)
+    integer :: a(:)
+    logical, optional :: m(:)
+    logical, optional :: b
+    integer :: r
+    r = minloc(a, dim = 1, mask = m, back = b)
+  end subroutine
+  subroutine check_int_const_shape_optional_mask_present()
+    integer :: a(10)
+    logical :: m(10)
+    integer :: r
+    a = data10
+    m = mask10
+    calls_count = 0
+    call call_minloc_int(r, a, m, get_scalar_false())
+    if (calls_count /= 1) stop 39
+  end subroutine
+  subroutine check_int_const_shape_optional_mask_absent()
+    integer :: a(10)
+    integer :: r
+    a = data10
+    calls_count = 0
+    call call_minloc_int(r, a, b = get_scalar_false())
+    if (calls_count /= 1) stop 46
+  end subroutine
+  subroutine check_int_const_shape_empty()
+    integer :: a(0)
+    logical :: m(0)
+    integer :: r
+    a = (/ integer:: /)
+    m = (/ logical:: /)
+    calls_count = 0
+    r = minloc(a, dim = 1, mask = m, back = get_scalar_false())
+    if (calls_count /= 1) stop 53
+  end subroutine
+  subroutine check_int_alloc()
+    integer, allocatable :: a(:)
+    integer :: r
+    allocate(a(10))
+    a(:) = data10
+    calls_count = 0
+    r = minloc(a, dim = 1, back = get_scalar_false())
+    if (calls_count /= 1) stop 60
+  end subroutine
+  subroutine check_int_alloc_scalar_mask()
+    integer, allocatable :: a(:)
+    integer :: r
+    allocate(a(10))
+    a(:) = data10
+    calls_count = 0
+    ! We only check the case of a .true. mask.
+    ! If the mask is .false., the back argument is not necessary to deduce
+    ! the value returned by minloc, so the compiler is free to elide it,
+    ! and the value of calls_count is undefined in that case.
+    r = minloc(a, dim = 1, mask = .true., back = get_scalar_false())
+    if (calls_count /= 1) stop 67
+  end subroutine
+  subroutine check_int_alloc_array_mask()
+    integer, allocatable :: a(:)
+    logical, allocatable :: m(:)
+    integer :: r
+    allocate(a(10), m(10))
+    a(:) = data10
+    m(:) = mask10
+    calls_count = 0
+    r = minloc(a, dim = 1, mask = m, back = get_scalar_false())
+    if (calls_count /= 1) stop 81
+  end subroutine
+  subroutine check_int_alloc_empty()
+    integer, allocatable :: a(:)
+    logical, allocatable :: m(:)
+    integer :: r
+    allocate(a(0), m(0))
+    calls_count = 0
+    r = minloc(a, dim = 1, mask = m, back = get_scalar_false())
+    if (calls_count /= 1) stop 88
+  end subroutine
+  subroutine check_real_const_shape()
+    real :: a(10)
+    integer :: r
+    a = (/ real:: data10 /)
+    calls_count = 0
+    r = minloc(a, dim = 1, back = get_scalar_false())
+    if (calls_count /= 1) stop 95
+  end subroutine
+  subroutine check_real_const_shape_scalar_mask()
+    real :: a(10)
+    integer :: r
+    a = (/ real:: data10 /)
+    calls_count = 0
+    ! We only check the case of a .true. mask.
+    ! If the mask is .false., the back argument is not necessary to deduce
+    ! the value returned by minloc, so the compiler is free to elide it,
+    ! and the value of calls_count is undefined in that case.
+    r = minloc(a, dim = 1, mask = .true., back = get_scalar_false())
+    if (calls_count /= 1) stop 102
+  end subroutine
+  subroutine check_real_const_shape_array_mask()
+    real :: a(10)
+    logical :: m(10)
+    integer :: r
+    a = (/ real:: data10 /)
+    m = mask10
+    calls_count = 0
+    r = minloc(a, dim = 1, mask = m, back = get_scalar_false())
+    if (calls_count /= 1) stop 116
+  end subroutine
+  subroutine call_minloc_real(r, a, m, b)
+    real :: a(:)
+    logical, optional :: m(:)
+    logical, optional :: b
+    integer :: r
+    r = minloc(a, dim = 1, mask = m, back = b)
+  end subroutine
+  subroutine check_real_const_shape_optional_mask_present()
+    real :: a(10)
+    logical :: m(10)
+    integer :: r
+    a = (/ real:: data10 /)
+    m = mask10
+    calls_count = 0
+    call call_minloc_real(r, a, m, b = get_scalar_false())
+    if (calls_count /= 1) stop 123
+  end subroutine
+  subroutine check_real_const_shape_optional_mask_absent()
+    real :: a(10)
+    integer :: r
+    a = (/ real:: data10 /)
+    calls_count = 0
+    call call_minloc_real(r, a, b = get_scalar_false())
+    if (calls_count /= 1) stop 130
+  end subroutine
+  subroutine check_real_const_shape_empty()
+    real :: a(0)
+    logical :: m(0)
+    integer :: r
+    a = (/ real:: /)
+    m = (/ logical:: /)
+    calls_count = 0
+    r = minloc(a, dim = 1, mask = m, back = get_scalar_false())
+    if (calls_count /= 1) stop 137
+  end subroutine
+  subroutine check_real_alloc()
+    real, allocatable :: a(:)
+    integer :: r
+    allocate(a(10))
+    a(:) = (/ real:: data10 /)
+    calls_count = 0
+    r = minloc(a, dim = 1, back = get_scalar_false())
+    if (calls_count /= 1) stop 144
+  end subroutine
+  subroutine check_real_alloc_scalar_mask()
+    real, allocatable :: a(:)
+    integer :: r
+    allocate(a(10))
+    a(:) = (/ real:: data10 /)
+    calls_count = 0
+    ! We only check the case of a .true. mask.
+    ! If the mask is .false., the back argument is not necessary to deduce
+    ! the value returned by minloc, so the compiler is free to elide it,
+    ! and the value of calls_count is undefined in that case.
+    r = minloc(a, dim = 1, mask = .true., back = get_scalar_false())
+    if (calls_count /= 1) stop 151
+  end subroutine
+  subroutine check_real_alloc_array_mask()
+    real, allocatable :: a(:)
+    logical, allocatable :: m(:)
+    integer :: r
+    allocate(a(10), m(10))
+    a(:) = (/ real:: data10 /)
+    m(:) = mask10
+    calls_count = 0
+    r = minloc(a, dim = 1, mask = m, back = get_scalar_false())
+    if (calls_count /= 1) stop 165
+  end subroutine
+  subroutine check_real_alloc_empty()
+    real, allocatable :: a(:)
+    logical, allocatable :: m(:)
+    integer :: r
+    allocate(a(0), m(0))
+    a(:) = (/ real:: /)
+    m(:) = (/ logical :: /)
+    calls_count = 0
+    r = minloc(a, dim = 1, mask = m, back = get_scalar_false())
+    if (calls_count /= 1) stop 172
+  end subroutine
+end program p

Reply via email to