> -----Original Message-----
> From: Harald Anlauf <anl...@gmx.de>
> Sent: Saturday, July 13, 2024 1:15 AM
> To: Prathamesh Kulkarni <prathame...@nvidia.com>; gcc-
> patc...@gcc.gnu.org; fort...@gcc.gnu.org
> Subject: Re: Lower zeroing array assignment to memset for allocatable
> arrays
> 
> External email: Use caution opening links or attachments
> 
> 
> Hi Prathamesh,
Hi Harald,
> 
> Am 12.07.24 um 15:31 schrieb Prathamesh Kulkarni:
> > It seems that component references are not currently handled even
> for static size arrays ?
> > For eg:
> > subroutine test_dt (dt, y)
> >     implicit none
> >     real :: y (10, 20, 30)
> >     type t
> >        real :: x(10, 20, 30)
> >     end type t
> >     type(t) :: dt
> >     y = 0
> >     dt% x = 0
> > end subroutine
> >
> > With trunk, it generates memset for 'y' but not for dt%x.
> > That happens because copyable_array_p returns false for dt%x,
> because
> > expr->ref->next is non NULL:
> >
> >    /* First check it's an array.  */
> >    if (expr->rank < 1 || !expr->ref || expr->ref->next)
> >      return false;
> >
> > and gfc_full_array_ref_p(expr) bails out if expr->ref->type !=
> REF_ARRAY.
> 
> Indeed that check (as is) prevents the use of component refs.
> (I just tried to modify the this part to cycle thru the refs, but then
> I get regressions in the testsuite for some of the coarray tests.
> Furthermore, gfc_trans_zero_assign would need further changes to
> handle even the constant shapes from above.)
> 
> > Looking thru git history, it seems both the checks were added in
> 18eaa2c0cd20 to fix PR33370.
> > (Even after removing these checks, the previous patch bails out from
> > gfc_trans_zero_assign because GFC_DESCRIPTOR_TYPE_P (type) returns
> > false for component ref and ends up returning NULL_TREE) I am
> working on extending the patch to handle component refs for statically
> sized as well as allocatable arrays.
> >
> > Since it looks like a bigger change and an extension to current
> > functionality, will it be OK to commit the previous patch as-is (if
> it looks correct) and address component refs in follow up one ?
> 
> I agree that it is reasonable to defer the handling of arrays as
> components of derived types, and recommend to do the following:
> 
> - replace "&& gfc_is_simply_contiguous (expr, true, false))" in your
>    last patch by "&& gfc_is_simply_contiguous (expr, false, false))",
>    as that would also allow to treat
> 
>    z(:,::1,:) = 0
> 
>    as contiguous if z is allocatable or a contiguous pointer.
> 
> - open a PR in bugzilla to track the missed-optimization for
>    the cases we discussed here, and link the discussion in the ML.
Done: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115935
> 
> Your patch then will be OK for mainline.
Thanks, does the attached version look OK ?
Bootstrapped+tested on aarch64-linux-gnu, x86_64-linux-gnu.

Thanks,
Prathamesh
> 
> Thanks,
> Harald
> 
> > Thanks,
> > Prathamesh
> >>
> >> Thanks,
> >> Harald
> >>
> >>> Bootstrapped+tested on aarch64-linux-gnu.
> >>> Does the attached patch look OK ?
> >>>
> >>> Signed-off-by: Prathamesh Kulkarni <prathame...@nvidia.com>
> >>>
> >>> Thanks,
> >>> Prathamesh
> >>>>
> >>>> Thanks,
> >>>> Harald
> >>>>
> >>>>
> >>>>> Signed-off-by: Prathamesh Kulkarni <prathame...@nvidia.com>
> >>>>>
> >>>>> Thanks,
> >>>>> Prathamesh
> >>>
> >

Lower zeroing array assignment to memset for allocatable arrays.

gcc/fortran/ChangeLog:
        * trans-expr.cc (gfc_trans_zero_assign): Handle allocatable arrays.

gcc/testsuite/ChangeLog:
        * gfortran.dg/array_memset_3.f90: New test.

Signed-off-by: Prathamesh Kulkarni <prathame...@nvidia.com>

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index 477c2720187..a85b41bf815 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -11515,18 +11515,24 @@ gfc_trans_zero_assign (gfc_expr * expr)
   type = TREE_TYPE (dest);
   if (POINTER_TYPE_P (type))
     type = TREE_TYPE (type);
-  if (!GFC_ARRAY_TYPE_P (type))
-    return NULL_TREE;
-
-  /* Determine the length of the array.  */
-  len = GFC_TYPE_ARRAY_SIZE (type);
-  if (!len || TREE_CODE (len) != INTEGER_CST)
+  if (GFC_ARRAY_TYPE_P (type))
+    {
+      /* Determine the length of the array.  */
+      len = GFC_TYPE_ARRAY_SIZE (type);
+      if (!len || TREE_CODE (len) != INTEGER_CST)
+       return NULL_TREE;
+    }
+  else if (GFC_DESCRIPTOR_TYPE_P (type)
+         && gfc_is_simply_contiguous (expr, false, false))
+    {
+      if (POINTER_TYPE_P (TREE_TYPE (dest)))
+       dest = build_fold_indirect_ref_loc (input_location, dest);
+      len = gfc_conv_descriptor_size (dest, GFC_TYPE_ARRAY_RANK (type));
+      dest = gfc_conv_descriptor_data_get (dest);
+    }
+  else
     return NULL_TREE;
 
-  tmp = TYPE_SIZE_UNIT (gfc_get_element_type (type));
-  len = fold_build2_loc (input_location, MULT_EXPR, gfc_array_index_type, len,
-                        fold_convert (gfc_array_index_type, tmp));
-
   /* If we are zeroing a local array avoid taking its address by emitting
      a = {} instead.  */
   if (!POINTER_TYPE_P (TREE_TYPE (dest)))
@@ -11534,6 +11540,11 @@ gfc_trans_zero_assign (gfc_expr * expr)
                       dest, build_constructor (TREE_TYPE (dest),
                                              NULL));
 
+  /* Multiply len by element size.  */
+  tmp = TYPE_SIZE_UNIT (gfc_get_element_type (type));
+  len = fold_build2_loc (input_location, MULT_EXPR, gfc_array_index_type,
+                        len, fold_convert (gfc_array_index_type, tmp));
+
   /* Convert arguments to the correct types.  */
   dest = fold_convert (pvoid_type_node, dest);
   len = fold_convert (size_type_node, len);
diff --git a/gcc/testsuite/gfortran.dg/array_memset_3.f90 
b/gcc/testsuite/gfortran.dg/array_memset_3.f90
new file mode 100644
index 00000000000..f3945aacbbd
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/array_memset_3.f90
@@ -0,0 +1,52 @@
+! { dg-do compile }
+! { dg-options "-O2 -fdump-tree-original" }
+
+subroutine test1(n)
+  implicit none
+    integer(8) :: n
+    real(4), allocatable :: z(:,:,:)
+
+    allocate(z(n, 100, 200))
+    z = 0
+end subroutine
+
+subroutine test2(n)
+  implicit none
+    integer(8) :: n
+    integer, allocatable :: z(:,:,:)
+
+    allocate(z(n, 100, 200))
+    z = 0
+end subroutine
+
+subroutine test3(n)
+  implicit none
+    integer(8) :: n
+    logical, allocatable :: z(:,:,:)
+
+    allocate(z(n, 100, 200))
+    z = .false. 
+end subroutine
+
+subroutine test4(n, z)
+   implicit none
+   integer :: n
+   real, pointer :: z(:,:,:)     ! need not be contiguous!
+   z = 0
+end subroutine
+
+subroutine test5(n, z)
+   implicit none
+   integer :: n
+   real, contiguous, pointer :: z(:,:,:)
+   z = 0
+end subroutine
+
+subroutine test6 (n, z)
+   implicit none
+   integer :: n
+   real, contiguous, pointer :: z(:,:,:)
+   z(:,::1,:) = 0
+end subroutine
+
+! { dg-final { scan-tree-dump-times "__builtin_memset" 5 "original" } }

Reply via email to