> -----Original Message-----
> From: Harald Anlauf <[email protected]>
> Sent: Thursday, July 11, 2024 12:53 AM
> To: Prathamesh Kulkarni <[email protected]>; gcc-
> [email protected]; [email protected]
> Subject: Re: Lower zeroing array assignment to memset for allocatable
> arrays
>
> External email: Use caution opening links or attachments
>
>
> Hi Prathamesh,
>
> Am 10.07.24 um 13:22 schrieb Prathamesh Kulkarni:
> > Hi,
> > The attached patch lowers zeroing array assignment to memset for
> allocatable arrays.
> >
> > For example:
> > subroutine test(z, n)
> > implicit none
> > integer :: n
> > real(4), allocatable :: z(:,:,:)
> >
> > allocate(z(n, 8192, 2048))
> > z = 0
> > end subroutine
> >
> > results in following call to memset instead of 3 nested loops for z
> = 0:
> > (void) __builtin_memset ((void *) z->data, 0, (unsigned long)
> > ((((MAX_EXPR <z->dim[0].ubound - z->dim[0].lbound, -1> + 1) *
> > (MAX_EXPR <z->dim[1].ubound - z->dim[1].lbound, -1> + 1)) *
> (MAX_EXPR
> > <z->dim[2].ubound - z->dim[2].lbound, -1> + 1)) * 4));
> >
> > The patch significantly improves speedup for an internal Fortran
> application on AArch64 -mcpu=grace (and potentially on other AArch64
> cores too).
> > Bootstrapped+tested on aarch64-linux-gnu.
> > Does the patch look OK to commit ?
>
> no, it is NOT ok.
>
> Consider:
>
> subroutine test0 (n, z)
> implicit none
> integer :: n
> real, pointer :: z(:,:,:) ! need not be contiguous!
> z = 0
> end subroutine
>
> After your patch this also generates a memset, but this cannot be true
> in general. One would need to have a test on contiguity of the array
> before memset can be used.
>
> In principle this is a nice idea, and IIRC there exists a very old PR
> on this (by Thomas König?). So it might be worth pursuing.
Hi Harald,
Thanks for the suggestions!
The attached patch checks gfc_is_simply_contiguous(expr, true, false) before
lowering to memset,
which avoids generating memset for your example above.
Bootstrapped+tested on aarch64-linux-gnu.
Does the attached patch look OK ?
Signed-off-by: Prathamesh Kulkarni <[email protected]>
Thanks,
Prathamesh
>
> Thanks,
> Harald
>
>
> > Signed-off-by: Prathamesh Kulkarni <[email protected]>
> >
> > Thanks,
> > Prathamesh
Lower zeroing array assignment to memset for allocatable arrays.
gcc/fortran/ChangeLog:
* trans-expr.cc (gfc_trans_zero_assign): Handle allocatable arrays.
gcc/testsuite/ChangeLog:
* gfortran.dg/array_memset_3.f90: New test.
Signed-off-by: Prathamesh Kulkarni <[email protected]>
diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index 477c2720187..f9a7f70b2a3 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -11515,18 +11515,24 @@ gfc_trans_zero_assign (gfc_expr * expr)
type = TREE_TYPE (dest);
if (POINTER_TYPE_P (type))
type = TREE_TYPE (type);
- if (!GFC_ARRAY_TYPE_P (type))
- return NULL_TREE;
-
- /* Determine the length of the array. */
- len = GFC_TYPE_ARRAY_SIZE (type);
- if (!len || TREE_CODE (len) != INTEGER_CST)
+ if (GFC_ARRAY_TYPE_P (type))
+ {
+ /* Determine the length of the array. */
+ len = GFC_TYPE_ARRAY_SIZE (type);
+ if (!len || TREE_CODE (len) != INTEGER_CST)
+ return NULL_TREE;
+ }
+ else if (GFC_DESCRIPTOR_TYPE_P (type)
+ && gfc_is_simply_contiguous (expr, true, false))
+ {
+ if (POINTER_TYPE_P (TREE_TYPE (dest)))
+ dest = build_fold_indirect_ref_loc (input_location, dest);
+ len = gfc_conv_descriptor_size (dest, GFC_TYPE_ARRAY_RANK (type));
+ dest = gfc_conv_descriptor_data_get (dest);
+ }
+ else
return NULL_TREE;
- tmp = TYPE_SIZE_UNIT (gfc_get_element_type (type));
- len = fold_build2_loc (input_location, MULT_EXPR, gfc_array_index_type, len,
- fold_convert (gfc_array_index_type, tmp));
-
/* If we are zeroing a local array avoid taking its address by emitting
a = {} instead. */
if (!POINTER_TYPE_P (TREE_TYPE (dest)))
@@ -11534,6 +11540,11 @@ gfc_trans_zero_assign (gfc_expr * expr)
dest, build_constructor (TREE_TYPE (dest),
NULL));
+ /* Multiply len by element size. */
+ tmp = TYPE_SIZE_UNIT (gfc_get_element_type (type));
+ len = fold_build2_loc (input_location, MULT_EXPR, gfc_array_index_type,
+ len, fold_convert (gfc_array_index_type, tmp));
+
/* Convert arguments to the correct types. */
dest = fold_convert (pvoid_type_node, dest);
len = fold_convert (size_type_node, len);
diff --git a/gcc/testsuite/gfortran.dg/array_memset_3.f90
b/gcc/testsuite/gfortran.dg/array_memset_3.f90
new file mode 100644
index 00000000000..753006f7a91
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/array_memset_3.f90
@@ -0,0 +1,45 @@
+! { dg-do compile }
+! { dg-options "-O2 -fdump-tree-original" }
+
+subroutine test1(n)
+ implicit none
+ integer(8) :: n
+ real(4), allocatable :: z(:,:,:)
+
+ allocate(z(n, 100, 200))
+ z = 0
+end subroutine
+
+subroutine test2(n)
+ implicit none
+ integer(8) :: n
+ integer, allocatable :: z(:,:,:)
+
+ allocate(z(n, 100, 200))
+ z = 0
+end subroutine
+
+subroutine test3(n)
+ implicit none
+ integer(8) :: n
+ logical, allocatable :: z(:,:,:)
+
+ allocate(z(n, 100, 200))
+ z = .false.
+end subroutine
+
+subroutine test4(n, z)
+ implicit none
+ integer :: n
+ real, pointer :: z(:,:,:) ! need not be contiguous!
+ z = 0
+end subroutine
+
+subroutine test5(n, z)
+ implicit none
+ integer :: n
+ real, contiguous, pointer :: z(:,:,:)
+ z = 0
+end subroutine
+
+! { dg-final { scan-tree-dump-times "__builtin_memset" 4 "original" } }