Hello world, the attached patch implements a blocked algorithm for improving the speed of cshift for dim > 1.
It uses the fact that integer, dimension (n1,n2,n3) :: a, b b = cshift(a,shift,3) is identical, as far as the memory locations is concerned. integer, dimension (n1*n2*n3) :: c, d d = cshift(c, shift*n1*n2, 1) The speedup is quite large; from being really slow for dim > 1, this patch makes it go even faster. Below there are some comparisons for the attached benchmark, do-1.f90. gfortran-7 uses the old library version. Interestingly, the library version is also much faster than an implementation of straight DO loops. Regression-tested. OK for trunk? Regards Thomas $ gfortran-7 -static-libgfortran -O3 do-1.f90 && ./a.out Testing explicit DO loops Dim = 1 Elapsed CPU time = 5.71363878 Dim = 2 Elapsed CPU time = 5.40494061 Dim = 3 Elapsed CPU time = 5.40769291 Testing built-in cshift Dim = 1 Elapsed CPU time = 3.43479729 Dim = 2 Elapsed CPU time = 11.7110386 Dim = 3 Elapsed CPU time = 31.0966301 $ gfortran -static-libgfortran -O3 do-1.f90 && ./a.out Testing explicit DO loops Dim = 1 Elapsed CPU time = 5.73881340 Dim = 2 Elapsed CPU time = 5.38435745 Dim = 3 Elapsed CPU time = 5.38971329 Testing built-in cshift Dim = 1 Elapsed CPU time = 3.42018127 Dim = 2 Elapsed CPU time = 2.24075317 Dim = 3 Elapsed CPU time = 2.23136330 2017-06-14 Thomas Koenig <tkoe...@gcc.gnu.org> PR fortran/52473 * m4/cshift0.m4: For arrays that are contiguous up to shift, implement blocked algorighm for cshift. * generated/cshift0_c10.c: Regenerated. * generated/cshift0_c16.c: Regenerated. * generated/cshift0_c4.c: Regenerated. * generated/cshift0_c8.c: Regenerated. * generated/cshift0_i1.c: Regenerated. * generated/cshift0_i16.c: Regenerated. * generated/cshift0_i2.c: Regenerated. * generated/cshift0_i4.c: Regenerated. * generated/cshift0_i8.c: Regenerated. * generated/cshift0_r10.c: Regenerated. * generated/cshift0_r16.c: Regenerated. * generated/cshift0_r4.c: Regenerated. * generated/cshift0_r8.c: Regenerated. 2017-06-14 Thomas Koenig <tkoe...@gcc.gnu.org> PR fortran/52473 * gfortran.dg/cshift_1.f90: New test.
Index: m4/cshift0.m4 =================================================================== --- m4/cshift0.m4 (Revision 249104) +++ m4/cshift0.m4 (Arbeitskopie) @@ -52,6 +52,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -64,33 +67,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_c10.c =================================================================== --- generated/cshift0_c10.c (Revision 249104) +++ generated/cshift0_c10.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_c16.c =================================================================== --- generated/cshift0_c16.c (Revision 249104) +++ generated/cshift0_c16.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_c4.c =================================================================== --- generated/cshift0_c4.c (Revision 249104) +++ generated/cshift0_c4.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_c8.c =================================================================== --- generated/cshift0_c8.c (Revision 249104) +++ generated/cshift0_c8.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_i1.c =================================================================== --- generated/cshift0_i1.c (Revision 249104) +++ generated/cshift0_i1.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_i16.c =================================================================== --- generated/cshift0_i16.c (Revision 249104) +++ generated/cshift0_i16.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_i2.c =================================================================== --- generated/cshift0_i2.c (Revision 249104) +++ generated/cshift0_i2.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_i4.c =================================================================== --- generated/cshift0_i4.c (Revision 249104) +++ generated/cshift0_i4.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_i8.c =================================================================== --- generated/cshift0_i8.c (Revision 249104) +++ generated/cshift0_i8.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_r10.c =================================================================== --- generated/cshift0_r10.c (Revision 249104) +++ generated/cshift0_r10.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_r16.c =================================================================== --- generated/cshift0_r16.c (Revision 249104) +++ generated/cshift0_r16.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_r4.c =================================================================== --- generated/cshift0_r4.c (Revision 249104) +++ generated/cshift0_r4.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_r8.c =================================================================== --- generated/cshift0_r8.c (Revision 249104) +++ generated/cshift0_r8.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr;
! { dg-do run } ! Take cshift through its paces to make sure no boundary ! cases are wrong with the optimized cshift. module kinds integer, parameter :: sp = selected_real_kind(6) ! Single precision end module kinds module replacements use kinds contains subroutine cshift_sp_3_v1 (array, shift, dim, res) integer, parameter :: wp = sp real(kind=wp), dimension(:,:,:), intent(in) :: array integer, intent(in) :: shift, dim real(kind=wp), dimension(:,:,:), intent(out) :: res integer :: i,j,k integer :: sh, rsh integer :: n integer :: n2, n3 res = 0 n3 = size(array,3) n2 = size(array,2) n1 = size(array,1) if (dim == 1) then n = n1 sh = modulo(shift, n) rsh = n - sh do k=1, n3 do j=1, n2 do i=1, rsh res(i,j,k) = array(i+sh,j,k) end do do i=rsh+1,n res(i,j,k) = array(i-rsh,j,k) end do end do end do else if (dim == 2) then n = n2 sh = modulo(shift,n) rsh = n - sh do k=1, n3 do j=1, rsh do i=1, n1 res(i,j,k) = array(i,j+sh, k) end do end do do j=rsh+1, n do i=1, n1 res(i,j,k) = array(i,j-rsh, k) end do end do end do else if (dim == 3) then n = n3 sh = modulo(shift, n) rsh = n - sh do k=1, rsh do j=1, n2 do i=1, n1 res(i,j,k) = array(i, j, k+sh) end do end do end do do k=rsh+1, n do j=1, n2 do i=1, n1 res(i,j, k) = array(i, j, k-rsh) end do end do end do else stop "Wrong argument to dim" end if end subroutine cshift_sp_3_v1 end module replacements program testme use kinds use replacements implicit none integer, parameter :: wp = sp ! Working precision INTEGER, PARAMETER :: n = 7 real(kind=wp), dimension(:,:,:), allocatable :: a,b,c integer i, j, k real:: t1, t2 integer, parameter :: nrep = 20 allocate (a(n,n,n), b(n,n,n),c(n,n,n)) call random_number(a) do k = 1,3 do i=-3,3,2 call cshift_sp_3_v1 (a, i, k, b) c = cshift(a,i,k) if (any (c /= b)) call abort end do end do deallocate (b,c) allocate (b(n-1,n-1,n-1),c(n-1,n-1,n-1)) do k=1,3 do i=-3,3,2 call cshift_sp_3_v1 (a(1:n-1,1:n-1,1:n-1), i, k, b) c = cshift(a(1:n-1,1:n-1,1:n-1), i, k) if (any (c /= b)) call abort end do end do deallocate (b,c) allocate (b(n,n-1,n-1), c(n,n-1,n-1)) do k=1,3 do i=-3,3,2 call cshift_sp_3_v1 (a(:,2:n,2:n), i, k, b) c = cshift (a(:,2:n,2:n), i, k) if (any (c /= b)) call abort end do end do deallocate (b,c) allocate (b(n,n,n-1), c(n,n,n-1)) do k=1,3 do i=-3,3,2 call cshift_sp_3_v1 (a(:,:,2:n), i, k, b) c = cshift (a(:,:,2:n), i, k) if (any (c /= b)) call abort end do end do end program testme
module kinds integer, parameter :: sp = selected_real_kind(6) ! Single precision integer, parameter :: dp = selected_real_kind(15) ! Double precision end module kinds module replacements use kinds contains subroutine cshift_sp_3_v1 (array, shift, dim, res) integer, parameter :: wp = sp real(kind=wp), dimension(:,:,:), intent(in), contiguous :: array integer, intent(in) :: shift, dim real(kind=wp), dimension(:,:,:), intent(out), contiguous :: res integer :: i,j,k integer :: sh, rsh integer :: n res = 0 if (dim == 1) then n = size(array,1) sh = modulo(shift, n) rsh = n - sh do k=1, size(array,3) do j=1, size(array,2) do i=1, rsh res(i,j,k) = array(i+sh,j,k) end do do i=rsh+1,n res(i,j,k) = array(i-rsh,j,k) end do end do end do else if (dim == 2) then n = size(array,2) sh = modulo(shift,n) rsh = n - sh do k=1, size(array,3) do j=1, rsh do i=1, size(array,1) res(i,j,k) = array(i,j+sh, k) end do end do do j=rsh+1, n do i=1, size(array,1) res(i,j,k) = array(i,j-rsh, k) end do end do end do else if (dim == 3) then n = size(array,3) sh = modulo(shift, n) rsh = n - sh do k=1, rsh do j=1, size(array,2) do i=1, size(array,1) res(i,j,k) = array(i, j, k+sh) end do end do end do do k=rsh+1, n do j=1, size(array,2) do i=1, size(array,1) res(i,j, k) = array(i, j, k-rsh) end do end do end do else stop "Wrong argument to dim" end if end subroutine cshift_sp_3_v1 end module replacements program testme use kinds use replacements implicit none integer, parameter :: wp = sp ! Working precision INTEGER, PARAMETER :: n = 500 real(kind=wp) :: a(n,n,n), b(n,n,n) integer i, j, k real t1, t2 print *,"Testing explicit DO loops" call random_number(a) do k = 1,3 call cpu_time ( t1 ) do j = 1, 10 call cshift_sp_3_v1 (a, 1, k, b) end do call cpu_time ( t2 ) write ( *, * ) 'Dim = ', k, ' Elapsed CPU time = ', t2-t1 end do print *,"Testing built-in cshift" do k = 1,3 call cpu_time ( t1 ) do j = 1, 10 b = cshift(a,1,k) end do call cpu_time ( t2 ) write ( *, * ) 'Dim = ', k, ' Elapsed CPU time = ', t2-t1 end do end program testme