Hello world,

the attached patch implements a blocked algorithm for
improving the speed of cshift for dim > 1.

It uses the fact that

  integer, dimension (n1,n2,n3) :: a, b

  b = cshift(a,shift,3)

is identical, as far as the memory locations is concerned.

  integer, dimension (n1*n2*n3) :: c, d
  d = cshift(c, shift*n1*n2, 1)

The speedup is quite large; from being really slow for
dim > 1, this patch makes it go even faster.

Below there are some comparisons for the attached benchmark,
do-1.f90. gfortran-7 uses the old library version.

Interestingly, the library version is also much faster
than an implementation of straight DO loops.

Regression-tested.  OK for trunk?

Regards

        Thomas


$ gfortran-7 -static-libgfortran -O3 do-1.f90 && ./a.out
 Testing explicit DO loops
 Dim =            1  Elapsed CPU time =    5.71363878
 Dim =            2  Elapsed CPU time =    5.40494061
 Dim =            3  Elapsed CPU time =    5.40769291
 Testing built-in cshift
 Dim =            1  Elapsed CPU time =    3.43479729
 Dim =            2  Elapsed CPU time =    11.7110386
 Dim =            3  Elapsed CPU time =    31.0966301
$ gfortran -static-libgfortran -O3 do-1.f90 && ./a.out
 Testing explicit DO loops
 Dim =            1  Elapsed CPU time =    5.73881340
 Dim =            2  Elapsed CPU time =    5.38435745
 Dim =            3  Elapsed CPU time =    5.38971329
 Testing built-in cshift
 Dim =            1  Elapsed CPU time =    3.42018127
 Dim =            2  Elapsed CPU time =    2.24075317
 Dim =            3  Elapsed CPU time =    2.23136330


2017-06-14  Thomas Koenig  <tkoe...@gcc.gnu.org>

        PR fortran/52473
        * m4/cshift0.m4:  For arrays that are contiguous up to
        shift, implement blocked algorighm for cshift.
        * generated/cshift0_c10.c:  Regenerated.
        * generated/cshift0_c16.c:  Regenerated.
        * generated/cshift0_c4.c:  Regenerated.
        * generated/cshift0_c8.c:  Regenerated.
        * generated/cshift0_i1.c:  Regenerated.
        * generated/cshift0_i16.c:  Regenerated.
        * generated/cshift0_i2.c:  Regenerated.
        * generated/cshift0_i4.c:  Regenerated.
        * generated/cshift0_i8.c:  Regenerated.
        * generated/cshift0_r10.c:  Regenerated.
        * generated/cshift0_r16.c:  Regenerated.
        * generated/cshift0_r4.c:  Regenerated.
        * generated/cshift0_r8.c:  Regenerated.

2017-06-14  Thomas Koenig  <tkoe...@gcc.gnu.org>

        PR fortran/52473
        * gfortran.dg/cshift_1.f90:  New test.
Index: m4/cshift0.m4
===================================================================
--- m4/cshift0.m4	(Revision 249104)
+++ m4/cshift0.m4	(Arbeitskopie)
@@ -52,6 +52,9 @@
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -64,33 +67,99 @@
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
+  else
+    do_blocked = false;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
+    }
+
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
Index: generated/cshift0_c10.c
===================================================================
--- generated/cshift0_c10.c	(Revision 249104)
+++ generated/cshift0_c10.c	(Arbeitskopie)
@@ -51,6 +51,9 @@
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
+  else
+    do_blocked = false;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
+    }
+
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
Index: generated/cshift0_c16.c
===================================================================
--- generated/cshift0_c16.c	(Revision 249104)
+++ generated/cshift0_c16.c	(Arbeitskopie)
@@ -51,6 +51,9 @@
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
+  else
+    do_blocked = false;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
+    }
+
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
Index: generated/cshift0_c4.c
===================================================================
--- generated/cshift0_c4.c	(Revision 249104)
+++ generated/cshift0_c4.c	(Arbeitskopie)
@@ -51,6 +51,9 @@
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
+  else
+    do_blocked = false;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
+    }
+
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
Index: generated/cshift0_c8.c
===================================================================
--- generated/cshift0_c8.c	(Revision 249104)
+++ generated/cshift0_c8.c	(Arbeitskopie)
@@ -51,6 +51,9 @@
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
+  else
+    do_blocked = false;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
+    }
+
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
Index: generated/cshift0_i1.c
===================================================================
--- generated/cshift0_i1.c	(Revision 249104)
+++ generated/cshift0_i1.c	(Arbeitskopie)
@@ -51,6 +51,9 @@
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
+  else
+    do_blocked = false;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
+    }
+
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
Index: generated/cshift0_i16.c
===================================================================
--- generated/cshift0_i16.c	(Revision 249104)
+++ generated/cshift0_i16.c	(Arbeitskopie)
@@ -51,6 +51,9 @@
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
+  else
+    do_blocked = false;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
+    }
+
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
Index: generated/cshift0_i2.c
===================================================================
--- generated/cshift0_i2.c	(Revision 249104)
+++ generated/cshift0_i2.c	(Arbeitskopie)
@@ -51,6 +51,9 @@
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
+  else
+    do_blocked = false;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
+    }
+
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
Index: generated/cshift0_i4.c
===================================================================
--- generated/cshift0_i4.c	(Revision 249104)
+++ generated/cshift0_i4.c	(Arbeitskopie)
@@ -51,6 +51,9 @@
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
+  else
+    do_blocked = false;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
+    }
+
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
Index: generated/cshift0_i8.c
===================================================================
--- generated/cshift0_i8.c	(Revision 249104)
+++ generated/cshift0_i8.c	(Arbeitskopie)
@@ -51,6 +51,9 @@
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
+  else
+    do_blocked = false;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
+    }
+
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
Index: generated/cshift0_r10.c
===================================================================
--- generated/cshift0_r10.c	(Revision 249104)
+++ generated/cshift0_r10.c	(Arbeitskopie)
@@ -51,6 +51,9 @@
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
+  else
+    do_blocked = false;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
+    }
+
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
Index: generated/cshift0_r16.c
===================================================================
--- generated/cshift0_r16.c	(Revision 249104)
+++ generated/cshift0_r16.c	(Arbeitskopie)
@@ -51,6 +51,9 @@
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
+  else
+    do_blocked = false;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
+    }
+
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
Index: generated/cshift0_r4.c
===================================================================
--- generated/cshift0_r4.c	(Revision 249104)
+++ generated/cshift0_r4.c	(Arbeitskopie)
@@ -51,6 +51,9 @@
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
+  else
+    do_blocked = false;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
+    }
+
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
Index: generated/cshift0_r8.c
===================================================================
--- generated/cshift0_r8.c	(Revision 249104)
+++ generated/cshift0_r8.c	(Arbeitskopie)
@@ -51,6 +51,9 @@
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
+  else
+    do_blocked = false;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
+    }
+
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
! { dg-do  run }
! Take cshift through its paces to make sure no boundary
! cases are wrong with the optimized cshift.

module kinds
  integer, parameter :: sp = selected_real_kind(6) ! Single precision
end module kinds

module replacements
  use kinds
contains
  subroutine cshift_sp_3_v1 (array, shift, dim, res)
    integer, parameter :: wp = sp
    real(kind=wp), dimension(:,:,:), intent(in) :: array
    integer, intent(in) :: shift, dim
    real(kind=wp), dimension(:,:,:), intent(out) :: res
    integer :: i,j,k
    integer :: sh, rsh
    integer :: n
    integer :: n2, n3
    res = 0
    n3 = size(array,3)
    n2 = size(array,2)
    n1 = size(array,1)
    if (dim == 1) then
       n = n1
       sh = modulo(shift, n)
       rsh = n - sh
       do k=1, n3
          do j=1, n2
             do i=1, rsh
                res(i,j,k) = array(i+sh,j,k)
             end do
             do i=rsh+1,n
                res(i,j,k) = array(i-rsh,j,k)
             end do
          end do
       end do
    else if (dim == 2) then
       n = n2
       sh = modulo(shift,n)
       rsh = n - sh
       do k=1, n3
          do j=1, rsh
             do i=1, n1
                res(i,j,k) = array(i,j+sh, k)
             end do
          end do
          do j=rsh+1, n
             do i=1, n1
                res(i,j,k) = array(i,j-rsh, k)
             end do
          end do
       end do
    else if (dim == 3) then
       n = n3
       sh = modulo(shift, n)
       rsh = n - sh
       do k=1, rsh
          do j=1, n2
             do i=1, n1
                res(i,j,k) = array(i, j, k+sh)
             end do
          end do
       end do
       do k=rsh+1, n
          do j=1, n2
             do i=1, n1
                res(i,j, k) = array(i, j, k-rsh)
             end do
          end do          
       end do
    else
       stop "Wrong argument to dim"
    end if
  end subroutine cshift_sp_3_v1
end module replacements

program testme
  use kinds
  use replacements
  implicit none
  integer, parameter :: wp = sp  ! Working precision
  INTEGER, PARAMETER :: n = 7
  real(kind=wp), dimension(:,:,:), allocatable :: a,b,c
  integer i, j, k
  real:: t1, t2
  integer, parameter :: nrep = 20

  allocate (a(n,n,n), b(n,n,n),c(n,n,n))
  call random_number(a)
  do k = 1,3
   do i=-3,3,2
     call cshift_sp_3_v1 (a, i, k, b)
     c = cshift(a,i,k)
     if (any (c /= b)) call abort
   end do
  end do
  deallocate (b,c)
  allocate (b(n-1,n-1,n-1),c(n-1,n-1,n-1))
  do k=1,3
     do i=-3,3,2
        call cshift_sp_3_v1 (a(1:n-1,1:n-1,1:n-1), i, k, b)
        c = cshift(a(1:n-1,1:n-1,1:n-1), i, k)
        if (any (c /= b)) call abort
     end do
  end do
  deallocate (b,c)
  allocate (b(n,n-1,n-1), c(n,n-1,n-1))
  do k=1,3
     do i=-3,3,2
        call cshift_sp_3_v1 (a(:,2:n,2:n), i, k, b)
        c = cshift (a(:,2:n,2:n), i, k)
        if (any (c /= b)) call abort
     end do
  end do
  deallocate (b,c)
  allocate (b(n,n,n-1), c(n,n,n-1))

  do k=1,3
     do i=-3,3,2
        call cshift_sp_3_v1 (a(:,:,2:n), i, k, b)
        c = cshift (a(:,:,2:n), i, k)
        if (any (c /= b)) call abort
     end do
  end do
  
end program testme
module kinds
  integer, parameter :: sp = selected_real_kind(6) ! Single precision
  integer, parameter :: dp = selected_real_kind(15) ! Double precision
end module kinds

module replacements
  use kinds
contains
  subroutine cshift_sp_3_v1 (array, shift, dim, res)
    integer, parameter :: wp = sp
    real(kind=wp), dimension(:,:,:), intent(in), contiguous :: array
    integer, intent(in) :: shift, dim
    real(kind=wp), dimension(:,:,:), intent(out), contiguous :: res
    integer :: i,j,k
    integer :: sh, rsh
    integer :: n
    res = 0
    if (dim == 1) then
       n = size(array,1)
       sh = modulo(shift, n)
       rsh = n - sh
       do k=1, size(array,3)
          do j=1, size(array,2)
             do i=1, rsh
                res(i,j,k) = array(i+sh,j,k)
             end do
             do i=rsh+1,n
                res(i,j,k) = array(i-rsh,j,k)
             end do
          end do
       end do
    else if (dim == 2) then
       n = size(array,2)
       sh = modulo(shift,n)
       rsh = n - sh
       do k=1, size(array,3)
          do j=1, rsh
             do i=1, size(array,1)
                res(i,j,k) = array(i,j+sh, k)
             end do
          end do
          do j=rsh+1, n
             do i=1, size(array,1)
                res(i,j,k) = array(i,j-rsh, k)
             end do
          end do
       end do
    else if (dim == 3) then
       n = size(array,3)
       sh = modulo(shift, n)
       rsh = n - sh
       do k=1, rsh
          do j=1, size(array,2)
             do i=1, size(array,1)
                res(i,j,k) = array(i, j, k+sh)
             end do
          end do
       end do
       do k=rsh+1, n
          do j=1, size(array,2)
             do i=1, size(array,1)
                res(i,j, k) = array(i, j, k-rsh)
             end do
          end do          
       end do
    else
       stop "Wrong argument to dim"
    end if
  end subroutine cshift_sp_3_v1
end module replacements

program testme
  use kinds
  use replacements
  implicit none
  integer, parameter :: wp = sp  ! Working precision
  INTEGER, PARAMETER :: n = 500
  real(kind=wp) :: a(n,n,n), b(n,n,n)
  integer i, j, k
  real t1, t2

  print *,"Testing explicit DO loops"
  call random_number(a)
  do k = 1,3
     call cpu_time ( t1 )
     do j = 1, 10
        call cshift_sp_3_v1 (a, 1, k, b)
     end do
     call cpu_time ( t2 )
     write ( *, * ) 'Dim = ', k, ' Elapsed CPU time = ', t2-t1
  end do

  print *,"Testing built-in cshift"
  do k = 1,3
     call cpu_time ( t1 )
     do j = 1, 10
        b = cshift(a,1,k)
     end do
     call cpu_time ( t2 )
     write ( *, * ) 'Dim = ', k, ' Elapsed CPU time = ', t2-t1
  end do

end program testme

Reply via email to