[SH] PR 31640 - cache block alignment is too aggressive on sh-elf

Oleg Endo Sun, 01 Jan 2012 17:54:33 -0800

The attached patch addresses PR 31640.
It reduces the the default function alignment when not optimizing for
size from cache line size (32 bytes) to 4 bytes and sets the loop
alignment to 4 bytes when not optimizing for size.  Moreover, it brings
back the -falign-loops option which was always overridden and disabled
for -m4.


Tested against rev 182734 with 
make -k -check RUNTESTFLAGS="--target_board=sh-sim
\{-m2/-ml,
-m2/-mb,
-m2a-single/-mb,
-m4-single/-ml,
-m4-single/-mb,
-m4a-single/-ml,
-m4a-single/-mb}"

and no new failures. 
OK for trunk?

2012-01-02  Oleg Endo  <olege...@gcc.gnu.org>

        PR target/31640
        * config/sh/sh.h (LOOP_ALIGN): Move logic to ...
        * config/sh/sh.c (sh_loop_align): ... here.  Don't disable loop
        alignment for TARGET_HARD_SH4.
        (sh_option_override): Reduce default function alignment.  Set 
        loop alignment to 4 bytes when not optimizing for size.

Index: gcc/config/sh/sh.c
===================================================================
--- gcc/config/sh/sh.c	(revision 182734)
+++ gcc/config/sh/sh.c	(working copy)
@@ -816,20 +816,42 @@
 	}
     }
 
+  /*  Adjust loop, jump and function alignment values (in bytes), if those
+      were not specified by the user using -falign-loops, -falign-jumps
+      and -falign-functions options.
+      32 bit alignment is better for speed, because instructions can be
+      fetched as a pair from a longword boundary.  For size use 16 bit
+      alignment to get more compact code.
+      Aligning all jumps increases the code size, even if it might
+      result in slightly faster code.  Thus, it is set to the smallest 
+      alignment possible if not specified by the user.  */
   if (align_loops == 0)
-    align_loops =  1 << (TARGET_SH5 ? 3 : 2);
+    {
+      if (TARGET_SH5)
+	align_loops = 8;
+      else
+	align_loops = optimize_size ? 2 : 4;
+    }
+
   if (align_jumps == 0)
-    align_jumps = 1 << CACHE_LOG;
+    {
+      if (TARGET_SHMEDIA)
+	align_jumps = 1 << CACHE_LOG;
+      else
+	align_jumps = 2;
+    }
   else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
     align_jumps = TARGET_SHMEDIA ? 4 : 2;
 
-  /* Allocation boundary (in *bytes*) for the code of a function.
-     SH1: 32 bit alignment is faster, because instructions are always
-     fetched as a pair from a longword boundary.
-     SH2 .. SH5 : align to cache line start.  */
   if (align_functions == 0)
-    align_functions
-      = optimize_size ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
+    {
+      if (TARGET_SHMEDIA)
+	align_functions = optimize_size
+			  ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
+      else
+	align_functions = optimize_size ? 2 : 4;
+    }
+
   /* The linker relaxation code breaks when a function contains
      alignments that are larger than that at the start of a
      compilation unit.  */
@@ -5342,6 +5364,9 @@
 {
   rtx next = label;
 
+  if (! optimize || optimize_size)
+    return 0;
+
   do
     next = next_nonnote_insn (next);
   while (next && LABEL_P (next));
Index: gcc/config/sh/sh.h
===================================================================
--- gcc/config/sh/sh.h	(revision 182734)
+++ gcc/config/sh/sh.h	(working copy)
@@ -579,9 +579,7 @@
 #define LABEL_ALIGN_AFTER_BARRIER(LABEL_AFTER_BARRIER) \
   barrier_align (LABEL_AFTER_BARRIER)
 
-#define LOOP_ALIGN(A_LABEL) \
-  ((! optimize || TARGET_HARD_SH4 || optimize_size) \
-   ? 0 : sh_loop_align (A_LABEL))
+#define LOOP_ALIGN(A_LABEL) sh_loop_align (A_LABEL)
 
 #define LABEL_ALIGN(A_LABEL) \
 (									\

[SH] PR 31640 - cache block alignment is too aggressive on sh-elf

Reply via email to