The attached patch addresses PR 31640. It reduces the the default function alignment when not optimizing for size from cache line size (32 bytes) to 4 bytes and sets the loop alignment to 4 bytes when not optimizing for size. Moreover, it brings back the -falign-loops option which was always overridden and disabled for -m4.
Tested against rev 182734 with make -k -check RUNTESTFLAGS="--target_board=sh-sim \{-m2/-ml, -m2/-mb, -m2a-single/-mb, -m4-single/-ml, -m4-single/-mb, -m4a-single/-ml, -m4a-single/-mb}" and no new failures. OK for trunk? 2012-01-02 Oleg Endo <olege...@gcc.gnu.org> PR target/31640 * config/sh/sh.h (LOOP_ALIGN): Move logic to ... * config/sh/sh.c (sh_loop_align): ... here. Don't disable loop alignment for TARGET_HARD_SH4. (sh_option_override): Reduce default function alignment. Set loop alignment to 4 bytes when not optimizing for size.
Index: gcc/config/sh/sh.c =================================================================== --- gcc/config/sh/sh.c (revision 182734) +++ gcc/config/sh/sh.c (working copy) @@ -816,20 +816,42 @@ } } + /* Adjust loop, jump and function alignment values (in bytes), if those + were not specified by the user using -falign-loops, -falign-jumps + and -falign-functions options. + 32 bit alignment is better for speed, because instructions can be + fetched as a pair from a longword boundary. For size use 16 bit + alignment to get more compact code. + Aligning all jumps increases the code size, even if it might + result in slightly faster code. Thus, it is set to the smallest + alignment possible if not specified by the user. */ if (align_loops == 0) - align_loops = 1 << (TARGET_SH5 ? 3 : 2); + { + if (TARGET_SH5) + align_loops = 8; + else + align_loops = optimize_size ? 2 : 4; + } + if (align_jumps == 0) - align_jumps = 1 << CACHE_LOG; + { + if (TARGET_SHMEDIA) + align_jumps = 1 << CACHE_LOG; + else + align_jumps = 2; + } else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2)) align_jumps = TARGET_SHMEDIA ? 4 : 2; - /* Allocation boundary (in *bytes*) for the code of a function. - SH1: 32 bit alignment is faster, because instructions are always - fetched as a pair from a longword boundary. - SH2 .. SH5 : align to cache line start. */ if (align_functions == 0) - align_functions - = optimize_size ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG); + { + if (TARGET_SHMEDIA) + align_functions = optimize_size + ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG); + else + align_functions = optimize_size ? 2 : 4; + } + /* The linker relaxation code breaks when a function contains alignments that are larger than that at the start of a compilation unit. */ @@ -5342,6 +5364,9 @@ { rtx next = label; + if (! optimize || optimize_size) + return 0; + do next = next_nonnote_insn (next); while (next && LABEL_P (next)); Index: gcc/config/sh/sh.h =================================================================== --- gcc/config/sh/sh.h (revision 182734) +++ gcc/config/sh/sh.h (working copy) @@ -579,9 +579,7 @@ #define LABEL_ALIGN_AFTER_BARRIER(LABEL_AFTER_BARRIER) \ barrier_align (LABEL_AFTER_BARRIER) -#define LOOP_ALIGN(A_LABEL) \ - ((! optimize || TARGET_HARD_SH4 || optimize_size) \ - ? 0 : sh_loop_align (A_LABEL)) +#define LOOP_ALIGN(A_LABEL) sh_loop_align (A_LABEL) #define LABEL_ALIGN(A_LABEL) \ ( \