Hi! Linus complained about padding inserted in between asm goto. In 4.8 even -mtune=generic performs ix86_avoid_jump_mispredicts, in 4.9 only e.g. -mtune=atom and others, but not generic. The issue is that assuming every asm goto must contain a jump is too pessimistic, often asm goto (may) transfer control to the label(s) through other means (e.g. on the side data structure with labels etc.), plus we conservatively assume asm goto is 0 bytes long, so we were adding the paddings pretty much always when multiple asm goto stmts appeared consecutively.
This patch just ignores asm goto from the optimization. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk and 4.8 after a while? 2014-01-03 Jakub Jelinek <ja...@redhat.com> PR target/59625 * config/i386/i386.c (ix86_avoid_jump_mispredicts): Don't consider asm goto as jump. * gcc.target/i386/pr59625.c: New test. --- gcc/config/i386/i386.c.jj 2014-01-01 00:45:15.000000000 +0100 +++ gcc/config/i386/i386.c 2014-01-02 14:44:07.980841529 +0100 @@ -38852,7 +38852,8 @@ ix86_avoid_jump_mispredicts (void) while (nbytes + max_skip >= 16) { start = NEXT_INSN (start); - if (JUMP_P (start) || CALL_P (start)) + if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0) + || CALL_P (start)) njumps--, isjump = 1; else isjump = 0; @@ -38867,7 +38868,8 @@ ix86_avoid_jump_mispredicts (void) if (dump_file) fprintf (dump_file, "Insn %i estimated to %i bytes\n", INSN_UID (insn), min_size); - if (JUMP_P (insn) || CALL_P (insn)) + if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0) + || CALL_P (insn)) njumps++; else continue; @@ -38875,7 +38877,8 @@ ix86_avoid_jump_mispredicts (void) while (njumps > 3) { start = NEXT_INSN (start); - if (JUMP_P (start) || CALL_P (start)) + if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0) + || CALL_P (start)) njumps--, isjump = 1; else isjump = 0; --- gcc/testsuite/gcc.target/i386/pr59625.c.jj 2014-01-02 17:17:45.611680569 +0100 +++ gcc/testsuite/gcc.target/i386/pr59625.c 2014-01-02 17:17:37.000000000 +0100 @@ -0,0 +1,36 @@ +/* PR target/59625 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=atom" } */ + +int +foo (void) +{ + asm goto ("" : : : : lab); + asm goto ("" : : : : lab); + asm goto ("" : : : : lab); + asm goto ("" : : : : lab); + asm goto ("" : : : : lab); + asm goto ("" : : : : lab); + asm goto ("" : : : : lab); + asm goto ("" : : : : lab); + asm goto ("" : : : : lab); + asm goto ("" : : : : lab); + asm goto ("" : : : : lab); + asm goto ("" : : : : lab); + asm goto ("" : : : : lab); + asm goto ("" : : : : lab); + asm goto ("" : : : : lab); + asm goto ("" : : : : lab); + asm goto ("" : : : : lab); + asm goto ("" : : : : lab); + asm goto ("" : : : : lab); + asm goto ("" : : : : lab); + return 0; +lab: + return 1; +} + +/* Verify we don't consider asm goto as a jump for four jumps limit + optimization. asm goto doesn't have to contain a jump at all, + the branching to labels can happen through different means. */ +/* { dg-final { scan-assembler-not "p2align\[^\n\r\]*\[\n\r]*\[^\n\r\]*p2align\[^\n\r\]*\[\n\r]*\[^\n\r\]*p2align\[^\n\r\]*\[\n\r]*\[^\n\r\]*p2align\[^\n\r\]*\[\n\r]*\[^\n\r\]*p2align\[^\n\r\]*\[\n\r]*\[^\n\r\]*p2align" } } */ Jakub