Hello, This patch inlines strlen when optimizing for speed.
A strlen body is now inlined as: mov r4,r0 tst #3,r0 bf/s .L6 mov r4,r1 mov #0,r3 .L4: mov.l @r1+,r2 cmp/str r3,r2 bf .L4 add #-4,r1 .L6: mov.b @r1+,r2 tst r2,r2 bf/s .L6 sett mov r1,r0 rts subc r4,r0 A few percent performance improvement here and there for regexp based benchmarks, but worth to highlight is a 70% speedup for eembc networking/qos that now nicely combines sequences like !strncmp(*av, "any", strlen(*av)) No regressions for sh-none-elf. OK for trunk ? Many thanks Christian
2013-11-05 Christian Bruel <christian.br...@st.com> * gcc/config/sh/sh-mem.cc (sh_expand_strlen): New function. * gcc/config/sh/sh-protos.h (sh_expand_strlen): Declare. * gcc/config/sh/sh.md (strlensi): New pattern. (UNSPEC_BUILTIN_STRLEN): Define. 2013-11-05 Christian Bruel <christian.br...@st.com> * gcc.target/sh/strlen.c: New test. diff --exclude='*~' --exclude=.svn -ruN gcc/config/sh/sh.md ../../gnu_trunk.test/gcc/gcc/config/sh/sh.md --- gcc/config/sh/sh.md 2013-11-05 12:28:38.000000000 +0100 +++ ../../gnu_trunk.test/gcc/gcc/config/sh/sh.md 2013-11-05 11:16:00.000000000 +0100 @@ -161,6 +161,9 @@ ;; (unspec [OFFSET ANCHOR] UNSPEC_PCREL_SYMOFF) == OFFSET - (ANCHOR - .). (UNSPEC_PCREL_SYMOFF 46) + ;; Misc builtins + (UNSPEC_BUILTIN_STRLEN 47) + ;; These are used with unspec_volatile. (UNSPECV_BLOCKAGE 0) (UNSPECV_ALIGN 1) @@ -12081,6 +12084,20 @@ FAIL; }) +(define_expand "strlensi" + [(set (match_operand:SI 0 "register_operand") + (unspec:SI [(match_operand:BLK 1 "memory_operand") + (match_operand:SI 2 "immediate_operand") + (match_operand:SI 3 "immediate_operand")] + UNSPEC_BUILTIN_STRLEN))] + "TARGET_SH1 && optimize" +{ + if (! optimize_insn_for_size_p () && sh_expand_strlen (operands)) + DONE; + else + FAIL; +}) + ;; ------------------------------------------------------------------------- ;; Floating point instructions. diff --exclude='*~' --exclude=.svn -ruN gcc/config/sh/sh-mem.cc ../../gnu_trunk.test/gcc/gcc/config/sh/sh-mem.cc --- gcc/config/sh/sh-mem.cc 2013-11-05 12:30:33.000000000 +0100 +++ ../../gnu_trunk.test/gcc/gcc/config/sh/sh-mem.cc 2013-11-04 15:34:05.000000000 +0100 @@ -469,3 +469,83 @@ return true; } + +/* Emit code to perform a strlen + + OPERANDS[0] is the destination. + OPERANDS[1] is the string. + OPERANDS[2] is the char to search. + OPERANDS[3] is the alignment. */ +bool +sh_expand_strlen (rtx *operands) +{ + rtx addr1 = operands[1]; + rtx start_addr = copy_addr_to_reg (XEXP (addr1, 0)); + rtx end_addr = gen_reg_rtx (Pmode); + rtx tmp0 = gen_reg_rtx (SImode); + rtx tmp1 = gen_reg_rtx (SImode); + rtx L_return = gen_label_rtx (); + rtx L_loop_byte = gen_label_rtx (); + + rtx jump; + rtx L_loop_long = gen_label_rtx (); + rtx L_end_loop_long = gen_label_rtx (); + + int align = INTVAL (operands[3]); + + emit_move_insn (operands[0], GEN_INT (-1)); + + /* remember start of string. */ + emit_move_insn (end_addr, start_addr); + + if (align < 4) + { + emit_insn (gen_tstsi_t (GEN_INT (3), start_addr)); + jump = emit_jump_insn (gen_branch_false (L_loop_byte)); + add_int_reg_note (jump, REG_BR_PROB, prob_likely); + } + + emit_move_insn (tmp0, operands[2]); + + addr1 = adjust_automodify_address (addr1, SImode, start_addr, 0); + + /*start long loop. */ + emit_label (L_loop_long); + + /* tmp1 is aligned, OK to load. */ + emit_move_insn (tmp1, addr1); + emit_move_insn (start_addr, plus_constant (Pmode, start_addr, 4)); + + /* Is there a 0 byte ? */ + emit_insn (gen_cmpstr_t (tmp0, tmp1)); + + jump = emit_jump_insn (gen_branch_false (L_loop_long)); + add_int_reg_note (jump, REG_BR_PROB, prob_likely); + /* end loop. */ + + emit_label (L_end_loop_long); + + emit_move_insn (start_addr, plus_constant (Pmode, start_addr, -4)); + + /* start byte loop. */ + addr1 = adjust_address (addr1, QImode, 0); + + emit_label (L_loop_byte); + + emit_insn (gen_extendqisi2 (tmp1, addr1)); + emit_move_insn (start_addr, plus_constant (Pmode, start_addr, 1)); + + emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx)); + jump = emit_jump_insn (gen_branch_false (L_loop_byte)); + add_int_reg_note (jump, REG_BR_PROB, prob_likely); + + /* end loop. */ + + emit_label (L_return); + + emit_insn (gen_addsi3 (end_addr, end_addr, GEN_INT (1))); + + emit_insn (gen_subsi3 (operands[0], start_addr, end_addr)); + + return true; +} diff --exclude='*~' --exclude=.svn -ruN gcc/config/sh/sh-protos.h ../../gnu_trunk.test/gcc/gcc/config/sh/sh-protos.h --- gcc/config/sh/sh-protos.h 2013-11-05 12:47:44.000000000 +0100 +++ ../../gnu_trunk.test/gcc/gcc/config/sh/sh-protos.h 2013-11-05 10:14:48.000000000 +0100 @@ -118,6 +118,7 @@ extern void prepare_move_operands (rtx[], enum machine_mode mode); extern bool sh_expand_cmpstr (rtx *); extern bool sh_expand_cmpnstr (rtx *); +extern bool sh_expand_strlen (rtx *); extern enum rtx_code prepare_cbranch_operands (rtx *, enum machine_mode mode, enum rtx_code comparison); extern void expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int); diff --exclude='*~' --exclude=.svn -ruN gcc/testsuite/ChangeLog ../../gnu_trunk.test/gcc/gcc/testsuite/ChangeLog diff --exclude='*~' --exclude=.svn -ruN gcc/testsuite/gcc.target/sh/strlen.c ../../gnu_trunk.test/gcc/gcc/testsuite/gcc.target/sh/strlen.c --- gcc/testsuite/gcc.target/sh/strlen.c 1970-01-01 01:00:00.000000000 +0100 +++ ../../gnu_trunk.test/gcc/gcc/testsuite/gcc.target/sh/strlen.c 2013-11-05 09:45:43.000000000 +0100 @@ -0,0 +1,19 @@ +/* Check that the __builtin_strlen function is inlined with cmp/str + when optimizing for speed. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O2" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "-m5*" } { "" } } */ +/* { dg-final { scan-assembler-not "jmp" } } */ +/* { dg-final { scan-assembler-times "cmp/str" 2 } } */ +/* { dg-final { scan-assembler-times "tst\t#3" 1 } } */ + +test00 (const char *s1) +{ + return __builtin_strlen (s1); +} + +/* Check that no test for alignment is needed. */ +test03(const char *s1) +{ + return __builtin_strlen (__builtin_assume_aligned (s1, 4)); +}