Hello,

This patch inlines strlen when optimizing for speed.

A strlen body is now inlined as:

        mov     r4,r0
        tst     #3,r0
        bf/s    .L6
        mov     r4,r1
        mov     #0,r3
.L4:
        mov.l   @r1+,r2
        cmp/str r3,r2
        bf      .L4
        add     #-4,r1
.L6:
        mov.b   @r1+,r2
        tst     r2,r2
        bf/s    .L6
        sett
        mov     r1,r0
        rts
        subc    r4,r0

A few percent performance improvement here and there for regexp based
benchmarks, but worth to highlight is a 70% speedup for eembc
networking/qos that now nicely combines sequences like !strncmp(*av,
"any", strlen(*av))

No regressions for sh-none-elf. OK for trunk ?

Many thanks

Christian



2013-11-05  Christian Bruel  <christian.br...@st.com>

	* gcc/config/sh/sh-mem.cc (sh_expand_strlen): New function.
	* gcc/config/sh/sh-protos.h (sh_expand_strlen): Declare.
	* gcc/config/sh/sh.md (strlensi): New pattern.
	(UNSPEC_BUILTIN_STRLEN): Define.

2013-11-05  Christian Bruel  <christian.br...@st.com>

	* gcc.target/sh/strlen.c: New test.

diff --exclude='*~' --exclude=.svn -ruN gcc/config/sh/sh.md ../../gnu_trunk.test/gcc/gcc/config/sh/sh.md
--- gcc/config/sh/sh.md	2013-11-05 12:28:38.000000000 +0100
+++ ../../gnu_trunk.test/gcc/gcc/config/sh/sh.md	2013-11-05 11:16:00.000000000 +0100
@@ -161,6 +161,9 @@
   ;; (unspec [OFFSET ANCHOR] UNSPEC_PCREL_SYMOFF) == OFFSET - (ANCHOR - .).
   (UNSPEC_PCREL_SYMOFF	46)
 
+  ;; Misc builtins
+  (UNSPEC_BUILTIN_STRLEN 47)
+
   ;; These are used with unspec_volatile.
   (UNSPECV_BLOCKAGE	0)
   (UNSPECV_ALIGN	1)
@@ -12081,6 +12084,20 @@
     FAIL;
 })
 
+(define_expand "strlensi"
+  [(set (match_operand:SI 0 "register_operand")
+	(unspec:SI [(match_operand:BLK 1 "memory_operand")
+		   (match_operand:SI 2 "immediate_operand")
+		   (match_operand:SI 3 "immediate_operand")]
+		  UNSPEC_BUILTIN_STRLEN))]
+  "TARGET_SH1 && optimize"
+{
+ if (! optimize_insn_for_size_p () && sh_expand_strlen (operands))
+   DONE;
+ else
+   FAIL;
+})
+
 
 ;; -------------------------------------------------------------------------
 ;; Floating point instructions.
diff --exclude='*~' --exclude=.svn -ruN gcc/config/sh/sh-mem.cc ../../gnu_trunk.test/gcc/gcc/config/sh/sh-mem.cc
--- gcc/config/sh/sh-mem.cc	2013-11-05 12:30:33.000000000 +0100
+++ ../../gnu_trunk.test/gcc/gcc/config/sh/sh-mem.cc	2013-11-04 15:34:05.000000000 +0100
@@ -469,3 +469,83 @@
 
   return true;
 }
+
+/* Emit code to perform a strlen
+
+   OPERANDS[0] is the destination.
+   OPERANDS[1] is the string.
+   OPERANDS[2] is the char to search.
+   OPERANDS[3] is the alignment.  */
+bool
+sh_expand_strlen (rtx *operands)
+{
+  rtx addr1 = operands[1];
+  rtx start_addr = copy_addr_to_reg (XEXP (addr1, 0));
+  rtx end_addr = gen_reg_rtx (Pmode);
+  rtx tmp0 = gen_reg_rtx (SImode);
+  rtx tmp1 = gen_reg_rtx (SImode);
+  rtx L_return = gen_label_rtx ();
+  rtx L_loop_byte = gen_label_rtx ();
+
+  rtx jump;
+  rtx L_loop_long = gen_label_rtx ();
+  rtx L_end_loop_long = gen_label_rtx ();
+
+  int align = INTVAL (operands[3]);
+
+  emit_move_insn (operands[0], GEN_INT (-1));
+
+  /* remember start of string.  */
+  emit_move_insn (end_addr, start_addr);
+
+  if (align < 4)
+    {
+      emit_insn (gen_tstsi_t (GEN_INT (3), start_addr));
+      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
+      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+    }
+
+  emit_move_insn (tmp0, operands[2]);
+
+  addr1 = adjust_automodify_address (addr1, SImode, start_addr, 0);
+
+  /*start long loop.  */
+  emit_label (L_loop_long);
+
+  /* tmp1 is aligned, OK to load.  */
+  emit_move_insn (tmp1, addr1);
+  emit_move_insn (start_addr, plus_constant (Pmode, start_addr, 4));
+
+  /* Is there a 0 byte ?  */
+  emit_insn (gen_cmpstr_t (tmp0, tmp1));
+
+  jump = emit_jump_insn (gen_branch_false (L_loop_long));
+  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+  /* end loop.  */
+
+  emit_label (L_end_loop_long);
+
+  emit_move_insn (start_addr, plus_constant (Pmode, start_addr, -4));
+
+  /* start byte loop.  */
+  addr1 = adjust_address (addr1, QImode, 0);
+
+  emit_label (L_loop_byte);
+
+  emit_insn (gen_extendqisi2 (tmp1, addr1));
+  emit_move_insn (start_addr, plus_constant (Pmode, start_addr, 1));
+
+  emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
+  jump = emit_jump_insn (gen_branch_false (L_loop_byte));
+  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+
+  /* end loop.  */
+
+  emit_label (L_return);
+
+  emit_insn (gen_addsi3 (end_addr, end_addr, GEN_INT (1)));
+
+  emit_insn (gen_subsi3 (operands[0], start_addr, end_addr));
+
+  return true;
+}
diff --exclude='*~' --exclude=.svn -ruN gcc/config/sh/sh-protos.h ../../gnu_trunk.test/gcc/gcc/config/sh/sh-protos.h
--- gcc/config/sh/sh-protos.h	2013-11-05 12:47:44.000000000 +0100
+++ ../../gnu_trunk.test/gcc/gcc/config/sh/sh-protos.h	2013-11-05 10:14:48.000000000 +0100
@@ -118,6 +118,7 @@
 extern void prepare_move_operands (rtx[], enum machine_mode mode);
 extern bool sh_expand_cmpstr (rtx *);
 extern bool sh_expand_cmpnstr (rtx *);
+extern bool sh_expand_strlen  (rtx *);
 extern enum rtx_code prepare_cbranch_operands (rtx *, enum machine_mode mode,
 					       enum rtx_code comparison);
 extern void expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int);
diff --exclude='*~' --exclude=.svn -ruN gcc/testsuite/ChangeLog ../../gnu_trunk.test/gcc/gcc/testsuite/ChangeLog
diff --exclude='*~' --exclude=.svn -ruN gcc/testsuite/gcc.target/sh/strlen.c ../../gnu_trunk.test/gcc/gcc/testsuite/gcc.target/sh/strlen.c
--- gcc/testsuite/gcc.target/sh/strlen.c	1970-01-01 01:00:00.000000000 +0100
+++ ../../gnu_trunk.test/gcc/gcc/testsuite/gcc.target/sh/strlen.c	2013-11-05 09:45:43.000000000 +0100
@@ -0,0 +1,19 @@
+/* Check that the __builtin_strlen function is inlined with cmp/str
+   when optimizing for speed.  */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O2" } */
+/* { dg-skip-if "" { "sh*-*-*" } { "-m5*" } { "" } } */
+/* { dg-final { scan-assembler-not "jmp" } } */
+/* { dg-final { scan-assembler-times "cmp/str" 2 } } */
+/* { dg-final { scan-assembler-times "tst\t#3" 1 } } */
+
+test00 (const char *s1)
+{
+  return __builtin_strlen (s1);
+}
+
+/* Check that no test for alignment is needed.  */
+test03(const char *s1)
+{
+  return __builtin_strlen (__builtin_assume_aligned (s1, 4));
+}

Reply via email to