[PATCH, SH] Add support for inlined builtin_strncmp

2013-10-25 Thread Christian Bruel
Hello,

This patch implements the cmpstrnsi pattern to support the strncmp
builtin for constant lengths. The cmp/str instructions is used for size
= 8 bytes, else fall back to the byte-at-a-time check to favor small
strings.

I now also handle the cases where align is known for both cmpstr and
cmpstrn, so we can avoid the pointer check, and added a schedule
improvement to speculate the extu.b  r1,r1 instruction into the delay
slot, winning an additional instruction (we know that r1 is 0) when the
end of string is reached. The byte-at-a-time loop becomes:

mov.b   @r4+,r1
tst r1,r1
bt/s.L4
mov.b   @r3+,r0
cmp/eq  r1,r0
bt/s.L9
extu.b  r1,r1
.L4:
extu.b  r0,r0
rts
sub r1,r0

Enabled the existing execute/builtins/strncmp-2.c for functional check
and added 2 new target specific tests.

No regressions for -m2 and -m4 for sh-elf.
OK for trunk ?

Many thanks,

Christian





2013-10-27  Christian Bruel  christian.br...@st.com

	* gcc/config/sh/sh-mem.cc (sh_expand_cmpnstr): Moved here.
	(sh_expand_cmpstr): Handle known align and schedule improvements.
	* gcc/config/sh/sh-protos.h (sh_expand_cmpstrn): Declare.
	* gcc/config/sh/sh.md (cmpstrnsi): New pattern.

	* gcc.c-torture/execute/builtins/strncmp-2.c: Enable for SH.
	* gcc.target/sh/cmpstr.c: New test.
	* gcc.target/sh/cmpstrn.c: New test.

Index: config/sh/sh-mem.cc
===
--- config/sh/sh-mem.cc	(revision 204013)
+++ config/sh/sh-mem.cc	(working copy)
@@ -200,22 +200,25 @@ sh_expand_cmpstr (rtx *operands)
   rtx L_return = gen_label_rtx ();
   rtx L_loop_byte = gen_label_rtx ();
   rtx L_end_loop_byte = gen_label_rtx ();
-  rtx L_loop_long = gen_label_rtx ();
-  rtx L_end_loop_long = gen_label_rtx ();
 
   rtx jump, addr1, addr2;
   int prob_unlikely = REG_BR_PROB_BASE / 10;
   int prob_likely = REG_BR_PROB_BASE / 4;
 
-  emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
-  emit_move_insn (tmp0, GEN_INT (3));
+  rtx L_loop_long = gen_label_rtx ();
+  rtx L_end_loop_long = gen_label_rtx ();
 
-  emit_insn (gen_tstsi_t (tmp0, tmp1));
+  int align = INTVAL (operands[3]);
 
   emit_move_insn (tmp0, const0_rtx);
 
-  jump = emit_jump_insn (gen_branch_false (L_loop_byte));
-  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+  if (align  4)
+{
+  emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
+  emit_insn (gen_tstsi_t (GEN_INT (3), tmp1));
+  jump = emit_jump_insn (gen_branch_false (L_loop_byte));
+  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+}
 
   addr1 = adjust_automodify_address (s1, SImode, s1_addr, 0);
   addr2 = adjust_automodify_address (s2, SImode, s2_addr, 0);
@@ -250,7 +253,7 @@ sh_expand_cmpstr (rtx *operands)
   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
   /* end loop.  */
 
-  /* Fallthu, check if one of the word is greater.  */
+  /* Fallthu, diff results r.  */
   if (TARGET_LITTLE_ENDIAN)
 {
   rtx low_1 = gen_lowpart (HImode, tmp1);
@@ -267,15 +270,15 @@ sh_expand_cmpstr (rtx *operands)
   jump = emit_jump_insn (gen_jump_compact (L_return));
   emit_barrier_after (jump);
 
-  /* start byte loop.  */
-  addr1 = adjust_automodify_address (s1, QImode, s1_addr, 0);
-  addr2 = adjust_automodify_address (s2, QImode, s2_addr, 0);
-
   emit_label (L_end_loop_long);
 
   emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4));
   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
 
+  /* start byte loop.  */
+  addr1 = adjust_automodify_address (s1, QImode, s1_addr, 0);
+  addr2 = adjust_automodify_address (s2, QImode, s2_addr, 0);
+
   emit_label (L_loop_byte);
 
   emit_insn (gen_extendqisi2 (tmp2, addr2));
@@ -289,13 +292,16 @@ sh_expand_cmpstr (rtx *operands)
   add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
 
   emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
-  emit_jump_insn (gen_branch_true (L_loop_byte));
+  if (flag_delayed_branch)
+emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
+  jump = emit_jump_insn (gen_branch_true (L_loop_byte));
   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
   /* end loop.  */
 
   emit_label (L_end_loop_byte);
 
-  emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
+  if (! flag_delayed_branch)
+emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
   emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
 
   emit_label (L_return);
@@ -305,3 +311,166 @@ sh_expand_cmpstr (rtx *operands)
   return true;
 }
 
+/* Emit code to perform a strcmp.
+
+   OPERANDS[0] is the destination.
+   OPERANDS[1] is the first string.
+   OPERANDS[2] is the second string.
+   OPERANDS[3] is the length.
+   OPERANDS[4] is the align.  */
+bool
+sh_expand_cmpnstr (rtx *operands)
+{
+  rtx s1 = copy_rtx (operands[1]);
+  rtx s2 = copy_rtx (operands[2]);
+
+  rtx s1_addr = copy_addr_to_reg (XEXP (s1, 0));
+  rtx s2_addr = copy_addr_to_reg 

Re: [PATCH, SH] Add support for inlined builtin_strncmp

2013-10-25 Thread Christian Bruel
In the ChangeLog,  the entry

* gcc/config/sh/sh-mem.cc (sh_expand_cmpnstr): Moved here.

is instead

 * gcc/config/sh/sh-mem.cc (sh_expand_cmpnstr): New function.

Sorry for this,

Christian



Re: [PATCH, SH] Add support for inlined builtin_strncmp

2013-10-25 Thread Kaz Kojima
Christian Bruel christian.br...@st.com wrote:
 No regressions for -m2 and -m4 for sh-elf.
 OK for trunk ?

OK with the change of ChangeLog entry suggested by your another mail.
Thanks!

Regards,
kaz