Hello,
This patch implements the cmpstrnsi pattern to support the strncmp
builtin for constant lengths. The cmp/str instructions is used for size
= 8 bytes, else fall back to the byte-at-a-time check to favor small
strings.
I now also handle the cases where align is known for both cmpstr and
cmpstrn, so we can avoid the pointer check, and added a schedule
improvement to speculate the extu.b r1,r1 instruction into the delay
slot, winning an additional instruction (we know that r1 is 0) when the
end of string is reached. The byte-at-a-time loop becomes:
mov.b @r4+,r1
tst r1,r1
bt/s.L4
mov.b @r3+,r0
cmp/eq r1,r0
bt/s.L9
extu.b r1,r1
.L4:
extu.b r0,r0
rts
sub r1,r0
Enabled the existing execute/builtins/strncmp-2.c for functional check
and added 2 new target specific tests.
No regressions for -m2 and -m4 for sh-elf.
OK for trunk ?
Many thanks,
Christian
2013-10-27 Christian Bruel christian.br...@st.com
* gcc/config/sh/sh-mem.cc (sh_expand_cmpnstr): Moved here.
(sh_expand_cmpstr): Handle known align and schedule improvements.
* gcc/config/sh/sh-protos.h (sh_expand_cmpstrn): Declare.
* gcc/config/sh/sh.md (cmpstrnsi): New pattern.
* gcc.c-torture/execute/builtins/strncmp-2.c: Enable for SH.
* gcc.target/sh/cmpstr.c: New test.
* gcc.target/sh/cmpstrn.c: New test.
Index: config/sh/sh-mem.cc
===
--- config/sh/sh-mem.cc (revision 204013)
+++ config/sh/sh-mem.cc (working copy)
@@ -200,22 +200,25 @@ sh_expand_cmpstr (rtx *operands)
rtx L_return = gen_label_rtx ();
rtx L_loop_byte = gen_label_rtx ();
rtx L_end_loop_byte = gen_label_rtx ();
- rtx L_loop_long = gen_label_rtx ();
- rtx L_end_loop_long = gen_label_rtx ();
rtx jump, addr1, addr2;
int prob_unlikely = REG_BR_PROB_BASE / 10;
int prob_likely = REG_BR_PROB_BASE / 4;
- emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
- emit_move_insn (tmp0, GEN_INT (3));
+ rtx L_loop_long = gen_label_rtx ();
+ rtx L_end_loop_long = gen_label_rtx ();
- emit_insn (gen_tstsi_t (tmp0, tmp1));
+ int align = INTVAL (operands[3]);
emit_move_insn (tmp0, const0_rtx);
- jump = emit_jump_insn (gen_branch_false (L_loop_byte));
- add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+ if (align 4)
+{
+ emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
+ emit_insn (gen_tstsi_t (GEN_INT (3), tmp1));
+ jump = emit_jump_insn (gen_branch_false (L_loop_byte));
+ add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+}
addr1 = adjust_automodify_address (s1, SImode, s1_addr, 0);
addr2 = adjust_automodify_address (s2, SImode, s2_addr, 0);
@@ -250,7 +253,7 @@ sh_expand_cmpstr (rtx *operands)
add_int_reg_note (jump, REG_BR_PROB, prob_likely);
/* end loop. */
- /* Fallthu, check if one of the word is greater. */
+ /* Fallthu, diff results r. */
if (TARGET_LITTLE_ENDIAN)
{
rtx low_1 = gen_lowpart (HImode, tmp1);
@@ -267,15 +270,15 @@ sh_expand_cmpstr (rtx *operands)
jump = emit_jump_insn (gen_jump_compact (L_return));
emit_barrier_after (jump);
- /* start byte loop. */
- addr1 = adjust_automodify_address (s1, QImode, s1_addr, 0);
- addr2 = adjust_automodify_address (s2, QImode, s2_addr, 0);
-
emit_label (L_end_loop_long);
emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4));
emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
+ /* start byte loop. */
+ addr1 = adjust_automodify_address (s1, QImode, s1_addr, 0);
+ addr2 = adjust_automodify_address (s2, QImode, s2_addr, 0);
+
emit_label (L_loop_byte);
emit_insn (gen_extendqisi2 (tmp2, addr2));
@@ -289,13 +292,16 @@ sh_expand_cmpstr (rtx *operands)
add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
- emit_jump_insn (gen_branch_true (L_loop_byte));
+ if (flag_delayed_branch)
+emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
+ jump = emit_jump_insn (gen_branch_true (L_loop_byte));
add_int_reg_note (jump, REG_BR_PROB, prob_likely);
/* end loop. */
emit_label (L_end_loop_byte);
- emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
+ if (! flag_delayed_branch)
+emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
emit_label (L_return);
@@ -305,3 +311,166 @@ sh_expand_cmpstr (rtx *operands)
return true;
}
+/* Emit code to perform a strcmp.
+
+ OPERANDS[0] is the destination.
+ OPERANDS[1] is the first string.
+ OPERANDS[2] is the second string.
+ OPERANDS[3] is the length.
+ OPERANDS[4] is the align. */
+bool
+sh_expand_cmpnstr (rtx *operands)
+{
+ rtx s1 = copy_rtx (operands[1]);
+ rtx s2 = copy_rtx (operands[2]);
+
+ rtx s1_addr = copy_addr_to_reg (XEXP (s1, 0));
+ rtx s2_addr = copy_addr_to_reg