From: Christoph Müllner <christoph.muell...@vrull.eu>

This patch implements expansions for the cmpstrsi and the cmpstrnsi
builtins using Zbb instructions (if available).
This allows to inline calls to strcmp() and strncmp().

The expansion basically emits a peeled comparison sequence (i.e. a peeled
comparison loop) which compares XLEN bits per step if possible.

The emitted sequence can be controlled, by setting the maximum number
of compared bytes (-mstring-compare-inline-limit).

gcc/ChangeLog:

        * config/riscv/riscv-protos.h (riscv_expand_strn_compare): New
          prototype.
        * config/riscv/riscv-string.cc (GEN_EMIT_HELPER3): New helper
          macros.
        (GEN_EMIT_HELPER2): New helper macros.
        (expand_strncmp_zbb_sequence): New function.
        (riscv_emit_str_compare_zbb): New function.
        (riscv_expand_strn_compare): New function.
        * config/riscv/riscv.md (cmpstrnsi): Invoke expansion functions
          for strn_compare.
        (cmpstrsi): Invoke expansion functions for strn_compare.
        * config/riscv/riscv.opt: Add new parameter
          '-mstring-compare-inline-limit'.

Signed-off-by: Christoph Müllner <christoph.muell...@vrull.eu>
---
 gcc/config/riscv/riscv-protos.h               |   1 +
 gcc/config/riscv/riscv-string.cc              | 344 ++++++++++++++++++
 gcc/config/riscv/riscv.md                     |  46 +++
 gcc/config/riscv/riscv.opt                    |   5 +
 .../gcc.target/riscv/zbb-strcmp-unaligned.c   |  36 ++
 gcc/testsuite/gcc.target/riscv/zbb-strcmp.c   |  55 +++
 6 files changed, 487 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-strcmp-unaligned.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-strcmp.c

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 18187e3bd78..7f334be333c 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -97,6 +97,7 @@ rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt);
 /* Routines implemented in riscv-string.c.  */
 extern bool riscv_expand_block_move (rtx, rtx, rtx);
 extern bool riscv_expand_strlen (rtx[]);
+extern bool riscv_expand_strn_compare (rtx[], int);
 
 /* Information about one CPU we know about.  */
 struct riscv_cpu_info {
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index bf96522b608..f157e04ac0c 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -84,6 +84,11 @@ GEN_EMIT_HELPER2(one_cmpl) /* do_one_cmpl2  */
 GEN_EMIT_HELPER2(clz) /* do_clz2  */
 GEN_EMIT_HELPER2(ctz) /* do_ctz2  */
 GEN_EMIT_HELPER2(zero_extendqi) /* do_zero_extendqi2  */
+GEN_EMIT_HELPER3(xor) /* do_xor3  */
+GEN_EMIT_HELPER3(ashl) /* do_ashl3  */
+GEN_EMIT_HELPER2(bswap) /* do_bswap2  */
+GEN_EMIT_HELPER3(riscv_ior_not) /* do_riscv_ior_not3  */
+GEN_EMIT_HELPER3(riscv_and_not) /* do_riscv_and_not3  */
 
 /* Helper function to load a byte or a Pmode register.
 
@@ -268,6 +273,345 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length)
   return false;
 }
 
+/* Generate the sequence of compares for strcmp/strncmp using zbb instructions.
+   BYTES_TO_COMPARE is the number of bytes to be compared.
+   BASE_ALIGN is the smaller of the alignment of the two strings.
+   ORIG_SRC1 is the unmodified rtx for the first string.
+   ORIG_SRC2 is the unmodified rtx for the second string.
+   DATA1 is the register for loading the first string.
+   DATA2 is the register for loading the second string.
+   HAS_NUL is the register holding non-NUL bytes for NUL-bytes in the string.
+   TARGET is the rtx for the result register (SImode)
+   EQUALITY_COMPARE_REST if set, then we hand over to libc if string matches.
+   END_LABEL is the location before the calculation of the result value.
+   FINAL_LABEL is the location after the calculation of the result value.  */
+
+static void
+expand_strncmp_zbb_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
+                            rtx src1, rtx src2, rtx data1, rtx data2,
+                            rtx target, rtx orc, bool equality_compare_rest,
+                            rtx end_label, rtx final_label)
+{
+  const unsigned HOST_WIDE_INT p_mode_size = GET_MODE_SIZE (Pmode);
+  rtx src1_addr = force_reg (Pmode, XEXP (src1, 0));
+  rtx src2_addr = force_reg (Pmode, XEXP (src2, 0));
+  unsigned HOST_WIDE_INT offset = 0;
+
+  rtx m1 = gen_reg_rtx (Pmode);
+  emit_insn (gen_rtx_SET (m1, constm1_rtx));
+
+  /* Generate a compare sequence.  */
+  while (bytes_to_compare > 0)
+    {
+      machine_mode load_mode = QImode;
+      unsigned HOST_WIDE_INT load_mode_size = 1;
+      if (bytes_to_compare > 1)
+       {
+         load_mode = Pmode;
+         load_mode_size = p_mode_size;
+       }
+      unsigned HOST_WIDE_INT cmp_bytes = 0;
+
+      if (bytes_to_compare >= load_mode_size)
+       cmp_bytes = load_mode_size;
+      else
+       cmp_bytes = bytes_to_compare;
+
+      unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
+
+      /* load_mode_size...bytes we will read
+        cmp_bytes...bytes we will compare (might be less than load_mode_size)
+        bytes_to_compare...bytes we will compare (incl. cmp_bytes)
+        remain...bytes left to compare (excl. cmp_bytes)  */
+
+      rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset));
+      rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset));
+
+      do_load_from_addr (load_mode, data1, addr1, src1);
+      do_load_from_addr (load_mode, data2, addr2, src2);
+
+      if (load_mode_size == 1)
+       {
+         /* Special case for comparing just single (last) byte.  */
+         gcc_assert (remain == 0);
+
+         if (!equality_compare_rest)
+           {
+             /* Calculate difference and jump to final_label.  */
+             rtx result = gen_reg_rtx (Pmode);
+             do_sub3 (result, data1, data2);
+             emit_insn (gen_movsi (target, gen_lowpart (SImode, result)));
+             emit_jump_insn (gen_jump (final_label));
+           }
+         else
+           {
+             /* Compare both bytes and jump to final_label if not equal.  */
+             rtx result = gen_reg_rtx (Pmode);
+             do_sub3 (result, data1, data2);
+             emit_insn (gen_movsi (target, gen_lowpart (SImode, result)));
+             /* Check if str1[i] is NULL.  */
+             rtx cond1 = gen_rtx_EQ (VOIDmode, data1, const0_rtx);
+             riscv_emit_unlikely_jump (gen_cbranch4 (Pmode, cond1,
+                                       data1, const0_rtx, final_label));
+             /* Check if str1[i] == str2[i].  */
+             rtx cond2 = gen_rtx_NE (VOIDmode, data1, data2);
+             riscv_emit_unlikely_jump (gen_cbranch4 (Pmode, cond2,
+                                       data1, data2, final_label));
+             /* Processing will fall through to libc calls.  */
+           }
+       }
+      else
+       {
+         /* Eliminate irrelevant data (behind the N-th character).  */
+         if (bytes_to_compare < p_mode_size)
+           {
+             gcc_assert (remain == 0);
+            /* Set a NUL-byte after the relevant data (behind the string).  */
+             unsigned long im = 0xffUL;
+             rtx imask = gen_rtx_CONST_INT (Pmode, im);
+             rtx m_reg = gen_reg_rtx (Pmode);
+             emit_insn (gen_rtx_SET (m_reg, imask));
+             do_ashl3 (m_reg, m_reg, GEN_INT (cmp_bytes * BITS_PER_UNIT));
+             do_riscv_and_not3 (data1, m_reg, data1);
+             do_riscv_and_not3 (data2, m_reg, data2);
+             do_orcb2 (orc, data1);
+             emit_jump_insn (gen_jump (end_label));
+           }
+         else
+           {
+             /* Check if data1 contains a NUL character.  */
+             do_orcb2 (orc, data1);
+             rtx cond1 = gen_rtx_NE (VOIDmode, orc, m1);
+             riscv_emit_unlikely_jump (gen_cbranch4 (Pmode, cond1, orc, m1,
+                                                     end_label));
+
+             /* Break out if u1 != u2 */
+             rtx cond2 = gen_rtx_NE (VOIDmode, data1, data2);
+             riscv_emit_unlikely_jump (gen_cbranch4 (Pmode, cond2, data1,
+                                                     data2, end_label));
+
+             /* Fast-exit for complete and equal strings.  */
+             if (remain == 0 && !equality_compare_rest)
+               {
+                 /* All compared and everything was equal.  */
+                 emit_insn (gen_rtx_SET (target, gen_rtx_CONST_INT (SImode, 
0)));
+                 emit_jump_insn (gen_jump (final_label));
+               }
+           }
+       }
+
+      offset += cmp_bytes;
+      bytes_to_compare -= cmp_bytes;
+    }
+  /* Processing will fall through to libc calls.  */
+}
+
+/* Emit a string comparison sequence using Zbb instruction.
+
+   OPERANDS[0] is the target (result).
+   OPERANDS[1] is the first source.
+   OPERANDS[2] is the second source.
+   If NO_LENGTH is zero, then:
+   OPERANDS[3] is the length.
+   OPERANDS[4] is the alignment in bytes.
+   If NO_LENGTH is nonzero, then:
+   OPERANDS[3] is the alignment in bytes.
+   BYTES_TO_COMPARE is the maximum number of bytes to compare.
+   EQUALITY_COMPARE_REST defines if str(n)cmp should be called on equality.
+ */
+
+static bool
+riscv_emit_str_compare_zbb (rtx operands[], int no_length,
+                           unsigned HOST_WIDE_INT bytes_to_compare,
+                           bool equality_compare_rest)
+{
+  const unsigned HOST_WIDE_INT p_mode_size = GET_MODE_SIZE (Pmode);
+  rtx target = operands[0];
+  rtx src1 = operands[1];
+  rtx src2 = operands[2];
+  rtx bytes_rtx = NULL;
+  rtx align_rtx = operands[3];
+
+  if (!no_length)
+    {
+      bytes_rtx = operands[3];
+      align_rtx = operands[4];
+    }
+
+  gcc_assert (TARGET_ZBB);
+
+  /* Enable only if we can access at least one XLEN-register.  */
+  if (bytes_to_compare < p_mode_size)
+    return false;
+
+  /* Limit to 12-bits (maximum load-offset).  */
+  if (bytes_to_compare > IMM_REACH)
+    return false;
+
+  /* We don't support big endian.  */
+  if (BYTES_BIG_ENDIAN)
+    return false;
+
+  /* We need to know the alignment.  */
+  if (!CONST_INT_P (align_rtx))
+    return false;
+
+  unsigned HOST_WIDE_INT base_align = UINTVAL (align_rtx);
+  unsigned HOST_WIDE_INT required_align = p_mode_size;
+  if (base_align < required_align)
+    return false;
+
+  rtx data1 = gen_reg_rtx (Pmode);
+  rtx data2 = gen_reg_rtx (Pmode);
+  rtx orc = gen_reg_rtx (Pmode);
+  rtx end_label = gen_label_rtx ();
+  rtx final_label = gen_label_rtx ();
+
+  /* Generate a sequence of zbb instructions to compare out
+     to the length specified.  */
+  expand_strncmp_zbb_sequence (bytes_to_compare, src1, src2, data1, data2,
+                              target, orc, equality_compare_rest,
+                              end_label, final_label);
+
+  if (equality_compare_rest)
+    {
+      /* Update pointers past what has been compared already.  */
+      rtx src1_addr = force_reg (Pmode, XEXP (src1, 0));
+      rtx src2_addr = force_reg (Pmode, XEXP (src2, 0));
+      unsigned HOST_WIDE_INT offset = bytes_to_compare;
+      rtx src1 = force_reg (Pmode,
+                           gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset)));
+      rtx src2 = force_reg (Pmode,
+                           gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset)));
+
+      /* Construct call to strcmp/strncmp to compare the rest of the string.  
*/
+      if (no_length)
+       {
+         tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
+         emit_library_call_value (XEXP (DECL_RTL (fun), 0),
+                                  target, LCT_NORMAL, GET_MODE (target),
+                                  src1, Pmode, src2, Pmode);
+       }
+      else
+       {
+         unsigned HOST_WIDE_INT bytes = UINTVAL (bytes_rtx);
+         unsigned HOST_WIDE_INT delta = bytes - bytes_to_compare;
+         gcc_assert (delta > 0);
+         rtx len_rtx = gen_reg_rtx (Pmode);
+         emit_move_insn (len_rtx, gen_int_mode (delta, Pmode));
+         tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
+         emit_library_call_value (XEXP (DECL_RTL (fun), 0),
+                                  target, LCT_NORMAL, GET_MODE (target),
+                                  src1, Pmode, src2, Pmode, len_rtx, Pmode);
+       }
+
+      emit_jump_insn (gen_jump (final_label));
+    }
+
+  emit_barrier (); /* No fall-through.  */
+
+  emit_label (end_label);
+
+  /* Convert non-equal bytes into non-NUL bytes.  */
+  rtx diff = gen_reg_rtx (Pmode);
+  do_xor3 (diff, data1, data2);
+  do_orcb2 (diff, diff);
+
+  /* Convert non-equal or NUL-bytes into non-NUL bytes.  */
+  rtx syndrome = gen_reg_rtx (Pmode);
+  do_riscv_ior_not3 (syndrome, orc, diff);
+
+  /* Count the number of equal bits from the beginning of the word.  */
+  rtx shift = gen_reg_rtx (Pmode);
+  do_ctz2 (shift, syndrome);
+
+  do_bswap2 (data1, data1);
+  do_bswap2 (data2, data2);
+
+  /* The most-significant-non-zero bit of the syndrome marks either the
+     first bit that is different, or the top bit of the first zero byte.
+     Shifting left now will bring the critical information into the
+     top bits.  */
+  do_ashl3 (data1, data1, gen_lowpart (QImode, shift));
+  do_ashl3 (data2, data2, gen_lowpart (QImode, shift));
+
+  /* But we need to zero-extend (char is unsigned) the value and then
+     perform a signed 32-bit subtraction.  */
+  unsigned int shiftr = p_mode_size * BITS_PER_UNIT - 8;
+  do_lshr3 (data1, data1, GEN_INT (shiftr));
+  do_lshr3 (data2, data2, GEN_INT (shiftr));
+
+  rtx result = gen_reg_rtx (Pmode);
+  do_sub3 (result, data1, data2);
+  emit_insn (gen_movsi (target, gen_lowpart (SImode, result)));
+
+  /* And we are done.  */
+  emit_label (final_label);
+  return true;
+}
+
+/* Expand a string compare operation with length, and return
+   true if successful.  Return false if we should let the
+   compiler generate normal code, probably a strncmp call.
+   If NO_LENGTH is set, there is no upper bound of the strings.
+
+   OPERANDS[0] is the target (result).
+   OPERANDS[1] is the first source.
+   OPERANDS[2] is the second source.
+   If NO_LENGTH is zero, then:
+   OPERANDS[3] is the length.
+   OPERANDS[4] is the alignment in bytes.
+   If NO_LENGTH is nonzero, then:
+   OPERANDS[3] is the alignment in bytes.  */
+
+bool
+riscv_expand_strn_compare (rtx operands[], int no_length)
+{
+  rtx bytes_rtx = NULL;
+  const unsigned HOST_WIDE_INT compare_max = riscv_string_compare_inline_limit;
+  unsigned HOST_WIDE_INT compare_length; /* How much to compare inline.  */
+  bool equality_compare_rest = false; /* Call libc to compare remainder.  */
+
+  if (riscv_string_compare_inline_limit == 0)
+    return false;
+
+  /* Decide how many bytes to compare inline and what to do if there is
+     no difference detected at the end of the compared bytes.
+     We might call libc to continue the comparison.  */
+  if (no_length)
+    {
+      compare_length = compare_max;
+      equality_compare_rest = true;
+    }
+  else
+    {
+      /* If we have a length, it must be constant.  */
+      bytes_rtx = operands[3];
+      if (!CONST_INT_P (bytes_rtx))
+       return false;
+
+      unsigned HOST_WIDE_INT bytes = UINTVAL (bytes_rtx);
+      if (bytes <= compare_max)
+       {
+         compare_length = bytes;
+         equality_compare_rest = false;
+       }
+      else
+       {
+         compare_length = compare_max;
+         equality_compare_rest = true;
+       }
+    }
+
+  if (TARGET_ZBB)
+    {
+      return riscv_emit_str_compare_zbb (operands, no_length, compare_length,
+                                        equality_compare_rest);
+    }
+
+  return false;
+}
+
 /* If the provided string is aligned, then read XLEN bytes
    in a loop and use orc.b to find NUL-bytes.  */
 
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index f05c764c3d4..dce33a4b638 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -3010,6 +3010,52 @@ (define_expand "cpymemsi"
     FAIL;
 })
 
+;; String compare N insn.
+;; Argument 0 is the target (result)
+;; Argument 1 is the source1
+;; Argument 2 is the source2
+;; Argument 3 is the length
+;; Argument 4 is the alignment
+
+(define_expand "cmpstrnsi"
+  [(parallel [(set (match_operand:SI 0)
+             (compare:SI (match_operand:BLK 1)
+                         (match_operand:BLK 2)))
+             (use (match_operand:SI 3))
+             (use (match_operand:SI 4))])]
+  ""
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  if (riscv_expand_strn_compare (operands, 0))
+    DONE;
+  else
+    FAIL;
+})
+
+;; String compare insn.
+;; Argument 0 is the target (result)
+;; Argument 1 is the destination
+;; Argument 2 is the source
+;; Argument 3 is the alignment
+
+(define_expand "cmpstrsi"
+  [(parallel [(set (match_operand:SI 0)
+             (compare:SI (match_operand:BLK 1)
+                         (match_operand:BLK 2)))
+             (use (match_operand:SI 3))])]
+  ""
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  if (riscv_expand_strn_compare (operands, 1))
+    DONE;
+  else
+    FAIL;
+})
+
 ;; Search character in string (generalization of strlen).
 ;; Argument 0 is the resulting offset
 ;; Argument 1 is the string
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 7c3ca48d1cc..fdf768ae9a7 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -249,3 +249,8 @@ Enum(isa_spec_class) String(20191213) 
Value(ISA_SPEC_CLASS_20191213)
 misa-spec=
 Target RejectNegative Joined Enum(isa_spec_class) Var(riscv_isa_spec) 
Init(TARGET_DEFAULT_ISA_SPEC)
 Set the version of RISC-V ISA spec.
+
+mstring-compare-inline-limit=
+Target Var(riscv_string_compare_inline_limit) Init(64) RejectNegative Joined 
UInteger Save
+Max number of bytes to compare.
+
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strcmp-unaligned.c 
b/gcc/testsuite/gcc.target/riscv/zbb-strcmp-unaligned.c
new file mode 100644
index 00000000000..2126c849e0a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strcmp-unaligned.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbb -mabi=lp64 
-mstring-compare-inline-limit=64" } */
+
+typedef long unsigned int size_t;
+
+int
+my_str_cmp (const char *s1, const char *s2)
+{
+  return __builtin_strcmp (s1, s2);
+}
+
+int
+my_str_cmp_const (const char *s1)
+{
+  return __builtin_strcmp (s1, "foo");
+}
+
+int
+my_strn_cmp (const char *s1, const char *s2, size_t n)
+{
+  return __builtin_strncmp (s1, s2, n);
+}
+
+int
+my_strn_cmp_const (const char *s1, size_t n)
+{
+  return __builtin_strncmp (s1, "foo", n);
+}
+
+int
+my_strn_cmp_bounded (const char *s1, const char *s2)
+{
+  return __builtin_strncmp (s1, s2, 42);
+}
+
+/* { dg-final { scan-assembler-not "orc.b\t" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strcmp.c 
b/gcc/testsuite/gcc.target/riscv/zbb-strcmp.c
new file mode 100644
index 00000000000..3465e7ffee3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strcmp.c
@@ -0,0 +1,55 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbb -mabi=lp64 
-mstring-compare-inline-limit=64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+typedef long unsigned int size_t;
+
+/* Emits 8+1 orc.b instructions.  */
+
+int
+my_str_cmp (const char *s1, const char *s2)
+{
+  s1 = __builtin_assume_aligned (s1, 4096);
+  s2 = __builtin_assume_aligned (s2, 4096);
+  return __builtin_strcmp (s1, s2);
+}
+
+/* 8+1 because the backend does not know the size of "foo".  */
+
+int
+my_str_cmp_const (const char *s1)
+{
+  s1 = __builtin_assume_aligned (s1, 4096);
+  return __builtin_strcmp (s1, "foo");
+}
+
+/* Emits 6+1 orc.b instructions.  */
+
+int
+my_strn_cmp (const char *s1, const char *s2)
+{
+  s1 = __builtin_assume_aligned (s1, 4096);
+  s2 = __builtin_assume_aligned (s2, 4096);
+  return __builtin_strncmp (s1, s2, 42);
+}
+
+/* Note expanded because the backend does not know the size of "foo".  */
+
+int
+my_strn_cmp_const (const char *s1, size_t n)
+{
+  s1 = __builtin_assume_aligned (s1, 4096);
+  return __builtin_strncmp (s1, "foo", n);
+}
+
+/* Emits 6+1 orc.b instructions.  */
+
+int
+my_strn_cmp_bounded (const char *s1, const char *s2)
+{
+  s1 = __builtin_assume_aligned (s1, 4096);
+  s2 = __builtin_assume_aligned (s2, 4096);
+  return __builtin_strncmp (s1, s2, 42);
+}
+
+/* { dg-final { scan-assembler-times "orc.b\t" 32 } } */
-- 
2.38.1

Reply via email to