From: Christoph Müllner <christoph.muell...@vrull.eu> This patch implements expansions for the cmpstrsi and the cmpstrnsi builtins using Zbb instructions (if available). This allows to inline calls to strcmp() and strncmp().
The expansion basically emits a peeled comparison sequence (i.e. a peeled comparison loop) which compares XLEN bits per step if possible. The emitted sequence can be controlled, by setting the maximum number of compared bytes (-mstring-compare-inline-limit). gcc/ChangeLog: * config/riscv/riscv-protos.h (riscv_expand_strn_compare): New prototype. * config/riscv/riscv-string.cc (GEN_EMIT_HELPER3): New helper macros. (GEN_EMIT_HELPER2): New helper macros. (expand_strncmp_zbb_sequence): New function. (riscv_emit_str_compare_zbb): New function. (riscv_expand_strn_compare): New function. * config/riscv/riscv.md (cmpstrnsi): Invoke expansion functions for strn_compare. (cmpstrsi): Invoke expansion functions for strn_compare. * config/riscv/riscv.opt: Add new parameter '-mstring-compare-inline-limit'. Signed-off-by: Christoph Müllner <christoph.muell...@vrull.eu> --- gcc/config/riscv/riscv-protos.h | 1 + gcc/config/riscv/riscv-string.cc | 344 ++++++++++++++++++ gcc/config/riscv/riscv.md | 46 +++ gcc/config/riscv/riscv.opt | 5 + .../gcc.target/riscv/zbb-strcmp-unaligned.c | 36 ++ gcc/testsuite/gcc.target/riscv/zbb-strcmp.c | 55 +++ 6 files changed, 487 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-strcmp-unaligned.c create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-strcmp.c diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 18187e3bd78..7f334be333c 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -97,6 +97,7 @@ rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt); /* Routines implemented in riscv-string.c. */ extern bool riscv_expand_block_move (rtx, rtx, rtx); extern bool riscv_expand_strlen (rtx[]); +extern bool riscv_expand_strn_compare (rtx[], int); /* Information about one CPU we know about. */ struct riscv_cpu_info { diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc index bf96522b608..f157e04ac0c 100644 --- a/gcc/config/riscv/riscv-string.cc +++ b/gcc/config/riscv/riscv-string.cc @@ -84,6 +84,11 @@ GEN_EMIT_HELPER2(one_cmpl) /* do_one_cmpl2 */ GEN_EMIT_HELPER2(clz) /* do_clz2 */ GEN_EMIT_HELPER2(ctz) /* do_ctz2 */ GEN_EMIT_HELPER2(zero_extendqi) /* do_zero_extendqi2 */ +GEN_EMIT_HELPER3(xor) /* do_xor3 */ +GEN_EMIT_HELPER3(ashl) /* do_ashl3 */ +GEN_EMIT_HELPER2(bswap) /* do_bswap2 */ +GEN_EMIT_HELPER3(riscv_ior_not) /* do_riscv_ior_not3 */ +GEN_EMIT_HELPER3(riscv_and_not) /* do_riscv_and_not3 */ /* Helper function to load a byte or a Pmode register. @@ -268,6 +273,345 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length) return false; } +/* Generate the sequence of compares for strcmp/strncmp using zbb instructions. + BYTES_TO_COMPARE is the number of bytes to be compared. + BASE_ALIGN is the smaller of the alignment of the two strings. + ORIG_SRC1 is the unmodified rtx for the first string. + ORIG_SRC2 is the unmodified rtx for the second string. + DATA1 is the register for loading the first string. + DATA2 is the register for loading the second string. + HAS_NUL is the register holding non-NUL bytes for NUL-bytes in the string. + TARGET is the rtx for the result register (SImode) + EQUALITY_COMPARE_REST if set, then we hand over to libc if string matches. + END_LABEL is the location before the calculation of the result value. + FINAL_LABEL is the location after the calculation of the result value. */ + +static void +expand_strncmp_zbb_sequence (unsigned HOST_WIDE_INT bytes_to_compare, + rtx src1, rtx src2, rtx data1, rtx data2, + rtx target, rtx orc, bool equality_compare_rest, + rtx end_label, rtx final_label) +{ + const unsigned HOST_WIDE_INT p_mode_size = GET_MODE_SIZE (Pmode); + rtx src1_addr = force_reg (Pmode, XEXP (src1, 0)); + rtx src2_addr = force_reg (Pmode, XEXP (src2, 0)); + unsigned HOST_WIDE_INT offset = 0; + + rtx m1 = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (m1, constm1_rtx)); + + /* Generate a compare sequence. */ + while (bytes_to_compare > 0) + { + machine_mode load_mode = QImode; + unsigned HOST_WIDE_INT load_mode_size = 1; + if (bytes_to_compare > 1) + { + load_mode = Pmode; + load_mode_size = p_mode_size; + } + unsigned HOST_WIDE_INT cmp_bytes = 0; + + if (bytes_to_compare >= load_mode_size) + cmp_bytes = load_mode_size; + else + cmp_bytes = bytes_to_compare; + + unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes; + + /* load_mode_size...bytes we will read + cmp_bytes...bytes we will compare (might be less than load_mode_size) + bytes_to_compare...bytes we will compare (incl. cmp_bytes) + remain...bytes left to compare (excl. cmp_bytes) */ + + rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset)); + rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset)); + + do_load_from_addr (load_mode, data1, addr1, src1); + do_load_from_addr (load_mode, data2, addr2, src2); + + if (load_mode_size == 1) + { + /* Special case for comparing just single (last) byte. */ + gcc_assert (remain == 0); + + if (!equality_compare_rest) + { + /* Calculate difference and jump to final_label. */ + rtx result = gen_reg_rtx (Pmode); + do_sub3 (result, data1, data2); + emit_insn (gen_movsi (target, gen_lowpart (SImode, result))); + emit_jump_insn (gen_jump (final_label)); + } + else + { + /* Compare both bytes and jump to final_label if not equal. */ + rtx result = gen_reg_rtx (Pmode); + do_sub3 (result, data1, data2); + emit_insn (gen_movsi (target, gen_lowpart (SImode, result))); + /* Check if str1[i] is NULL. */ + rtx cond1 = gen_rtx_EQ (VOIDmode, data1, const0_rtx); + riscv_emit_unlikely_jump (gen_cbranch4 (Pmode, cond1, + data1, const0_rtx, final_label)); + /* Check if str1[i] == str2[i]. */ + rtx cond2 = gen_rtx_NE (VOIDmode, data1, data2); + riscv_emit_unlikely_jump (gen_cbranch4 (Pmode, cond2, + data1, data2, final_label)); + /* Processing will fall through to libc calls. */ + } + } + else + { + /* Eliminate irrelevant data (behind the N-th character). */ + if (bytes_to_compare < p_mode_size) + { + gcc_assert (remain == 0); + /* Set a NUL-byte after the relevant data (behind the string). */ + unsigned long im = 0xffUL; + rtx imask = gen_rtx_CONST_INT (Pmode, im); + rtx m_reg = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (m_reg, imask)); + do_ashl3 (m_reg, m_reg, GEN_INT (cmp_bytes * BITS_PER_UNIT)); + do_riscv_and_not3 (data1, m_reg, data1); + do_riscv_and_not3 (data2, m_reg, data2); + do_orcb2 (orc, data1); + emit_jump_insn (gen_jump (end_label)); + } + else + { + /* Check if data1 contains a NUL character. */ + do_orcb2 (orc, data1); + rtx cond1 = gen_rtx_NE (VOIDmode, orc, m1); + riscv_emit_unlikely_jump (gen_cbranch4 (Pmode, cond1, orc, m1, + end_label)); + + /* Break out if u1 != u2 */ + rtx cond2 = gen_rtx_NE (VOIDmode, data1, data2); + riscv_emit_unlikely_jump (gen_cbranch4 (Pmode, cond2, data1, + data2, end_label)); + + /* Fast-exit for complete and equal strings. */ + if (remain == 0 && !equality_compare_rest) + { + /* All compared and everything was equal. */ + emit_insn (gen_rtx_SET (target, gen_rtx_CONST_INT (SImode, 0))); + emit_jump_insn (gen_jump (final_label)); + } + } + } + + offset += cmp_bytes; + bytes_to_compare -= cmp_bytes; + } + /* Processing will fall through to libc calls. */ +} + +/* Emit a string comparison sequence using Zbb instruction. + + OPERANDS[0] is the target (result). + OPERANDS[1] is the first source. + OPERANDS[2] is the second source. + If NO_LENGTH is zero, then: + OPERANDS[3] is the length. + OPERANDS[4] is the alignment in bytes. + If NO_LENGTH is nonzero, then: + OPERANDS[3] is the alignment in bytes. + BYTES_TO_COMPARE is the maximum number of bytes to compare. + EQUALITY_COMPARE_REST defines if str(n)cmp should be called on equality. + */ + +static bool +riscv_emit_str_compare_zbb (rtx operands[], int no_length, + unsigned HOST_WIDE_INT bytes_to_compare, + bool equality_compare_rest) +{ + const unsigned HOST_WIDE_INT p_mode_size = GET_MODE_SIZE (Pmode); + rtx target = operands[0]; + rtx src1 = operands[1]; + rtx src2 = operands[2]; + rtx bytes_rtx = NULL; + rtx align_rtx = operands[3]; + + if (!no_length) + { + bytes_rtx = operands[3]; + align_rtx = operands[4]; + } + + gcc_assert (TARGET_ZBB); + + /* Enable only if we can access at least one XLEN-register. */ + if (bytes_to_compare < p_mode_size) + return false; + + /* Limit to 12-bits (maximum load-offset). */ + if (bytes_to_compare > IMM_REACH) + return false; + + /* We don't support big endian. */ + if (BYTES_BIG_ENDIAN) + return false; + + /* We need to know the alignment. */ + if (!CONST_INT_P (align_rtx)) + return false; + + unsigned HOST_WIDE_INT base_align = UINTVAL (align_rtx); + unsigned HOST_WIDE_INT required_align = p_mode_size; + if (base_align < required_align) + return false; + + rtx data1 = gen_reg_rtx (Pmode); + rtx data2 = gen_reg_rtx (Pmode); + rtx orc = gen_reg_rtx (Pmode); + rtx end_label = gen_label_rtx (); + rtx final_label = gen_label_rtx (); + + /* Generate a sequence of zbb instructions to compare out + to the length specified. */ + expand_strncmp_zbb_sequence (bytes_to_compare, src1, src2, data1, data2, + target, orc, equality_compare_rest, + end_label, final_label); + + if (equality_compare_rest) + { + /* Update pointers past what has been compared already. */ + rtx src1_addr = force_reg (Pmode, XEXP (src1, 0)); + rtx src2_addr = force_reg (Pmode, XEXP (src2, 0)); + unsigned HOST_WIDE_INT offset = bytes_to_compare; + rtx src1 = force_reg (Pmode, + gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset))); + rtx src2 = force_reg (Pmode, + gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset))); + + /* Construct call to strcmp/strncmp to compare the rest of the string. */ + if (no_length) + { + tree fun = builtin_decl_explicit (BUILT_IN_STRCMP); + emit_library_call_value (XEXP (DECL_RTL (fun), 0), + target, LCT_NORMAL, GET_MODE (target), + src1, Pmode, src2, Pmode); + } + else + { + unsigned HOST_WIDE_INT bytes = UINTVAL (bytes_rtx); + unsigned HOST_WIDE_INT delta = bytes - bytes_to_compare; + gcc_assert (delta > 0); + rtx len_rtx = gen_reg_rtx (Pmode); + emit_move_insn (len_rtx, gen_int_mode (delta, Pmode)); + tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP); + emit_library_call_value (XEXP (DECL_RTL (fun), 0), + target, LCT_NORMAL, GET_MODE (target), + src1, Pmode, src2, Pmode, len_rtx, Pmode); + } + + emit_jump_insn (gen_jump (final_label)); + } + + emit_barrier (); /* No fall-through. */ + + emit_label (end_label); + + /* Convert non-equal bytes into non-NUL bytes. */ + rtx diff = gen_reg_rtx (Pmode); + do_xor3 (diff, data1, data2); + do_orcb2 (diff, diff); + + /* Convert non-equal or NUL-bytes into non-NUL bytes. */ + rtx syndrome = gen_reg_rtx (Pmode); + do_riscv_ior_not3 (syndrome, orc, diff); + + /* Count the number of equal bits from the beginning of the word. */ + rtx shift = gen_reg_rtx (Pmode); + do_ctz2 (shift, syndrome); + + do_bswap2 (data1, data1); + do_bswap2 (data2, data2); + + /* The most-significant-non-zero bit of the syndrome marks either the + first bit that is different, or the top bit of the first zero byte. + Shifting left now will bring the critical information into the + top bits. */ + do_ashl3 (data1, data1, gen_lowpart (QImode, shift)); + do_ashl3 (data2, data2, gen_lowpart (QImode, shift)); + + /* But we need to zero-extend (char is unsigned) the value and then + perform a signed 32-bit subtraction. */ + unsigned int shiftr = p_mode_size * BITS_PER_UNIT - 8; + do_lshr3 (data1, data1, GEN_INT (shiftr)); + do_lshr3 (data2, data2, GEN_INT (shiftr)); + + rtx result = gen_reg_rtx (Pmode); + do_sub3 (result, data1, data2); + emit_insn (gen_movsi (target, gen_lowpart (SImode, result))); + + /* And we are done. */ + emit_label (final_label); + return true; +} + +/* Expand a string compare operation with length, and return + true if successful. Return false if we should let the + compiler generate normal code, probably a strncmp call. + If NO_LENGTH is set, there is no upper bound of the strings. + + OPERANDS[0] is the target (result). + OPERANDS[1] is the first source. + OPERANDS[2] is the second source. + If NO_LENGTH is zero, then: + OPERANDS[3] is the length. + OPERANDS[4] is the alignment in bytes. + If NO_LENGTH is nonzero, then: + OPERANDS[3] is the alignment in bytes. */ + +bool +riscv_expand_strn_compare (rtx operands[], int no_length) +{ + rtx bytes_rtx = NULL; + const unsigned HOST_WIDE_INT compare_max = riscv_string_compare_inline_limit; + unsigned HOST_WIDE_INT compare_length; /* How much to compare inline. */ + bool equality_compare_rest = false; /* Call libc to compare remainder. */ + + if (riscv_string_compare_inline_limit == 0) + return false; + + /* Decide how many bytes to compare inline and what to do if there is + no difference detected at the end of the compared bytes. + We might call libc to continue the comparison. */ + if (no_length) + { + compare_length = compare_max; + equality_compare_rest = true; + } + else + { + /* If we have a length, it must be constant. */ + bytes_rtx = operands[3]; + if (!CONST_INT_P (bytes_rtx)) + return false; + + unsigned HOST_WIDE_INT bytes = UINTVAL (bytes_rtx); + if (bytes <= compare_max) + { + compare_length = bytes; + equality_compare_rest = false; + } + else + { + compare_length = compare_max; + equality_compare_rest = true; + } + } + + if (TARGET_ZBB) + { + return riscv_emit_str_compare_zbb (operands, no_length, compare_length, + equality_compare_rest); + } + + return false; +} + /* If the provided string is aligned, then read XLEN bytes in a loop and use orc.b to find NUL-bytes. */ diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index f05c764c3d4..dce33a4b638 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -3010,6 +3010,52 @@ (define_expand "cpymemsi" FAIL; }) +;; String compare N insn. +;; Argument 0 is the target (result) +;; Argument 1 is the source1 +;; Argument 2 is the source2 +;; Argument 3 is the length +;; Argument 4 is the alignment + +(define_expand "cmpstrnsi" + [(parallel [(set (match_operand:SI 0) + (compare:SI (match_operand:BLK 1) + (match_operand:BLK 2))) + (use (match_operand:SI 3)) + (use (match_operand:SI 4))])] + "" +{ + if (optimize_insn_for_size_p ()) + FAIL; + + if (riscv_expand_strn_compare (operands, 0)) + DONE; + else + FAIL; +}) + +;; String compare insn. +;; Argument 0 is the target (result) +;; Argument 1 is the destination +;; Argument 2 is the source +;; Argument 3 is the alignment + +(define_expand "cmpstrsi" + [(parallel [(set (match_operand:SI 0) + (compare:SI (match_operand:BLK 1) + (match_operand:BLK 2))) + (use (match_operand:SI 3))])] + "" +{ + if (optimize_insn_for_size_p ()) + FAIL; + + if (riscv_expand_strn_compare (operands, 1)) + DONE; + else + FAIL; +}) + ;; Search character in string (generalization of strlen). ;; Argument 0 is the resulting offset ;; Argument 1 is the string diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt index 7c3ca48d1cc..fdf768ae9a7 100644 --- a/gcc/config/riscv/riscv.opt +++ b/gcc/config/riscv/riscv.opt @@ -249,3 +249,8 @@ Enum(isa_spec_class) String(20191213) Value(ISA_SPEC_CLASS_20191213) misa-spec= Target RejectNegative Joined Enum(isa_spec_class) Var(riscv_isa_spec) Init(TARGET_DEFAULT_ISA_SPEC) Set the version of RISC-V ISA spec. + +mstring-compare-inline-limit= +Target Var(riscv_string_compare_inline_limit) Init(64) RejectNegative Joined UInteger Save +Max number of bytes to compare. + diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strcmp-unaligned.c b/gcc/testsuite/gcc.target/riscv/zbb-strcmp-unaligned.c new file mode 100644 index 00000000000..2126c849e0a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zbb-strcmp-unaligned.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zbb -mabi=lp64 -mstring-compare-inline-limit=64" } */ + +typedef long unsigned int size_t; + +int +my_str_cmp (const char *s1, const char *s2) +{ + return __builtin_strcmp (s1, s2); +} + +int +my_str_cmp_const (const char *s1) +{ + return __builtin_strcmp (s1, "foo"); +} + +int +my_strn_cmp (const char *s1, const char *s2, size_t n) +{ + return __builtin_strncmp (s1, s2, n); +} + +int +my_strn_cmp_const (const char *s1, size_t n) +{ + return __builtin_strncmp (s1, "foo", n); +} + +int +my_strn_cmp_bounded (const char *s1, const char *s2) +{ + return __builtin_strncmp (s1, s2, 42); +} + +/* { dg-final { scan-assembler-not "orc.b\t" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strcmp.c b/gcc/testsuite/gcc.target/riscv/zbb-strcmp.c new file mode 100644 index 00000000000..3465e7ffee3 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zbb-strcmp.c @@ -0,0 +1,55 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zbb -mabi=lp64 -mstring-compare-inline-limit=64" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */ + +typedef long unsigned int size_t; + +/* Emits 8+1 orc.b instructions. */ + +int +my_str_cmp (const char *s1, const char *s2) +{ + s1 = __builtin_assume_aligned (s1, 4096); + s2 = __builtin_assume_aligned (s2, 4096); + return __builtin_strcmp (s1, s2); +} + +/* 8+1 because the backend does not know the size of "foo". */ + +int +my_str_cmp_const (const char *s1) +{ + s1 = __builtin_assume_aligned (s1, 4096); + return __builtin_strcmp (s1, "foo"); +} + +/* Emits 6+1 orc.b instructions. */ + +int +my_strn_cmp (const char *s1, const char *s2) +{ + s1 = __builtin_assume_aligned (s1, 4096); + s2 = __builtin_assume_aligned (s2, 4096); + return __builtin_strncmp (s1, s2, 42); +} + +/* Note expanded because the backend does not know the size of "foo". */ + +int +my_strn_cmp_const (const char *s1, size_t n) +{ + s1 = __builtin_assume_aligned (s1, 4096); + return __builtin_strncmp (s1, "foo", n); +} + +/* Emits 6+1 orc.b instructions. */ + +int +my_strn_cmp_bounded (const char *s1, const char *s2) +{ + s1 = __builtin_assume_aligned (s1, 4096); + s2 = __builtin_assume_aligned (s2, 4096); + return __builtin_strncmp (s1, s2, 42); +} + +/* { dg-final { scan-assembler-times "orc.b\t" 32 } } */ -- 2.38.1