Hi, When TI AND with a special constant (the high part or low part is all ones), it may be converted to DI AND with a 64-bit constant and a simple DI move. When the DI AND can be implemented by rotate and mask or "andi.", it eliminates the 128-bit constant loading to save the cost.
The patch creates three insn_and_split patterns to match these cases in combine pass and splits them later. The new predicate "double_wide_cint_operand" is used to identify if a constant is a double wide constant. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. Gui Haochen ChangeLog 2023-01-18 Haochen Gui <guih...@linux.ibm.com> gcc/ PR target/93123 * config/rs6000/predicates.md (double_wide_cint_operand): New. * config/rs6000/rs6000.md (*andti3_128bit_imm_highpart): New. (*andti3_128bit_imm_lowpart): New. (*andti3_64bit_imm): New. gcc/testsuite/ PR target/93123 * gcc.target/powerpc/pr93123.c: New. patch.diff diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index a1764018545..bacb87c3fb2 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -255,6 +255,19 @@ (define_predicate "u10bit_cint_operand" (and (match_code "const_int") (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 1023"))) +;; Return 1 if op is a 65-128 bits constant integer. +(define_predicate "double_wide_cint_operand" + (match_operand 0 "const_scalar_int_operand") +{ + if (CONST_INT_P (op)) + return 0; + + if (CONST_WIDE_INT_NUNITS (op) == 2) + return 1; + + return 0; +}) + ;; Return 1 if op is a constant integer that can fit in a D field. (define_predicate "short_cint_operand" (and (match_code "const_int") diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 6011f5bf76a..1fecb2d734e 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -7199,6 +7199,128 @@ (define_expand "orc<mode>3" "<MODE>mode == TImode || <MODE>mode == PTImode || TARGET_P8_VECTOR" "") +(define_insn_and_split "*andti3_128bit_imm_highpart" + [(set (match_operand:TI 0 "gpc_reg_operand" "=r") + (and:TI + (match_operand:TI 1 "gpc_reg_operand" "r") + (match_operand:TI 2 "double_wide_cint_operand" "n")))] + "CONST_WIDE_INT_ELT (operands[2], 0) == -1 + && (rs6000_is_valid_and_mask (GEN_INT (CONST_WIDE_INT_ELT (operands[2], 1)), + E_DImode) + || logical_const_operand (GEN_INT (CONST_WIDE_INT_ELT (operands[2], 1)), + E_DImode))" + "#" + "&& 1" + [(const_int 0)] +{ + rtx in_lo, in_hi, out_lo, out_hi; + rtx imm = GEN_INT (CONST_WIDE_INT_ELT (operands[2], 1)); + int hi_off, lo_off; + + if (BYTES_BIG_ENDIAN) + { + hi_off = 0; + lo_off = 8; + } + else + { + hi_off = 8; + lo_off = 0; + } + + in_lo = simplify_gen_subreg (DImode, operands[1], TImode, lo_off); + out_lo = simplify_gen_subreg (DImode, operands[0], TImode, lo_off); + in_hi = simplify_gen_subreg (DImode, operands[1], TImode, hi_off); + out_hi = simplify_gen_subreg (DImode, operands[0], TImode, hi_off); + + if (rs6000_is_valid_and_mask (imm, E_DImode)) + emit_insn (gen_anddi3_mask (out_hi, in_hi, imm)); + else + emit_insn (gen_anddi3_imm (out_hi, in_hi, imm)); + + emit_move_insn (out_lo, in_lo); +} + [(set_attr "length" "8")]) + +(define_insn_and_split "*andti3_128bit_imm_lowpart" + [(set (match_operand:TI 0 "gpc_reg_operand" "=r") + (and:TI + (match_operand:TI 1 "gpc_reg_operand" "r") + (match_operand:TI 2 "double_wide_cint_operand" "n")))] + "CONST_WIDE_INT_ELT (operands[2], 1) == -1 + && (rs6000_is_valid_and_mask (GEN_INT (CONST_WIDE_INT_ELT (operands[2], 0)), + E_DImode) + || logical_const_operand (GEN_INT (CONST_WIDE_INT_ELT (operands[2], 0)), + E_DImode))" + "#" + "&& 1" + [(const_int 0)] +{ + rtx in_lo, in_hi, out_lo, out_hi; + rtx imm = GEN_INT (CONST_WIDE_INT_ELT (operands[2], 0)); + int hi_off, lo_off; + + if (BYTES_BIG_ENDIAN) + { + hi_off = 0; + lo_off = 8; + } + else + { + hi_off = 8; + lo_off = 0; + } + + in_lo = simplify_gen_subreg (DImode, operands[1], TImode, lo_off); + out_lo = simplify_gen_subreg (DImode, operands[0], TImode, lo_off); + in_hi = simplify_gen_subreg (DImode, operands[1], TImode, hi_off); + out_hi = simplify_gen_subreg (DImode, operands[0], TImode, hi_off); + + if (rs6000_is_valid_and_mask (imm, E_DImode)) + emit_insn (gen_anddi3_mask (out_lo, in_lo, imm)); + else + emit_insn (gen_anddi3_imm (out_lo, in_lo, imm)); + + emit_move_insn (out_hi, in_hi); +} + [(set_attr "length" "8")]) + + +(define_insn_and_split "*andti3_64bit_imm" + [(set (match_operand:TI 0 "gpc_reg_operand" "=r") + (and:TI + (match_operand:TI 1 "gpc_reg_operand" "r") + (match_operand:TI 2 "const_int_operand" "n")))] + "INTVAL (operands[2]) < 0 + && rs6000_is_valid_and_mask (operands[2], E_DImode)" + "#" + "&& 1" + [(const_int 0)] +{ + rtx in_lo, in_hi, out_lo, out_hi; + int hi_off, lo_off; + + if (BYTES_BIG_ENDIAN) + { + hi_off = 0; + lo_off = 8; + } + else + { + hi_off = 8; + lo_off = 0; + } + + in_lo = simplify_gen_subreg (DImode, operands[1], TImode, lo_off); + out_lo = simplify_gen_subreg (DImode, operands[0], TImode, lo_off); + in_hi = simplify_gen_subreg (DImode, operands[1], TImode, hi_off); + out_hi = simplify_gen_subreg (DImode, operands[0], TImode, hi_off); + + emit_insn (gen_anddi3_mask (out_lo, in_lo, operands[2])); + emit_move_insn (out_hi, in_hi); +} + [(set_attr "length" "8")]) + ;; 128-bit logical operations insns and split operations (define_insn_and_split "*and<mode>3_internal" [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") diff --git a/gcc/testsuite/gcc.target/powerpc/pr93123.c b/gcc/testsuite/gcc.target/powerpc/pr93123.c new file mode 100644 index 00000000000..7dcaa59be1d --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr93123.c @@ -0,0 +1,46 @@ +/* { dg-options "-O2" } */ +/* { dg-require-effective-target int128 } */ + +unsigned __int128 +and128WithConst1 (unsigned __int128 a) +{ + unsigned __int128 c128 = (((unsigned __int128)(~0ULL)) << 64) + | ((unsigned __int128)(~0xFULL)); + return a & c128; +} + +unsigned __int128 +and128WithConst2 (unsigned __int128 a) +{ + unsigned __int128 c128 = (((unsigned __int128)(~0ULL)) << 64) + | ((unsigned __int128)(0xFFULL)); + return a & c128; +} + +unsigned __int128 +and128WithConst3 (unsigned __int128 a) +{ + unsigned __int128 c128 = (((unsigned __int128)(~0ULL)) << 64) + | ((unsigned __int128)(0xF1ULL)); + return a & c128; +} + +unsigned __int128 +and128WithConst4 (unsigned __int128 a) +{ + unsigned __int128 c128 = (((unsigned __int128) (~0xFULL)) << 64) + | ((unsigned __int128) (~0ULL)); + return a & c128; +} + +unsigned __int128 +and128WithConst5 (unsigned __int128 a) +{ + unsigned __int128 c128 = (((unsigned __int128) (0xF1ULL)) << 64) + | ((unsigned __int128) (~0ULL)); + return a & c128; +} + +/* { dg-final { scan-assembler-times {\mrldicr\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mrldicl\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mandi\M} 2 } } */