https://gcc.gnu.org/g:94aade062a4ab689abc4c3422c1b901ab0733c19
commit r15-1674-g94aade062a4ab689abc4c3422c1b901ab0733c19 Author: Xi Ruoyao <xry...@xry111.site> Date: Sat Jun 15 18:29:43 2024 +0800 LoongArch: Tweak IOR rtx_cost for bstrins Consider c &= 0xfff; a &= ~0xfff; b &= ~0xfff; a |= c; b |= c; This can be done with 2 bstrins instructions. But we need to recognize it in loongarch_rtx_costs or the compiler will not propagate "c & 0xfff" forward. gcc/ChangeLog: * config/loongarch/loongarch.cc: (loongarch_use_bstrins_for_ior_with_mask): Split the main logic into ... (loongarch_use_bstrins_for_ior_with_mask_1): ... here. (loongarch_rtx_costs): Special case for IOR those can be implemented with bstrins. gcc/testsuite/ChangeLog; * gcc.target/loongarch/bstrins-3.c: New test. Diff: --- gcc/config/loongarch/loongarch.cc | 73 ++++++++++++++++++++------ gcc/testsuite/gcc.target/loongarch/bstrins-3.c | 16 ++++++ 2 files changed, 72 insertions(+), 17 deletions(-) diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index e2ff2af89e2..5119d878731 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -3681,6 +3681,27 @@ loongarch_set_reg_reg_piece_cost (machine_mode mode, unsigned int units) return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units); } +static int +loongarch_use_bstrins_for_ior_with_mask_1 (machine_mode mode, + unsigned HOST_WIDE_INT mask1, + unsigned HOST_WIDE_INT mask2) +{ + if (mask1 != ~mask2 || !mask1 || !mask2) + return 0; + + /* Try to avoid a right-shift. */ + if (low_bitmask_len (mode, mask1) != -1) + return -1; + + if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1) + return 1; + + if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1) + return -1; + + return 0; +} + /* Return the cost of moving between two registers of mode MODE. */ static int @@ -3812,6 +3833,38 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, /* Fall through. */ case IOR: + { + rtx op[2] = {XEXP (x, 0), XEXP (x, 1)}; + if (GET_CODE (op[0]) == AND && GET_CODE (op[1]) == AND + && (mode == SImode || (TARGET_64BIT && mode == DImode))) + { + rtx rtx_mask0 = XEXP (op[0], 1), rtx_mask1 = XEXP (op[1], 1); + if (CONST_INT_P (rtx_mask0) && CONST_INT_P (rtx_mask1)) + { + unsigned HOST_WIDE_INT mask0 = UINTVAL (rtx_mask0); + unsigned HOST_WIDE_INT mask1 = UINTVAL (rtx_mask1); + if (loongarch_use_bstrins_for_ior_with_mask_1 (mode, + mask0, + mask1)) + { + /* A bstrins instruction */ + *total = COSTS_N_INSNS (1); + + /* A srai instruction */ + if (low_bitmask_len (mode, mask0) == -1 + && low_bitmask_len (mode, mask1) == -1) + *total += COSTS_N_INSNS (1); + + for (int i = 0; i < 2; i++) + *total += set_src_cost (XEXP (op[i], 0), mode, speed); + + return true; + } + } + } + } + + /* Fall through. */ case XOR: /* Double-word operations use two single-word operations. */ *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (2), @@ -5796,23 +5849,9 @@ bool loongarch_pre_reload_split (void) int loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op) { - unsigned HOST_WIDE_INT mask1 = UINTVAL (op[2]); - unsigned HOST_WIDE_INT mask2 = UINTVAL (op[4]); - - if (mask1 != ~mask2 || !mask1 || !mask2) - return 0; - - /* Try to avoid a right-shift. */ - if (low_bitmask_len (mode, mask1) != -1) - return -1; - - if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1) - return 1; - - if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1) - return -1; - - return 0; + return loongarch_use_bstrins_for_ior_with_mask_1 (mode, + UINTVAL (op[2]), + UINTVAL (op[4])); } /* Rewrite a MEM for simple load/store under -mexplicit-relocs=auto diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-3.c b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c new file mode 100644 index 00000000000..13762bdef42 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-rtl-final" } */ +/* { dg-final { scan-rtl-dump-times "insv\[sd\]i" 2 "final" } } */ + +struct X { + long a, b; +}; + +struct X +test (long a, long b, long c) +{ + c &= 0xfff; + a &= ~0xfff; + b &= ~0xfff; + return (struct X){.a = a | c, .b = b | c}; +}