https://gcc.gnu.org/g:ffdee87ac4cf576c59825a0a7a615ff212e51d96
commit r16-4619-gffdee87ac4cf576c59825a0a7a615ff212e51d96 Author: Jiahao Xu <[email protected]> Date: Thu Oct 23 14:29:06 2025 +0800 LoongArch: Implement vector reduction from 256-bit to 128-bit gcc/ChangeLog: * config/loongarch/lasx.md (vec_extract<mode><lasxhalf>): New define_expand. (vec_extract_lo_<mode>): New define_insn_and_split. (vec_extract_hi_<mode>): New define_insn. * config/loongarch/loongarch-protos.h (loongarch_check_vect_par_cnst_half) New function prototype. * config/loongarch/loongarch.cc (loongarch_split_reduction): Implement TARGET_VECTORIZE_SPLIT_REDUCTION. (loongarch_check_vect_par_cnst_half): New function. * config/loongarch/predicates.md (vect_par_cnst_low_half): New predicate. (vect_par_cnst_high_half): New predicate. gcc/testsuite/ChangeLog: * gcc.target/loongarch/lasx-reduc-1.c: New test. Diff: --- gcc/config/loongarch/lasx.md | 42 ++++++++++++++++++++ gcc/config/loongarch/loongarch-protos.h | 1 + gcc/config/loongarch/loongarch.cc | 48 +++++++++++++++++++++++ gcc/config/loongarch/predicates.md | 16 ++++++++ gcc/testsuite/gcc.target/loongarch/lasx-reduc-1.c | 17 ++++++++ 5 files changed, 124 insertions(+) diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md index 3d71f30a54be..eed4d2b186ba 100644 --- a/gcc/config/loongarch/lasx.md +++ b/gcc/config/loongarch/lasx.md @@ -633,6 +633,48 @@ [(set_attr "move_type" "fmove") (set_attr "mode" "<UNITMODE>")]) +(define_expand "vec_extract<mode><lasxhalf>" + [(match_operand:<VHMODE256_ALL> 0 "register_operand") + (match_operand:LASX 1 "register_operand") + (match_operand 2 "const_0_or_1_operand")] + "ISA_HAS_LASX" +{ + if (INTVAL (operands[2])) + { + operands[2] = loongarch_lsx_vec_parallel_const_half (<MODE>mode, true); + emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1], + operands[2])); + } + else + { + operands[2] = loongarch_lsx_vec_parallel_const_half (<MODE>mode, false); + emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1], + operands[2])); + } + DONE; +}) + +(define_insn_and_split "vec_extract_lo_<mode>" + [(set (match_operand:<VHMODE256_ALL> 0 "register_operand" "=f") + (vec_select:<VHMODE256_ALL> + (match_operand:LASX 1 "register_operand" "f") + (match_operand:LASX 2 "vect_par_cnst_low_half")))] + "ISA_HAS_LASX" + "#" + "&& reload_completed" + [(set (match_dup 0) (match_dup 1))] + "operands[1] = gen_lowpart (<VHMODE256_ALL>mode, operands[1]);") + +(define_insn "vec_extract_hi_<mode>" + [(set (match_operand:<VHMODE256_ALL> 0 "register_operand" "=f") + (vec_select:<VHMODE256_ALL> + (match_operand:LASX 1 "register_operand" "f") + (match_operand:LASX 2 "vect_par_cnst_high_half")))] + "ISA_HAS_LASX" + "xvpermi.d\t%u0,%u1,0xe" + [(set_attr "move_type" "fmove") + (set_attr "mode" "<MODE>")]) + (define_expand "vec_perm<mode>" [(match_operand:LASX 0 "register_operand") (match_operand:LASX 1 "register_operand") diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h index 6139af48d7a6..6ecbe27218ca 100644 --- a/gcc/config/loongarch/loongarch-protos.h +++ b/gcc/config/loongarch/loongarch-protos.h @@ -121,6 +121,7 @@ extern bool loongarch_const_vector_same_int_p (rtx, machine_mode, extern bool loongarch_const_vector_shuffle_set_p (rtx, machine_mode); extern bool loongarch_const_vector_bitimm_set_p (rtx, machine_mode); extern bool loongarch_const_vector_bitimm_clr_p (rtx, machine_mode); +extern bool loongarch_check_vect_par_cnst_half (rtx, machine_mode, bool); extern rtx loongarch_const_vector_vrepli (rtx, machine_mode); extern rtx loongarch_lsx_vec_parallel_const_half (machine_mode, bool); extern rtx loongarch_gen_const_int_vector (machine_mode, HOST_WIDE_INT); diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 3fe8c766cc77..c782cac0ff9e 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -1846,6 +1846,37 @@ loongarch_const_vector_shuffle_set_p (rtx op, machine_mode mode) return true; } +/* Check if OP is a PARALLEL RTX with CONST_INT elements representing + the HIGH (high_p == TRUE) or LOW (high_p == FALSE) half of a vector + for mode MODE. Returns true if the pattern matches, false otherwise. */ + +bool +loongarch_check_vect_par_cnst_half (rtx op, machine_mode mode, bool high_p) +{ + int nunits = XVECLEN (op, 0); + int nelts = GET_MODE_NUNITS (mode); + + if (!known_eq (nelts, nunits * 2)) + return false; + + rtx first = XVECEXP (op, 0, 0); + if (!CONST_INT_P (first)) + return false; + + int base = high_p ? nelts / 2 : 0; + if (INTVAL (first) != base) + return false; + + for (int i = 1; i < nunits; i++) + { + rtx elem = XVECEXP (op, 0, i); + if (!CONST_INT_P (elem) || INTVAL (elem) != INTVAL (first) + i) + return false; + } + + return true; +} + rtx loongarch_const_vector_vrepli (rtx x, machine_mode mode) { @@ -4143,6 +4174,19 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, } } +/* All CPUs prefer to avoid cross-lane operations so perform reductions + upper against lower halves up to LSX reg size. */ + +machine_mode +loongarch_split_reduction (machine_mode mode) +{ + if (LSX_SUPPORTED_MODE_P (mode)) + return mode; + + return mode_for_vector (as_a <scalar_mode> (GET_MODE_INNER (mode)), + GET_MODE_NUNITS (mode) / 2).require (); +} + /* Implement targetm.vectorize.builtin_vectorization_cost. */ static int @@ -11397,6 +11441,10 @@ loongarch_can_inline_p (tree caller, tree callee) #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \ loongarch_autovectorize_vector_modes +#undef TARGET_VECTORIZE_SPLIT_REDUCTION +#define TARGET_VECTORIZE_SPLIT_REDUCTION \ + loongarch_split_reduction + #undef TARGET_OPTAB_SUPPORTED_P #define TARGET_OPTAB_SUPPORTED_P loongarch_optab_supported_p diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md index fd2d7b9ab551..34cf74d5d66e 100644 --- a/gcc/config/loongarch/predicates.md +++ b/gcc/config/loongarch/predicates.md @@ -699,3 +699,19 @@ return true; }) + +;; PARALLEL for a vec_select that selects the low half +;; elements of a vector of MODE. +(define_special_predicate "vect_par_cnst_low_half" + (match_code "parallel") +{ + return loongarch_check_vect_par_cnst_half (op, mode, false); +}) + +;; PARALLEL for a vec_select that selects the high half +;; elements of a vector of MODE. +(define_special_predicate "vect_par_cnst_high_half" + (match_code "parallel") +{ + return loongarch_check_vect_par_cnst_half (op, mode, true);; +}) diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-reduc-1.c b/gcc/testsuite/gcc.target/loongarch/lasx-reduc-1.c new file mode 100644 index 000000000000..e4492593aa9c --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/lasx-reduc-1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -funsafe-math-optimizations -mlasx -fno-unroll-loops -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times "\.REDUC_PLUS" 4 "optimized" } } */ + +#define DEFINE_SUM_FUNCTION(T, FUNC_NAME, SIZE) \ +T FUNC_NAME(const T arr[]) { \ + arr = __builtin_assume_aligned(arr, 64); \ + T sum = 0; \ + for (int i = 0; i < SIZE; i++) \ + sum += arr[i]; \ + return sum; \ +} + +DEFINE_SUM_FUNCTION (int, sum_int_1040, 1028) +DEFINE_SUM_FUNCTION (float, sum_float_1040, 1028) +DEFINE_SUM_FUNCTION (long, sum_long_1040, 1026) +DEFINE_SUM_FUNCTION (double, sum_double_1040, 1026)
