Loop vectorizer will use vec_perm to select lower part of a vector, there could be some redundancy when using subreg in reduc_<code>_scal_m, because rtl cse can't figure out vec_select lower part is just subreg.
I'm trying to canonicalize vec_select to subreg like aarch64 did, but there're so many regressions, some are easy to fix, some requires middle-end adjustment. So for simplicity, the patch use vec_select instead of subreg in reduc_<code>_scal_m. Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. Ready push to trunk. gcc/ChangeLog: * config/i386/sse.md: (reduc_plus_scal_<mode>): Use vec_extract_lo instead of subreg. (reduc_<code>_scal_<mode>): Ditto. (reduc_<code>_scal_<mode>): Ditto. (reduc_<code>_scal_<mode>): Ditto. (reduc_<code>_scal_<mode>): Ditto. --- gcc/config/i386/sse.md | 47 +++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 4f511693e3f..5e0e0e9e51f 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -3480,11 +3480,12 @@ (define_expand "reduc_plus_scal_<mode>" "" { rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode); - emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1])); rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode); - rtx tmp3 = gen_lowpart (<ssehalfvecmode>mode, operands[1]); - emit_insn (gen_add<ssehalfvecmodelower>3 (tmp2, tmp, tmp3)); - emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2)); + rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode); + emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1])); + emit_insn (gen_vec_extract_lo_<mode> (tmp2, operands[1])); + emit_insn (gen_add<ssehalfvecmodelower>3 (tmp3, tmp, tmp2)); + emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp3)); DONE; }) @@ -3528,11 +3529,12 @@ (define_expand "reduc_<code>_scal_<mode>" "" { rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode); - emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1])); rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode); - emit_insn (gen_<code><ssehalfvecmodelower>3 - (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1]))); - emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2)); + rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode); + emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1])); + emit_insn (gen_vec_extract_lo_<mode> (tmp2, operands[1])); + emit_insn (gen_<code><ssehalfvecmodelower>3 (tmp3, tmp, tmp2)); + emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp3)); DONE; }) @@ -3543,11 +3545,12 @@ (define_expand "reduc_<code>_scal_<mode>" "TARGET_AVX512F" { rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode); - emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1])); rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode); - emit_insn (gen_<code><ssehalfvecmodelower>3 - (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1]))); - emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2)); + rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode); + emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1])); + emit_insn (gen_vec_extract_lo_<mode> (tmp2, operands[1])); + emit_insn (gen_<code><ssehalfvecmodelower>3 (tmp3, tmp, tmp2)); + emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp3)); DONE; }) @@ -3558,14 +3561,15 @@ (define_expand "reduc_<code>_scal_<mode>" "TARGET_AVX2" { rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode); - emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1])); rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode); - emit_insn (gen_<code><ssehalfvecmodelower>3 - (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1]))); rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode); - ix86_expand_reduc (gen_<code><ssehalfvecmodelower>3, tmp3, tmp2); + emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1])); + emit_insn (gen_vec_extract_lo_<mode> (tmp2, operands[1])); + emit_insn (gen_<code><ssehalfvecmodelower>3 (tmp3, tmp, tmp2)); + rtx tmp4 = gen_reg_rtx (<ssehalfvecmode>mode); + ix86_expand_reduc (gen_<code><ssehalfvecmodelower>3, tmp4, tmp3); emit_insn (gen_vec_extract<ssehalfvecmodelower><ssescalarmodelower> - (operands[0], tmp3, const0_rtx)); + (operands[0], tmp4, const0_rtx)); DONE; }) @@ -3637,11 +3641,12 @@ (define_expand "reduc_<code>_scal_<mode>" "" { rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode); - emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1])); rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode); - rtx tmp3 = gen_lowpart (<ssehalfvecmode>mode, operands[1]); - emit_insn (gen_<code><ssehalfvecmodelower>3 (tmp2, tmp, tmp3)); - emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2)); + rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode); + emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1])); + emit_insn (gen_vec_extract_lo_<mode> (tmp2, operands[1])); + emit_insn (gen_<code><ssehalfvecmodelower>3 (tmp3, tmp, tmp2)); + emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp3)); DONE; }) -- 2.31.1