------- Comment #6 from hjl dot tools at gmail dot com 2008-09-09 04:39 ------- This patch will disable SSE4 and SSE2 optimization if inter-unit move is disabled or there are duplicates:
--- i386.c.sse2 2008-09-08 21:17:15.000000000 -0700 +++ i386.c 2008-09-08 21:36:38.000000000 -0700 @@ -26886,7 +26886,7 @@ static void ix86_expand_vector_init_interleave (enum machine_mode mode, rtx target, rtx *ops, int n) { - enum machine_mode first_imode, second_imode, third_imode; + enum machine_mode first_imode, second_imode, third_imode, inner_mode; int i, j; rtx op0, op1; rtx (*gen_load_even) (rtx, rtx, rtx); @@ -26899,6 +26899,7 @@ ix86_expand_vector_init_interleave (enum gen_load_even = gen_vec_setv8hi; gen_interleave_first_low = gen_vec_interleave_lowv4si; gen_interleave_second_low = gen_vec_interleave_lowv2di; + inner_mode = HImode; first_imode = V4SImode; second_imode = V2DImode; third_imode = VOIDmode; @@ -26907,6 +26908,7 @@ ix86_expand_vector_init_interleave (enum gen_load_even = gen_vec_setv16qi; gen_interleave_first_low = gen_vec_interleave_lowv8hi; gen_interleave_second_low = gen_vec_interleave_lowv4si; + inner_mode = QImode; first_imode = V8HImode; second_imode = V4SImode; third_imode = V2DImode; @@ -26935,7 +26937,9 @@ ix86_expand_vector_init_interleave (enum emit_move_insn (op0, gen_lowpart (mode, op1)); /* Load even elements into the second positon. */ - emit_insn ((*gen_load_even) (op0, ops [i + i + 1], + emit_insn ((*gen_load_even) (op0, + force_reg (inner_mode, + ops [i + i + 1]), const1_rtx)); /* Cast vector to FIRST_IMODE vector. */ @@ -26998,7 +27002,8 @@ ix86_expand_vector_init_general (bool mm { rtx ops[32], op0, op1; enum machine_mode half_mode = VOIDmode; - int n, i; + int n, i, h; + bool duplicated; switch (mode) { @@ -27045,18 +27050,27 @@ half: return; case V16QImode: - if (!TARGET_SSE4_1) + if (!TARGET_SSE4_1 || !TARGET_INTER_UNIT_MOVES) break; /* FALLTHRU */ case V8HImode: - if (!TARGET_SSE2) + if (!TARGET_SSE2 || !TARGET_INTER_UNIT_MOVES) break; n = GET_MODE_NUNITS (mode); + h = n >> 1; + duplicated = true; for (i = 0; i < n; i++) - ops[i] = XVECEXP (vals, 0, i); - ix86_expand_vector_init_interleave (mode, target, ops, n >> 1); + { + ops[i] = XVECEXP (vals, 0, i); + if (i >= h && !rtx_equal_p (ops[i], ops[i - h])) + duplicated = false; + } + if (duplicated) + break; + + ix86_expand_vector_init_interleave (mode, target, ops, h); return; case V4HImode: -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37434