Hi! I've made two mistakes in the *sse4_1_zero_extend* define_insn_and_split patterns. One is that when it uses vector_operand, it should use Bm rather than m constraint, and the other one is that because it is a post-reload splitter it needs isa attribute to select which alternatives are valid for which ISAs. Sorry for messing this up.
Ok for trunk if it passes bootstrap/regtest? 2021-01-14 Jakub Jelinek <ja...@redhat.com> PR target/98670 * config/i386/sse.md (*sse4_1_zero_extendv8qiv8hi2_3, *sse4_1_zero_extendv4hiv4si2_3, *sse4_1_zero_extendv2siv2di2_3): Use Bm instead of m for non-avx. Add isa attribute. * gcc.target/i386/pr98670.c: New test. --- gcc/config/i386/sse.md.jj 2021-01-13 11:36:27.078888291 +0100 +++ gcc/config/i386/sse.md 2021-01-14 10:30:26.952146198 +0100 @@ -17721,7 +17721,7 @@ (define_insn_and_split "*sse4_1_zero_ext [(set (match_operand:V16QI 0 "register_operand" "=Yr,*x,v") (vec_select:V16QI (vec_concat:V32QI - (match_operand:V16QI 1 "vector_operand" "Yrm,*xm,vm") + (match_operand:V16QI 1 "vector_operand" "YrBm,*xBm,vm") (match_operand:V16QI 2 "const0_operand" "C,C,C")) (match_parallel 3 "pmovzx_parallel" [(match_operand 4 "const_int_operand" "n,n,n")])))] @@ -17745,7 +17745,8 @@ (define_insn_and_split "*sse4_1_zero_ext emit_insn (gen_rtx_SET (operands[0], operands[1])); DONE; } -}) +} + [(set_attr "isa" "noavx,noavx,avx")]) (define_expand "<insn>v8qiv8hi2" [(set (match_operand:V8HI 0 "register_operand") @@ -18031,7 +18032,7 @@ (define_insn_and_split "*sse4_1_zero_ext [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v") (vec_select:V8HI (vec_concat:V16HI - (match_operand:V8HI 1 "vector_operand" "Yrm,*xm,vm") + (match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,vm") (match_operand:V8HI 2 "const0_operand" "C,C,C")) (match_parallel 3 "pmovzx_parallel" [(match_operand 4 "const_int_operand" "n,n,n")])))] @@ -18053,7 +18054,8 @@ (define_insn_and_split "*sse4_1_zero_ext emit_insn (gen_rtx_SET (operands[0], operands[1])); DONE; } -}) +} + [(set_attr "isa" "noavx,noavx,avx")]) (define_insn "avx512f_<code>v8qiv8di2<mask_name>" [(set (match_operand:V8DI 0 "register_operand" "=v") @@ -18447,7 +18449,7 @@ (define_insn_and_split "*sse4_1_zero_ext [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v") (vec_select:V4SI (vec_concat:V8SI - (match_operand:V4SI 1 "vector_operand" "Yrm,*xm,vm") + (match_operand:V4SI 1 "vector_operand" "YrBm,*xBm,vm") (match_operand:V4SI 2 "const0_operand" "C,C,C")) (match_parallel 3 "pmovzx_parallel" [(match_operand 4 "const_int_operand" "n,n,n")])))] @@ -18467,7 +18469,8 @@ (define_insn_and_split "*sse4_1_zero_ext emit_insn (gen_rtx_SET (operands[0], operands[1])); DONE; } -}) +} + [(set_attr "isa" "noavx,noavx,avx")]) (define_expand "<insn>v2siv2di2" [(set (match_operand:V2DI 0 "register_operand") --- gcc/testsuite/gcc.target/i386/pr98670.c.jj 2021-01-14 10:40:37.208180135 +0100 +++ gcc/testsuite/gcc.target/i386/pr98670.c 2021-01-14 10:40:07.340521064 +0100 @@ -0,0 +1,16 @@ +/* PR target/98670 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include <x86intrin.h> + +void foo (__m128i); +int a[6]; + +void +bar (void) +{ + __m128i d = *(__m128i *) (a + 2); + __m128i e = _mm_unpacklo_epi16 (d, (__m128i) {}); + foo (e); +} Jakub