As suggested in PR 91201 to avoid zero-extension to HImode for SSE4.1 targets.
2019-08-02 Uroš Bizjak <[email protected]>
PR target/91201
* config/i386/sse.md (*vec_extractv16qi_zext): New insn pattern.
testsuite/ChangeLog:
2019-08-02 Uroš Bizjak <[email protected]>
PR target/91201
* gcc.target/i386/sse4_1-pr91201.c: New test.
Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
Committed to mainlin SVN.
Uros.
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md (revision 274008)
+++ config/i386/sse.md (working copy)
@@ -14970,6 +14970,25 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
+(define_insn "*vec_extractv16qi_zext"
+ [(set (match_operand:HI 0 "register_operand" "=r,r")
+ (zero_extend:HI
+ (vec_select:QI
+ (match_operand:V16QI 1 "register_operand" "x,v")
+ (parallel
+ [(match_operand:SI 2 "const_0_to_15_operand")]))))]
+ "TARGET_SSE4_1"
+ "@
+ %vpextrb\t{%2, %1, %k0|%k0, %1, %2}
+ vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
+ [(set_attr "isa" "*,avx512bw")
+ (set_attr "type" "sselog1")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
(define_insn "*vec_extract<mode>_mem"
[(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
(vec_select:<ssescalarmode>
Index: testsuite/gcc.target/i386/sse4_1-pr91201.c
===================================================================
--- testsuite/gcc.target/i386/sse4_1-pr91201.c (nonexistent)
+++ testsuite/gcc.target/i386/sse4_1-pr91201.c (working copy)
@@ -0,0 +1,12 @@
+/* PR tree-optimization/91201 */
+/* { dg-do compile } */
+/* { dg-options "-Os -msse4.1 -masm=att" } */
+/* { dg-final { scan-assembler-not "\tmovzb(w|l)" } } */
+
+typedef unsigned char V __attribute__((vector_size (16)));
+
+unsigned short
+foo (V x)
+{
+ return x[0];
+}