Hello! Attached patch merges *sse4_1_pextrq and *sse4_1_pextrd with base vec_select patterns. The patch splits instruction with zero selector to plain movdi and movsi patterns. Please note that pextr $0,... should only be generated for !TARGET_INTER_UNIT_MOVES_FROM_VEC targets when -msse4 is used. For TARGET_INTER_UNIT_MOVES_FROM_VEC, we should always generate corresponding plain movq or movd interunit move.
2013-05-06 Uros Bizjak <ubiz...@gmail.com> * config/i386/i386.md (isa): Add x64_sse4 member. (enabled): Handle x64_sse4. (*movdi_internal): Add *x->?r alternative to emit pextrq $0,%xmm,%reg instruction for 64bit SSE4_1 targets. Update insn attributes. (*movsi_internal): Add *x->?r alternative to emit pextrd $0,%xmm,%reg instruction for SSE4_1 targets. Update insn attributes. * config/i386/sse.md (*vec_extract<ssevecmodelower>_0): Merge with *sse4_1_pextrd and *sse4_1_pextrq having const_0 selector. (*vec_extractv2di_1): Merge with *sse4_1_pextrq having const_1 selector. (*vec_extractv4si): Rename from *sse4_1_pextrd. (*vec_extractv4si_zext): Rename from *sse4_1_pextrd_zext. (*vec_extract<ssevecmodelower>_0 splitters): Merge splitters together. Patch was tested on x86_64-pc-linux-gnu {,-m32} and committed to mainline SVN. Uros.
Index: i386.md =================================================================== --- i386.md (revision 198611) +++ i386.md (working copy) @@ -658,12 +658,15 @@ (define_attr "movu" "0,1" (const_string "0")) ;; Used to control the "enabled" attribute on a per-instruction basis. -(define_attr "isa" "base,x64,x64_sse4_noavx,x64_avx,nox64,sse2,sse2_noavx, - sse3,sse4,sse4_noavx,avx,noavx,avx2,noavx2,bmi2,fma4,fma" +(define_attr "isa" "base,x64,x64_sse4,x64_sse4_noavx,x64_avx,nox64, + sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx, + avx2,noavx2,bmi2,fma4,fma" (const_string "base")) (define_attr "enabled" "" (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT") + (eq_attr "isa" "x64_sse4") + (symbol_ref "TARGET_64BIT && TARGET_SSE4_1") (eq_attr "isa" "x64_sse4_noavx") (symbol_ref "TARGET_64BIT && TARGET_SSE4_1 && !TARGET_AVX") (eq_attr "isa" "x64_avx") @@ -1850,9 +1853,9 @@ (define_insn "*movdi_internal" [(set (match_operand:DI 0 "nonimmediate_operand" - "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*x,*x,*x,m ,?r ,?*Yi,?*Ym,?*Yi") + "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*x,*x,*x,m ,?r ,?r,?*Yi,?*Ym,?*Yi") (match_operand:DI 1 "general_operand" - "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*x,m ,*x,*Yj,r ,*Yj ,*Yn"))] + "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*x,m ,*x,*Yj,*x,r ,*Yj ,*Yn"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -1872,6 +1875,9 @@ return "movq\t{%1, %0|%0, %1}"; case TYPE_SSELOG1: + if (GENERAL_REG_P (operands[0])) + return "%vpextrq\t{$0, %1, %0|%0, %1, 0}"; + return standard_sse_constant_opcode (insn, operands[1]); case TYPE_SSEMOV: @@ -1924,8 +1930,10 @@ [(set (attr "isa") (cond [(eq_attr "alternative" "0,1") (const_string "nox64") - (eq_attr "alternative" "2,3,4,5,10,11,16,17") + (eq_attr "alternative" "2,3,4,5,10,11,16,18") (const_string "x64") + (eq_attr "alternative" "17") + (const_string "x64_sse4") ] (const_string "*"))) (set (attr "type") @@ -1935,13 +1943,13 @@ (const_string "mmx") (eq_attr "alternative" "7,8,9,10,11") (const_string "mmxmov") - (eq_attr "alternative" "12") + (eq_attr "alternative" "12,17") (const_string "sselog1") - (eq_attr "alternative" "13,14,15,16,17") + (eq_attr "alternative" "13,14,15,16,18") (const_string "ssemov") - (eq_attr "alternative" "18,19") + (eq_attr "alternative" "19,20") (const_string "ssecvt") - (match_operand 1 "pic_32bit_operand") + (match_operand 1 "pic_32bit_operand") (const_string "lea") ] (const_string "imov"))) @@ -1951,14 +1959,20 @@ (const_string "0") (const_string "*"))) (set (attr "length_immediate") - (if_then_else - (and (eq_attr "alternative" "4") (eq_attr "type" "imov")) - (const_string "8") - (const_string "*"))) + (cond [(and (eq_attr "alternative" "4") (eq_attr "type" "imov")) + (const_string "8") + (eq_attr "alternative" "17") + (const_string "1") + ] + (const_string "*"))) (set (attr "prefix_rex") - (if_then_else (eq_attr "alternative" "10,11,16,17") + (if_then_else (eq_attr "alternative" "10,11,16,17,18") (const_string "1") (const_string "*"))) + (set (attr "prefix_extra") + (if_then_else (eq_attr "alternative" "17") + (const_string "1") + (const_string "*"))) (set (attr "prefix") (if_then_else (eq_attr "type" "sselog1,ssemov") (const_string "maybe_vex") @@ -1984,6 +1998,8 @@ (and (eq_attr "alternative" "14,15") (not (match_test "TARGET_SSE2"))) (const_string "V2SF") + (eq_attr "alternative" "17") + (const_string "TI") ] (const_string "DI")))]) @@ -1998,14 +2014,17 @@ (define_insn "*movsi_internal" [(set (match_operand:SI 0 "nonimmediate_operand" - "=r,m ,*y,*y,?rm,?*y,*x,*x,*x,m ,?r ,?*Yi") + "=r,m ,*y,*y,?rm,?*y,*x,*x,*x,m ,?r ,?r,?*Yi") (match_operand:SI 1 "general_operand" - "g ,re,C ,*y,*y ,rm ,C ,*x,m ,*x,*Yj,r"))] + "g ,re,C ,*y,*y ,rm ,C ,*x,m ,*x,*Yj,*x,r"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) { case TYPE_SSELOG1: + if (GENERAL_REG_P (operands[0])) + return "%vpextrd\t{$0, %1, %0|%0, %1, 0}"; + return standard_sse_constant_opcode (insn, operands[1]); case TYPE_SSEMOV: @@ -2056,19 +2075,31 @@ gcc_unreachable (); } } - [(set (attr "type") + [(set (attr "isa") + (if_then_else (eq_attr "alternative" "11") + (const_string "sse4") + (const_string "*"))) + (set (attr "type") (cond [(eq_attr "alternative" "2") (const_string "mmx") (eq_attr "alternative" "3,4,5") (const_string "mmxmov") - (eq_attr "alternative" "6") + (eq_attr "alternative" "6,11") (const_string "sselog1") - (eq_attr "alternative" "7,8,9,10,11") + (eq_attr "alternative" "7,8,9,10,12") (const_string "ssemov") (match_operand 1 "pic_32bit_operand") (const_string "lea") ] (const_string "imov"))) + (set (attr "length_immediate") + (if_then_else (eq_attr "alternative" "11") + (const_string "1") + (const_string "*"))) + (set (attr "prefix_extra") + (if_then_else (eq_attr "alternative" "11") + (const_string "1") + (const_string "*"))) (set (attr "prefix") (if_then_else (eq_attr "type" "sselog1,ssemov") (const_string "maybe_vex") @@ -2094,6 +2125,8 @@ (and (eq_attr "alternative" "8,9") (not (match_test "TARGET_SSE2"))) (const_string "SF") + (eq_attr "alternative" "11") + (const_string "TI") ] (const_string "SI")))]) Index: sse.md =================================================================== --- sse.md (revision 198611) +++ sse.md (working copy) @@ -6987,48 +6987,6 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) -(define_insn "*sse4_1_pextrd" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") - (vec_select:SI - (match_operand:V4SI 1 "register_operand" "x") - (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))] - "TARGET_SSE4_1" - "%vpextrd\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sselog") - (set_attr "prefix_extra" "1") - (set_attr "length_immediate" "1") - (set_attr "prefix" "maybe_vex") - (set_attr "mode" "TI")]) - -(define_insn "*sse4_1_pextrd_zext" - [(set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI - (vec_select:SI - (match_operand:V4SI 1 "register_operand" "x") - (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))] - "TARGET_64BIT && TARGET_SSE4_1" - "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}" - [(set_attr "type" "sselog") - (set_attr "prefix_extra" "1") - (set_attr "length_immediate" "1") - (set_attr "prefix" "maybe_vex") - (set_attr "mode" "TI")]) - -;; It must come before *vec_extractv2di_1 since it is preferred. -(define_insn "*sse4_1_pextrq" - [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") - (vec_select:DI - (match_operand:V2DI 1 "register_operand" "x") - (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))] - "TARGET_SSE4_1 && TARGET_64BIT" - "%vpextrq\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sselog") - (set_attr "prefix_rex" "1") - (set_attr "prefix_extra" "1") - (set_attr "length_immediate" "1") - (set_attr "prefix" "maybe_vex") - (set_attr "mode" "TI")]) - (define_expand "avx2_pshufdv3" [(match_operand:V8SI 0 "register_operand") (match_operand:V8SI 1 "nonimmediate_operand") @@ -7358,12 +7316,13 @@ (set_attr "mode" "TI,TI,V4SF,SF,SF")]) (define_insn "*vec_extract<ssevecmodelower>_0" - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=x,m,r ,r") + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,x ,m,r") (vec_select:SWI48 - (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm,x,Yj,m") + (match_operand:<ssevecmode> 1 "nonimmediate_operand" "Yj,x,xm,x,m") (parallel [(const_int 0)])))] "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "#") + "#" + [(set_attr "isa" "*,sse4,*,*,*")]) (define_insn "*vec_extractv2di_0_sse" [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m") @@ -7375,25 +7334,46 @@ "#") (define_split - [(set (match_operand:SWI48x 0 "register_operand") + [(set (match_operand:SWI48x 0 "nonimmediate_operand") (vec_select:SWI48x - (match_operand:<ssevecmode> 1 "memory_operand") + (match_operand:<ssevecmode> 1 "nonimmediate_operand") (parallel [(const_int 0)])))] "TARGET_SSE && reload_completed" [(set (match_dup 0) (match_dup 1))] - "operands[1] = adjust_address (operands[1], <MODE>mode, 0);") +{ + if (REG_P (operands[1])) + operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1])); + else + operands[1] = adjust_address (operands[1], <MODE>mode, 0); +}) -(define_split - [(set (match_operand:SWI48x 0 "nonimmediate_operand") - (vec_select:SWI48x - (match_operand:<ssevecmode> 1 "register_operand") - (parallel [(const_int 0)])))] - "TARGET_SSE && reload_completed - && (TARGET_INTER_UNIT_MOVES_FROM_VEC - || !GENERAL_REG_P (operands [0]))" - [(set (match_dup 0) (match_dup 1))] - "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));") +(define_insn "*vec_extractv4si" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (vec_select:SI + (match_operand:V4SI 1 "register_operand" "x") + (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))] + "TARGET_SSE4_1" + "%vpextrd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "TI")]) +(define_insn "*vec_extractv4si_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (vec_select:SI + (match_operand:V4SI 1 "register_operand" "x") + (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))] + "TARGET_64BIT && TARGET_SSE4_1" + "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "TI")]) + (define_insn_and_split "*vec_extractv4si_mem" [(set (match_operand:SI 0 "register_operand" "=x,r") (vec_select:SI @@ -7408,24 +7388,27 @@ }) (define_insn "*vec_extractv2di_1" - [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,x,r") + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r") (vec_select:DI - (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,x,o,o") + (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o") (parallel [(const_int 1)])))] "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ + %vpextrq\t{$1, %1, %0|%0, %1, 1} %vmovhps\t{%1, %0|%0, %1} psrldq\t{$8, %0|%0, 8} vpsrldq\t{$8, %1, %0|%0, %1, 8} movhlps\t{%1, %0|%0, %1} # #" - [(set_attr "isa" "*,sse2_noavx,avx,noavx,*,x64") - (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,imov") - (set_attr "length_immediate" "*,1,1,*,*,*") - (set_attr "memory" "*,none,none,*,*,*") - (set_attr "prefix" "maybe_vex,orig,vex,orig,*,*") - (set_attr "mode" "V2SF,TI,TI,V4SF,DI,DI")]) + [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64") + (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov") + (set_attr "length_immediate" "1,*,1,1,*,*,*") + (set_attr "memory" "*,*,none,none,*,*,*") + (set_attr "prefix_rex" "1,*,*,*,*,*,*") + (set_attr "prefix_extra" "1,*,*,*,*,*,*") + (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*") + (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")]) (define_split [(set (match_operand:DI 0 "register_operand")