gcc/ChangeLog:
* config/i386/i386.md (x86_64_shld_nf): New define_insn.
(x86_64_shld_ndd_nf): Ditto.
(x86_64_shld_1_nf): Ditto.
(x86_64_shld_ndd_1_nf): Ditto.
(*x86_64_shld_shrd_1_nozext_nf): Ditto.
(x86_shld_nf): Ditto.
(x86_shld_ndd_nf): Ditto.
(x86_shld_1_nf): Ditto.
(x86_shld_ndd_1_nf): Ditto.
(*x86_shld_shrd_1_nozext_nf): Ditto.
(<insn><dwi>3_doubleword_lowpart_nf): Ditto.
(x86_64_shrd_nf): Ditto.
(x86_64_shrd_ndd_nf): Ditto.
(x86_64_shrd_1_nf): Ditto.
(x86_64_shrd_ndd_1_nf): Ditto.
(*x86_64_shrd_shld_1_nozext_nf): Ditto.
(x86_shrd_nf): Ditto.
(x86_shrd_ndd_nf): Ditto.
(x86_shrd_1_nf): Ditto.
(x86_shrd_ndd_1_nf): Ditto.
(*x86_shrd_shld_1_nozext_nf): Ditto.
---
gcc/config/i386/i386.md | 518 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 518 insertions(+)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index
ff44154b26b..f9a62fba0c4 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -14666,6 +14666,26 @@
DONE;
})
+(define_insn "x86_64_shld_nf"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
+ (ior:DI (ashift:DI (match_dup 0)
+ (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc")
+ (const_int 63)))
+ (subreg:DI
+ (lshiftrt:TI
+ (zero_extend:TI
+ (match_operand:DI 1 "register_operand" "r"))
+ (minus:QI (const_int 64)
+ (and:QI (match_dup 2) (const_int 63)))) 0)))]
+ "TARGET_APX_NF"
+ "%{nf%} shld{q}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "prefix_0f" "1")
+ (set_attr "mode" "DI")
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")
+ (set_attr "bdver1_decode" "vector")])
+
(define_insn "x86_64_shld"
[(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
(ior:DI (ashift:DI (match_dup 0) @@ -14687,6 +14707,22 @@
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
+(define_insn "x86_64_shld_ndd_nf"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
+ (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc")
+ (const_int 63)))
+ (subreg:DI
+ (lshiftrt:TI
+ (zero_extend:TI
+ (match_operand:DI 2 "register_operand" "r"))
+ (minus:QI (const_int 64)
+ (and:QI (match_dup 3) (const_int 63)))) 0)))]
+ "TARGET_APX_NDD && TARGET_APX_NF"
+ "%{nf%} shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "DI")])
+
(define_insn "x86_64_shld_ndd"
[(set (match_operand:DI 0 "register_operand" "=r")
(ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm") @@
-14704,6 +14740,43 @@
[(set_attr "type" "ishift")
(set_attr "mode" "DI")])
+(define_insn "x86_64_shld_1_nf"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
+ (ior:DI (ashift:DI (match_dup 0)
+ (match_operand:QI 2 "const_0_to_63_operand"))
+ (subreg:DI
+ (lshiftrt:TI
+ (zero_extend:TI
+ (match_operand:DI 1 "register_operand" "r"))
+ (match_operand:QI 3 "const_0_to_255_operand")) 0)))]
+ "TARGET_64BIT
+ && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
+ && TARGET_APX_NF"
+ "%{nf%} shld{q}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "prefix_0f" "1")
+ (set_attr "mode" "DI")
+ (set_attr "length_immediate" "1")
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")
+ (set_attr "bdver1_decode" "vector")])
+
+(define_insn "x86_64_shld_ndd_1_nf"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
+ (match_operand:QI 3 "const_0_to_63_operand"))
+ (subreg:DI
+ (lshiftrt:TI
+ (zero_extend:TI
+ (match_operand:DI 2 "register_operand" "r"))
+ (match_operand:QI 4 "const_0_to_255_operand")) 0)))]
+ "TARGET_APX_NDD && TARGET_APX_NF
+ && INTVAL (operands[4]) == 64 - INTVAL (operands[3])"
+ "%{nf%} shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "DI")
+ (set_attr "length_immediate" "1")])
+
(define_insn "x86_64_shld_1"
[(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
(ior:DI (ashift:DI (match_dup 0) @@ -14742,6 +14815,58 @@
(set_attr "mode" "DI")
(set_attr "length_immediate" "1")])
+(define_insn_and_split "*x86_64_shld_shrd_1_nozext_nf"
+ [(set (match_operand:DI 0 "nonimmediate_operand")
+ (ior:DI (ashift:DI (match_operand:DI 4 "nonimmediate_operand")
+ (match_operand:QI 2 "const_0_to_63_operand"))
+ (lshiftrt:DI
+ (match_operand:DI 1 "nonimmediate_operand")
+ (match_operand:QI 3 "const_0_to_63_operand"))))]
+ "TARGET_64BIT && TARGET_APX_NF
+ && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ if (rtx_equal_p (operands[4], operands[0]))
+ {
+ operands[1] = force_reg (DImode, operands[1]);
+ emit_insn (gen_x86_64_shld_1_nf (operands[0], operands[1], operands[2],
operands[3]));
+ }
+ else if (rtx_equal_p (operands[1], operands[0]))
+ {
+ operands[4] = force_reg (DImode, operands[4]);
+ emit_insn (gen_x86_64_shrd_1_nf (operands[0], operands[4], operands[3],
operands[2]));
+ }
+ else if (TARGET_APX_NDD)
+ {
+ rtx tmp = gen_reg_rtx (DImode);
+ if (MEM_P (operands[4]))
+ {
+ operands[1] = force_reg (DImode, operands[1]);
+ emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ }
+ else if (MEM_P (operands[1]))
+ emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[1], operands[4],
+ operands[3], operands[2]));
+ else
+ emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ emit_move_insn (operands[0], tmp);
+ }
+ else
+ {
+ operands[1] = force_reg (DImode, operands[1]);
+ rtx tmp = gen_reg_rtx (DImode);
+ emit_move_insn (tmp, operands[4]);
+ emit_insn (gen_x86_64_shld_1_nf (tmp, operands[1], operands[2],
operands[3]));
+ emit_move_insn (operands[0], tmp);
+ }
+ DONE;
+})
+
(define_insn_and_split "*x86_64_shld_shrd_1_nozext"
[(set (match_operand:DI 0 "nonimmediate_operand") @@ -14844,6 +14969,81 @@
emit_move_insn (operands[4], operands[0]);
})
+(define_insn "x86_shld_nf"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
+ (ior:SI (ashift:SI (match_dup 0)
+ (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic")
+ (const_int 31)))
+ (subreg:SI
+ (lshiftrt:DI
+ (zero_extend:DI
+ (match_operand:SI 1 "register_operand" "r"))
+ (minus:QI (const_int 32)
+ (and:QI (match_dup 2) (const_int 31)))) 0)))]
+ "TARGET_APX_NF"
+ "%{nf%} shld{l}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "prefix_0f" "1")
+ (set_attr "mode" "SI")
+ (set_attr "pent_pair" "np")
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")
+ (set_attr "bdver1_decode" "vector")])
+
+(define_insn "x86_shld_ndd_nf"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+ (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+ (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic")
+ (const_int 31)))
+ (subreg:SI
+ (lshiftrt:DI
+ (zero_extend:DI
+ (match_operand:SI 2 "register_operand" "r"))
+ (minus:QI (const_int 32)
+ (and:QI (match_dup 3) (const_int 31)))) 0)))]
+ "TARGET_APX_NDD && TARGET_APX_NF"
+ "%{nf%} shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+
+(define_insn "x86_shld_1_nf"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
+ (ior:SI (ashift:SI (match_dup 0)
+ (match_operand:QI 2 "const_0_to_31_operand"))
+ (subreg:SI
+ (lshiftrt:DI
+ (zero_extend:DI
+ (match_operand:SI 1 "register_operand" "r"))
+ (match_operand:QI 3 "const_0_to_63_operand")) 0)))]
+ "TARGET_APX_NF
+ && INTVAL (operands[3]) == 32 - INTVAL (operands[2])"
+ "%{nf%} shld{l}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "prefix_0f" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "SI")
+ (set_attr "pent_pair" "np")
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")
+ (set_attr "bdver1_decode" "vector")])
+
+(define_insn "x86_shld_ndd_1_nf"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+ (match_operand:QI 3 "const_0_to_31_operand"))
+ (subreg:SI
+ (lshiftrt:DI
+ (zero_extend:DI
+ (match_operand:SI 2 "register_operand" "r"))
+ (match_operand:QI 4 "const_0_to_63_operand")) 0)))]
+ "TARGET_APX_NDD && TARGET_APX_NF
+ && INTVAL (operands[4]) == 32 - INTVAL (operands[3])"
+ "%{nf%} shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ishift")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "SI")])
+
(define_insn "x86_shld"
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
(ior:SI (ashift:SI (match_dup 0) @@ -14922,6 +15122,57 @@
(set_attr "length_immediate" "1")
(set_attr "mode" "SI")])
+(define_insn_and_split "*x86_shld_shrd_1_nozext_nf"
+ [(set (match_operand:SI 0 "nonimmediate_operand")
+ (ior:SI (ashift:SI (match_operand:SI 4 "nonimmediate_operand")
+ (match_operand:QI 2 "const_0_to_31_operand"))
+ (lshiftrt:SI
+ (match_operand:SI 1 "nonimmediate_operand")
+ (match_operand:QI 3 "const_0_to_31_operand"))))]
+ "TARGET_APX_NF &&
+ INTVAL (operands[3]) == 32 - INTVAL (operands[2])
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ if (rtx_equal_p (operands[4], operands[0]))
+ {
+ operands[1] = force_reg (SImode, operands[1]);
+ emit_insn (gen_x86_shld_1_nf (operands[0], operands[1], operands[2],
operands[3]));
+ }
+ else if (rtx_equal_p (operands[1], operands[0]))
+ {
+ operands[4] = force_reg (SImode, operands[4]);
+ emit_insn (gen_x86_shrd_1_nf (operands[0], operands[4], operands[3],
operands[2]));
+ }
+ else if (TARGET_APX_NDD)
+ {
+ rtx tmp = gen_reg_rtx (SImode);
+ if (MEM_P (operands[4]))
+ {
+ operands[1] = force_reg (SImode, operands[1]);
+ emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ }
+ else if (MEM_P (operands[1]))
+ emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[1], operands[4],
+ operands[3], operands[2]));
+ else
+ emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ emit_move_insn (operands[0], tmp);
+ }
+ else
+ {
+ operands[1] = force_reg (SImode, operands[1]);
+ rtx tmp = gen_reg_rtx (SImode);
+ emit_move_insn (tmp, operands[4]);
+ emit_insn (gen_x86_shld_1_nf (tmp, operands[1], operands[2],
operands[3]));
+ emit_move_insn (operands[0], tmp);
+ }
+ DONE;
+})
(define_insn_and_split "*x86_shld_shrd_1_nozext"
[(set (match_operand:SI 0 "nonimmediate_operand") @@ -16107,6 +16358,26 @@
})
;; Split truncations of double word right shifts into x86_shrd_1.
+(define_insn_and_split "<insn><dwi>3_doubleword_lowpart_nf"
+ [(set (match_operand:DWIH 0 "register_operand" "=&r")
+ (subreg:DWIH
+ (any_shiftrt:<DWI> (match_operand:<DWI> 1 "register_operand" "r")
+ (match_operand:QI 2 "const_int_operand")) 0))]
+ "TARGET_APX_NF && UINTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (ior:DWIH (lshiftrt:DWIH (match_dup 0) (match_dup 2))
+ (subreg:DWIH
+ (ashift:<DWI> (zero_extend:<DWI> (match_dup 3))
+ (match_dup 4)) 0)))]
+{
+ split_double_mode (<DWI>mode, &operands[1], 1, &operands[1],
+&operands[3]);
+ operands[4] = GEN_INT ((<MODE_SIZE> * BITS_PER_UNIT) - INTVAL
+(operands[2]));
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+})
+
(define_insn_and_split "<insn><dwi>3_doubleword_lowpart"
[(set (match_operand:DWIH 0 "register_operand" "=&r")
(subreg:DWIH
@@ -16130,6 +16401,26 @@
emit_move_insn (operands[0], operands[1]);
})
+(define_insn "x86_64_shrd_nf"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
+ (ior:DI (lshiftrt:DI (match_dup 0)
+ (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc")
+ (const_int 63)))
+ (subreg:DI
+ (ashift:TI
+ (zero_extend:TI
+ (match_operand:DI 1 "register_operand" "r"))
+ (minus:QI (const_int 64)
+ (and:QI (match_dup 2) (const_int 63)))) 0)))]
+ "TARGET_APX_NF"
+ "%{nf%} shrd{q}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "prefix_0f" "1")
+ (set_attr "mode" "DI")
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")
+ (set_attr "bdver1_decode" "vector")])
+
(define_insn "x86_64_shrd"
[(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
(ior:DI (lshiftrt:DI (match_dup 0) @@ -16151,6 +16442,22 @@
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
+(define_insn "x86_64_shrd_ndd_nf"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
+ (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc")
+ (const_int 63)))
+ (subreg:DI
+ (ashift:TI
+ (zero_extend:TI
+ (match_operand:DI 2 "register_operand" "r"))
+ (minus:QI (const_int 64)
+ (and:QI (match_dup 3) (const_int 63)))) 0)))]
+ "TARGET_APX_NDD && TARGET_APX_NF"
+ "%{nf%} shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "DI")])
+
(define_insn "x86_64_shrd_ndd"
[(set (match_operand:DI 0 "register_operand" "=r")
(ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
@@ -16168,6 +16475,25 @@
[(set_attr "type" "ishift")
(set_attr "mode" "DI")])
+(define_insn "x86_64_shrd_1_nf"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
+ (ior:DI (lshiftrt:DI (match_dup 0)
+ (match_operand:QI 2 "const_0_to_63_operand"))
+ (subreg:DI
+ (ashift:TI
+ (zero_extend:TI
+ (match_operand:DI 1 "register_operand" "r"))
+ (match_operand:QI 3 "const_0_to_255_operand")) 0)))]
+ "TARGET_APX_NF
+ && INTVAL (operands[3]) == 64 - INTVAL (operands[2])"
+ "%{nf%} shrd{q}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "prefix_0f" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "DI")
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")
+ (set_attr "bdver1_decode" "vector")])
(define_insn "x86_64_shrd_1"
[(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") @@ -16190,6
+16516,22 @@
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
+(define_insn "x86_64_shrd_ndd_1_nf"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
+ (match_operand:QI 3 "const_0_to_63_operand"))
+ (subreg:DI
+ (ashift:TI
+ (zero_extend:TI
+ (match_operand:DI 2 "register_operand" "r"))
+ (match_operand:QI 4 "const_0_to_255_operand")) 0)))]
+ "TARGET_APX_NDD && TARGET_APX_NF
+ && INTVAL (operands[4]) == 64 - INTVAL (operands[3])"
+ "%{nf%} shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ishift")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "DI")])
+
(define_insn "x86_64_shrd_ndd_1"
[(set (match_operand:DI 0 "register_operand" "=r")
(ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
@@ -16207,6 +16549,57 @@
(set_attr "length_immediate" "1")
(set_attr "mode" "DI")])
+(define_insn_and_split "*x86_64_shrd_shld_1_nozext_nf"
+ [(set (match_operand:DI 0 "nonimmediate_operand")
+ (ior:DI (lshiftrt:DI (match_operand:DI 4 "nonimmediate_operand")
+ (match_operand:QI 2 "const_0_to_63_operand"))
+ (ashift:DI
+ (match_operand:DI 1 "nonimmediate_operand")
+ (match_operand:QI 3 "const_0_to_63_operand"))))]
+ "TARGET_64BIT && TARGET_APX_NF
+ && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ if (rtx_equal_p (operands[4], operands[0]))
+ {
+ operands[1] = force_reg (DImode, operands[1]);
+ emit_insn (gen_x86_64_shrd_1_nf (operands[0], operands[1], operands[2],
operands[3]));
+ }
+ else if (rtx_equal_p (operands[1], operands[0]))
+ {
+ operands[4] = force_reg (DImode, operands[4]);
+ emit_insn (gen_x86_64_shld_1_nf (operands[0], operands[4], operands[3],
operands[2]));
+ }
+ else if (TARGET_APX_NDD)
+ {
+ rtx tmp = gen_reg_rtx (DImode);
+ if (MEM_P (operands[4]))
+ {
+ operands[1] = force_reg (DImode, operands[1]);
+ emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ }
+ else if (MEM_P (operands[1]))
+ emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[1], operands[4],
+ operands[3], operands[2]));
+ else
+ emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ emit_move_insn (operands[0], tmp);
+ }
+ else
+ {
+ operands[1] = force_reg (DImode, operands[1]);
+ rtx tmp = gen_reg_rtx (DImode);
+ emit_move_insn (tmp, operands[4]);
+ emit_insn (gen_x86_64_shrd_1_nf (tmp, operands[1], operands[2],
operands[3]));
+ emit_move_insn (operands[0], tmp);
+ }
+ DONE;
+})
(define_insn_and_split "*x86_64_shrd_shld_1_nozext"
[(set (match_operand:DI 0 "nonimmediate_operand") @@ -16309,6 +16702,27 @@
emit_move_insn (operands[4], operands[0]);
})
+(define_insn "x86_shrd_nf"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
+ (ior:SI (lshiftrt:SI (match_dup 0)
+ (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic")
+ (const_int 31)))
+ (subreg:SI
+ (ashift:DI
+ (zero_extend:DI
+ (match_operand:SI 1 "register_operand" "r"))
+ (minus:QI (const_int 32)
+ (and:QI (match_dup 2) (const_int 31)))) 0)))]
+ "TARGET_APX_NF"
+ "%{nf%} shrd{l}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "prefix_0f" "1")
+ (set_attr "mode" "SI")
+ (set_attr "pent_pair" "np")
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")
+ (set_attr "bdver1_decode" "vector")])
+
(define_insn "x86_shrd"
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
(ior:SI (lshiftrt:SI (match_dup 0) @@ -16331,6 +16745,22 @@
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
+(define_insn "x86_shrd_ndd_nf"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+ (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic")
+ (const_int 31)))
+ (subreg:SI
+ (ashift:DI
+ (zero_extend:DI
+ (match_operand:SI 2 "register_operand" "r"))
+ (minus:QI (const_int 32)
+ (and:QI (match_dup 3) (const_int 31)))) 0)))]
+ "TARGET_APX_NDD && TARGET_APX_NF"
+ "%{nf%} shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
(define_insn "x86_shrd_ndd"
[(set (match_operand:SI 0 "register_operand" "=r")
(ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
@@ -16348,6 +16778,27 @@
[(set_attr "type" "ishift")
(set_attr "mode" "SI")])
+(define_insn "x86_shrd_1_nf"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
+ (ior:SI (lshiftrt:SI (match_dup 0)
+ (match_operand:QI 2 "const_0_to_31_operand"))
+ (subreg:SI
+ (ashift:DI
+ (zero_extend:DI
+ (match_operand:SI 1 "register_operand" "r"))
+ (match_operand:QI 3 "const_0_to_63_operand")) 0)))]
+ "TARGET_APX_NF
+ && INTVAL (operands[3]) == 32 - INTVAL (operands[2])"
+ "%{nf%} shrd{l}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "prefix_0f" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "SI")
+ (set_attr "pent_pair" "np")
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")
+ (set_attr "bdver1_decode" "vector")])
+
(define_insn "x86_shrd_1"
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
(ior:SI (lshiftrt:SI (match_dup 0) @@ -16369,6 +16820,22 @@
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
+(define_insn "x86_shrd_ndd_1_nf"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+ (match_operand:QI 3 "const_0_to_31_operand"))
+ (subreg:SI
+ (ashift:DI
+ (zero_extend:DI
+ (match_operand:SI 2 "register_operand" "r"))
+ (match_operand:QI 4 "const_0_to_63_operand")) 0)))]
+ "TARGET_APX_NDD && TARGET_APX_NF
+ && (INTVAL (operands[4]) == 32 - INTVAL (operands[3]))"
+ "%{nf%} shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ishift")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "SI")])
+
(define_insn "x86_shrd_ndd_1"
[(set (match_operand:SI 0 "register_operand" "=r")
(ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
@@ -16386,6 +16853,57 @@
(set_attr "length_immediate" "1")
(set_attr "mode" "SI")])
+(define_insn_and_split "*x86_shrd_shld_1_nozext_nf"
+ [(set (match_operand:SI 0 "nonimmediate_operand")
+ (ior:SI (lshiftrt:SI (match_operand:SI 4 "nonimmediate_operand")
+ (match_operand:QI 2 "const_0_to_31_operand"))
+ (ashift:SI
+ (match_operand:SI 1 "nonimmediate_operand")
+ (match_operand:QI 3 "const_0_to_31_operand"))))]
+ "TARGET_APX_NF &&
+ INTVAL (operands[3]) == 32 - INTVAL (operands[2])
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ if (rtx_equal_p (operands[4], operands[0]))
+ {
+ operands[1] = force_reg (SImode, operands[1]);
+ emit_insn (gen_x86_shrd_1_nf (operands[0], operands[1], operands[2],
operands[3]));
+ }
+ else if (rtx_equal_p (operands[1], operands[0]))
+ {
+ operands[4] = force_reg (SImode, operands[4]);
+ emit_insn (gen_x86_shld_1_nf (operands[0], operands[4], operands[3],
operands[2]));
+ }
+ else if (TARGET_APX_NDD)
+ {
+ rtx tmp = gen_reg_rtx (SImode);
+ if (MEM_P (operands[4]))
+ {
+ operands[1] = force_reg (SImode, operands[1]);
+ emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ }
+ else if (MEM_P (operands[1]))
+ emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[1], operands[4],
+ operands[3], operands[2]));
+ else
+ emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ emit_move_insn (operands[0], tmp);
+ }
+ else
+ {
+ operands[1] = force_reg (SImode, operands[1]);
+ rtx tmp = gen_reg_rtx (SImode);
+ emit_move_insn (tmp, operands[4]);
+ emit_insn (gen_x86_shrd_1_nf (tmp, operands[1], operands[2],
operands[3]));
+ emit_move_insn (operands[0], tmp);
+ }
+ DONE;
+})
(define_insn_and_split "*x86_shrd_shld_1_nozext"
[(set (match_operand:SI 0 "nonimmediate_operand")
--
2.31.1