gcc/ChangeLog: * config/i386/i386.md (x86_64_shld_nf): New define_insn. (x86_64_shld_ndd_nf): Ditto. (x86_64_shld_1_nf): Ditto. (x86_64_shld_ndd_1_nf): Ditto. (*x86_64_shld_shrd_1_nozext_nf): Ditto. (x86_shld_nf): Ditto. (x86_shld_ndd_nf): Ditto. (x86_shld_1_nf): Ditto. (x86_shld_ndd_1_nf): Ditto. (*x86_shld_shrd_1_nozext_nf): Ditto. (<insn><dwi>3_doubleword_lowpart_nf): Ditto. (x86_64_shrd_nf): Ditto. (x86_64_shrd_ndd_nf): Ditto. (x86_64_shrd_1_nf): Ditto. (x86_64_shrd_ndd_1_nf): Ditto. (*x86_64_shrd_shld_1_nozext_nf): Ditto. (x86_shrd_nf): Ditto. (x86_shrd_ndd_nf): Ditto. (x86_shrd_1_nf): Ditto. (x86_shrd_ndd_1_nf): Ditto. (*x86_shrd_shld_1_nozext_nf): Ditto. --- gcc/config/i386/i386.md | 377 +++++++++++++++++++++++++++++++--------- 1 file changed, 296 insertions(+), 81 deletions(-)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 731eb12d13a..4d684e8d919 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -14552,7 +14552,7 @@ DONE; }) -(define_insn "x86_64_shld" +(define_insn "x86_64_shld<nf_name>" [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") (ior:DI (ashift:DI (match_dup 0) (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc") @@ -14562,10 +14562,9 @@ (zero_extend:TI (match_operand:DI 1 "register_operand" "r")) (minus:QI (const_int 64) - (and:QI (match_dup 2) (const_int 63)))) 0))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT" - "shld{q}\t{%2, %1, %0|%0, %1, %2}" + (and:QI (match_dup 2) (const_int 63)))) 0)))] + "TARGET_64BIT && <nf_condition>" + "<nf_prefix>shld{q}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "mode" "DI") @@ -14573,7 +14572,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "x86_64_shld_ndd" +(define_insn "x86_64_shld_ndd<nf_name>" [(set (match_operand:DI 0 "register_operand" "=r") (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm") (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc") @@ -14583,14 +14582,13 @@ (zero_extend:TI (match_operand:DI 2 "register_operand" "r")) (minus:QI (const_int 64) - (and:QI (match_dup 3) (const_int 63)))) 0))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_APX_NDD" - "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + (and:QI (match_dup 3) (const_int 63)))) 0)))] + "TARGET_APX_NDD && <nf_condition>" + "<nf_prefix>shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "mode" "DI")]) -(define_insn "x86_64_shld_1" +(define_insn "x86_64_shld_1<nf_name>" [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") (ior:DI (ashift:DI (match_dup 0) (match_operand:QI 2 "const_0_to_63_operand")) @@ -14598,11 +14596,11 @@ (lshiftrt:TI (zero_extend:TI (match_operand:DI 1 "register_operand" "r")) - (match_operand:QI 3 "const_0_to_255_operand")) 0))) - (clobber (reg:CC FLAGS_REG))] + (match_operand:QI 3 "const_0_to_255_operand")) 0)))] "TARGET_64BIT - && INTVAL (operands[3]) == 64 - INTVAL (operands[2])" - "shld{q}\t{%2, %1, %0|%0, %1, %2}" + && INTVAL (operands[3]) == 64 - INTVAL (operands[2]) + && <nf_condition>" + "<nf_prefix>shld{q}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "mode" "DI") @@ -14611,7 +14609,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "x86_64_shld_ndd_1" +(define_insn "x86_64_shld_ndd_1<nf_name>" [(set (match_operand:DI 0 "register_operand" "=r") (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm") (match_operand:QI 3 "const_0_to_63_operand")) @@ -14619,15 +14617,66 @@ (lshiftrt:TI (zero_extend:TI (match_operand:DI 2 "register_operand" "r")) - (match_operand:QI 4 "const_0_to_255_operand")) 0))) - (clobber (reg:CC FLAGS_REG))] + (match_operand:QI 4 "const_0_to_255_operand")) 0)))] "TARGET_APX_NDD - && INTVAL (operands[4]) == 64 - INTVAL (operands[3])" - "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + && INTVAL (operands[4]) == 64 - INTVAL (operands[3]) + && <nf_condition>" + "<nf_prefix>shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "mode" "DI") (set_attr "length_immediate" "1")]) +(define_insn_and_split "*x86_64_shld_shrd_1_nozext_nf" + [(set (match_operand:DI 0 "nonimmediate_operand") + (ior:DI (ashift:DI (match_operand:DI 4 "nonimmediate_operand") + (match_operand:QI 2 "const_0_to_63_operand")) + (lshiftrt:DI + (match_operand:DI 1 "nonimmediate_operand") + (match_operand:QI 3 "const_0_to_63_operand"))))] + "TARGET_64BIT && TARGET_APX_NF + && INTVAL (operands[3]) == 64 - INTVAL (operands[2]) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + if (rtx_equal_p (operands[4], operands[0])) + { + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_x86_64_shld_1_nf (operands[0], operands[1], operands[2], operands[3])); + } + else if (rtx_equal_p (operands[1], operands[0])) + { + operands[4] = force_reg (DImode, operands[4]); + emit_insn (gen_x86_64_shrd_1_nf (operands[0], operands[4], operands[3], operands[2])); + } + else if (TARGET_APX_NDD) + { + rtx tmp = gen_reg_rtx (DImode); + if (MEM_P (operands[4])) + { + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[4], operands[1], + operands[2], operands[3])); + } + else if (MEM_P (operands[1])) + emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[1], operands[4], + operands[3], operands[2])); + else + emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[4], operands[1], + operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + else + { + operands[1] = force_reg (DImode, operands[1]); + rtx tmp = gen_reg_rtx (DImode); + emit_move_insn (tmp, operands[4]); + emit_insn (gen_x86_64_shld_1_nf (tmp, operands[1], operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + DONE; +}) (define_insn_and_split "*x86_64_shld_shrd_1_nozext" [(set (match_operand:DI 0 "nonimmediate_operand") @@ -14730,7 +14779,7 @@ emit_move_insn (operands[4], operands[0]); }) -(define_insn "x86_shld" +(define_insn "x86_shld<nf_name>" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") (ior:SI (ashift:SI (match_dup 0) (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic") @@ -14740,10 +14789,9 @@ (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) (minus:QI (const_int 32) - (and:QI (match_dup 2) (const_int 31)))) 0))) - (clobber (reg:CC FLAGS_REG))] - "" - "shld{l}\t{%2, %1, %0|%0, %1, %2}" + (and:QI (match_dup 2) (const_int 31)))) 0)))] + "<nf_condition>" + "<nf_prefix>shld{l}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "mode" "SI") @@ -14752,7 +14800,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "x86_shld_ndd" +(define_insn "x86_shld_ndd<nf_name>" [(set (match_operand:SI 0 "nonimmediate_operand" "=r") (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm") (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic") @@ -14762,15 +14810,14 @@ (zero_extend:DI (match_operand:SI 2 "register_operand" "r")) (minus:QI (const_int 32) - (and:QI (match_dup 3) (const_int 31)))) 0))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_APX_NDD" - "shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + (and:QI (match_dup 3) (const_int 31)))) 0)))] + "TARGET_APX_NDD && <nf_condition>" + "<nf_prefix>shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "mode" "SI")]) -(define_insn "x86_shld_1" +(define_insn "x86_shld_1<nf_name>" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") (ior:SI (ashift:SI (match_dup 0) (match_operand:QI 2 "const_0_to_31_operand")) @@ -14778,10 +14825,10 @@ (lshiftrt:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) - (match_operand:QI 3 "const_0_to_63_operand")) 0))) - (clobber (reg:CC FLAGS_REG))] - "INTVAL (operands[3]) == 32 - INTVAL (operands[2])" - "shld{l}\t{%2, %1, %0|%0, %1, %2}" + (match_operand:QI 3 "const_0_to_63_operand")) 0)))] + "INTVAL (operands[3]) == 32 - INTVAL (operands[2]) + && <nf_condition>" + "<nf_prefix>shld{l}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "length_immediate" "1") @@ -14791,7 +14838,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "x86_shld_ndd_1" +(define_insn "x86_shld_ndd_1<nf_name>" [(set (match_operand:SI 0 "register_operand" "=r") (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm") (match_operand:QI 3 "const_0_to_31_operand")) @@ -14799,15 +14846,66 @@ (lshiftrt:DI (zero_extend:DI (match_operand:SI 2 "register_operand" "r")) - (match_operand:QI 4 "const_0_to_63_operand")) 0))) - (clobber (reg:CC FLAGS_REG))] + (match_operand:QI 4 "const_0_to_63_operand")) 0)))] "TARGET_APX_NDD - && INTVAL (operands[4]) == 32 - INTVAL (operands[3])" - "shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + && INTVAL (operands[4]) == 32 - INTVAL (operands[3]) + && <nf_condition>" + "<nf_prefix>shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "length_immediate" "1") (set_attr "mode" "SI")]) +(define_insn_and_split "*x86_shld_shrd_1_nozext_nf" + [(set (match_operand:SI 0 "nonimmediate_operand") + (ior:SI (ashift:SI (match_operand:SI 4 "nonimmediate_operand") + (match_operand:QI 2 "const_0_to_31_operand")) + (lshiftrt:SI + (match_operand:SI 1 "nonimmediate_operand") + (match_operand:QI 3 "const_0_to_31_operand"))))] + "TARGET_APX_NF && + INTVAL (operands[3]) == 32 - INTVAL (operands[2]) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + if (rtx_equal_p (operands[4], operands[0])) + { + operands[1] = force_reg (SImode, operands[1]); + emit_insn (gen_x86_shld_1_nf (operands[0], operands[1], operands[2], operands[3])); + } + else if (rtx_equal_p (operands[1], operands[0])) + { + operands[4] = force_reg (SImode, operands[4]); + emit_insn (gen_x86_shrd_1_nf (operands[0], operands[4], operands[3], operands[2])); + } + else if (TARGET_APX_NDD) + { + rtx tmp = gen_reg_rtx (SImode); + if (MEM_P (operands[4])) + { + operands[1] = force_reg (SImode, operands[1]); + emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[4], operands[1], + operands[2], operands[3])); + } + else if (MEM_P (operands[1])) + emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[1], operands[4], + operands[3], operands[2])); + else + emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[4], operands[1], + operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + else + { + operands[1] = force_reg (SImode, operands[1]); + rtx tmp = gen_reg_rtx (SImode); + emit_move_insn (tmp, operands[4]); + emit_insn (gen_x86_shld_1_nf (tmp, operands[1], operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + DONE; +}) (define_insn_and_split "*x86_shld_shrd_1_nozext" [(set (match_operand:SI 0 "nonimmediate_operand") @@ -15846,6 +15944,26 @@ }) ;; Split truncations of double word right shifts into x86_shrd_1. +(define_insn_and_split "<insn><dwi>3_doubleword_lowpart_nf" + [(set (match_operand:DWIH 0 "register_operand" "=&r") + (subreg:DWIH + (any_shiftrt:<DWI> (match_operand:<DWI> 1 "register_operand" "r") + (match_operand:QI 2 "const_int_operand")) 0))] + "TARGET_APX_NF && UINTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT" + "#" + "&& reload_completed" + [(set (match_dup 0) + (ior:DWIH (lshiftrt:DWIH (match_dup 0) (match_dup 2)) + (subreg:DWIH + (ashift:<DWI> (zero_extend:<DWI> (match_dup 3)) + (match_dup 4)) 0)))] +{ + split_double_mode (<DWI>mode, &operands[1], 1, &operands[1], +&operands[3]); + operands[4] = GEN_INT ((<MODE_SIZE> * BITS_PER_UNIT) - INTVAL +(operands[2])); + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); +}) + (define_insn_and_split "<insn><dwi>3_doubleword_lowpart" [(set (match_operand:DWIH 0 "register_operand" "=&r") (subreg:DWIH @@ -15869,7 +15987,7 @@ emit_move_insn (operands[0], operands[1]); }) -(define_insn "x86_64_shrd" +(define_insn "x86_64_shrd<nf_name>" [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") (ior:DI (lshiftrt:DI (match_dup 0) (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc") @@ -15879,10 +15997,9 @@ (zero_extend:TI (match_operand:DI 1 "register_operand" "r")) (minus:QI (const_int 64) - (and:QI (match_dup 2) (const_int 63)))) 0))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT" - "shrd{q}\t{%2, %1, %0|%0, %1, %2}" + (and:QI (match_dup 2) (const_int 63)))) 0)))] + "TARGET_64BIT && <nf_condition>" + "<nf_prefix>shrd{q}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "mode" "DI") @@ -15890,7 +16007,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "x86_64_shrd_ndd" +(define_insn "x86_64_shrd_ndd<nf_name>" [(set (match_operand:DI 0 "register_operand" "=r") (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm") (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc") @@ -15900,15 +16017,13 @@ (zero_extend:TI (match_operand:DI 2 "register_operand" "r")) (minus:QI (const_int 64) - (and:QI (match_dup 3) (const_int 63)))) 0))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_APX_NDD" - "shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + (and:QI (match_dup 3) (const_int 63)))) 0)))] + "TARGET_APX_NDD && <nf_condition>" + "<nf_prefix>shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "mode" "DI")]) - -(define_insn "x86_64_shrd_1" +(define_insn "x86_64_shrd_1<nf_name>" [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") (ior:DI (lshiftrt:DI (match_dup 0) (match_operand:QI 2 "const_0_to_63_operand")) @@ -15916,11 +16031,11 @@ (ashift:TI (zero_extend:TI (match_operand:DI 1 "register_operand" "r")) - (match_operand:QI 3 "const_0_to_255_operand")) 0))) - (clobber (reg:CC FLAGS_REG))] + (match_operand:QI 3 "const_0_to_255_operand")) 0)))] "TARGET_64BIT - && INTVAL (operands[3]) == 64 - INTVAL (operands[2])" - "shrd{q}\t{%2, %1, %0|%0, %1, %2}" + && INTVAL (operands[3]) == 64 - INTVAL (operands[2]) + && <nf_condition>" + "<nf_prefix>shrd{q}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "length_immediate" "1") @@ -15929,7 +16044,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "x86_64_shrd_ndd_1" +(define_insn "x86_64_shrd_ndd_1<nf_name>" [(set (match_operand:DI 0 "register_operand" "=r") (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm") (match_operand:QI 3 "const_0_to_63_operand")) @@ -15937,15 +16052,66 @@ (ashift:TI (zero_extend:TI (match_operand:DI 2 "register_operand" "r")) - (match_operand:QI 4 "const_0_to_255_operand")) 0))) - (clobber (reg:CC FLAGS_REG))] + (match_operand:QI 4 "const_0_to_255_operand")) 0)))] "TARGET_APX_NDD - && INTVAL (operands[4]) == 64 - INTVAL (operands[3])" - "shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + && INTVAL (operands[4]) == 64 - INTVAL (operands[3]) + && <nf_condition>" + "<nf_prefix>shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "length_immediate" "1") (set_attr "mode" "DI")]) +(define_insn_and_split "*x86_64_shrd_shld_1_nozext_nf" + [(set (match_operand:DI 0 "nonimmediate_operand") + (ior:DI (lshiftrt:DI (match_operand:DI 4 "nonimmediate_operand") + (match_operand:QI 2 "const_0_to_63_operand")) + (ashift:DI + (match_operand:DI 1 "nonimmediate_operand") + (match_operand:QI 3 "const_0_to_63_operand"))))] + "TARGET_64BIT && TARGET_APX_NF + && INTVAL (operands[3]) == 64 - INTVAL (operands[2]) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + if (rtx_equal_p (operands[4], operands[0])) + { + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_x86_64_shrd_1_nf (operands[0], operands[1], operands[2], operands[3])); + } + else if (rtx_equal_p (operands[1], operands[0])) + { + operands[4] = force_reg (DImode, operands[4]); + emit_insn (gen_x86_64_shld_1_nf (operands[0], operands[4], operands[3], operands[2])); + } + else if (TARGET_APX_NDD) + { + rtx tmp = gen_reg_rtx (DImode); + if (MEM_P (operands[4])) + { + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[4], operands[1], + operands[2], operands[3])); + } + else if (MEM_P (operands[1])) + emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[1], operands[4], + operands[3], operands[2])); + else + emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[4], operands[1], + operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + else + { + operands[1] = force_reg (DImode, operands[1]); + rtx tmp = gen_reg_rtx (DImode); + emit_move_insn (tmp, operands[4]); + emit_insn (gen_x86_64_shrd_1_nf (tmp, operands[1], operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + DONE; +}) (define_insn_and_split "*x86_64_shrd_shld_1_nozext" [(set (match_operand:DI 0 "nonimmediate_operand") @@ -16048,7 +16214,7 @@ emit_move_insn (operands[4], operands[0]); }) -(define_insn "x86_shrd" +(define_insn "x86_shrd<nf_name>" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") (ior:SI (lshiftrt:SI (match_dup 0) (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic") @@ -16058,10 +16224,9 @@ (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) (minus:QI (const_int 32) - (and:QI (match_dup 2) (const_int 31)))) 0))) - (clobber (reg:CC FLAGS_REG))] - "" - "shrd{l}\t{%2, %1, %0|%0, %1, %2}" + (and:QI (match_dup 2) (const_int 31)))) 0)))] + "<nf_condition>" + "<nf_prefix>shrd{l}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "mode" "SI") @@ -16070,7 +16235,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "x86_shrd_ndd" +(define_insn "x86_shrd_ndd<nf_name>" [(set (match_operand:SI 0 "register_operand" "=r") (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm") (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic") @@ -16080,14 +16245,13 @@ (zero_extend:DI (match_operand:SI 2 "register_operand" "r")) (minus:QI (const_int 32) - (and:QI (match_dup 3) (const_int 31)))) 0))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_APX_NDD" - "shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + (and:QI (match_dup 3) (const_int 31)))) 0)))] + "TARGET_APX_NDD && <nf_condition>" + "<nf_prefix>shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "mode" "SI")]) -(define_insn "x86_shrd_1" +(define_insn "x86_shrd_1<nf_name>" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") (ior:SI (lshiftrt:SI (match_dup 0) (match_operand:QI 2 "const_0_to_31_operand")) @@ -16095,10 +16259,10 @@ (ashift:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) - (match_operand:QI 3 "const_0_to_63_operand")) 0))) - (clobber (reg:CC FLAGS_REG))] - "INTVAL (operands[3]) == 32 - INTVAL (operands[2])" - "shrd{l}\t{%2, %1, %0|%0, %1, %2}" + (match_operand:QI 3 "const_0_to_63_operand")) 0)))] + "INTVAL (operands[3]) == 32 - INTVAL (operands[2]) + && <nf_condition>" + "<nf_prefix>shrd{l}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "length_immediate" "1") @@ -16108,7 +16272,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "x86_shrd_ndd_1" +(define_insn "x86_shrd_ndd_1<nf_name>" [(set (match_operand:SI 0 "register_operand" "=r") (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm") (match_operand:QI 3 "const_0_to_31_operand")) @@ -16116,15 +16280,66 @@ (ashift:DI (zero_extend:DI (match_operand:SI 2 "register_operand" "r")) - (match_operand:QI 4 "const_0_to_63_operand")) 0))) - (clobber (reg:CC FLAGS_REG))] + (match_operand:QI 4 "const_0_to_63_operand")) 0)))] "TARGET_APX_NDD - && (INTVAL (operands[4]) == 32 - INTVAL (operands[3]))" - "shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + && (INTVAL (operands[4]) == 32 - INTVAL (operands[3])) + && <nf_condition>" + "<nf_prefix>shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "length_immediate" "1") (set_attr "mode" "SI")]) +(define_insn_and_split "*x86_shrd_shld_1_nozext_nf" + [(set (match_operand:SI 0 "nonimmediate_operand") + (ior:SI (lshiftrt:SI (match_operand:SI 4 "nonimmediate_operand") + (match_operand:QI 2 "const_0_to_31_operand")) + (ashift:SI + (match_operand:SI 1 "nonimmediate_operand") + (match_operand:QI 3 "const_0_to_31_operand"))))] + "TARGET_APX_NF && + INTVAL (operands[3]) == 32 - INTVAL (operands[2]) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + if (rtx_equal_p (operands[4], operands[0])) + { + operands[1] = force_reg (SImode, operands[1]); + emit_insn (gen_x86_shrd_1_nf (operands[0], operands[1], operands[2], operands[3])); + } + else if (rtx_equal_p (operands[1], operands[0])) + { + operands[4] = force_reg (SImode, operands[4]); + emit_insn (gen_x86_shld_1_nf (operands[0], operands[4], operands[3], operands[2])); + } + else if (TARGET_APX_NDD) + { + rtx tmp = gen_reg_rtx (SImode); + if (MEM_P (operands[4])) + { + operands[1] = force_reg (SImode, operands[1]); + emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[4], operands[1], + operands[2], operands[3])); + } + else if (MEM_P (operands[1])) + emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[1], operands[4], + operands[3], operands[2])); + else + emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[4], operands[1], + operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + else + { + operands[1] = force_reg (SImode, operands[1]); + rtx tmp = gen_reg_rtx (SImode); + emit_move_insn (tmp, operands[4]); + emit_insn (gen_x86_shrd_1_nf (tmp, operands[1], operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + DONE; +}) (define_insn_and_split "*x86_shrd_shld_1_nozext" [(set (match_operand:SI 0 "nonimmediate_operand") -- 2.31.1