https://gcc.gnu.org/g:4371ac5292da9eadc3daccbdce2a15c029a04892
commit r15-982-g4371ac5292da9eadc3daccbdce2a15c029a04892 Author: Lingling Kong <lingling.k...@intel.com> Date: Mon Jun 3 14:23:57 2024 +0800 [APX NF] Support APX NF for lzcnt/tzcnt/popcnt gcc/ChangeLog: * config/i386/i386.md (clz<mode>2_lzcnt_nf): New define_insn. (*clz<mode>2_lzcnt_falsedep_nf): Ditto. (<lt_zcnt>_<mode>_nf): Ditto. (*<lt_zcnt>_<mode>_falsedep_nf): Ditto. (<lt_zcnt>_hi<nf_name>): Ditto. (popcount<mode>2_nf): Ditto. (*popcount<mode>2_falsedep_nf): Ditto. (popcounthi2<nf_name>): Ditto. Diff: --- gcc/config/i386/i386.md | 124 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 113 insertions(+), 11 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 48ca19cb8df..2c95395b7be 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -20283,6 +20283,24 @@ operands[3] = gen_reg_rtx (<MODE>mode); }) +(define_insn_and_split "clz<mode>2_lzcnt_nf" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (clz:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm")))] + "TARGET_APX_NF && TARGET_LZCNT" + "%{nf%} lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" + "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed + && optimize_function_for_speed_p (cfun) + && !reg_mentioned_p (operands[0], operands[1])" + [(parallel + [(set (match_dup 0) + (clz:SWI48 (match_dup 1))) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])] + "ix86_expand_clear (operands[0]);" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + (define_insn_and_split "clz<mode>2_lzcnt" [(set (match_operand:SWI48 0 "register_operand" "=r") (clz:SWI48 @@ -20306,6 +20324,18 @@ ; False dependency happens when destination is only updated by tzcnt, ; lzcnt or popcnt. There is no false dependency when destination is ; also used in source. +(define_insn "*clz<mode>2_lzcnt_falsedep_nf" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (clz:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (unspec [(match_operand:SWI48 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP)] + "TARGET_APX_NF && TARGET_LZCNT" + "%{nf%} lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + (define_insn "*clz<mode>2_lzcnt_falsedep" [(set (match_operand:SWI48 0 "register_operand" "=r") (clz:SWI48 @@ -20412,6 +20442,25 @@ ;; Version of lzcnt/tzcnt that is expanded from intrinsics. This version ;; provides operand size as output when source operand is zero. +(define_insn_and_split "<lt_zcnt>_<mode>_nf" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 + [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))] + "TARGET_APX_NF" + "%{nf%} <lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}" + "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed + && optimize_function_for_speed_p (cfun) + && !reg_mentioned_p (operands[0], operands[1])" + [(parallel + [(set (match_dup 0) + (unspec:SWI48 [(match_dup 1)] LT_ZCNT)) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])] + "ix86_expand_clear (operands[0]);" + [(set_attr "type" "<lt_zcnt_type>") + (set_attr "prefix_0f" "1") + (set_attr "prefix_rep" "1") + (set_attr "mode" "<MODE>")]) + (define_insn_and_split "<lt_zcnt>_<mode>" [(set (match_operand:SWI48 0 "register_operand" "=r") (unspec:SWI48 @@ -20436,6 +20485,19 @@ ; False dependency happens when destination is only updated by tzcnt, ; lzcnt or popcnt. There is no false dependency when destination is ; also used in source. +(define_insn "*<lt_zcnt>_<mode>_falsedep_nf" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 + [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT)) + (unspec [(match_operand:SWI48 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP)] + "TARGET_APX_NF" + "%{nf%} <lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "type" "<lt_zcnt_type>") + (set_attr "prefix_0f" "1") + (set_attr "prefix_rep" "1") + (set_attr "mode" "<MODE>")]) + (define_insn "*<lt_zcnt>_<mode>_falsedep" [(set (match_operand:SWI48 0 "register_operand" "=r") (unspec:SWI48 @@ -20450,13 +20512,12 @@ (set_attr "prefix_rep" "1") (set_attr "mode" "<MODE>")]) -(define_insn "<lt_zcnt>_hi" +(define_insn "<lt_zcnt>_hi<nf_name>" [(set (match_operand:HI 0 "register_operand" "=r") (unspec:HI - [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT)) - (clobber (reg:CC FLAGS_REG))] - "" - "<lt_zcnt>{w}\t{%1, %0|%0, %1}" + [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT))] + "<nf_condition>" + "<nf_prefix><lt_zcnt>{w}\t{%1, %0|%0, %1}" [(set_attr "type" "<lt_zcnt_type>") (set_attr "prefix_0f" "1") (set_attr "prefix_rep" "1") @@ -20874,6 +20935,30 @@ [(set_attr "type" "bitmanip") (set_attr "mode" "<MODE>")]) +(define_insn_and_split "popcount<mode>2_nf" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (popcount:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm")))] + "TARGET_APX_NF && TARGET_POPCNT" +{ +#if TARGET_MACHO + return "%{nf%} popcnt\t{%1, %0|%0, %1}"; +#else + return "%{nf%} popcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; +#endif +} + "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed + && optimize_function_for_speed_p (cfun) + && !reg_mentioned_p (operands[0], operands[1])" + [(parallel + [(set (match_dup 0) + (popcount:SWI48 (match_dup 1))) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])] + "ix86_expand_clear (operands[0]);" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + (define_insn_and_split "popcount<mode>2" [(set (match_operand:SWI48 0 "register_operand" "=r") (popcount:SWI48 @@ -20903,6 +20988,24 @@ ; False dependency happens when destination is only updated by tzcnt, ; lzcnt or popcnt. There is no false dependency when destination is ; also used in source. +(define_insn "*popcount<mode>2_falsedep_nf" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (popcount:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (unspec [(match_operand:SWI48 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP)] + "TARGET_APX_NF && TARGET_POPCNT" +{ +#if TARGET_MACHO + return "%{nf%} popcnt\t{%1, %0|%0, %1}"; +#else + return "%{nf%} popcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; +#endif +} + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + (define_insn "*popcount<mode>2_falsedep" [(set (match_operand:SWI48 0 "register_operand" "=r") (popcount:SWI48 @@ -21060,17 +21163,16 @@ DONE; }) -(define_insn "popcounthi2" +(define_insn "popcounthi2<nf_name>" [(set (match_operand:HI 0 "register_operand" "=r") (popcount:HI - (match_operand:HI 1 "nonimmediate_operand" "rm"))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_POPCNT" + (match_operand:HI 1 "nonimmediate_operand" "rm")))] + "TARGET_POPCNT && <nf_condition>" { #if TARGET_MACHO - return "popcnt\t{%1, %0|%0, %1}"; + return "<nf_prefix>popcnt\t{%1, %0|%0, %1}"; #else - return "popcnt{w}\t{%1, %0|%0, %1}"; + return "<nf_prefix>popcnt{w}\t{%1, %0|%0, %1}"; #endif } [(set_attr "prefix_rep" "1")