On Fri, Aug 15, 2014 at 2:26 PM, Yuri Rumyantsev <[email protected]> wrote:
> Is it important to have correct value for length attribute for Big Cores?
> As I new this attribute is used for code layout alignment.
>
> 2014-08-15 15:54 GMT+04:00 Jakub Jelinek <[email protected]>:
>> On Fri, Aug 15, 2014 at 03:45:33PM +0400, Yuri Rumyantsev wrote:
>>> gcc/ChangeLog
>>> 2014-08-15 Yuri Rumyantsev <[email protected]>
>>>
>>> PR target/62011
>>> * config/i386/i386-protos.h (ix86_avoid_false_dep_for_bm): New function
>>> prototype.
>>> * config/i386/i386.c (ix86_avoid_false_dep_for_bm): New function.
>>> * config/i386/i386.h (TARGET_AVOID_FALSE_DEP_FOR_BM) New macros.
>>> * config/i386/i386.md (ctz<mode>2, clz<mode>2_lzcnt, popcount<mode>2,
>>> *popcount<mode>2_cmp, *popcountsi2_cmp_zext): Output zeroing
>>> destination register for unary bit-manipulation instructions
>>> if required.
>>> * config/i386/x86-tune.def (X86_TUNE_AVOID_FALSE_DEP_FOR_BM): New.
I am testing a different approach, outlined in the attached patch. In
the patch, insn is split after reload to separate insns.
As far as popcnt is concerned, we don't need _cmp pattern, the generic
code is clever enough to substuitute "if (popcnt (a))" with "if (a)".
Uros.
Index: i386.h
===================================================================
--- i386.h (revision 214000)
+++ i386.h (working copy)
@@ -473,6 +473,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_L
ix86_tune_features[X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS]
#define TARGET_ADJUST_UNROLL \
ix86_tune_features[X86_TUNE_ADJUST_UNROLL]
+#define TARGET_AVOID_FALSE_DEP_FOR_BMI \
+ ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI]
/* Feature tests against the various architecture variations. */
enum ix86_arch_indices {
Index: i386.md
===================================================================
--- i386.md (revision 214000)
+++ i386.md (working copy)
@@ -112,6 +112,7 @@
UNSPEC_XBEGIN_ABORT
UNSPEC_STOS
UNSPEC_PEEPSIB
+ UNSPEC_INSN_FALSE_DEP
;; For SSE/MMX support:
UNSPEC_FIX_NOTRUNC
@@ -12569,11 +12570,37 @@
(set_attr "prefix_0f" "1")
(set_attr "mode" "HI")])
-(define_insn "popcount<mode>2"
- [(set (match_operand:SWI248 0 "register_operand" "=r")
- (popcount:SWI248
- (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
+(define_expand "popcount<mode>2"
+ [(parallel
+ [(set (match_operand:SWI248 0 "register_operand")
+ (popcount:SWI248
+ (match_operand:SWI248 1 "nonimmediate_operand")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_POPCNT")
+
+(define_insn_and_split "*popcount<mode>2_falsedep_1"
+ [(set (match_operand:SWI48 0 "register_operand" "=&r")
+ (popcount:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
(clobber (reg:CC FLAGS_REG))]
+ "TARGET_POPCNT
+ && TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_insn_for_speed_p ()"
+ "#"
+ "&& reload_completed"
+ [(parallel
+ [(set (match_dup 0)
+ (popcount:SWI48 (match_dup 1)))
+ (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
+ (clobber (reg:CC FLAGS_REG))])]
+ "ix86_expand_clear (operands[0]);")
+
+(define_insn "*popcount<mode>2_falsedep"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (popcount:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+ (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+ UNSPEC_INSN_FALSE_DEP)
+ (clobber (reg:CC FLAGS_REG))]
"TARGET_POPCNT"
{
#if TARGET_MACHO
@@ -12586,15 +12613,12 @@
(set_attr "type" "bitmanip")
(set_attr "mode" "<MODE>")])
-(define_insn "*popcount<mode>2_cmp"
- [(set (reg FLAGS_REG)
- (compare
- (popcount:SWI248
- (match_operand:SWI248 1 "nonimmediate_operand" "rm"))
- (const_int 0)))
- (set (match_operand:SWI248 0 "register_operand" "=r")
- (popcount:SWI248 (match_dup 1)))]
- "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
+(define_insn "*popcount<mode>2"
+ [(set (match_operand:SWI248 0 "register_operand" "=r")
+ (popcount:SWI248
+ (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_POPCNT"
{
#if TARGET_MACHO
return "popcnt\t{%1, %0|%0, %1}";
@@ -12606,25 +12630,6 @@
(set_attr "type" "bitmanip")
(set_attr "mode" "<MODE>")])
-(define_insn "*popcountsi2_cmp_zext"
- [(set (reg FLAGS_REG)
- (compare
- (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))
- (const_int 0)))
- (set (match_operand:DI 0 "register_operand" "=r")
- (zero_extend:DI(popcount:SI (match_dup 1))))]
- "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
-{
-#if TARGET_MACHO
- return "popcnt\t{%1, %0|%0, %1}";
-#else
- return "popcnt{l}\t{%1, %0|%0, %1}";
-#endif
-}
- [(set_attr "prefix_rep" "1")
- (set_attr "type" "bitmanip")
- (set_attr "mode" "SI")])
-
(define_expand "bswapdi2"
[(set (match_operand:DI 0 "register_operand")
(bswap:DI (match_operand:DI 1 "nonimmediate_operand")))]
Index: x86-tune.def
===================================================================
--- x86-tune.def (revision 214000)
+++ x86-tune.def (working copy)
@@ -509,6 +509,11 @@ DEF_TUNE (X86_TUNE_NOT_VECTORMODE, "not_vectormode
DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode",
m_K8)
+/* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency
+ for bit-manipulation instructions. */
+DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi",
+ m_SANDYBRIDGE | m_HASWELL | m_INTEL | m_GENERIC)
+
/*****************************************************************************/
/* This never worked well before. */
/*****************************************************************************/