Hello! As said above i386.c, inline_secondary_memory_needed:
--cut here-- The function can't work reliably when one of the CLASSES is a class containing registers from multiple sets. We avoid this by never combining different sets in a single alternative in the machine description. Ensure that this constraint holds to avoid unexpected surprises. --cut here-- The patch enforces this constraint also for mask registers and fixes an oversight in *movsi_internal. 2017-01-17 Uros Bizjak <ubiz...@gmail.com> * config/i386/i386.h (MASK_CLASS_P): New define. * config/i386/i386.c (inline_secondary_memory_needed): Ensure that there are no registers from different register sets also when mask registers are used. Update function comment. * config/i386/i386.md (*movsi_internal): Split (*k/*krm) alternative to (*k/*r) and (*k/*km) alternatives. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Committed to mainline SVN. Uros.
Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 244540) +++ config/i386/i386.c (working copy) @@ -39868,19 +39868,19 @@ ix86_class_likely_spilled_p (reg_class_t rclass) return false; } -/* If we are copying between general and FP registers, we need a memory - location. The same is true for SSE and MMX registers. +/* If we are copying between registers from different register sets + (e.g. FP and integer), we may need a memory location. - To optimize register_move_cost performance, allow inline variant. - - The macro can't work reliably when one of the CLASSES is class containing - registers from multiple units (SSE, MMX, integer). We avoid this by never - combining those units in single alternative in the machine description. + The function can't work reliably when one of the CLASSES is a class + containing registers from multiple sets. We avoid this by never combining + different sets in a single alternative in the machine description. Ensure that this constraint holds to avoid unexpected surprises. - When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not - enforce these sanity checks. */ + When STRICT is false, we are being called from REGISTER_MOVE_COST, + so do not enforce these sanity checks. + To optimize register_move_cost performance, define inline variant. */ + static inline bool inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2, machine_mode mode, int strict) @@ -39887,12 +39887,15 @@ inline_secondary_memory_needed (enum reg_class cla { if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS)) return false; + if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) - || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)) + || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2) + || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1) + || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2)) { gcc_assert (!strict || lra_in_progress); return true; @@ -39902,7 +39905,7 @@ inline_secondary_memory_needed (enum reg_class cla return true; /* Between mask and general, we have moves no larger than word size. */ - if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2)) + if ((MASK_CLASS_P (class1) != MASK_CLASS_P (class2)) && (GET_MODE_SIZE (mode) > UNITS_PER_WORD)) return true; Index: config/i386/i386.h =================================================================== --- config/i386/i386.h (revision 244540) +++ config/i386/i386.h (working copy) @@ -1378,6 +1378,8 @@ enum reg_class reg_class_subset_p ((CLASS), ALL_SSE_REGS) #define MMX_CLASS_P(CLASS) \ ((CLASS) == MMX_REGS) +#define MASK_CLASS_P(CLASS) \ + reg_class_subset_p ((CLASS), MASK_REGS) #define MAYBE_INTEGER_CLASS_P(CLASS) \ reg_classes_intersect_p ((CLASS), GENERAL_REGS) #define MAYBE_FLOAT_CLASS_P(CLASS) \ Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 244540) +++ config/i386/i386.md (working copy) @@ -2324,9 +2324,9 @@ (define_insn "*movsi_internal" [(set (match_operand:SI 0 "nonimmediate_operand" - "=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi,*k ,*rm") + "=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi,*k,*k ,*rm") (match_operand:SI 1 "general_operand" - "g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r ,*krm,*k"))] + "g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r ,*r,*km,*k"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -2403,7 +2403,7 @@ (const_string "sselog1") (eq_attr "alternative" "7,8,9,10,12") (const_string "ssemov") - (eq_attr "alternative" "13,14") + (eq_attr "alternative" "13,14,15") (const_string "mskmov") (and (match_operand 0 "register_operand") (match_operand 1 "pic_32bit_operand"))