I'm considering the following. Does anyone believe this i386/i486 decision re DImode is a mistake? Should I limit that to Pentium by checking cmpxchg?
r~
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 7ce57d8..7d28e43 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -248,6 +248,9 @@ ;; For BMI2 support UNSPEC_PDEP UNSPEC_PEXT + + ;; For __atomic support + UNSPEC_MOVA ]) (define_c_enum "unspecv" [ diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md index e5579b1..da08e92 100644 --- a/gcc/config/i386/sync.md +++ b/gcc/config/i386/sync.md @@ -46,6 +46,88 @@ "lock{%;} or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}" [(set_attr "memory" "unknown")]) +;; ??? From volume 3 section 7.1.1 Guaranteed Atomic Operations, +;; Only beginning at Pentium family processors do we get any guarantee of +;; atomicity in aligned 64-bit quantities. Beginning at P6, we get a +;; guarantee for 64-bit accesses that do not cross a cacheline boundary. +;; This distinction is ignored below, since I *suspect* that FSTLL will +;; appear atomic from the point of view of user-level threads even back +;; on the 80386; I suspect that the non-atomicity can only be seen from +;; other bus-level devices. +;; +;; Importantly, *no* processor makes atomicity guarantees for larger +;; accesses. In particular, there's no way to perform an atomic TImode +;; move, despite the apparent applicability of MOVDQA et al. + +(define_mode_iterator ATOMIC + [QI HI SI (DI "TARGET_64BIT || TARGET_80387 || TARGET_SSE")]) + +(define_expand "atomic_load<mode>" + [(set (match_operand:ATOMIC 0 "register_operand" "") + (unspec:ATOMIC [(match_operand:ATOMIC 1 "memory_operand" "") + (match_operand:SI 2 "const_int_operand" "")] + UNSPEC_MOVA))] + "" +{ + /* For DImode on 32-bit, we can use the FPU to perform the load. */ + if (<MODE>mode == DImode && !TARGET_64BIT) + emit_insn (gen_atomic_loaddi_fpu (operands[1], operands[2])); + else + emit_move_insn (operands[0], operands[1]); + DONE; +}) + +(define_insn_and_split "atomic_loaddi_fpu" + [(set (match_operand:DI 0 "register_operand" "=fx") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m")] + UNSPEC_MOVA))] + "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)" + "#" + "&& reload_completed" + [(set (match_dup 0) (match_dup 1))]) + +(define_expand "atomic_store<mode>" + [(set (match_operand:ATOMIC 0 "memory_operand" "") + (unspec:ATOMIC [(match_operand:ATOMIC 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "")] + UNSPEC_MOVA))] + "" +{ + enum memmodel model = (enum memmodel) INTVAL (operands[2]); + + if (<MODE>mode == DImode && !TARGET_64BIT) + { + /* For DImode on 32-bit, we can use the FPU to perform the store. */ + emit_insn (gen_atomic_storedi_fpu (operands[1], operands[2])); + if (model == MEMMODEL_SEQ_CST) + emit_insn (gen_mem_thread_fence (operands[2])); + } + else + { + /* For non-seq-cst stores, we can simply just perform the store. */ + if (model != MEMMODEL_SEQ_CST) + { + emit_move_insn (operands[0], operands[1]); + DONE; + } + + /* For sub-word-size, sequentialy-consistent stores, use xchg. */ + emit_insn (gen_atomic_exchange<mode> (gen_reg_rtx (<MODE>mode), + operands[0], operands[1], + operands[2])); + } + DONE; +}) + +(define_insn_and_split "atomic_storedi_fpu" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:DI 1 "register_operand" "fx")] + UNSPEC_MOVA))] + "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)" + "#" + "&& reload_completed" + [(set (match_dup 0) (match_dup 1))]) + (define_expand "atomic_compare_and_swap<mode>" [(match_operand:QI 0 "register_operand" "") ;; bool success output (match_operand:SWI124 1 "register_operand" "") ;; oldval output