On Thu, 2024-03-07 at 09:12 +0800, Lulu Cheng wrote:

> +  output_asm_insn ("1:", operands);
> +  output_asm_insn ("ll.<amo>\t%0,%1", operands);
> +
> +  /* Like the test case atomic-cas-int.C, in loongarch64, O1 and higher, the
> +     return value of the val_without_const_folding will not be truncated and
> +     will be passed directly to the function compare_exchange_strong.
> +     However, the instruction 'bne' does not distinguish between 32-bit and
> +     64-bit operations.  so if the upper 32 bits of the register are not
> +     extended by the 32nd bit symbol, then the comparison may not be valid
> +     here.  This will affect the result of the operation.  */
> +
> +  if (TARGET_64BIT && REG_P (operands[2])
> +      && GET_MODE (operands[2]) == SImode)
> +    {
> +      output_asm_insn ("addi.w\t%5,%2,0", operands);
> +      output_asm_insn ("bne\t%0,%5,2f", operands);

It should be better to extend the expected value before the ll/sc loop
(like what LLVM does), instead of repeating the extending in each
iteration.  Something like:

diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
index 8f35a5b48d2..c21781947fd 100644
--- a/gcc/config/loongarch/sync.md
+++ b/gcc/config/loongarch/sync.md
@@ -234,11 +234,11 @@ (define_insn "atomic_exchange<mode>_short"
   "amswap%A3.<amo>\t%0,%z2,%1"
   [(set (attr "length") (const_int 4))])
 
-(define_insn "atomic_cas_value_strong<mode>"
+(define_insn "atomic_cas_value_strong<GPR:mode><X:mode>"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
        (match_operand:GPR 1 "memory_operand" "+ZC"))
    (set (match_dup 1)
-       (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")
+       (unspec_volatile:GPR [(match_operand:X 2 "reg_or_0_operand" "rJ")
                              (match_operand:GPR 3 "reg_or_0_operand" "rJ")
                              (match_operand:SI 4 "const_int_operand")]  ;; 
mod_s
         UNSPEC_COMPARE_AND_SWAP))
@@ -246,10 +246,10 @@ (define_insn "atomic_cas_value_strong<mode>"
   ""
 {
   return "1:\\n\\t"
-        "ll.<amo>\\t%0,%1\\n\\t"
+        "ll.<GPR:amo>\\t%0,%1\\n\\t"
         "bne\\t%0,%z2,2f\\n\\t"
         "or%i3\\t%5,$zero,%3\\n\\t"
-        "sc.<amo>\\t%5,%1\\n\\t"
+        "sc.<GPR:amo>\\t%5,%1\\n\\t"
         "beqz\\t%5,1b\\n\\t"
         "b\\t3f\\n\\t"
         "2:\\n\\t"
@@ -301,9 +301,23 @@ (define_expand "atomic_compare_and_swap<mode>"
                                                         operands[3], 
operands[4],
                                                         operands[6]));
   else
-    emit_insn (gen_atomic_cas_value_strong<mode> (operands[1], operands[2],
-                                                 operands[3], operands[4],
-                                                 operands[6]));
+    {
+      rtx (*cas)(rtx, rtx, rtx, rtx, rtx) =
+       TARGET_64BIT ? gen_atomic_cas_value_strong<mode>di
+                    : gen_atomic_cas_value_strong<mode>si;
+      rtx expect = operands[3];
+
+      if (<MODE>mode == SImode
+         && TARGET_64BIT
+         && operands[3] != const0_rtx)
+       {
+         expect = gen_reg_rtx (DImode);
+         emit_insn (gen_extendsidi2 (expect, operands[3]));
+       }
+
+      emit_insn (cas (operands[1], operands[2], expect, operands[4],
+                     operands[6]));
+    }
 
   rtx compare = operands[1];
   if (operands[3] != const0_rtx)

It produces:

        slli.w  $r4,$r4,0
        1:
        ll.w    $r14,$r3,0
        bne     $r14,$r4,2f
        or      $r15,$zero,$r12
        sc.w    $r15,$r3,0
        beqz    $r15,1b
        b       3f
        2:
        dbar    0b10100
        3:

for the test case and the compiled test case runs successfully.  I've
not done a full bootstrap yet though.

-- 
Xi Ruoyao <xry...@xry111.site>
School of Aerospace Science and Technology, Xidian University

Reply via email to