https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112697

--- Comment #8 from Alexander Monakov <amonakov at gcc dot gnu.org> ---
Thanks, I can reproduce it. It is pretty tricky though. For instance, just
swapping the mov and the compare is enough to make it fast:

--- d.out.ltrans0.ltrans.slow.s 2023-12-01 18:32:54.255841611 +0300
+++ d.out.ltrans0.ltrans.fast.s 2023-12-01 18:32:20.318668991 +0300
@@ -743,8 +743,8 @@ add_force_to_mom:
        .p2align 4,,10
        .p2align 3
 .L58:
-       cmpb    $1, -680(%r11,%r12)
        movapd  %xmm5, %xmm7
+       cmpb    $1, -680(%r11,%r12)
        jne     .L54
        xorpd   %xmm6, %xmm7
 .L54:

Reply via email to