https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80808
--- Comment #2 from Jakub Jelinek <jakub at gcc dot gnu.org> --- Smaller testcase that still works at -O1 and fails at -O2 (+ -march=armv7-a -mfpu=vfpv3-d16 -mfloat-abi=hard in both cases): static __attribute__ ((noinline, noclone)) unsigned mpihelp_divrem (unsigned *qp, int qextra_limbs, unsigned *np, int nsize, unsigned *dp, int dsize) { unsigned most_significant_q_limb = 0; switch (dsize) { case 0: return 1 / dsize; case 2: { int i; unsigned n1, n0, n2, d1, d0; np += nsize - 2; d1 = dp[1]; d0 = dp[0]; n1 = np[1]; n0 = np[0]; if (n1 >= d1 && (n1 > d1 || n0 >= d0)) { __asm__ ("subs %1, %4, %5\n" "sbc %0, %2, %3" : "=r" (n1), "=&r" (n0) : "r" (n1), "rI" (d1), "r" (n0), "rI" (d0)); most_significant_q_limb = 1; } for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--) { unsigned q; unsigned r; if (i >= qextra_limbs) np--; else np[0] = 0; if (n1 == d1) { q = ~(unsigned) 0; r = n0 + d1; if (r < d1) { __asm__ ("adds %1, %4, %5\n" "adc %0, %2, %3" : "=r" (n1), "=&r" (n0) : "%r" (r - d0), "rI" (0), "%r" (np[0]), "rI" (d0)); qp[i] = q; continue; } n1 = d0 - (d0 != 0 ? 1 : 0); n0 = -d0; } else { do { unsigned __d1, __d0, __q1, __q0, __r1, __r0, __m; __d1 = (d1 >> ((8 * (4)) / 2)); __d0 = (d1 & ((1U << ((8 * (4)) / 2)) - 1)); __r1 = (n1) % __d1; __q1 = (n1) / __d1; __m = (unsigned) __q1 *__d0; __r1 = __r1 * (1U << ((8 * (4)) / 2)) | ((unsigned) (n0) >> ((8 * (4)) / 2)); if (__r1 < __m) { __q1--, __r1 += (d1); if (__r1 >= (d1)) if (__r1 < __m) __q1--, __r1 += (d1); } __r1 -= __m; __r0 = __r1 % __d1; __q0 = __r1 / __d1; __m = (unsigned) __q0 *__d0; __r0 = __r0 * (1U << ((8 * (4)) / 2)) | (n0 & (((unsigned) 1 << ((8 * (4)) / 2)) - 1)); if (__r0 < __m) { __q0--, __r0 += (d1); if (__r0 >= (d1)) if (__r0 < __m) __q0--, __r0 += (d1); } __r0 -= __m; q = (unsigned) __q1 * (1U << ((8 * (4)) / 2)) | __q0; r = __r0; } while (0); __asm__ ("umull %r1, %r0, %r2, %r3" : "=&r" (n1), "=r" (n0) : "r" (d0), "r" (q):"r0", "r1"); } n2 = np[0]; q_test: if (n1 > r || (n1 == r && n0 > n2)) { q--; __asm__ ("subs %1, %4, %5\n" "sbc %0, %2, %3" : "=r" (n1), "=&r" (n0) : "r" (n1), "rI" (0), "r" (n0), "rI" (d0)); r += d1; if (r >= d1) goto q_test; } qp[i] = q; __asm__ ("subs %1, %4, %5\n" "sbc %0, %2, %3" : "=r" (n1), "=&r" (n0) : "r" (r), "rI" (n1), "r" (n2), "rI" (n0)); } np[1] = n1; np[0] = n0; } break; default: __builtin_abort (); } return most_significant_q_limb; } int main () { unsigned qp[1]; unsigned np[3] = { 0xdaafeaa6, 0x0e77816a, 1 }; unsigned dp[2] = { 0x6816ec64, 0xb9d5666d }; volatile int l = 0; unsigned ret = mpihelp_divrem (qp + l, 0 + l, np + l, 3 + l, dp + l, 2 + l); if (ret != 0 || qp[0] != 1 || np[0] != 0x7298fe42 || np[1] != 0x54a21afd) __builtin_abort (); return 0; }