https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80808
Bug ID: 80808 Summary: [7/8 Regression] gnupg miscompilation on arm starting with r241660 Product: gcc Version: 7.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: target Assignee: unassigned at gcc dot gnu.org Reporter: jakub at gcc dot gnu.org Target Milestone: --- The following testcase distilled from gnupg mpih-div.c is miscompiled on arm with -O2 -march=armv7-a -mfpu=vfpv3-d16 -mfloat-abi=hard options: typedef unsigned int mpi_limb_t; typedef mpi_limb_t *mpi_ptr_t; typedef int mpi_size_t; typedef mpi_limb_t UWtype; typedef unsigned int USItype __attribute__ ((mode (SI))); static __attribute__ ((noinline, noclone)) mpi_limb_t mpihelp_add_n (mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_ptr_t s2_ptr, mpi_size_t size) { mpi_limb_t x, y, cy; mpi_size_t j; asm volatile ("":::"memory"); j = -size; s1_ptr -= j; s2_ptr -= j; res_ptr -= j; cy = 0; do { y = s2_ptr[j]; x = s1_ptr[j]; y += cy; cy = y < cy; y += x; cy += y < x; res_ptr[j] = y; } while (++j); return cy; } static __attribute__ ((noinline, noclone)) mpi_limb_t mpihelp_submul_1 (mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_limb_t s2_limb) { mpi_limb_t cy_limb; mpi_size_t j; mpi_limb_t prod_high, prod_low; mpi_limb_t x; asm volatile ("":::"memory"); j = -s1_size; res_ptr -= j; s1_ptr -= j; cy_limb = 0; do { __asm__ ("umull %r1, %r0, %r2, %r3": "=&r" ((USItype) (prod_high)), "=r" ((USItype) (prod_low)): "r" ((USItype) (s1_ptr[j])), "r" ((USItype) (s2_limb)):"r0", "r1"); prod_low += cy_limb; cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high; x = res_ptr[j]; prod_low = x - prod_low; cy_limb += prod_low > x ? 1 : 0; res_ptr[j] = prod_low; } while (++j); return cy_limb; } static __attribute__ ((noinline, noclone)) mpi_limb_t mpihelp_sub_n (mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_ptr_t s2_ptr, mpi_size_t size) { mpi_limb_t x, y, cy; mpi_size_t j; asm volatile ("":::"memory"); j = -size; s1_ptr -= j; s2_ptr -= j; res_ptr -= j; cy = 0; do { y = s2_ptr[j]; x = s1_ptr[j]; y += cy; cy = y < cy; y = x - y; cy += y > x; res_ptr[j] = y; } while (++j); return cy; } static __attribute__ ((noinline, noclone)) int mpihelp_cmp (mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size) { mpi_size_t i; mpi_limb_t op1_word, op2_word; asm volatile ("":::"memory"); for (i = size - 1; i >= 0; i--) { op1_word = op1_ptr[i]; op2_word = op2_ptr[i]; if (op1_word != op2_word) goto diff; } return 0; diff: return (op1_word > op2_word) ? 1 : -1; } static __attribute__ ((noinline, noclone)) mpi_limb_t mpihelp_divrem (mpi_ptr_t qp, mpi_size_t qextra_limbs, mpi_ptr_t np, mpi_size_t nsize, mpi_ptr_t dp, mpi_size_t dsize) { mpi_limb_t most_significant_q_limb = 0; switch (dsize) { case 0: return 1 / dsize; case 1: { mpi_size_t i; mpi_limb_t n1; mpi_limb_t d; d = dp[0]; n1 = np[nsize - 1]; if (n1 >= d) { n1 -= d; most_significant_q_limb = 1; } qp += qextra_limbs; for (i = nsize - 2; i >= 0; i--) do { UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; __d1 = ((UWtype) (d) >> ((8 * (4)) / 2)); __d0 = ((UWtype) (d) & (((UWtype) 1 << ((8 * (4)) / 2)) - 1)); __r1 = (n1) % __d1; __q1 = (n1) / __d1; __m = (UWtype) __q1 *__d0; __r1 = __r1 * ((UWtype) 1 << ((8 * (4)) / 2)) | ((UWtype) (np[i]) >> ((8 * (4)) / 2)); if (__r1 < __m) { __q1--, __r1 += (d); if (__r1 >= (d)) if (__r1 < __m) __q1--, __r1 += (d); } __r1 -= __m; __r0 = __r1 % __d1; __q0 = __r1 / __d1; __m = (UWtype) __q0 *__d0; __r0 = __r0 * ((UWtype) 1 << ((8 * (4)) / 2)) | ((UWtype) (np[i]) & (((UWtype) 1 << ((8 * (4)) / 2)) - 1)); if (__r0 < __m) { __q0--, __r0 += (d); if (__r0 >= (d)) if (__r0 < __m) __q0--, __r0 += (d); } __r0 -= __m; (qp[i]) = (UWtype) __q1 *((UWtype) 1 << ((8 * (4)) / 2)) | __q0; (n1) = __r0; } while (0); qp -= qextra_limbs; for (i = qextra_limbs - 1; i >= 0; i--) do { UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; __d1 = ((UWtype) (d) >> ((8 * (4)) / 2)); __d0 = ((UWtype) (d) & (((UWtype) 1 << ((8 * (4)) / 2)) - 1)); __r1 = (n1) % __d1; __q1 = (n1) / __d1; __m = (UWtype) __q1 *__d0; __r1 = __r1 * ((UWtype) 1 << ((8 * (4)) / 2)) | ((UWtype) (0) >> ((8 * (4)) / 2)); if (__r1 < __m) { __q1--, __r1 += (d); if (__r1 >= (d)) if (__r1 < __m) __q1--, __r1 += (d); } __r1 -= __m; __r0 = __r1 % __d1; __q0 = __r1 / __d1; __m = (UWtype) __q0 *__d0; __r0 = __r0 * ((UWtype) 1 << ((8 * (4)) / 2)) | ((UWtype) (0) & (((UWtype) 1 << ((8 * (4)) / 2)) - 1)); if (__r0 < __m) { __q0--, __r0 += (d); if (__r0 >= (d)) if (__r0 < __m) __q0--, __r0 += (d); } __r0 -= __m; (qp[i]) = (UWtype) __q1 *((UWtype) 1 << ((8 * (4)) / 2)) | __q0; (n1) = __r0; } while (0); np[0] = n1; } break; case 2: { mpi_size_t i; mpi_limb_t n1, n0, n2; mpi_limb_t d1, d0; np += nsize - 2; d1 = dp[1]; d0 = dp[0]; n1 = np[1]; n0 = np[0]; if (n1 >= d1 && (n1 > d1 || n0 >= d0)) { __asm__ ("subs %1, %4, %5\n" "sbc %0, %2, %3": "=r" ((USItype) (n1)), "=&r" ((USItype) (n0)):"r" ((USItype) (n1)), "rI" ((USItype) (d1)), "r" ((USItype) (n0)), "rI" ((USItype) (d0))); most_significant_q_limb = 1; } for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--) { mpi_limb_t q; mpi_limb_t r; if (i >= qextra_limbs) np--; else np[0] = 0; if (n1 == d1) { q = ~(mpi_limb_t) 0; r = n0 + d1; if (r < d1) { __asm__ ("adds %1, %4, %5\n" "adc %0, %2, %3": "=r" ((USItype) (n1)), "=&r" ((USItype) (n0)):"%r" ((USItype) (r - d0)), "rI" ((USItype) (0)), "%r" ((USItype) (np[0])), "rI" ((USItype) (d0))); qp[i] = q; continue; } n1 = d0 - (d0 != 0 ? 1 : 0); n0 = -d0; } else { do { UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; __d1 = ((UWtype) (d1) >> ((8 * (4)) / 2)); __d0 = ((UWtype) (d1) & (((UWtype) 1 << ((8 * (4)) / 2)) - 1)); __r1 = (n1) % __d1; __q1 = (n1) / __d1; __m = (UWtype) __q1 *__d0; __r1 = __r1 * ((UWtype) 1 << ((8 * (4)) / 2)) | ((UWtype) (n0) >> ((8 * (4)) / 2)); if (__r1 < __m) { __q1--, __r1 += (d1); if (__r1 >= (d1)) if (__r1 < __m) __q1--, __r1 += (d1); } __r1 -= __m; __r0 = __r1 % __d1; __q0 = __r1 / __d1; __m = (UWtype) __q0 *__d0; __r0 = __r0 * ((UWtype) 1 << ((8 * (4)) / 2)) | ((UWtype) (n0) & (((UWtype) 1 << ((8 * (4)) / 2)) - 1)); if (__r0 < __m) { __q0--, __r0 += (d1); if (__r0 >= (d1)) if (__r0 < __m) __q0--, __r0 += (d1); } __r0 -= __m; (q) = (UWtype) __q1 *((UWtype) 1 << ((8 * (4)) / 2)) | __q0; (r) = __r0; } while (0); __asm__ ("umull %r1, %r0, %r2, %r3": "=&r" ((USItype) (n1)), "=r" ((USItype) (n0)): "r" ((USItype) (d0)), "r" ((USItype) (q)):"r0", "r1"); } n2 = np[0]; q_test: if (n1 > r || (n1 == r && n0 > n2)) { q--; __asm__ ("subs %1, %4, %5\n" "sbc %0, %2, %3": "=r" ((USItype) (n1)), "=&r" ((USItype) (n0)):"r" ((USItype) (n1)), "rI" ((USItype) (0)), "r" ((USItype) (n0)), "rI" ((USItype) (d0))); r += d1; if (r >= d1) goto q_test; } qp[i] = q; __asm__ ("subs %1, %4, %5\n" "sbc %0, %2, %3": "=r" ((USItype) (n1)), "=&r" ((USItype) (n0)):"r" ((USItype) (r)), "rI" ((USItype) (n1)), "r" ((USItype) (n2)), "rI" ((USItype) (n0))); } np[1] = n1; np[0] = n0; } break; default: { mpi_size_t i; mpi_limb_t dX, d1, n0; np += nsize - dsize; dX = dp[dsize - 1]; d1 = dp[dsize - 2]; n0 = np[dsize - 1]; if (n0 >= dX) { if (n0 > dX || mpihelp_cmp (np, dp, dsize - 1) >= 0) { mpihelp_sub_n (np, np, dp, dsize); n0 = np[dsize - 1]; most_significant_q_limb = 1; } } for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) { mpi_limb_t q; mpi_limb_t n1, n2; mpi_limb_t cy_limb; if (i >= qextra_limbs) { np--; n2 = np[dsize]; } else { n2 = np[dsize - 1]; do { mpi_size_t _i; for (_i = (dsize - 1) - 1; _i >= 0; _i--) (np + 1)[_i] = (np)[_i]; } while (0); np[0] = 0; } if (n0 == dX) { q = ~(mpi_limb_t) 0; } else { mpi_limb_t r; do { UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; __d1 = ((UWtype) (dX) >> ((8 * (4)) / 2)); __d0 = ((UWtype) (dX) & (((UWtype) 1 << ((8 * (4)) / 2)) - 1)); __r1 = (n0) % __d1; __q1 = (n0) / __d1; __m = (UWtype) __q1 *__d0; __r1 = __r1 * ((UWtype) 1 << ((8 * (4)) / 2)) | ((UWtype) (np[dsize - 1]) >> ((8 * (4)) / 2)); if (__r1 < __m) { __q1--, __r1 += (dX); if (__r1 >= (dX)) if (__r1 < __m) __q1--, __r1 += (dX); } __r1 -= __m; __r0 = __r1 % __d1; __q0 = __r1 / __d1; __m = (UWtype) __q0 *__d0; __r0 = __r0 * ((UWtype) 1 << ((8 * (4)) / 2)) | ((UWtype) (np[dsize - 1]) & (((UWtype) 1 << ((8 * (4)) / 2)) - 1)); if (__r0 < __m) { __q0--, __r0 += (dX); if (__r0 >= (dX)) if (__r0 < __m) __q0--, __r0 += (dX); } __r0 -= __m; (q) = (UWtype) __q1 *((UWtype) 1 << ((8 * (4)) / 2)) | __q0; (r) = __r0; } while (0); __asm__ ("umull %r1, %r0, %r2, %r3": "=&r" ((USItype) (n1)), "=r" ((USItype) (n0)): "r" ((USItype) (d1)), "r" ((USItype) (q)):"r0", "r1"); while (n1 > r || (n1 == r && n0 > np[dsize - 2])) { q--; r += dX; if (r < dX) break; n1 -= n0 < d1; n0 -= d1; } } cy_limb = mpihelp_submul_1 (np, dp, dsize, q); if (n2 != cy_limb) { mpihelp_add_n (np, np, dp, dsize); q--; } qp[i] = q; n0 = np[dsize - 1]; } } } return most_significant_q_limb; } int main () { mpi_limb_t qp[1]; mpi_limb_t np[3] = { 0xdaafeaa6, 0x0e77816a, 1 }; mpi_limb_t dp[2] = { 0x6816ec64, 0xb9d5666d }; volatile int l = 0; mpi_limb_t ret = mpihelp_divrem (qp + l, 0 + l, np + l, 3 + l, dp + l, 2 + l); if (ret != 0 || qp[0] != 1 || np[0] != 0x7298fe42 || np[1] != 0x54a21afd) __builtin_abort (); return 0; }