https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80808

            Bug ID: 80808
           Summary: [7/8 Regression] gnupg miscompilation on arm starting
                    with r241660
           Product: gcc
           Version: 7.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: jakub at gcc dot gnu.org
  Target Milestone: ---

The following testcase distilled from gnupg mpih-div.c is miscompiled on arm
with -O2 -march=armv7-a -mfpu=vfpv3-d16  -mfloat-abi=hard options:

typedef unsigned int mpi_limb_t;
typedef mpi_limb_t *mpi_ptr_t;
typedef int mpi_size_t;
typedef mpi_limb_t UWtype;
typedef unsigned int USItype __attribute__ ((mode (SI)));

static __attribute__ ((noinline, noclone)) mpi_limb_t
mpihelp_add_n (mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
               mpi_ptr_t s2_ptr, mpi_size_t size)
{
  mpi_limb_t x, y, cy;
  mpi_size_t j;
  asm volatile ("":::"memory");
  j = -size;
  s1_ptr -= j;
  s2_ptr -= j;
  res_ptr -= j;
  cy = 0;
  do
    {
      y = s2_ptr[j];
      x = s1_ptr[j];
      y += cy;
      cy = y < cy;
      y += x;
      cy += y < x;
      res_ptr[j] = y;
    }
  while (++j);
  return cy;
}

static __attribute__ ((noinline, noclone)) mpi_limb_t
mpihelp_submul_1 (mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
                  mpi_size_t s1_size, mpi_limb_t s2_limb)
{
  mpi_limb_t cy_limb;
  mpi_size_t j;
  mpi_limb_t prod_high, prod_low;
  mpi_limb_t x;
  asm volatile ("":::"memory");
  j = -s1_size;
  res_ptr -= j;
  s1_ptr -= j;
  cy_limb = 0;
  do
    {
      __asm__ ("umull %r1, %r0, %r2, %r3": "=&r" ((USItype) (prod_high)), "=r"
((USItype) (prod_low)): "r" ((USItype) (s1_ptr[j])), "r" ((USItype)
(s2_limb)):"r0", "r1");
      prod_low += cy_limb;
      cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high;
      x = res_ptr[j];
      prod_low = x - prod_low;
      cy_limb += prod_low > x ? 1 : 0;
      res_ptr[j] = prod_low;
    }
  while (++j);
  return cy_limb;
}

static __attribute__ ((noinline, noclone)) mpi_limb_t
mpihelp_sub_n (mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_ptr_t s2_ptr,
               mpi_size_t size)
{
  mpi_limb_t x, y, cy;
  mpi_size_t j;
  asm volatile ("":::"memory");
  j = -size;
  s1_ptr -= j;
  s2_ptr -= j;
  res_ptr -= j;
  cy = 0;
  do
    {
      y = s2_ptr[j];
      x = s1_ptr[j];
      y += cy;
      cy = y < cy;
      y = x - y;
      cy += y > x;
      res_ptr[j] = y;
    }
  while (++j);
  return cy;
}

static __attribute__ ((noinline, noclone)) int
mpihelp_cmp (mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size)
{
  mpi_size_t i;
  mpi_limb_t op1_word, op2_word;
  asm volatile ("":::"memory");
  for (i = size - 1; i >= 0; i--)
    {
      op1_word = op1_ptr[i];
      op2_word = op2_ptr[i];
      if (op1_word != op2_word)
        goto diff;
    }
  return 0;
diff:
  return (op1_word > op2_word) ? 1 : -1;
}

static __attribute__ ((noinline, noclone)) mpi_limb_t
mpihelp_divrem (mpi_ptr_t qp, mpi_size_t qextra_limbs,
                mpi_ptr_t np, mpi_size_t nsize,
                mpi_ptr_t dp, mpi_size_t dsize)
{
  mpi_limb_t most_significant_q_limb = 0;
  switch (dsize)
    {
    case 0:
      return 1 / dsize;
    case 1:
      {
        mpi_size_t i;
        mpi_limb_t n1;
        mpi_limb_t d;
        d = dp[0];
        n1 = np[nsize - 1];
        if (n1 >= d)
          {
            n1 -= d;
            most_significant_q_limb = 1;
          }
        qp += qextra_limbs;
        for (i = nsize - 2; i >= 0; i--)
          do
            {
              UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m;
              __d1 = ((UWtype) (d) >> ((8 * (4)) / 2));
              __d0 = ((UWtype) (d) & (((UWtype) 1 << ((8 * (4)) / 2)) - 1));
              __r1 = (n1) % __d1;
              __q1 = (n1) / __d1;
              __m = (UWtype) __q1 *__d0;
              __r1 =
                __r1 *
                ((UWtype) 1 << ((8 * (4)) / 2)) | ((UWtype) (np[i]) >>
                                                   ((8 * (4)) / 2));
              if (__r1 < __m)
                {
                  __q1--, __r1 += (d);
                  if (__r1 >= (d))
                    if (__r1 < __m)
                      __q1--, __r1 += (d);
                }
              __r1 -= __m;
              __r0 = __r1 % __d1;
              __q0 = __r1 / __d1;
              __m = (UWtype) __q0 *__d0;
              __r0 =
                __r0 *
                ((UWtype) 1 << ((8 * (4)) / 2)) | ((UWtype) (np[i]) &
                                                   (((UWtype) 1 <<
                                                     ((8 * (4)) / 2)) - 1));
              if (__r0 < __m)
                {
                  __q0--, __r0 += (d);
                  if (__r0 >= (d))
                    if (__r0 < __m)
                      __q0--, __r0 += (d);
                }
              __r0 -= __m;
              (qp[i]) = (UWtype) __q1 *((UWtype) 1 << ((8 * (4)) / 2)) | __q0;
              (n1) = __r0;
            }
          while (0);
        qp -= qextra_limbs;
        for (i = qextra_limbs - 1; i >= 0; i--)
          do
            {
              UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m;
              __d1 = ((UWtype) (d) >> ((8 * (4)) / 2));
              __d0 = ((UWtype) (d) & (((UWtype) 1 << ((8 * (4)) / 2)) - 1));
              __r1 = (n1) % __d1;
              __q1 = (n1) / __d1;
              __m = (UWtype) __q1 *__d0;
              __r1 =
                __r1 *
                ((UWtype) 1 << ((8 * (4)) / 2)) | ((UWtype) (0) >>
                                                   ((8 * (4)) / 2));
              if (__r1 < __m)
                {
                  __q1--, __r1 += (d);
                  if (__r1 >= (d))
                    if (__r1 < __m)
                      __q1--, __r1 += (d);
                }
              __r1 -= __m;
              __r0 = __r1 % __d1;
              __q0 = __r1 / __d1;
              __m = (UWtype) __q0 *__d0;
              __r0 =
                __r0 *
                ((UWtype) 1 << ((8 * (4)) / 2)) | ((UWtype) (0) &
                                                   (((UWtype) 1 <<
                                                     ((8 * (4)) / 2)) - 1));
              if (__r0 < __m)
                {
                  __q0--, __r0 += (d);
                  if (__r0 >= (d))
                    if (__r0 < __m)
                      __q0--, __r0 += (d);
                }
              __r0 -= __m;
              (qp[i]) = (UWtype) __q1 *((UWtype) 1 << ((8 * (4)) / 2)) | __q0;
              (n1) = __r0;
            }
          while (0);
        np[0] = n1;
      }
      break;
    case 2:
      {
        mpi_size_t i;
        mpi_limb_t n1, n0, n2;
        mpi_limb_t d1, d0;
        np += nsize - 2;
        d1 = dp[1];
        d0 = dp[0];
        n1 = np[1];
        n0 = np[0];
        if (n1 >= d1 && (n1 > d1 || n0 >= d0))
          {
            __asm__ ("subs %1, %4, %5\n" "sbc  %0, %2, %3": "=r" ((USItype)
(n1)), "=&r" ((USItype) (n0)):"r" ((USItype) (n1)), "rI" ((USItype) (d1)), "r"
((USItype) (n0)), "rI" ((USItype) (d0)));
            most_significant_q_limb = 1;
          }
        for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--)
          {
            mpi_limb_t q;
            mpi_limb_t r;
            if (i >= qextra_limbs)
              np--;
            else
              np[0] = 0;
            if (n1 == d1)
              {
                q = ~(mpi_limb_t) 0;
                r = n0 + d1;
                if (r < d1)
                  {
                    __asm__ ("adds %1, %4, %5\n" "adc  %0, %2, %3": "=r"
((USItype) (n1)), "=&r" ((USItype) (n0)):"%r" ((USItype) (r - d0)), "rI"
((USItype) (0)), "%r" ((USItype) (np[0])), "rI" ((USItype) (d0)));
                    qp[i] = q;
                    continue;
                  }
                n1 = d0 - (d0 != 0 ? 1 : 0);
                n0 = -d0;
              }
            else
              {
                do
                  {
                    UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m;
                    __d1 = ((UWtype) (d1) >> ((8 * (4)) / 2));
                    __d0 =
                      ((UWtype) (d1) & (((UWtype) 1 << ((8 * (4)) / 2)) - 1));
                    __r1 = (n1) % __d1;
                    __q1 = (n1) / __d1;
                    __m = (UWtype) __q1 *__d0;
                    __r1 =
                      __r1 *
                      ((UWtype) 1 << ((8 * (4)) / 2)) | ((UWtype) (n0) >>
                                                         ((8 * (4)) / 2));
                    if (__r1 < __m)
                      {
                        __q1--, __r1 += (d1);
                        if (__r1 >= (d1))
                          if (__r1 < __m)
                            __q1--, __r1 += (d1);
                      }
                    __r1 -= __m;
                    __r0 = __r1 % __d1;
                    __q0 = __r1 / __d1;
                    __m = (UWtype) __q0 *__d0;
                    __r0 =
                      __r0 *
                      ((UWtype) 1 << ((8 * (4)) / 2)) | ((UWtype) (n0) &
                                                         (((UWtype) 1 <<
                                                           ((8 * (4)) / 2)) -
                                                          1));
                    if (__r0 < __m)
                      {
                        __q0--, __r0 += (d1);
                        if (__r0 >= (d1))
                          if (__r0 < __m)
                            __q0--, __r0 += (d1);
                      }
                    __r0 -= __m;
                    (q) =
                      (UWtype) __q1 *((UWtype) 1 << ((8 * (4)) / 2)) | __q0;
                    (r) = __r0;
                  }
                while (0);
                __asm__ ("umull %r1, %r0, %r2, %r3": "=&r" ((USItype) (n1)),
"=r" ((USItype) (n0)): "r" ((USItype) (d0)), "r" ((USItype) (q)):"r0", "r1");
              }
            n2 = np[0];
          q_test:
            if (n1 > r || (n1 == r && n0 > n2))
              {
                q--;
                __asm__ ("subs %1, %4, %5\n" "sbc  %0, %2, %3": "=r" ((USItype)
(n1)), "=&r" ((USItype) (n0)):"r" ((USItype) (n1)), "rI" ((USItype) (0)), "r"
((USItype) (n0)), "rI" ((USItype) (d0)));
                r += d1;
                if (r >= d1)
                  goto q_test;
              }
            qp[i] = q;
            __asm__ ("subs %1, %4, %5\n" "sbc  %0, %2, %3": "=r" ((USItype)
(n1)), "=&r" ((USItype) (n0)):"r" ((USItype) (r)), "rI" ((USItype) (n1)), "r"
((USItype) (n2)), "rI" ((USItype) (n0)));
          }
        np[1] = n1;
        np[0] = n0;
      }
      break;
    default:
      {
        mpi_size_t i;
        mpi_limb_t dX, d1, n0;
        np += nsize - dsize;
        dX = dp[dsize - 1];
        d1 = dp[dsize - 2];
        n0 = np[dsize - 1];
        if (n0 >= dX)
          {
            if (n0 > dX || mpihelp_cmp (np, dp, dsize - 1) >= 0)
              {
                mpihelp_sub_n (np, np, dp, dsize);
                n0 = np[dsize - 1];
                most_significant_q_limb = 1;
              }
          }
        for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--)
          {
            mpi_limb_t q;
            mpi_limb_t n1, n2;
            mpi_limb_t cy_limb;
            if (i >= qextra_limbs)
              {
                np--;
                n2 = np[dsize];
              }
            else
              {
                n2 = np[dsize - 1];
                do
                  {
                    mpi_size_t _i;
                    for (_i = (dsize - 1) - 1; _i >= 0; _i--)
                      (np + 1)[_i] = (np)[_i];
                  }
                while (0);
                np[0] = 0;
              }
            if (n0 == dX)
              {
                q = ~(mpi_limb_t) 0;
              }
            else
              {
                mpi_limb_t r;
                do
                  {
                    UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m;
                    __d1 = ((UWtype) (dX) >> ((8 * (4)) / 2));
                    __d0 =
                      ((UWtype) (dX) & (((UWtype) 1 << ((8 * (4)) / 2)) - 1));
                    __r1 = (n0) % __d1;
                    __q1 = (n0) / __d1;
                    __m = (UWtype) __q1 *__d0;
                    __r1 =
                      __r1 *
                      ((UWtype) 1 << ((8 * (4)) / 2)) |
                      ((UWtype) (np[dsize - 1]) >> ((8 * (4)) / 2));
                    if (__r1 < __m)
                      {
                        __q1--, __r1 += (dX);
                        if (__r1 >= (dX))
                          if (__r1 < __m)
                            __q1--, __r1 += (dX);
                      }
                    __r1 -= __m;
                    __r0 = __r1 % __d1;
                    __q0 = __r1 / __d1;
                    __m = (UWtype) __q0 *__d0;
                    __r0 =
                      __r0 *
                      ((UWtype) 1 << ((8 * (4)) / 2)) |
                      ((UWtype) (np[dsize - 1]) &
                       (((UWtype) 1 << ((8 * (4)) / 2)) - 1));
                    if (__r0 < __m)
                      {
                        __q0--, __r0 += (dX);
                        if (__r0 >= (dX))
                          if (__r0 < __m)
                            __q0--, __r0 += (dX);
                      }
                    __r0 -= __m;
                    (q) =
                      (UWtype) __q1 *((UWtype) 1 << ((8 * (4)) / 2)) | __q0;
                    (r) = __r0;
                  }
                while (0);
                __asm__ ("umull %r1, %r0, %r2, %r3": "=&r" ((USItype) (n1)),
"=r" ((USItype) (n0)): "r" ((USItype) (d1)), "r" ((USItype) (q)):"r0", "r1");
                while (n1 > r || (n1 == r && n0 > np[dsize - 2]))
                  {
                    q--;
                    r += dX;
                    if (r < dX)
                      break;
                    n1 -= n0 < d1;
                    n0 -= d1;
                  }
              }
            cy_limb = mpihelp_submul_1 (np, dp, dsize, q);
            if (n2 != cy_limb)
              {
                mpihelp_add_n (np, np, dp, dsize);
                q--;
              }
            qp[i] = q;
            n0 = np[dsize - 1];
          }
      }
    }
  return most_significant_q_limb;
}

int
main ()
{
  mpi_limb_t qp[1];
  mpi_limb_t np[3] = { 0xdaafeaa6, 0x0e77816a, 1 };
  mpi_limb_t dp[2] = { 0x6816ec64, 0xb9d5666d };
  volatile int l = 0;
  mpi_limb_t ret =
    mpihelp_divrem (qp + l, 0 + l, np + l, 3 + l, dp + l, 2 + l);
  if (ret != 0 || qp[0] != 1 || np[0] != 0x7298fe42 || np[1] != 0x54a21afd)
    __builtin_abort ();
  return 0;
}

Reply via email to