https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102054

--- Comment #2 from Kewen Lin <linkw at gcc dot gnu.org> ---
Yet another reduced test case from 526.blender_r.

#include <math.h>

typedef struct QMCSampler {
  struct QMCSampler *next, *prev;
  int type;
  int tot;
  int used;
  double *samp2d;
  double offs[1][2];
} QMCSampler;

float BLI_thread_frand(int thread);

static void halton_sample(double *ht_invprimes, double *ht_nums, double *v) {
  unsigned int i;

  for (i = 0; i < 2; i++) {
    double r = fabs((1.0 - ht_nums[i]) - 1e-10);

    if (ht_invprimes[i] >= r) {
      double lasth;
      double h = ht_invprimes[i];

      do {
        lasth = h;
        h *= ht_invprimes[i];
      } while (h >= r);

      ht_nums[i] += ((lasth + h) - 1.0);
    } else
      ht_nums[i] += ht_invprimes[i];

    v[i] = (float)ht_nums[i];
  }
}

void QMC_initPixel(QMCSampler *qsa, int thread) {
  if (qsa->type == 2) {
    qsa->offs[thread][0] = 0.5f * BLI_thread_frand(thread);
    qsa->offs[thread][1] = 0.5f * BLI_thread_frand(thread);
  } else {
    double ht_invprimes[2], ht_nums[2];
    double r[2];
    int i;

    ht_nums[0] = BLI_thread_frand(thread);
    ht_nums[1] = BLI_thread_frand(thread);
    ht_invprimes[0] = 0.5;
    ht_invprimes[1] = 1.0 / 3.0;

    for (i = 0; i < qsa->tot; i++) {
      halton_sample(ht_invprimes, ht_nums, r);
      qsa->samp2d[2 * i + 0] = r[0];
      qsa->samp2d[2 * i + 1] = r[1];
    }
  }
}

Without loop vectorization, unrestricted pre makes the loop happy for cunroll
and the loop was completely unrolled. The affected pct. is also small, about
0.7%.

Reply via email to