https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108757

--- Comment #12 from Chip Kerchner <chip.kerchner at ibm dot com> ---
Here is an example of the original problem

#define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline

typedef __vector float Packet4f;
typedef size_t Index;

EIGEN_ALWAYS_INLINE Packet4f ploadu(const float* from)
{
  return vec_xl(0, const_cast<float*>(from));
}

EIGEN_ALWAYS_INLINE void pstoreu(float* to, const Packet4f &from)
{
  vec_xst(from, 0, to);
}

void convert(Index rows, float*src, float *result)
{
  for(Index i = 0; i + 4 <= rows; i+=4) {
    Packet4f r32_0 = ploadu(src + i +  0);
    pstoreu(result + i +  0, r32_0);
  }
}

And the output (with notation on the lines in question)

    cmpldi 0,3,3
    blelr 0
    addi 3,3,-4  <- i = rows - 4
    li 9,0
    srdi 3,3,2   <- i >>= 2
    addi 8,3,1   <- i = i + 1
    andi. 7,8,0x3
    mr 10,8
    beq 0,.L10
    cmpdi 0,7,1
    beq 0,.L14
    cmpdi 0,7,2
    beq 0,.L15
    lxv 0,0(4)
    mr 8,3
    li 9,16
    stxv 0,0(5)
.L15:
    lxvx 0,4,9
    addi 8,8,-1
    stxvx 0,5,9
    addi 9,9,16
.L14:
    lxvx 0,4,9
    cmpdi 0,8,1
    stxvx 0,5,9
    addi 9,9,16
    beqlr 0
.L10:
    srdi 10,10,2
    mtctr 10
.L3:
    lxvx 0,4,9
    addi 10,9,16
    addi 7,9,32
    addi 8,9,48
    stxvx 0,5,9
    lxvx 0,4,10
    addi 9,9,64
    stxvx 0,5,10
    lxvx 0,4,7
    stxvx 0,5,7
    lxvx 0,4,8
    stxvx 0,5,8
    bdnz .L3
    blr

In this case the 3 lines notated can be replaced a simple `srdi 8,3,2`

Reply via email to