https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80695

            Bug ID: 80695
           Summary: gratuitous use of stxvx to store multiple pointers
           Product: gcc
           Version: 7.1.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: acsawdey at gcc dot gnu.org
                CC: meissner at gcc dot gnu.org, wschmidt at gcc dot gnu.org
  Target Milestone: ---
            Target: powerpc64*-*-* power9

Testing 7.1.0 on power9, compiling the following code with -O3 -mcpu=power9

typedef struct _IO_FILE {
  int _flags;           /* High-order word is _IO_MAGIC; rest is flags. */
  char* _IO_read_ptr;   /* Current read pointer */
  char* _IO_read_end;   /* End of get area. */
  char* _IO_read_base;  /* Start of putback+get area. */
  char* _IO_write_base; /* Start of put area. */
  char* _IO_write_ptr;  /* Current put pointer. */
  char* _IO_write_end;  /* End of put area. */
  char* _IO_buf_base;   /* Start of reserve area. */
  char* _IO_buf_end;    /* End of reserve area. */
} _IO_FILE;
int
_IO_new_file_overflow (_IO_FILE *f, int ch)
{
      if (f->_IO_read_ptr == f->_IO_buf_end)
        f->_IO_read_end = f->_IO_read_ptr = f->_IO_buf_base;
      f->_IO_write_ptr = f->_IO_read_ptr;
      f->_IO_write_base = f->_IO_write_ptr;
      f->_IO_write_end = f->_IO_buf_end;
      f->_IO_read_base = f->_IO_read_ptr = f->_IO_read_end;
  return (unsigned char) ch;
}

results in this generated code:

_IO_new_file_overflow:
        ld 10,8(3)
        ld 8,64(3)
        mr 9,3
        cmpld 7,10,8
        beq 7,.L2
        ld 7,16(3)
        mtvsrdd 0,8,10
        li 8,24
        rlwinm 3,4,0,0xff
        mtvsrdd 12,10,7
        li 10,40
        std 7,8(9)
        stxvx 12,9,8
        stxvx 0,9,10
        blr
        .p2align 4,,15
.L2:
        ld 10,56(3)
        mr 7,10
        mtvsrdd 0,8,10
        std 10,16(3)
        li 8,24
        rlwinm 3,4,0,0xff
        mtvsrdd 12,10,7
        li 10,40
        std 7,8(9)
        stxvx 12,9,8
        stxvx 0,9,10
        blr

This is an improvement over gcc6 which used multiple mtvsrd plus xxpermdi to
accomplish the same thing. However it seems to me it would still be better just
to generate 4 std instructions. We then get rid of 2 mtvsrdd, 2 stxvx, and 2 li
instructions.

Reply via email to