https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99718

--- Comment #13 from luoxhu at gcc dot gnu.org ---
Performance data in #c11 is for int variable vec_insert of 32bit mode, the
float variable vec_insert of 32-bit is a bit slower but much better than
original(extra stfs+lwz of insn #17 and insn 18 in expand to move SF register
to SI register by hex value.):

46.677s -> 8.723s

test.c

#include <altivec.h>
#define TYPE float

vector TYPE
test (vector TYPE u, TYPE i, signed int n){
return vec_insert (i, u, n);
}

Expand:
    1: NOTE_INSN_DELETED
    6: NOTE_INSN_BASIC_BLOCK 2
    2: r122:V4SF=%2:V4SF
    3: r123:SF=%1:SF
    4: r124:SI=%3:SI
    5: NOTE_INSN_FUNCTION_BEG
    8: r120:V4SF=r122:V4SF
    9: r125:SI=r124:SI&0x3
   10: r126:V4SF=r120:V4SF
   11: r128:SI=r125:SI<<0x2
   12: {r128:SI=0x14-r128:SI;clobber ca:SI;}
   13: r132:SI=high(`*.LC0')
   14: r131:SI=r132:SI+low(`*.LC0')
      REG_EQUAL `*.LC0'
   15: r130:V2DI=[r131:SI]
      REG_EQUAL const_vector
   16: r129:V16QI=r130:V2DI#0
   17: [r112:SI]=r123:SF
   18: r133:SI=[r112:SI]
   19: r136:DI#4=r133:SI
   22: {r137:SI=r133:SI>>0x1f;clobber ca:SI;}
   23: r136:DI#0=r137:SI
   24: r138:DI=0
   25: r135:V2DI=vec_concat(r136:DI,r138:DI)
   26: r134:V16QI=r135:V2DI#0
   27: r139:V16QI=unspec[r128:SI] 151
   28: r140:V16QI=unspec[r134:V16QI,r134:V16QI,r139:V16QI] 236
   29: r141:V16QI=unspec[r129:V16QI,r129:V16QI,r139:V16QI] 236
   30: r126:V4SF#0={(r141:V16QI!=const_vector)?r140:V16QI:r126:V4SF#0}
   31: r119:V4SF=r126:V4SF
   32: r120:V4SF=r119:V4SF

ASM:

.LFB0:
        .cfi_startproc
        stwu 1,-16(1)
        .cfi_def_cfa_offset 16
        lis 9,.LC0@ha
        rlwinm 3,3,2,28,29
        xxlxor 0,0,0
        la 9,.LC0@l(9)
        subfic 3,3,20
        lxvd2x 33,0,9
        lvsl 13,0,3
        stfs 1,8(1)
        vperm 1,1,1,13
        ori 2,2,0
        lwz 9,8(1)
        addi 1,1,16
        .cfi_def_cfa_offset 0
        srawi 10,9,31
        mtvsrwz 13,9
        mtvsrwz 12,10
        fmrgow 11,12,13
        xxpermdi 32,11,0,0
        vperm 0,0,0,13
        xxsel 34,34,32,33
        blr

Reply via email to