Nikunj A Dadhania <nik...@linux.vnet.ibm.com> writes: > This series contains 7 new instructions for POWER9 ISA3.0 > Use newer qemu load/store tcg helpers and optimize stxvw4x and lxvw4x. > > GCC was adding epilogue for every VSX instructions causing change in > behaviour. For testing the load vector instructions used mfvsrld/mfvsrd > for loading vsr to register. And for testing store vector, used mtvsrdd > instructions. This helped in getting rid of the epilogue added by gcc. Tried > adding the test cases to kvm-unit-tests, but executing vsx instructions > results in cpu exception. Will debug that later. I will send the test code > and steps to execute as reply to this email.
Source code for stxv_x.c and lxv_x.c is attached and following are the steps to use them: Compile using IBM Advance toolchain[1]: ======================================= /opt/at10.0/bin/powerpc64-linux-gnu-gcc -static -O3 lxv_x.c -o be_lxv_x /opt/at10.0/bin/powerpc64-linux-gnu-gcc -static -O3 stxv_x.c -o be_stxv_x /opt/at10.0/bin/powerpc64le-linux-gnu-gcc -static -O3 lxv_x.c -o le_lxv_x /opt/at10.0/bin/powerpc64le-linux-gnu-gcc -static -O3 stxv_x.c -o le_stxv_x Run following for testing the instructions: =========================================== for i in lxv_x stxv_x do echo "Running ... $i" echo ">>>>>>>>>>>>>>>> LE LE LE >>>>>>>>>>>>>>" ../qemu/ppc64le-linux-user/qemu-ppc64le -cpu POWER9 le_${i} echo ">>>>>>>>>>>>>>>> BE BE BE >>>>>>>>>>>>>>" ../qemu/ppc64-linux-user/qemu-ppc64 -cpu POWER9 be_${i} echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" done Regards Nikunj 1. ftp://ftp.unicamp.br/pub/linuxpatch/toolchain/at/redhat/Fedora22
#include <stdio.h> #include <stdlib.h> #include <inttypes.h> static void print16x1(uint8_t *p) { int i; for(i = 0; i < 16; i++) printf(" %02X ", p[i]); printf("\n"); } int main(void) { __vector uint8_t vrt8; uint8_t rb8[16]; unsigned long hi = 0x0001020310111213; unsigned long lo = 0x2021222330313233; asm volatile("mtvsrdd %x0, %2, %3;" "stxvw4x %x0, 0, %1;" : "=ws"(vrt8): "r"(&rb8), "r"(hi), "r"(lo)); print16x1(rb8); asm volatile("mtvsrdd %x0, %2, %3;" "stxvh8x %x0, 0, %1;" : "=ws"(vrt8) : "r"(&rb8), "r"(hi), "r"(lo)); print16x1(rb8); asm volatile("mtvsrdd %x0, %2, %3;" "stxvb16x %x0, 0, %1;" : "=ws"(vrt8) : "r"(&rb8), "r"(hi), "r"(lo)); print16x1(rb8); return EXIT_SUCCESS; }
#include <stdio.h> #include <stdlib.h> #include <inttypes.h> int main(void) { __vector uint8_t vrt8; unsigned long lo, hi; #if __BYTE_ORDER == __LITTLE_ENDIAN uint8_t rb32[16] = {0x03, 0x02, 0x01, 0x00, 0x13, 0x12, 0x11, 0x10, 0x23, 0x22, 0x21, 0x20, 0x33, 0x32, 0x31, 0x30}; uint8_t rb16[16] = {0x01, 0x00, 0x11, 0x10, 0x21, 0x20, 0x31, 0x30, 0x41, 0x40, 0x51, 0x50, 0x61, 0x60, 0x71, 0x70}; #else uint8_t rb32[16] = {0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x20, 0x21, 0x22, 0x23, 0x30, 0x31, 0x32, 0x33}; uint8_t rb16[16] = {0x00, 0x01, 0x10, 0x11, 0x20, 0x21, 0x30, 0x31, 0x40, 0x41, 0x50, 0x51, 0x60, 0x61, 0x70, 0x71}; #endif uint8_t rb8[16] = {0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7}; asm volatile("lxvw4x %x0, 0, %1;" "mfvsrd %2, %x0;" "mfvsrld %3, %x0;" : "=ws"(vrt8): "r"(&rb32), "r"(hi), "r"(lo)); printf("lxvw4x: hi %016lx lo %016lx \n", hi, lo); asm volatile("lxvh8x %x0, 0, %1;" "mfvsrd %2, %x0;" "mfvsrld %3, %x0;" : "=ws"(vrt8): "r"(&rb16), "r"(hi), "r"(lo)); printf("lxvh8x: hi %016lx lo %016lx \n", hi, lo); asm volatile("lxvb16x %x0, 0, %1;" "mfvsrd %2, %x0;" "mfvsrld %3, %x0;" : "=ws"(vrt8): "r"(&rb8), "r"(hi), "r"(lo)); printf("lxvb16x: hi %016lx lo %016lx \n", hi, lo); return EXIT_SUCCESS; }