Nikunj A Dadhania <nik...@linux.vnet.ibm.com> writes:

> This series contains 7 new instructions for POWER9 ISA3.0
> Use newer qemu load/store tcg helpers and optimize stxvw4x and lxvw4x.
>
> GCC was adding epilogue for every VSX instructions causing change in 
> behaviour. For testing the load vector instructions used mfvsrld/mfvsrd 
> for loading vsr to register. And for testing store vector, used mtvsrdd 
> instructions. This helped in getting rid of the epilogue added by gcc. Tried 
> adding the test cases to kvm-unit-tests, but executing vsx instructions 
> results in cpu exception. Will debug that later. I will send the test code 
> and steps to execute as reply to this email.

Source code for stxv_x.c and lxv_x.c is attached and following are the 
steps to use them:

Compile using IBM Advance toolchain[1]:
=======================================
/opt/at10.0/bin/powerpc64-linux-gnu-gcc -static -O3 lxv_x.c -o be_lxv_x
/opt/at10.0/bin/powerpc64-linux-gnu-gcc -static -O3 stxv_x.c -o be_stxv_x
/opt/at10.0/bin/powerpc64le-linux-gnu-gcc -static -O3 lxv_x.c -o le_lxv_x
/opt/at10.0/bin/powerpc64le-linux-gnu-gcc -static -O3 stxv_x.c -o le_stxv_x

Run following for testing the instructions:
===========================================

for i in lxv_x stxv_x
do
    echo "Running ... $i"
    echo ">>>>>>>>>>>>>>>> LE LE LE >>>>>>>>>>>>>>"
    ../qemu/ppc64le-linux-user/qemu-ppc64le   -cpu POWER9 le_${i}
    echo ">>>>>>>>>>>>>>>> BE BE BE >>>>>>>>>>>>>>"
    ../qemu/ppc64-linux-user/qemu-ppc64   -cpu POWER9 be_${i}
    echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
done

Regards
Nikunj

1. ftp://ftp.unicamp.br/pub/linuxpatch/toolchain/at/redhat/Fedora22

#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>

static void print16x1(uint8_t *p)
{
  int i;
  for(i = 0; i < 16; i++)
    printf(" %02X ", p[i]);
  printf("\n");
}

int main(void) {
  __vector uint8_t vrt8;
  uint8_t rb8[16];
  unsigned long hi = 0x0001020310111213;
  unsigned long lo = 0x2021222330313233;

  asm volatile("mtvsrdd %x0, %2, %3;"
               "stxvw4x %x0, 0, %1;"
               : "=ws"(vrt8): "r"(&rb8), "r"(hi), "r"(lo));
  print16x1(rb8);

  asm volatile("mtvsrdd %x0, %2, %3;"
               "stxvh8x %x0, 0, %1;"
               : "=ws"(vrt8) : "r"(&rb8), "r"(hi), "r"(lo));
  print16x1(rb8);

  asm volatile("mtvsrdd %x0, %2, %3;"
               "stxvb16x %x0, 0, %1;"
               : "=ws"(vrt8) : "r"(&rb8), "r"(hi), "r"(lo));
  print16x1(rb8);

  return EXIT_SUCCESS;
}
#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>

int main(void) {
  __vector uint8_t vrt8;
  unsigned long lo, hi;

#if __BYTE_ORDER == __LITTLE_ENDIAN
  uint8_t rb32[16] = {0x03, 0x02, 0x01, 0x00, 0x13, 0x12, 0x11, 0x10,
                      0x23, 0x22, 0x21, 0x20, 0x33, 0x32, 0x31, 0x30};
  uint8_t rb16[16] = {0x01, 0x00, 0x11, 0x10, 0x21, 0x20, 0x31, 0x30,
                      0x41, 0x40, 0x51, 0x50, 0x61, 0x60, 0x71, 0x70};
#else
  uint8_t rb32[16] = {0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
                      0x20, 0x21, 0x22, 0x23, 0x30, 0x31, 0x32, 0x33};
  uint8_t rb16[16] = {0x00, 0x01, 0x10, 0x11, 0x20, 0x21, 0x30, 0x31,
                      0x40, 0x41, 0x50, 0x51, 0x60, 0x61, 0x70, 0x71};
#endif

  uint8_t rb8[16] = {0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
                     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7};

  asm volatile("lxvw4x %x0, 0, %1;"
               "mfvsrd %2, %x0;"
               "mfvsrld %3, %x0;"
               : "=ws"(vrt8): "r"(&rb32), "r"(hi), "r"(lo));
  printf("lxvw4x:  hi %016lx lo %016lx \n", hi, lo);

  asm volatile("lxvh8x %x0, 0, %1;"
               "mfvsrd %2, %x0;"
               "mfvsrld %3, %x0;"
               : "=ws"(vrt8): "r"(&rb16), "r"(hi), "r"(lo));
  printf("lxvh8x:  hi %016lx lo %016lx \n", hi, lo);

  asm volatile("lxvb16x %x0, 0, %1;"
               "mfvsrd %2, %x0;"
               "mfvsrld %3, %x0;"
               : "=ws"(vrt8): "r"(&rb8), "r"(hi), "r"(lo));
  printf("lxvb16x: hi %016lx lo %016lx \n", hi, lo);

  return EXIT_SUCCESS;
}

Reply via email to