The SSE2 instructions cvttps2dq, movdq2q, movq2dq do not behave
correctly, as shown by the attached program.  It should print

  cvttps2dq_1 ... ok
  cvttps2dq_2 ... ok
  movdq2q_1 ... ok
  movq2dq_1 ... ok

but instead produces

  cvttps2dq_1 ... ok
  cvttps2dq_2 ... not ok
    result0.sd[0] = 12 (expected 12)
    result0.sd[1] = 3 (expected 56)
    result0.sd[2] = -2147483648 (expected 43)
    result0.sd[3] = 3 (expected 87)
  movdq2q_1 ... not ok
    result0.uq[0] = 1302123111658042420 (expected 5124095577148911)
  movq2dq_1 ... not ok
    result0.uq[0] = 1302123111658042420 (expected 5124095577148911)
    result0.uq[1] = 6221254864647256184 (expected 0)

I looked at QEMU's instruction decoders for these, and compared them
to Valgrind's, but could not see what the problem was.  The decode
logic looks OK.  Maybe the problem is elsewhere.

J

-------------------------------------------------------------------

#include <math.h>
#include <setjmp.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

typedef union {
  char sb[1];
  unsigned char ub[1];
} reg8_t;

typedef union {
  char sb[2];
  unsigned char ub[2];
  short sw[1];
  unsigned short uw[1];
} reg16_t;

typedef union {
  char sb[4];
  unsigned char ub[4];
  short sw[2];
  unsigned short uw[2];
  long int sd[1];
  unsigned long int ud[1];
  float ps[1];
} reg32_t;

typedef union {
  char sb[8];
  unsigned char ub[8];
  short sw[4];
  unsigned short uw[4];
  long int sd[2];
  unsigned long int ud[2];
  long long int sq[1];
  unsigned long long int uq[1];
  float ps[2];
  double pd[1];
} reg64_t __attribute__ ((aligned (8)));

typedef union {
  char sb[16];
  unsigned char ub[16];
  short sw[8];
  unsigned short uw[8];
  long int sd[4];
  unsigned long int ud[4];
  long long int sq[2];
  unsigned long long int uq[2];
  float ps[4];
  double pd[2];
} reg128_t __attribute__ ((aligned (16)));

static sigjmp_buf catchpoint;

static void handle_sigill(int signum)
{
   siglongjmp(catchpoint, 1);
}

__attribute__((unused))
static int eq_float(float f1, float f2)
{
   return f1 == f2 || fabsf(f1 - f2) < fabsf(f1) * 1.5 * pow(2,-12);
}

__attribute__((unused))
static int eq_double(double d1, double d2)
{
   return d1 == d2 || fabs(d1 - d2) < fabs(d1) * 1.5 * pow(2,-12);
}

static void cvttps2dq_1(void)
{
   reg128_t arg0 = { .ps = { 12.34F, 56.78F, 43.21F, 87.65F } };
   reg128_t arg1 = { .sd = { 1L, 2L, 3L, 4L } };
   reg128_t result0;
   char state[108];

   if (sigsetjmp(catchpoint, 1) == 0)
   {
      asm(
         "fsave %3\n"
         "movlps 0%0, %%xmm4\n"
         "movhps 8%0, %%xmm4\n"
         "movlps 0%1, %%xmm5\n"
         "movhps 8%1, %%xmm5\n"
         "cvttps2dq %%xmm4, %%xmm5\n"
         "movlps %%xmm5, 0%2\n"
         "movhps %%xmm5, 8%2\n"
         "frstor %3\n"
         :
         : "m" (arg0), "m" (arg1), "m" (result0), "m" (state[0])
         : "xmm4", "xmm5"
      );

      if (result0.sd[0] == 12L && result0.sd[1] == 56L && result0.sd[2] == 43L 
&& result0.sd[3] == 87L )
      {
         printf("cvttps2dq_1 ... ok\n");
      }
      else
      {
         printf("cvttps2dq_1 ... not ok\n");
         printf("  result0.sd[0] = %ld (expected %ld)\n", result0.sd[0], 12L);
         printf("  result0.sd[1] = %ld (expected %ld)\n", result0.sd[1], 56L);
         printf("  result0.sd[2] = %ld (expected %ld)\n", result0.sd[2], 43L);
         printf("  result0.sd[3] = %ld (expected %ld)\n", result0.sd[3], 87L);
      }
   }
   else
   {
      printf("cvttps2dq_1 ... failed\n");
   }

   return;
}

static void cvttps2dq_2(void)
{
   reg128_t arg0 = { .ps = { 12.34F, 56.78F, 43.21F, 87.65F } };
   reg128_t arg1 = { .sd = { 1L, 2L, 3L, 4L } };
   reg128_t result0;
   char state[108];

   if (sigsetjmp(catchpoint, 1) == 0)
   {
      asm(
         "fsave %3\n"
         "movlps 0%1, %%xmm5\n"
         "movhps 8%1, %%xmm5\n"
         "cvttps2dq %0, %%xmm5\n"
         "movlps %%xmm5, 0%2\n"
         "movhps %%xmm5, 8%2\n"
         "frstor %3\n"
         :
         : "m" (arg0), "m" (arg1), "m" (result0), "m" (state[0])
         : "xmm4", "xmm5"
      );

      if (result0.sd[0] == 12L && result0.sd[1] == 56L && result0.sd[2] == 43L 
&& result0.sd[3] == 87L )
      {
         printf("cvttps2dq_2 ... ok\n");
      }
      else
      {
         printf("cvttps2dq_2 ... not ok\n");
         printf("  result0.sd[0] = %ld (expected %ld)\n", result0.sd[0], 12L);
         printf("  result0.sd[1] = %ld (expected %ld)\n", result0.sd[1], 56L);
         printf("  result0.sd[2] = %ld (expected %ld)\n", result0.sd[2], 43L);
         printf("  result0.sd[3] = %ld (expected %ld)\n", result0.sd[3], 87L);
      }
   }
   else
   {
      printf("cvttps2dq_2 ... failed\n");
   }

   return;
}

static void movdq2q_1(void)
{
   reg128_t arg0 = { .uq = { 0x012345678abcdefULL, 0xfedcba9876543210ULL } };
   reg64_t arg1 = { .uq = { 0x1212121234343434ULL } };
   reg64_t result0;
   char state[108];

   if (sigsetjmp(catchpoint, 1) == 0)
   {
      asm(
         "fsave %3\n"
         "movlps 0%0, %%xmm4\n"
         "movhps 8%0, %%xmm4\n"
         "movq %1, %%mm6\n"
         "movdq2q %%xmm4, %%mm6\n"
         "movq %%mm6, %2\n"
         "frstor %3\n"
         :
         : "m" (arg0), "m" (arg1), "m" (result0), "m" (state[0])
         : "xmm4", "mm6"
      );

      if (result0.uq[0] == 0x012345678abcdefULL )
      {
         printf("movdq2q_1 ... ok\n");
      }
      else
      {
         printf("movdq2q_1 ... not ok\n");
         printf("  result0.uq[0] = %llu (expected %llu)\n", result0.uq[0], 
0x012345678abcdefULL);
      }
   }
   else
   {
      printf("movdq2q_1 ... failed\n");
   }

   return;
}

static void movq2dq_1(void)
{
   reg64_t arg0 = { .uq = { 0x012345678abcdefULL } };
   reg128_t arg1 = { .uq = { 0x1212121234343434ULL, 0x5656565678787878ULL } };
   reg128_t result0;
   char state[108];

   if (sigsetjmp(catchpoint, 1) == 0)
   {
      asm(
         "fsave %3\n"
         "movq %0, %%mm6\n"
         "movlps 0%1, %%xmm4\n"
         "movhps 8%1, %%xmm4\n"
         "movq2dq %%mm6, %%xmm4\n"
         "movlps %%xmm4, 0%2\n"
         "movhps %%xmm4, 8%2\n"
         "frstor %3\n"
         :
         : "m" (arg0), "m" (arg1), "m" (result0), "m" (state[0])
         : "mm6", "xmm4"
      );

      if (result0.uq[0] == 0x012345678abcdefULL && result0.uq[1] == 0ULL )
      {
         printf("movq2dq_1 ... ok\n");
      }
      else
      {
         printf("movq2dq_1 ... not ok\n");
         printf("  result0.uq[0] = %llu (expected %llu)\n", result0.uq[0], 
0x012345678abcdefULL);
         printf("  result0.uq[1] = %llu (expected %llu)\n", result0.uq[1], 
0ULL);
      }
   }
   else
   {
      printf("movq2dq_1 ... failed\n");
   }

   return;
}

int main(int argc, char **argv)
{
   signal(SIGILL, handle_sigill);

   cvttps2dq_1();
   cvttps2dq_2();
   movdq2q_1();
   movq2dq_1();

   exit(0);
}


_______________________________________________
Qemu-devel mailing list
Qemu-devel@nongnu.org
http://lists.nongnu.org/mailman/listinfo/qemu-devel

Reply via email to