On 9/7/23 01:31, Song Gao wrote:
  void HELPER(vshuf_b)(void *vd, void *vj, void *vk, void *va, uint32_t desc)
  {
      int i, m;
-    VReg temp;
+    VReg temp = {};
      VReg *Vd = (VReg *)vd;
      VReg *Vj = (VReg *)vj;
      VReg *Vk = (VReg *)vk;
      VReg *Va = (VReg *)va;
+    int oprsz = simd_oprsz(desc);
- m = LSX_LEN/8;
-    for (i = 0; i < m ; i++) {
+    m = LSX_LEN / 8;
+    for (i = 0; i < m; i++) {
          uint64_t k = (uint8_t)Va->B(i) % (2 * m);
          temp.B(i) = k < m ? Vk->B(k) : Vj->B(k - m);
      }
+    if (oprsz == 32) {
+        for(i = m; i < 2 * m; i++) {
+            uint64_t j = (uint8_t)Va->B(i) % (2 * m);
+            temp.B(i) = j < m ? Vk->B(j + m) : Vj->B(j);
+        }
+    }

Loop, not a compare against oprsz.  Several instances.

+void HELPER(vpermi_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
 {
     VReg temp;
     VReg *Vd = (VReg *)vd;
     VReg *Vj = (VReg *)vj;
+ temp.Q(0) = (imm & 0x3) > 1 ? Vd->Q((imm & 0x3) - 2) : Vj->Q(imm & 0x3);
+    temp.Q(1) = ((imm >> 4) & 0x3) > 1 ? Vd->Q(((imm >> 4) & 0x3) - 2) :
+                                         Vj->Q((imm >> 4) & 0x3);


    for (i = 0; i < 2; i++, imm >>= 4) {
       temp.Q(i) = (imm & 2 ? Vd : Vj)->Q(imm & 1);
    }


r~

Reply via email to