================
@@ -4718,6 +4726,26 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
           return APInt(8, 0);
         });
 
+  case X86::BI__builtin_ia32_palignr128:
+  case X86::BI__builtin_ia32_palignr256:
+  case X86::BI__builtin_ia32_palignr512:
+    return interp__builtin_ia32_shuffle_generic(
+        S, OpPC, Call, [](unsigned DstIdx, unsigned Shift, unsigned NumElems) {
+          // Default to -1 → zero-fill this destination element
+          unsigned VecIdx = 0;
+          int ElemIdx = -1;
+
+          // Elements come from VecB first, then VecA after the shift boundary
+          unsigned ShiftedIdx = DstIdx + Shift;
+          if (ShiftedIdx < NumElems) { // from VecB
----------------
RKSimon wrote:

These should be NumElemsPerLane - given the palign builtins are hardcoded to 
`<X x i8>` types - I think you can just assume NumElemsPerLane == 16 (and 
remove the NumElems arg again from interp__builtin_ia32_shuffle_generic - sorry 
about that!).

https://github.com/llvm/llvm-project/pull/162005
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to