[PATCH] D140773: [WebAssembly] Use `shufflevector` for shuffle

Petr Penzin via Phabricator via cfe-commits Thu, 29 Dec 2022 23:12:44 -0800

penzn created this revision.
penzn added a reviewer: tlively.
Herald added subscribers: pmatos, asb, ecnelises, sunfish, hiraditya, 
jgravelle-google, sbc100, dschuff.
Herald added a project: All.
penzn requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, aheejin.
Herald added projects: clang, LLVM.


Back out D66983 <https://reviews.llvm.org/D66983>, "[WebAssembly] Add 
wasm-specific vector shuffle builtin and
intrinsic".

      

Fix shuffle intrinsic tests. Since they are compiled with -O2 masks need to be
updated as well, otherwise optimizer would replace second operand with
`poison`.

      

Fix shuffle codegen tests. They require some mask updates to avoid invalid
`shufflevector` use and to make sure both operands get picked up.

      

This reverts commit 8e3e56f2a36701480eeb65e426701d5a9025cc59 
<https://reviews.llvm.org/rG8e3e56f2a36701480eeb65e426701d5a9025cc59>.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D140773

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Headers/wasm_simd128.h
  clang/test/CodeGen/builtins-wasm.c
  clang/test/Headers/wasm.c
  llvm/include/llvm/IR/IntrinsicsWebAssembly.td
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
  llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll

Index: llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
===================================================================
--- llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
+++ llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
@@ -154,15 +154,10 @@
 ; NO-CHECK-NOT: i8x16
 ; CHECK-NEXT: .functype shuffle_v16i8 (v128, v128) -> (v128){{$}}
 ; CHECK-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1,
-; CHECK-SAME: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0{{$}}
+; CHECK-SAME: 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <16 x i8> @llvm.wasm.shuffle(
-  <16 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
-  i32, i32, i32, i32, i32)
 define <16 x i8> @shuffle_v16i8(<16 x i8> %x, <16 x i8> %y) {
-  %res = call <16 x i8> @llvm.wasm.shuffle(<16 x i8> %x, <16 x i8> %y,
-      i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
-      i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 35)
+  %res = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
   ret <16 x i8> %res
 }
 
@@ -170,13 +165,13 @@
 ; NO-CHECK-NOT: i8x16
 ; CHECK-NEXT: .functype shuffle_undef_v16i8 (v128, v128) -> (v128){{$}}
 ; CHECK-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1,
-; CHECK-SAME: 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2{{$}}
+; CHECK-SAME: 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define <16 x i8> @shuffle_undef_v16i8(<16 x i8> %x, <16 x i8> %y) {
-  %res = call <16 x i8> @llvm.wasm.shuffle(<16 x i8> %x, <16 x i8> %y,
+  %res = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <
       i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
       i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
-      i32 undef, i32 undef, i32 undef, i32 2)
+      i32 undef, i32 undef, i32 undef, i32 25>
   ret <16 x i8> %res
 }
 
Index: llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
===================================================================
--- llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -1806,24 +1806,6 @@
     SDValue Node = DAG.getTargetExternalSymbol(SymName, PtrVT);
     return DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, Node);
   }
-
-  case Intrinsic::wasm_shuffle: {
-    // Drop in-chain and replace undefs, but otherwise pass through unchanged
-    SDValue Ops[18];
-    size_t OpIdx = 0;
-    Ops[OpIdx++] = Op.getOperand(1);
-    Ops[OpIdx++] = Op.getOperand(2);
-    while (OpIdx < 18) {
-      const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
-      if (MaskIdx.isUndef() ||
-          cast<ConstantSDNode>(MaskIdx.getNode())->getZExtValue() >= 32) {
-        Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32);
-      } else {
-        Ops[OpIdx++] = MaskIdx;
-      }
-    }
-    return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
-  }
   }
 }
 
Index: llvm/include/llvm/IR/IntrinsicsWebAssembly.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -168,14 +168,6 @@
   DefaultAttrsIntrinsic<[llvm_v16i8_ty],
                         [llvm_v16i8_ty, llvm_v16i8_ty],
                         [IntrNoMem, IntrSpeculatable]>;
-def int_wasm_shuffle :
-  DefaultAttrsIntrinsic<[llvm_v16i8_ty],
-                        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
-                         llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
-                         llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
-                         llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
-                         llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-                        [IntrNoMem, IntrSpeculatable]>;
 def int_wasm_sub_sat_signed :
   DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                         [LLVMMatchType<0>, LLVMMatchType<0>],
Index: clang/test/Headers/wasm.c
===================================================================
--- clang/test/Headers/wasm.c
+++ clang/test/Headers/wasm.c
@@ -2706,48 +2706,48 @@
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i32> <i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
 v128_t test_i8x16_shuffle(v128_t a, v128_t b) {
-  return wasm_i8x16_shuffle(a, b, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+  return wasm_i8x16_shuffle(a, b, 23, 22, 21, 20, 19, 18, 17, 16, 7, 6, 5, 4, 3, 2, 1, 0);
 }
 
 // CHECK-LABEL: @test_i16x8_shuffle(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9, i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1)
+// CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i32> <i32 22, i32 23, i32 20, i32 21, i32 18, i32 19, i32 16, i32 17, i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
 v128_t test_i16x8_shuffle(v128_t a, v128_t b) {
-  return wasm_i16x8_shuffle(a, b, 7, 6, 5, 4, 3, 2, 1, 0);
+  return wasm_i16x8_shuffle(a, b, 11, 10, 9, 8, 3, 2, 1, 0);
 }
 
 // CHECK-LABEL: @test_i32x4_shuffle(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3)
+// CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i32> <i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
 v128_t test_i32x4_shuffle(v128_t a, v128_t b) {
-  return wasm_i32x4_shuffle(a, b, 3, 2, 1, 0);
+  return wasm_i32x4_shuffle(a, b, 5, 4, 1, 0);
 }
 
 // CHECK-LABEL: @test_i64x2_shuffle(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7)
+// CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
 v128_t test_i64x2_shuffle(v128_t a, v128_t b) {
-  return wasm_i64x2_shuffle(a, b, 1, 0);
+  return wasm_i64x2_shuffle(a, b, 2, 0);
 }
 
 // CHECK-LABEL: @test_i8x16_swizzle(
Index: clang/test/CodeGen/builtins-wasm.c
===================================================================
--- clang/test/CodeGen/builtins-wasm.c
+++ clang/test/CodeGen/builtins-wasm.c
@@ -649,15 +649,6 @@
 i8x16 swizzle_i8x16(i8x16 x, i8x16 y) {
   return __builtin_wasm_swizzle_i8x16(x, y);
   // WEBASSEMBLY: call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %x, <16 x i8> %y)
-}
-
-i8x16 shuffle(i8x16 x, i8x16 y) {
-  return __builtin_wasm_shuffle_i8x16(x, y, 0, 1, 2, 3, 4, 5, 6, 7,
-                                      8, 9, 10, 11, 12, 13, 14, 15);
-  // WEBASSEMBLY: call <16 x i8> @llvm.wasm.shuffle(<16 x i8> %x, <16 x i8> %y,
-  // WEBASSEMBLY-SAME: i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
-  // WEBASSEMBLY-SAME: i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14,
-  // WEBASSEMBLY-SAME: i32 15
   // WEBASSEMBLY-NEXT: ret
 }
 
Index: clang/lib/Headers/wasm_simd128.h
===================================================================
--- clang/lib/Headers/wasm_simd128.h
+++ clang/lib/Headers/wasm_simd128.h
@@ -1429,31 +1429,31 @@
 #define wasm_i8x16_shuffle(__a, __b, __c0, __c1, __c2, __c3, __c4, __c5, __c6, \
                            __c7, __c8, __c9, __c10, __c11, __c12, __c13,       \
                            __c14, __c15)                                       \
-  ((v128_t)__builtin_wasm_shuffle_i8x16(                                       \
-      (__i8x16)(__a), (__i8x16)(__b), __c0, __c1, __c2, __c3, __c4, __c5,      \
-      __c6, __c7, __c8, __c9, __c10, __c11, __c12, __c13, __c14, __c15))
+  ((v128_t)(__builtin_shufflevector(                                           \
+      (__u8x16)(__a), (__u8x16)(__b), __c0, __c1, __c2, __c3, __c4, __c5,      \
+      __c6, __c7, __c8, __c9, __c10, __c11, __c12, __c13, __c14, __c15)))
 
 #define wasm_i16x8_shuffle(__a, __b, __c0, __c1, __c2, __c3, __c4, __c5, __c6, \
                            __c7)                                               \
-  ((v128_t)__builtin_wasm_shuffle_i8x16(                                       \
+  ((v128_t)(__builtin_shufflevector(                                           \
       (__i8x16)(__a), (__i8x16)(__b), (__c0)*2, (__c0)*2 + 1, (__c1)*2,        \
       (__c1)*2 + 1, (__c2)*2, (__c2)*2 + 1, (__c3)*2, (__c3)*2 + 1, (__c4)*2,  \
       (__c4)*2 + 1, (__c5)*2, (__c5)*2 + 1, (__c6)*2, (__c6)*2 + 1, (__c7)*2,  \
-      (__c7)*2 + 1))
+      (__c7)*2 + 1)))
 
 #define wasm_i32x4_shuffle(__a, __b, __c0, __c1, __c2, __c3)                   \
-  ((v128_t)__builtin_wasm_shuffle_i8x16(                                       \
+  ((v128_t)(__builtin_shufflevector(                                           \
       (__i8x16)(__a), (__i8x16)(__b), (__c0)*4, (__c0)*4 + 1, (__c0)*4 + 2,    \
       (__c0)*4 + 3, (__c1)*4, (__c1)*4 + 1, (__c1)*4 + 2, (__c1)*4 + 3,        \
       (__c2)*4, (__c2)*4 + 1, (__c2)*4 + 2, (__c2)*4 + 3, (__c3)*4,            \
-      (__c3)*4 + 1, (__c3)*4 + 2, (__c3)*4 + 3))
+      (__c3)*4 + 1, (__c3)*4 + 2, (__c3)*4 + 3)))
 
 #define wasm_i64x2_shuffle(__a, __b, __c0, __c1)                               \
-  ((v128_t)__builtin_wasm_shuffle_i8x16(                                       \
+  ((v128_t)(__builtin_shufflevector(                                           \
       (__i8x16)(__a), (__i8x16)(__b), (__c0)*8, (__c0)*8 + 1, (__c0)*8 + 2,    \
       (__c0)*8 + 3, (__c0)*8 + 4, (__c0)*8 + 5, (__c0)*8 + 6, (__c0)*8 + 7,    \
       (__c1)*8, (__c1)*8 + 1, (__c1)*8 + 2, (__c1)*8 + 3, (__c1)*8 + 4,        \
-      (__c1)*8 + 5, (__c1)*8 + 6, (__c1)*8 + 7))
+      (__c1)*8 + 5, (__c1)*8 + 6, (__c1)*8 + 7)))
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_swizzle(v128_t __a,
                                                                v128_t __b) {
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -19012,20 +19012,6 @@
     Value *Splat = Constant::getNullValue(TruncT);
     return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});
   }
-  case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
-    Value *Ops[18];
-    size_t OpIdx = 0;
-    Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
-    Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
-    while (OpIdx < 18) {
-      Optional<llvm::APSInt> LaneConst =
-          E->getArg(OpIdx)->getIntegerConstantExpr(getContext());
-      assert(LaneConst && "Constant arg isn't actually constant?");
-      Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
-    }
-    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
-    return Builder.CreateCall(Callee, Ops);
-  }
   case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
   case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
   case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D140773: [WebAssembly] Use `shufflevector` for shuffle

Reply via email to