[PATCH] D67020: [WebAssembly] Add SIMD QFMA/QFMS

Thomas Lively via Phabricator via cfe-commits Fri, 30 Aug 2019 14:26:08 -0700

tlively created this revision.
tlively added reviewers: aheejin, dschuff.
Herald added subscribers: llvm-commits, cfe-commits, sunfish, hiraditya, 
jgravelle-google, sbc100.
Herald added projects: clang, LLVM.


Adds clang builtins and LLVM intrinsics for these experimental
instructions. They are not implemented in engines yet, but that is ok
because the user must opt into using them by calling the builtins.

Depends on D66983 <https://reviews.llvm.org/D66983>.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D67020

Files:
  clang/include/clang/Basic/BuiltinsWebAssembly.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-wasm.c
  llvm/include/llvm/IR/IntrinsicsWebAssembly.td
  llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
  llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
  llvm/test/MC/WebAssembly/simd-encodings.s

Index: llvm/test/MC/WebAssembly/simd-encodings.s
===================================================================
--- llvm/test/MC/WebAssembly/simd-encodings.s
+++ llvm/test/MC/WebAssembly/simd-encodings.s
@@ -382,6 +382,12 @@
     # CHECK: f32x4.sqrt # encoding: [0xfd,0x97,0x01]
     f32x4.sqrt
 
+    # CHECK: f32x4.qfma # encoding: [0xfd,0x98,0x01]
+    f32x4.qfma
+
+    # CHECK: f32x4.qfms # encoding: [0xfd,0x99,0x01]
+    f32x4.qfms
+
     # CHECK: f32x4.add # encoding: [0xfd,0x9a,0x01]
     f32x4.add
 
@@ -409,6 +415,12 @@
     # CHECK: f64x2.sqrt # encoding: [0xfd,0xa2,0x01]
     f64x2.sqrt
 
+    # CHECK: f64x2.qfma # encoding: [0xfd,0xa3,0x01]
+    f64x2.qfma
+
+    # CHECK: f64x2.qfms # encoding: [0xfd,0xa4,0x01]
+    f64x2.qfms
+
     # CHECK: f64x2.add # encoding: [0xfd,0xa5,0x01]
     f64x2.add
 
Index: llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
===================================================================
--- llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
+++ llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
@@ -320,11 +320,35 @@
 declare <4 x float> @llvm.wasm.bitselect.v4f32(<4 x float>, <4 x float>, <4 x float>)
 define <4 x float> @bitselect_v4f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %c) {
   %a = call <4 x float> @llvm.wasm.bitselect.v4f32(
-     <4 x float> %v1, <4 x float> %v2, <4 x float> %c
+    <4 x float> %v1, <4 x float> %v2, <4 x float> %c
   )
   ret <4 x float> %a
 }
 
+; CHECK-LABEL: qfma_v4f32:
+; SIMD128-NEXT: .functype qfma_v4f32 (v128, v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: f32x4.qfma $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <4 x float> @llvm.wasm.qfma.v4f32(<4 x float>, <4 x float>, <4 x float>)
+define <4 x float> @qfma_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+  %v = call <4 x float> @llvm.wasm.qfma.v4f32(
+    <4 x float> %a, <4 x float> %b, <4 x float> %c
+  )
+  ret <4 x float> %v
+}
+
+; CHECK-LABEL: qfms_v4f32:
+; SIMD128-NEXT: .functype qfms_v4f32 (v128, v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: f32x4.qfms $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <4 x float> @llvm.wasm.qfms.v4f32(<4 x float>, <4 x float>, <4 x float>)
+define <4 x float> @qfms_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+  %v = call <4 x float> @llvm.wasm.qfms.v4f32(
+    <4 x float> %a, <4 x float> %b, <4 x float> %c
+  )
+  ret <4 x float> %v
+}
+
 ; ==============================================================================
 ; 2 x f64
 ; ==============================================================================
@@ -339,3 +363,27 @@
   )
   ret <2 x double> %a
 }
+
+; CHECK-LABEL: qfma_v2f64:
+; SIMD128-NEXT: .functype qfma_v2f64 (v128, v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: f64x2.qfma $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <2 x double> @llvm.wasm.qfma.v2f64(<2 x double>, <2 x double>, <2 x double>)
+define <2 x double> @qfma_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+  %v = call <2 x double> @llvm.wasm.qfma.v2f64(
+    <2 x double> %a, <2 x double> %b, <2 x double> %c
+  )
+  ret <2 x double> %v
+}
+
+; CHECK-LABEL: qfms_v2f64:
+; SIMD128-NEXT: .functype qfms_v2f64 (v128, v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: f64x2.qfms $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <2 x double> @llvm.wasm.qfms.v2f64(<2 x double>, <2 x double>, <2 x double>)
+define <2 x double> @qfms_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+  %v = call <2 x double> @llvm.wasm.qfms.v2f64(
+    <2 x double> %a, <2 x double> %b, <2 x double> %c
+  )
+  ret <2 x double> %v
+}
Index: llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
===================================================================
--- llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -732,3 +732,24 @@
   )
 ) in
 def : Pat<(t1 (bitconvert (t2 V128:$v))), (t1 V128:$v)>;
+
+//===----------------------------------------------------------------------===//
+// Quasi-Fused Multiply- Add and Subtract (QFMA/QFMS)
+//===----------------------------------------------------------------------===//
+multiclass SIMDQFM<ValueType vec_t, string vec, bits<32> baseInst> {
+  defm QFMA_#vec_t :
+    SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c),
+           (outs), (ins),
+           [(set (vec_t V128:$dst),
+             (int_wasm_qfma (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))],
+           vec#".qfma\t$dst, $a, $b, $c", vec#".qfma", baseInst>;
+  defm QFMS_#vec_t :
+    SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c),
+           (outs), (ins),
+           [(set (vec_t V128:$dst),
+             (int_wasm_qfms (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))],
+           vec#".qfms\t$dst, $a, $b, $c", vec#".qfms", !add(baseInst, 1)>;
+}
+
+defm "" : SIMDQFM<v4f32, "f32x4", 0x98>;
+defm "" : SIMDQFM<v2f64, "f64x2", 0xa3>;
Index: llvm/include/llvm/IR/IntrinsicsWebAssembly.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -109,6 +109,14 @@
   Intrinsic<[llvm_i32_ty],
             [llvm_anyvector_ty],
             [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_qfma :
+  Intrinsic<[llvm_anyvector_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_qfms :
+  Intrinsic<[llvm_anyvector_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem, IntrSpeculatable]>;
 def int_wasm_shuffle :
   Intrinsic<[llvm_v16i8_ty],
             [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty,
Index: clang/test/CodeGen/builtins-wasm.c
===================================================================
--- clang/test/CodeGen/builtins-wasm.c
+++ clang/test/CodeGen/builtins-wasm.c
@@ -412,6 +412,34 @@
   // WEBASSEMBLY: ret
 }
 
+f32x4 qfma_f32x4(f32x4 a, f32x4 b, f32x4 c) {
+  return __builtin_wasm_qfma_f32x4(a, b, c);
+  // WEBASSEMBLY: call <4 x float> @llvm.wasm.qfma.v4f32(
+  // WEBASSEMBLY-SAME: <4 x float> %a, <4 x float> %b, <4 x float> %c)
+  // WEBASSEMBLY-NEXT: ret
+}
+
+f32x4 qfms_f32x4(f32x4 a, f32x4 b, f32x4 c) {
+  return __builtin_wasm_qfms_f32x4(a, b, c);
+  // WEBASSEMBLY: call <4 x float> @llvm.wasm.qfms.v4f32(
+  // WEBASSEMBLY-SAME: <4 x float> %a, <4 x float> %b, <4 x float> %c)
+  // WEBASSEMBLY-NEXT: ret
+}
+
+f64x2 qfma_f64x2(f64x2 a, f64x2 b, f64x2 c) {
+  return __builtin_wasm_qfma_f64x2(a, b, c);
+  // WEBASSEMBLY: call <2 x double> @llvm.wasm.qfma.v2f64(
+  // WEBASSEMBLY-SAME: <2 x double> %a, <2 x double> %b, <2 x double> %c)
+  // WEBASSEMBLY-NEXT: ret
+}
+
+f64x2 qfms_f64x2(f64x2 a, f64x2 b, f64x2 c) {
+  return __builtin_wasm_qfms_f64x2(a, b, c);
+  // WEBASSEMBLY: call <2 x double> @llvm.wasm.qfms.v2f64(
+  // WEBASSEMBLY-SAME: <2 x double> %a, <2 x double> %b, <2 x double> %c)
+  // WEBASSEMBLY-NEXT: ret
+}
+
 i32x4 trunc_saturate_s_i32x4_f32x4(f32x4 f) {
   return __builtin_wasm_trunc_saturate_s_i32x4_f32x4(f);
   // WEBASSEMBLY: call <4 x i32> @llvm.wasm.trunc.saturate.signed.v4i32.v4f32(<4 x float> %f)
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -14173,6 +14173,29 @@
     Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
     return Builder.CreateCall(Callee, {Vec});
   }
+  case WebAssembly::BI__builtin_wasm_qfma_f32x4:
+  case WebAssembly::BI__builtin_wasm_qfms_f32x4:
+  case WebAssembly::BI__builtin_wasm_qfma_f64x2:
+  case WebAssembly::BI__builtin_wasm_qfms_f64x2: {
+    Value *A = EmitScalarExpr(E->getArg(0));
+    Value *B = EmitScalarExpr(E->getArg(1));
+    Value *C = EmitScalarExpr(E->getArg(2));
+    unsigned IntNo;
+    switch (BuiltinID) {
+    case WebAssembly::BI__builtin_wasm_qfma_f32x4:
+    case WebAssembly::BI__builtin_wasm_qfma_f64x2:
+      IntNo = Intrinsic::wasm_qfma;
+      break;
+    case WebAssembly::BI__builtin_wasm_qfms_f32x4:
+    case WebAssembly::BI__builtin_wasm_qfms_f64x2:
+      IntNo = Intrinsic::wasm_qfms;
+      break;
+    default:
+      llvm_unreachable("unexpected builtin ID");
+    }
+    Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
+    return Builder.CreateCall(Callee, {A, B, C});
+  }
   case WebAssembly::BI__builtin_wasm_shuffle_v8x16: {
     Value *Ops[18];
     size_t OpIdx = 0;
Index: clang/include/clang/Basic/BuiltinsWebAssembly.def
===================================================================
--- clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -108,6 +108,11 @@
 TARGET_BUILTIN(__builtin_wasm_sqrt_f32x4, "V4fV4f", "nc", "unimplemented-simd128")
 TARGET_BUILTIN(__builtin_wasm_sqrt_f64x2, "V2dV2d", "nc", "unimplemented-simd128")
 
+TARGET_BUILTIN(__builtin_wasm_qfma_f32x4, "V4fV4fV4fV4f", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_qfms_f32x4, "V4fV4fV4fV4f", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_qfma_f64x2, "V2dV2dV2dV2d", "nc", "unimplemented-simd128")
+TARGET_BUILTIN(__builtin_wasm_qfms_f64x2, "V2dV2dV2dV2d", "nc", "unimplemented-simd128")
+
 TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i32x4_f32x4, "V4iV4f", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i32x4_f32x4, "V4iV4f", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i64x2_f64x2, "V2LLiV2d", "nc", "unimplemented-simd128")

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D67020: [WebAssembly] Add SIMD QFMA/QFMS

Reply via email to