[PATCH] D100411: [WebAssembly] Use standard intrinsics for f32x4 and f64x2 ops

Thomas Lively via Phabricator via cfe-commits Tue, 13 Apr 2021 13:55:21 -0700

tlively created this revision.
tlively added reviewers: aheejin, dschuff.
Herald added subscribers: wingo, ecnelises, sunfish, hiraditya, 
jgravelle-google, sbc100.
tlively requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.


Now that these instructions are no longer prototypes, we do not need to be
careful about keeping them opt-in and can use the standard LLVM infrastructure
for them. This commit removes the bespoke intrinsics we were using to represent
these operations in favor of the corresponding target-independent intrinsics.
The clang builtins are preserved because there is no standard way to easily
represent these operations in C/C++.

For consistency with the scalar codegen in the Wasm backend, the intrinsic used
to represent {f32x4,f64x2}.nearest is @llvm.nearbyint even though
@llvm.roundeven better captures the semantics of the underlying Wasm
instruction. Replacing our use of @llvm.nearbyint with use of @llvm.roundeven is
left to a potential future patch.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D100411

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-wasm.c
  llvm/include/llvm/IR/IntrinsicsWebAssembly.td
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
  llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
  llvm/test/CodeGen/WebAssembly/simd-unsupported.ll

Index: llvm/test/CodeGen/WebAssembly/simd-unsupported.ll
===================================================================
--- llvm/test/CodeGen/WebAssembly/simd-unsupported.ll
+++ llvm/test/CodeGen/WebAssembly/simd-unsupported.ll
@@ -366,38 +366,6 @@
 ; 4 x f32
 ; ==============================================================================
 
-; CHECK-LABEL: ceil_v4f32:
-; CHECK: f32.ceil
-declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
-define <4 x float> @ceil_v4f32(<4 x float> %x) {
-  %v = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x)
-  ret <4 x float> %v
-}
-
-; CHECK-LABEL: floor_v4f32:
-; CHECK: f32.floor
-declare <4 x float> @llvm.floor.v4f32(<4 x float>)
-define <4 x float> @floor_v4f32(<4 x float> %x) {
-  %v = call <4 x float> @llvm.floor.v4f32(<4 x float> %x)
-  ret <4 x float> %v
-}
-
-; CHECK-LABEL: trunc_v4f32:
-; CHECK: f32.trunc
-declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
-define <4 x float> @trunc_v4f32(<4 x float> %x) {
-  %v = call <4 x float> @llvm.trunc.v4f32(<4 x float> %x)
-  ret <4 x float> %v
-}
-
-; CHECK-LABEL: nearbyint_v4f32:
-; CHECK: f32.nearest
-declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
-define <4 x float> @nearbyint_v4f32(<4 x float> %x) {
-  %v = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %x)
-  ret <4 x float> %v
-}
-
 ; CHECK-LABEL: copysign_v4f32:
 ; CHECK: f32.copysign
 declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>)
@@ -498,38 +466,6 @@
 ; 2 x f64
 ; ==============================================================================
 
-; CHECK-LABEL: ceil_v2f64:
-; CHECK: f64.ceil
-declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
-define <2 x double> @ceil_v2f64(<2 x double> %x) {
-  %v = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x)
-  ret <2 x double> %v
-}
-
-; CHECK-LABEL: floor_v2f64:
-; CHECK: f64.floor
-declare <2 x double> @llvm.floor.v2f64(<2 x double>)
-define <2 x double> @floor_v2f64(<2 x double> %x) {
-  %v = call <2 x double> @llvm.floor.v2f64(<2 x double> %x)
-  ret <2 x double> %v
-}
-
-; CHECK-LABEL: trunc_v2f64:
-; CHECK: f64.trunc
-declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
-define <2 x double> @trunc_v2f64(<2 x double> %x) {
-  %v = call <2 x double> @llvm.trunc.v2f64(<2 x double> %x)
-  ret <2 x double> %v
-}
-
-; CHECK-LABEL: nearbyint_v2f64:
-; CHECK: f64.nearest
-declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
-define <2 x double> @nearbyint_v2f64(<2 x double> %x) {
-  %v = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %x)
-  ret <2 x double> %v
-}
-
 ; CHECK-LABEL: copysign_v2f64:
 ; CHECK: f64.copysign
 declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>)
Index: llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
===================================================================
--- llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
+++ llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
@@ -722,9 +722,9 @@
 ; CHECK-NEXT: .functype ceil_v4f32 (v128) -> (v128){{$}}
 ; CHECK-NEXT: f32x4.ceil $push[[R:[0-9]+]]=, $0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <4 x float> @llvm.wasm.ceil.v4f32(<4 x float>)
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
 define <4 x float> @ceil_v4f32(<4 x float> %a) {
-  %v = call <4 x float> @llvm.wasm.ceil.v4f32(<4 x float> %a)
+  %v = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a)
   ret <4 x float> %v
 }
 
@@ -732,9 +732,9 @@
 ; CHECK-NEXT: .functype floor_v4f32 (v128) -> (v128){{$}}
 ; CHECK-NEXT: f32x4.floor $push[[R:[0-9]+]]=, $0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <4 x float> @llvm.wasm.floor.v4f32(<4 x float>)
+declare <4 x float> @llvm.floor.v4f32(<4 x float>)
 define <4 x float> @floor_v4f32(<4 x float> %a) {
-  %v = call <4 x float> @llvm.wasm.floor.v4f32(<4 x float> %a)
+  %v = call <4 x float> @llvm.floor.v4f32(<4 x float> %a)
   ret <4 x float> %v
 }
 
@@ -742,9 +742,9 @@
 ; CHECK-NEXT: .functype trunc_v4f32 (v128) -> (v128){{$}}
 ; CHECK-NEXT: f32x4.trunc $push[[R:[0-9]+]]=, $0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <4 x float> @llvm.wasm.trunc.v4f32(<4 x float>)
+declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
 define <4 x float> @trunc_v4f32(<4 x float> %a) {
-  %v = call <4 x float> @llvm.wasm.trunc.v4f32(<4 x float> %a)
+  %v = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a)
   ret <4 x float> %v
 }
 
@@ -752,9 +752,9 @@
 ; CHECK-NEXT: .functype nearest_v4f32 (v128) -> (v128){{$}}
 ; CHECK-NEXT: f32x4.nearest $push[[R:[0-9]+]]=, $0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <4 x float> @llvm.wasm.nearest.v4f32(<4 x float>)
+declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
 define <4 x float> @nearest_v4f32(<4 x float> %a) {
-  %v = call <4 x float> @llvm.wasm.nearest.v4f32(<4 x float> %a)
+  %v = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a)
   ret <4 x float> %v
 }
 
@@ -807,9 +807,9 @@
 ; CHECK-NEXT: .functype ceil_v2f64 (v128) -> (v128){{$}}
 ; CHECK-NEXT: f64x2.ceil $push[[R:[0-9]+]]=, $0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <2 x double> @llvm.wasm.ceil.v2f64(<2 x double>)
+declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
 define <2 x double> @ceil_v2f64(<2 x double> %a) {
-  %v = call <2 x double> @llvm.wasm.ceil.v2f64(<2 x double> %a)
+  %v = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a)
   ret <2 x double> %v
 }
 
@@ -817,9 +817,9 @@
 ; CHECK-NEXT: .functype floor_v2f64 (v128) -> (v128){{$}}
 ; CHECK-NEXT: f64x2.floor $push[[R:[0-9]+]]=, $0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <2 x double> @llvm.wasm.floor.v2f64(<2 x double>)
+declare <2 x double> @llvm.floor.v2f64(<2 x double>)
 define <2 x double> @floor_v2f64(<2 x double> %a) {
-  %v = call <2 x double> @llvm.wasm.floor.v2f64(<2 x double> %a)
+  %v = call <2 x double> @llvm.floor.v2f64(<2 x double> %a)
   ret <2 x double> %v
 }
 
@@ -827,9 +827,9 @@
 ; CHECK-NEXT: .functype trunc_v2f64 (v128) -> (v128){{$}}
 ; CHECK-NEXT: f64x2.trunc $push[[R:[0-9]+]]=, $0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <2 x double> @llvm.wasm.trunc.v2f64(<2 x double>)
+declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
 define <2 x double> @trunc_v2f64(<2 x double> %a) {
-  %v = call <2 x double> @llvm.wasm.trunc.v2f64(<2 x double> %a)
+  %v = call <2 x double> @llvm.trunc.v2f64(<2 x double> %a)
   ret <2 x double> %v
 }
 
@@ -837,9 +837,9 @@
 ; CHECK-NEXT: .functype nearest_v2f64 (v128) -> (v128){{$}}
 ; CHECK-NEXT: f64x2.nearest $push[[R:[0-9]+]]=, $0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <2 x double> @llvm.wasm.nearest.v2f64(<2 x double>)
+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
 define <2 x double> @nearest_v2f64(<2 x double> %a) {
-  %v = call <2 x double> @llvm.wasm.nearest.v2f64(<2 x double> %a)
+  %v = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a)
   ret <2 x double> %v
 }
 
Index: llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
===================================================================
--- llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1031,14 +1031,14 @@
 defm SQRT : SIMDUnaryFP<fsqrt, "sqrt", 227>;
 
 // Rounding: ceil, floor, trunc, nearest
-defm CEIL : SIMDUnary<F32x4, int_wasm_ceil, "ceil", 0x67>;
-defm FLOOR : SIMDUnary<F32x4, int_wasm_floor, "floor", 0x68>;
-defm TRUNC: SIMDUnary<F32x4, int_wasm_trunc, "trunc", 0x69>;
-defm NEAREST: SIMDUnary<F32x4, int_wasm_nearest, "nearest", 0x6a>;
-defm CEIL : SIMDUnary<F64x2, int_wasm_ceil, "ceil", 0x74>;
-defm FLOOR : SIMDUnary<F64x2, int_wasm_floor, "floor", 0x75>;
-defm TRUNC: SIMDUnary<F64x2, int_wasm_trunc, "trunc", 0x7a>;
-defm NEAREST: SIMDUnary<F64x2, int_wasm_nearest, "nearest", 0x94>;
+defm CEIL : SIMDUnary<F32x4, fceil, "ceil", 0x67>;
+defm FLOOR : SIMDUnary<F32x4, ffloor, "floor", 0x68>;
+defm TRUNC: SIMDUnary<F32x4, ftrunc, "trunc", 0x69>;
+defm NEAREST: SIMDUnary<F32x4, fnearbyint, "nearest", 0x6a>;
+defm CEIL : SIMDUnary<F64x2, fceil, "ceil", 0x74>;
+defm FLOOR : SIMDUnary<F64x2, ffloor, "floor", 0x75>;
+defm TRUNC: SIMDUnary<F64x2, ftrunc, "trunc", 0x7a>;
+defm NEAREST: SIMDUnary<F64x2, fnearbyint, "nearest", 0x94>;
 
 //===----------------------------------------------------------------------===//
 // Floating-point binary arithmetic
Index: llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
===================================================================
--- llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -180,8 +180,7 @@
         setOperationAction(Op, T, Legal);
 
     // Expand float operations supported for scalars but not SIMD
-    for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
-                    ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
+    for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
                     ISD::FEXP, ISD::FEXP2, ISD::FRINT})
       for (auto T : {MVT::v4f32, MVT::v2f64})
         setOperationAction(Op, T, Expand);
Index: llvm/include/llvm/IR/IntrinsicsWebAssembly.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -183,26 +183,6 @@
             [LLVMMatchType<0>, LLVMMatchType<0>],
             [IntrNoMem, IntrSpeculatable]>;
 
-// TODO: Replace these instrinsics with normal ISel patterns once the
-// rounding instructions are merged to the proposal
-// (https://github.com/WebAssembly/simd/pull/232).
-def int_wasm_ceil :
-  Intrinsic<[llvm_anyvector_ty],
-            [LLVMMatchType<0>],
-            [IntrNoMem, IntrSpeculatable]>;
-def int_wasm_floor :
-  Intrinsic<[llvm_anyvector_ty],
-            [LLVMMatchType<0>],
-            [IntrNoMem, IntrSpeculatable]>;
-def int_wasm_trunc :
-  Intrinsic<[llvm_anyvector_ty],
-            [LLVMMatchType<0>],
-            [IntrNoMem, IntrSpeculatable]>;
-def int_wasm_nearest :
-  Intrinsic<[llvm_anyvector_ty],
-            [LLVMMatchType<0>],
-            [IntrNoMem, IntrSpeculatable]>;
-
 // TODO: Replace these intrinsic with normal ISel patterns once the
 // load_zero instructions are merged to the proposal.
 def int_wasm_load32_zero :
Index: clang/test/CodeGen/builtins-wasm.c
===================================================================
--- clang/test/CodeGen/builtins-wasm.c
+++ clang/test/CodeGen/builtins-wasm.c
@@ -792,49 +792,49 @@
 
 f32x4 ceil_f32x4(f32x4 x) {
   return __builtin_wasm_ceil_f32x4(x);
-  // WEBASSEMBLY: call <4 x float> @llvm.wasm.ceil.v4f32(<4 x float> %x)
+  // WEBASSEMBLY: call <4 x float> @llvm.ceil.v4f32(<4 x float> %x)
   // WEBASSEMBLY: ret
 }
 
 f32x4 floor_f32x4(f32x4 x) {
   return __builtin_wasm_floor_f32x4(x);
-  // WEBASSEMBLY: call <4 x float> @llvm.wasm.floor.v4f32(<4 x float> %x)
+  // WEBASSEMBLY: call <4 x float> @llvm.floor.v4f32(<4 x float> %x)
   // WEBASSEMBLY: ret
 }
 
 f32x4 trunc_f32x4(f32x4 x) {
   return __builtin_wasm_trunc_f32x4(x);
-  // WEBASSEMBLY: call <4 x float> @llvm.wasm.trunc.v4f32(<4 x float> %x)
+  // WEBASSEMBLY: call <4 x float> @llvm.trunc.v4f32(<4 x float> %x)
   // WEBASSEMBLY: ret
 }
 
 f32x4 nearest_f32x4(f32x4 x) {
   return __builtin_wasm_nearest_f32x4(x);
-  // WEBASSEMBLY: call <4 x float> @llvm.wasm.nearest.v4f32(<4 x float> %x)
+  // WEBASSEMBLY: call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %x)
   // WEBASSEMBLY: ret
 }
 
 f64x2 ceil_f64x2(f64x2 x) {
   return __builtin_wasm_ceil_f64x2(x);
-  // WEBASSEMBLY: call <2 x double> @llvm.wasm.ceil.v2f64(<2 x double> %x)
+  // WEBASSEMBLY: call <2 x double> @llvm.ceil.v2f64(<2 x double> %x)
   // WEBASSEMBLY: ret
 }
 
 f64x2 floor_f64x2(f64x2 x) {
   return __builtin_wasm_floor_f64x2(x);
-  // WEBASSEMBLY: call <2 x double> @llvm.wasm.floor.v2f64(<2 x double> %x)
+  // WEBASSEMBLY: call <2 x double> @llvm.floor.v2f64(<2 x double> %x)
   // WEBASSEMBLY: ret
 }
 
 f64x2 trunc_f64x2(f64x2 x) {
   return __builtin_wasm_trunc_f64x2(x);
-  // WEBASSEMBLY: call <2 x double> @llvm.wasm.trunc.v2f64(<2 x double> %x)
+  // WEBASSEMBLY: call <2 x double> @llvm.trunc.v2f64(<2 x double> %x)
   // WEBASSEMBLY: ret
 }
 
 f64x2 nearest_f64x2(f64x2 x) {
   return __builtin_wasm_nearest_f64x2(x);
-  // WEBASSEMBLY: call <2 x double> @llvm.wasm.nearest.v2f64(<2 x double> %x)
+  // WEBASSEMBLY: call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %x)
   // WEBASSEMBLY: ret
 }
 
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -17136,19 +17136,19 @@
     switch (BuiltinID) {
     case WebAssembly::BI__builtin_wasm_ceil_f32x4:
     case WebAssembly::BI__builtin_wasm_ceil_f64x2:
-      IntNo = Intrinsic::wasm_ceil;
+      IntNo = Intrinsic::ceil;
       break;
     case WebAssembly::BI__builtin_wasm_floor_f32x4:
     case WebAssembly::BI__builtin_wasm_floor_f64x2:
-      IntNo = Intrinsic::wasm_floor;
+      IntNo = Intrinsic::floor;
       break;
     case WebAssembly::BI__builtin_wasm_trunc_f32x4:
     case WebAssembly::BI__builtin_wasm_trunc_f64x2:
-      IntNo = Intrinsic::wasm_trunc;
+      IntNo = Intrinsic::trunc;
       break;
     case WebAssembly::BI__builtin_wasm_nearest_f32x4:
     case WebAssembly::BI__builtin_wasm_nearest_f64x2:
-      IntNo = Intrinsic::wasm_nearest;
+      IntNo = Intrinsic::nearbyint;
       break;
     default:
       llvm_unreachable("unexpected builtin ID");

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D100411: [WebAssembly] Use standard intrinsics for f32x4 and f64x2 ops

Reply via email to