[PATCH] D108387: [WebAssembly] Restore builtins and intrinsics for pmin/pmax

Thomas Lively via Phabricator via cfe-commits Fri, 20 Aug 2021 09:22:00 -0700

This revision was automatically updated to reflect the committed changes.
Closed by commit rG88962cea4680: [WebAssembly] Restore builtins and intrinsics 
for pmin/pmax (authored by tlively).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108387/new/

https://reviews.llvm.org/D108387

Files:
  clang/include/clang/Basic/BuiltinsWebAssembly.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Headers/wasm_simd128.h
  clang/test/CodeGen/builtins-wasm.c
  clang/test/Headers/wasm.c
  llvm/include/llvm/IR/IntrinsicsWebAssembly.td
  llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
  llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll

Index: llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
===================================================================
--- llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
+++ llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
@@ -540,6 +540,26 @@
   ret <4 x float> %a
 }
 
+; CHECK-LABEL: pmin_v4f32:
+; CHECK-NEXT: .functype pmin_v4f32 (v128, v128) -> (v128){{$}}
+; CHECK-NEXT: f32x4.pmin $push[[R:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[R]]{{$}}
+declare <4 x float> @llvm.wasm.pmin.v4f32(<4 x float>, <4 x float>)
+define <4 x float> @pmin_v4f32(<4 x float> %a, <4 x float> %b) {
+  %v = call <4 x float> @llvm.wasm.pmin.v4f32(<4 x float> %a, <4 x float> %b)
+  ret <4 x float> %v
+}
+
+; CHECK-LABEL: pmax_v4f32:
+; CHECK-NEXT: .functype pmax_v4f32 (v128, v128) -> (v128){{$}}
+; CHECK-NEXT: f32x4.pmax $push[[R:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[R]]{{$}}
+declare <4 x float> @llvm.wasm.pmax.v4f32(<4 x float>, <4 x float>)
+define <4 x float> @pmax_v4f32(<4 x float> %a, <4 x float> %b) {
+  %v = call <4 x float> @llvm.wasm.pmax.v4f32(<4 x float> %a, <4 x float> %b)
+  ret <4 x float> %v
+}
+
 ; CHECK-LABEL: ceil_v4f32:
 ; CHECK-NEXT: .functype ceil_v4f32 (v128) -> (v128){{$}}
 ; CHECK-NEXT: f32x4.ceil $push[[R:[0-9]+]]=, $0{{$}}
@@ -595,6 +615,26 @@
   ret <2 x double> %a
 }
 
+; CHECK-LABEL: pmin_v2f64:
+; CHECK-NEXT: .functype pmin_v2f64 (v128, v128) -> (v128){{$}}
+; CHECK-NEXT: f64x2.pmin $push[[R:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[R]]{{$}}
+declare <2 x double> @llvm.wasm.pmin.v2f64(<2 x double>, <2 x double>)
+define <2 x double> @pmin_v2f64(<2 x double> %a, <2 x double> %b) {
+  %v = call <2 x double> @llvm.wasm.pmin.v2f64(<2 x double> %a, <2 x double> %b)
+  ret <2 x double> %v
+}
+
+; CHECK-LABEL: pmax_v2f64:
+; CHECK-NEXT: .functype pmax_v2f64 (v128, v128) -> (v128){{$}}
+; CHECK-NEXT: f64x2.pmax $push[[R:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[R]]{{$}}
+declare <2 x double> @llvm.wasm.pmax.v2f64(<2 x double>, <2 x double>)
+define <2 x double> @pmax_v2f64(<2 x double> %a, <2 x double> %b) {
+  %v = call <2 x double> @llvm.wasm.pmax.v2f64(<2 x double> %a, <2 x double> %b)
+  ret <2 x double> %v
+}
+
 ; CHECK-LABEL: ceil_v2f64:
 ; CHECK-NEXT: .functype ceil_v2f64 (v128) -> (v128){{$}}
 ; CHECK-NEXT: f64x2.ceil $push[[R:[0-9]+]]=, $0{{$}}
Index: llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
===================================================================
--- llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1175,6 +1175,16 @@
           (pmax $lhs, $rhs)>;
 }
 
+// And match the pmin/pmax LLVM intrinsics as well
+def : Pat<(v4f32 (int_wasm_pmin (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
+          (PMIN_F32x4 V128:$lhs, V128:$rhs)>;
+def : Pat<(v4f32 (int_wasm_pmax (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
+          (PMAX_F32x4 V128:$lhs, V128:$rhs)>;
+def : Pat<(v2f64 (int_wasm_pmin (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
+          (PMIN_F64x2 V128:$lhs, V128:$rhs)>;
+def : Pat<(v2f64 (int_wasm_pmax (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
+          (PMAX_F64x2 V128:$lhs, V128:$rhs)>;
+
 //===----------------------------------------------------------------------===//
 // Conversions
 //===----------------------------------------------------------------------===//
Index: llvm/include/llvm/IR/IntrinsicsWebAssembly.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -164,6 +164,15 @@
             [llvm_v8i16_ty, llvm_v8i16_ty],
             [IntrNoMem, IntrSpeculatable]>;
 
+def int_wasm_pmin :
+  Intrinsic<[llvm_anyvector_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_pmax :
+  Intrinsic<[llvm_anyvector_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem, IntrSpeculatable]>;
+
 def int_wasm_extadd_pairwise_signed :
   Intrinsic<[llvm_anyvector_ty],
             [LLVMSubdivide2VectorType<0>],
Index: clang/test/Headers/wasm.c
===================================================================
--- clang/test/Headers/wasm.c
+++ clang/test/Headers/wasm.c
@@ -2424,11 +2424,11 @@
 
 // CHECK-LABEL: @test_f32x4_pmin(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT:    [[CMP_I:%.*]] = fcmp olt <4 x float> [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[CMP_I]], <4 x i32> [[B]], <4 x i32> [[A]]
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmin.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR6]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
 v128_t test_f32x4_pmin(v128_t a, v128_t b) {
   return wasm_f32x4_pmin(a, b);
@@ -2438,9 +2438,9 @@
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
-// CHECK-NEXT:    [[CMP_I:%.*]] = fcmp olt <4 x float> [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[CMP_I]], <4 x i32> [[B]], <4 x i32> [[A]]
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmax.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR6]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
 v128_t test_f32x4_pmax(v128_t a, v128_t b) {
   return wasm_f32x4_pmax(a, b);
@@ -2597,10 +2597,9 @@
 
 // CHECK-LABEL: @test_f64x2_pmin(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT:    [[CMP_I:%.*]] = fcmp olt <2 x double> [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[CMP_I]], <2 x double> [[TMP0]], <2 x double> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmin.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR6]]
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
@@ -2612,8 +2611,7 @@
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
-// CHECK-NEXT:    [[CMP_I:%.*]] = fcmp olt <2 x double> [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[CMP_I]], <2 x double> [[TMP1]], <2 x double> [[TMP0]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmax.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR6]]
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
Index: clang/test/CodeGen/builtins-wasm.c
===================================================================
--- clang/test/CodeGen/builtins-wasm.c
+++ clang/test/CodeGen/builtins-wasm.c
@@ -506,6 +506,20 @@
   // WEBASSEMBLY-NEXT: ret
 }
 
+f32x4 pmin_f32x4(f32x4 x, f32x4 y) {
+  return __builtin_wasm_pmin_f32x4(x, y);
+  // WEBASSEMBLY: call <4 x float> @llvm.wasm.pmin.v4f32(
+  // WEBASSEMBLY-SAME: <4 x float> %x, <4 x float> %y)
+  // WEBASSEMBLY-NEXT: ret
+}
+
+f32x4 pmax_f32x4(f32x4 x, f32x4 y) {
+  return __builtin_wasm_pmax_f32x4(x, y);
+  // WEBASSEMBLY: call <4 x float> @llvm.wasm.pmax.v4f32(
+  // WEBASSEMBLY-SAME: <4 x float> %x, <4 x float> %y)
+  // WEBASSEMBLY-NEXT: ret
+}
+
 f64x2 min_f64x2(f64x2 x, f64x2 y) {
   return __builtin_wasm_min_f64x2(x, y);
   // WEBASSEMBLY: call <2 x double> @llvm.minimum.v2f64(
@@ -520,6 +534,20 @@
   // WEBASSEMBLY-NEXT: ret
 }
 
+f64x2 pmin_f64x2(f64x2 x, f64x2 y) {
+  return __builtin_wasm_pmin_f64x2(x, y);
+  // WEBASSEMBLY: call <2 x double> @llvm.wasm.pmin.v2f64(
+  // WEBASSEMBLY-SAME: <2 x double> %x, <2 x double> %y)
+  // WEBASSEMBLY-NEXT: ret
+}
+
+f64x2 pmax_f64x2(f64x2 x, f64x2 y) {
+  return __builtin_wasm_pmax_f64x2(x, y);
+  // WEBASSEMBLY: call <2 x double> @llvm.wasm.pmax.v2f64(
+  // WEBASSEMBLY-SAME: <2 x double> %x, <2 x double> %y)
+  // WEBASSEMBLY-NEXT: ret
+}
+
 f32x4 ceil_f32x4(f32x4 x) {
   return __builtin_wasm_ceil_f32x4(x);
   // WEBASSEMBLY: call <4 x float> @llvm.ceil.v4f32(<4 x float> %x)
Index: clang/lib/Headers/wasm_simd128.h
===================================================================
--- clang/lib/Headers/wasm_simd128.h
+++ clang/lib/Headers/wasm_simd128.h
@@ -1297,14 +1297,12 @@
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_pmin(v128_t __a,
                                                             v128_t __b) {
-  __i32x4 __mask = (__i32x4)((__f32x4)__b < (__f32x4)__a);
-  return (v128_t)((((__i32x4)__b) & __mask) | (((__i32x4)__a) & ~__mask));
+  return (v128_t)__builtin_wasm_pmin_f32x4((__f32x4)__a, (__f32x4)__b);
 }
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_pmax(v128_t __a,
                                                             v128_t __b) {
-  __i32x4 __mask = (__i32x4)((__f32x4)__a < (__f32x4)__b);
-  return (v128_t)((((__i32x4)__b) & __mask) | (((__i32x4)__a) & ~__mask));
+  return (v128_t)__builtin_wasm_pmax_f32x4((__f32x4)__a, (__f32x4)__b);
 }
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_abs(v128_t __a) {
@@ -1367,14 +1365,12 @@
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_pmin(v128_t __a,
                                                             v128_t __b) {
-  __i64x2 __mask = (__i64x2)((__f64x2)__b < (__f64x2)__a);
-  return (v128_t)((((__i64x2)__b) & __mask) | (((__i64x2)__a) & ~__mask));
+  return (v128_t)__builtin_wasm_pmin_f64x2((__f64x2)__a, (__f64x2)__b);
 }
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_pmax(v128_t __a,
                                                             v128_t __b) {
-  __i64x2 __mask = (__i64x2)((__f64x2)__a < (__f64x2)__b);
-  return (v128_t)((((__i64x2)__b) & __mask) | (((__i64x2)__a) & ~__mask));
+  return (v128_t)__builtin_wasm_pmax_f64x2((__f64x2)__a, (__f64x2)__b);
 }
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -17822,6 +17822,22 @@
         CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
     return Builder.CreateCall(Callee, {LHS, RHS});
   }
+  case WebAssembly::BI__builtin_wasm_pmin_f32x4:
+  case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
+    Value *LHS = EmitScalarExpr(E->getArg(0));
+    Value *RHS = EmitScalarExpr(E->getArg(1));
+    Function *Callee =
+        CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
+    return Builder.CreateCall(Callee, {LHS, RHS});
+  }
+  case WebAssembly::BI__builtin_wasm_pmax_f32x4:
+  case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
+    Value *LHS = EmitScalarExpr(E->getArg(0));
+    Value *RHS = EmitScalarExpr(E->getArg(1));
+    Function *Callee =
+        CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
+    return Builder.CreateCall(Callee, {LHS, RHS});
+  }
   case WebAssembly::BI__builtin_wasm_ceil_f32x4:
   case WebAssembly::BI__builtin_wasm_floor_f32x4:
   case WebAssembly::BI__builtin_wasm_trunc_f32x4:
Index: clang/include/clang/Basic/BuiltinsWebAssembly.def
===================================================================
--- clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -129,8 +129,12 @@
 
 TARGET_BUILTIN(__builtin_wasm_min_f32x4, "V4fV4fV4f", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_max_f32x4, "V4fV4fV4f", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_pmin_f32x4, "V4fV4fV4f", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_pmax_f32x4, "V4fV4fV4f", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_min_f64x2, "V2dV2dV2d", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_max_f64x2, "V2dV2dV2d", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_pmin_f64x2, "V2dV2dV2d", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_pmax_f64x2, "V2dV2dV2d", "nc", "simd128")
 
 TARGET_BUILTIN(__builtin_wasm_ceil_f32x4, "V4fV4f", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_floor_f32x4, "V4fV4f", "nc", "simd128")

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D108387: [WebAssembly] Restore builtins and intrinsics for pmin/pmax

Reply via email to