[PATCH] D105675: [WebAssembly] Custom combines for f64x2.promote_low_f32x4

2021-07-09 Thread Thomas Lively via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGe5220104d070: [WebAssembly] Custom combines for 
f64x2.promote_low_f32x4 (authored by tlively).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105675/new/

https://reviews.llvm.org/D105675

Files:
  clang/include/clang/Basic/BuiltinsWebAssembly.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Headers/wasm_simd128.h
  clang/test/CodeGen/builtins-wasm.c
  clang/test/Headers/wasm.c
  llvm/include/llvm/IR/IntrinsicsWebAssembly.td
  llvm/lib/Target/WebAssembly/WebAssemblyISD.def
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
  llvm/test/CodeGen/WebAssembly/simd-conversions.ll
  llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll

Index: llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
===
--- llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
+++ llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
@@ -806,13 +806,3 @@
   %v = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a)
   ret <2 x double> %v
 }
-
-; CHECK-LABEL: promote_low_v2f64:
-; CHECK-NEXT: .functype promote_low_v2f64 (v128) -> (v128){{$}}
-; CHECK-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <2 x double> @llvm.wasm.promote.low(<4 x float>)
-define <2 x double> @promote_low_v2f64(<4 x float> %a) {
-  %v = call <2 x double> @llvm.wasm.promote.low(<4 x float> %a)
-  ret <2 x double> %v
-}
Index: llvm/test/CodeGen/WebAssembly/simd-conversions.ll
===
--- llvm/test/CodeGen/WebAssembly/simd-conversions.ll
+++ llvm/test/CodeGen/WebAssembly/simd-conversions.ll
@@ -126,3 +126,25 @@
   %a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> 
   ret <2 x double> %a
 }
+
+; CHECK-LABEL: promote_low_v2f64:
+; NO-SIMD128-NOT: f64x2
+; SIMD128-NEXT: .functype promote_low_v2f64 (v128) -> (v128){{$}}
+; SIMD128-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0
+; SIMD128-NEXT: return $pop[[R]]
+define <2 x double> @promote_low_v2f64(<4 x float> %x) {
+  %v = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> 
+  %a = fpext <2 x float> %v to <2 x double>
+  ret <2 x double> %a
+}
+
+; CHECK-LABEL: promote_low_v2f64_2:
+; NO-SIMD128-NOT: f64x2
+; SIMD128-NEXT: .functype promote_low_v2f64_2 (v128) -> (v128){{$}}
+; SIMD128-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0
+; SIMD128-NEXT: return $pop[[R]]
+define <2 x double> @promote_low_v2f64_2(<4 x float> %x) {
+  %v = fpext <4 x float> %x to <4 x double>
+  %a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> 
+  ret <2 x double> %a
+}
Index: llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
===
--- llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1288,11 +1288,13 @@
 defm "" : SIMDConvert;
 
-// Prototype f64x2 conversions
+// f64x2 <-> f32x4 conversions
 defm "" : SIMDConvert;
-defm "" : SIMDConvert;
+
+def promote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
+def promote_low : SDNode<"WebAssemblyISD::PROMOTE_LOW", promote_t>;
+defm "" : SIMDConvert;
 
 //===--===//
 // Saturating Rounding Q-Format Multiplication
Index: llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
===
--- llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -149,9 +149,11 @@
 setTargetDAGCombine(ISD::SIGN_EXTEND);
 setTargetDAGCombine(ISD::ZERO_EXTEND);
 
-// Combine int_to_fp of extract_vectors and vice versa into conversions ops
+// Combine int_to_fp or fp_extend of extract_vectors and vice versa into
+// conversions ops
 setTargetDAGCombine(ISD::SINT_TO_FP);
 setTargetDAGCombine(ISD::UINT_TO_FP);
+setTargetDAGCombine(ISD::FP_EXTEND);
 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
 
 // Combine concat of {s,u}int_to_fp_sat to i32x4.trunc_sat_f64x2_zero_{s,u}
@@ -2186,60 +2188,109 @@
   if (ResVT != MVT::v2f64)
 return SDValue();
 
-  if (N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) {
-// Combine this:
-//
-//   (v2f64 ({s,u}int_to_fp
-// (v2i32 (extract_subvector (v4i32 $x), 0
-//
-// into (f64x2.convert_low_i32x4_{s,u} $x).
-auto Extract = N->getOperand(0);
-if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
-  return SDValue();
-if (Extract.getValueType() != MVT::v2i32)
-  return SDValue();
-auto Source = Extract.getOperand(0);
-if (Source.getValueType() != MVT::v4i32)
-  return SDValue();
-

[PATCH] D105675: [WebAssembly] Custom combines for f64x2.promote_low_f32x4

2021-07-08 Thread Thomas Lively via Phabricator via cfe-commits
tlively created this revision.
tlively added reviewers: aheejin, dschuff.
Herald added subscribers: wingo, ecnelises, sunfish, hiraditya, 
jgravelle-google, sbc100.
tlively requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

Replace the clang builtin function and LLVM intrinsic previously used to select
the f64x2.promote_low_f32x4 instruction with custom combines from standard
SelectionDAG nodes. Implement the new combines to share code with the similar
combines for f64x2.convert_low_i32x4_{s,u}. Resolves PR50232.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D105675

Files:
  clang/include/clang/Basic/BuiltinsWebAssembly.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Headers/wasm_simd128.h
  clang/test/CodeGen/builtins-wasm.c
  clang/test/Headers/wasm.c
  llvm/include/llvm/IR/IntrinsicsWebAssembly.td
  llvm/lib/Target/WebAssembly/WebAssemblyISD.def
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
  llvm/test/CodeGen/WebAssembly/simd-conversions.ll
  llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll

Index: llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
===
--- llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
+++ llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
@@ -806,13 +806,3 @@
   %v = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a)
   ret <2 x double> %v
 }
-
-; CHECK-LABEL: promote_low_v2f64:
-; CHECK-NEXT: .functype promote_low_v2f64 (v128) -> (v128){{$}}
-; CHECK-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <2 x double> @llvm.wasm.promote.low(<4 x float>)
-define <2 x double> @promote_low_v2f64(<4 x float> %a) {
-  %v = call <2 x double> @llvm.wasm.promote.low(<4 x float> %a)
-  ret <2 x double> %v
-}
Index: llvm/test/CodeGen/WebAssembly/simd-conversions.ll
===
--- llvm/test/CodeGen/WebAssembly/simd-conversions.ll
+++ llvm/test/CodeGen/WebAssembly/simd-conversions.ll
@@ -126,3 +126,25 @@
   %a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> 
   ret <2 x double> %a
 }
+
+; CHECK-LABEL: promote_low_v2f64:
+; NO-SIMD128-NOT: f64x2
+; SIMD128-NEXT: .functype promote_low_v2f64 (v128) -> (v128){{$}}
+; SIMD128-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0
+; SIMD128-NEXT: return $pop[[R]]
+define <2 x double> @promote_low_v2f64(<4 x float> %x) {
+  %v = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> 
+  %a = fpext <2 x float> %v to <2 x double>
+  ret <2 x double> %a
+}
+
+; CHECK-LABEL: promote_low_v2f64_2:
+; NO-SIMD128-NOT: f64x2
+; SIMD128-NEXT: .functype promote_low_v2f64_2 (v128) -> (v128){{$}}
+; SIMD128-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0
+; SIMD128-NEXT: return $pop[[R]]
+define <2 x double> @promote_low_v2f64_2(<4 x float> %x) {
+  %v = fpext <4 x float> %x to <4 x double>
+  %a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> 
+  ret <2 x double> %a
+}
Index: llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
===
--- llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1288,11 +1288,13 @@
 defm "" : SIMDConvert;
 
-// Prototype f64x2 conversions
+// f64x2 <-> f32x4 conversions
 defm "" : SIMDConvert;
-defm "" : SIMDConvert;
+
+def promote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
+def promote_low : SDNode<"WebAssemblyISD::PROMOTE_LOW", promote_t>;
+defm "" : SIMDConvert;
 
 //===--===//
 // Saturating Rounding Q-Format Multiplication
Index: llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
===
--- llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -149,9 +149,11 @@
 setTargetDAGCombine(ISD::SIGN_EXTEND);
 setTargetDAGCombine(ISD::ZERO_EXTEND);
 
-// Combine int_to_fp of extract_vectors and vice versa into conversions ops
+// Combine int_to_fp or fp_extend of extract_vectors and vice versa into
+// conversions ops
 setTargetDAGCombine(ISD::SINT_TO_FP);
 setTargetDAGCombine(ISD::UINT_TO_FP);
+setTargetDAGCombine(ISD::FP_EXTEND);
 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
 
 // Combine concat of {s,u}int_to_fp_sat to i32x4.trunc_sat_f64x2_zero_{s,u}
@@ -2186,60 +2188,109 @@
   if (ResVT != MVT::v2f64)
 return SDValue();
 
-  if (N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) {
-// Combine this:
-//
-//   (v2f64 ({s,u}int_to_fp
-// (v2i32 (extract_subvector (v4i32 $x), 0
-//
-// into (f64x2.convert_low_i32x4_{s,u} $x).
-auto Extract = N->getO