[PATCH] D105675: [WebAssembly] Custom combines for f64x2.promote_low_f32x4
This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rGe5220104d070: [WebAssembly] Custom combines for f64x2.promote_low_f32x4 (authored by tlively). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D105675/new/ https://reviews.llvm.org/D105675 Files: clang/include/clang/Basic/BuiltinsWebAssembly.def clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Headers/wasm_simd128.h clang/test/CodeGen/builtins-wasm.c clang/test/Headers/wasm.c llvm/include/llvm/IR/IntrinsicsWebAssembly.td llvm/lib/Target/WebAssembly/WebAssemblyISD.def llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td llvm/test/CodeGen/WebAssembly/simd-conversions.ll llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll Index: llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll === --- llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -806,13 +806,3 @@ %v = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a) ret <2 x double> %v } - -; CHECK-LABEL: promote_low_v2f64: -; CHECK-NEXT: .functype promote_low_v2f64 (v128) -> (v128){{$}} -; CHECK-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -declare <2 x double> @llvm.wasm.promote.low(<4 x float>) -define <2 x double> @promote_low_v2f64(<4 x float> %a) { - %v = call <2 x double> @llvm.wasm.promote.low(<4 x float> %a) - ret <2 x double> %v -} Index: llvm/test/CodeGen/WebAssembly/simd-conversions.ll === --- llvm/test/CodeGen/WebAssembly/simd-conversions.ll +++ llvm/test/CodeGen/WebAssembly/simd-conversions.ll @@ -126,3 +126,25 @@ %a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> ret <2 x double> %a } + +; CHECK-LABEL: promote_low_v2f64: +; NO-SIMD128-NOT: f64x2 +; SIMD128-NEXT: .functype promote_low_v2f64 (v128) -> (v128){{$}} +; SIMD128-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0 +; SIMD128-NEXT: return $pop[[R]] +define <2 x double> @promote_low_v2f64(<4 x float> %x) { + %v = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> + %a = fpext <2 x float> %v to <2 x double> + ret <2 x double> %a +} + +; CHECK-LABEL: promote_low_v2f64_2: +; NO-SIMD128-NOT: f64x2 +; SIMD128-NEXT: .functype promote_low_v2f64_2 (v128) -> (v128){{$}} +; SIMD128-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0 +; SIMD128-NEXT: return $pop[[R]] +define <2 x double> @promote_low_v2f64_2(<4 x float> %x) { + %v = fpext <4 x float> %x to <4 x double> + %a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> + ret <2 x double> %a +} Index: llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td === --- llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1288,11 +1288,13 @@ defm "" : SIMDConvert; -// Prototype f64x2 conversions +// f64x2 <-> f32x4 conversions defm "" : SIMDConvert; -defm "" : SIMDConvert; + +def promote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; +def promote_low : SDNode<"WebAssemblyISD::PROMOTE_LOW", promote_t>; +defm "" : SIMDConvert; //===--===// // Saturating Rounding Q-Format Multiplication Index: llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp === --- llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -149,9 +149,11 @@ setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::ZERO_EXTEND); -// Combine int_to_fp of extract_vectors and vice versa into conversions ops +// Combine int_to_fp or fp_extend of extract_vectors and vice versa into +// conversions ops setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::UINT_TO_FP); +setTargetDAGCombine(ISD::FP_EXTEND); setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR); // Combine concat of {s,u}int_to_fp_sat to i32x4.trunc_sat_f64x2_zero_{s,u} @@ -2186,60 +2188,109 @@ if (ResVT != MVT::v2f64) return SDValue(); - if (N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) { -// Combine this: -// -// (v2f64 ({s,u}int_to_fp -// (v2i32 (extract_subvector (v4i32 $x), 0 -// -// into (f64x2.convert_low_i32x4_{s,u} $x). -auto Extract = N->getOperand(0); -if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR) - return SDValue(); -if (Extract.getValueType() != MVT::v2i32) - return SDValue(); -auto Source = Extract.getOperand(0); -if (Source.getValueType() != MVT::v4i32) - return SDValue(); -
[PATCH] D105675: [WebAssembly] Custom combines for f64x2.promote_low_f32x4
tlively created this revision. tlively added reviewers: aheejin, dschuff. Herald added subscribers: wingo, ecnelises, sunfish, hiraditya, jgravelle-google, sbc100. tlively requested review of this revision. Herald added projects: clang, LLVM. Herald added subscribers: llvm-commits, cfe-commits. Replace the clang builtin function and LLVM intrinsic previously used to select the f64x2.promote_low_f32x4 instruction with custom combines from standard SelectionDAG nodes. Implement the new combines to share code with the similar combines for f64x2.convert_low_i32x4_{s,u}. Resolves PR50232. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D105675 Files: clang/include/clang/Basic/BuiltinsWebAssembly.def clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Headers/wasm_simd128.h clang/test/CodeGen/builtins-wasm.c clang/test/Headers/wasm.c llvm/include/llvm/IR/IntrinsicsWebAssembly.td llvm/lib/Target/WebAssembly/WebAssemblyISD.def llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td llvm/test/CodeGen/WebAssembly/simd-conversions.ll llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll Index: llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll === --- llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -806,13 +806,3 @@ %v = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a) ret <2 x double> %v } - -; CHECK-LABEL: promote_low_v2f64: -; CHECK-NEXT: .functype promote_low_v2f64 (v128) -> (v128){{$}} -; CHECK-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -declare <2 x double> @llvm.wasm.promote.low(<4 x float>) -define <2 x double> @promote_low_v2f64(<4 x float> %a) { - %v = call <2 x double> @llvm.wasm.promote.low(<4 x float> %a) - ret <2 x double> %v -} Index: llvm/test/CodeGen/WebAssembly/simd-conversions.ll === --- llvm/test/CodeGen/WebAssembly/simd-conversions.ll +++ llvm/test/CodeGen/WebAssembly/simd-conversions.ll @@ -126,3 +126,25 @@ %a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> ret <2 x double> %a } + +; CHECK-LABEL: promote_low_v2f64: +; NO-SIMD128-NOT: f64x2 +; SIMD128-NEXT: .functype promote_low_v2f64 (v128) -> (v128){{$}} +; SIMD128-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0 +; SIMD128-NEXT: return $pop[[R]] +define <2 x double> @promote_low_v2f64(<4 x float> %x) { + %v = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> + %a = fpext <2 x float> %v to <2 x double> + ret <2 x double> %a +} + +; CHECK-LABEL: promote_low_v2f64_2: +; NO-SIMD128-NOT: f64x2 +; SIMD128-NEXT: .functype promote_low_v2f64_2 (v128) -> (v128){{$}} +; SIMD128-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0 +; SIMD128-NEXT: return $pop[[R]] +define <2 x double> @promote_low_v2f64_2(<4 x float> %x) { + %v = fpext <4 x float> %x to <4 x double> + %a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> + ret <2 x double> %a +} Index: llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td === --- llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1288,11 +1288,13 @@ defm "" : SIMDConvert; -// Prototype f64x2 conversions +// f64x2 <-> f32x4 conversions defm "" : SIMDConvert; -defm "" : SIMDConvert; + +def promote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; +def promote_low : SDNode<"WebAssemblyISD::PROMOTE_LOW", promote_t>; +defm "" : SIMDConvert; //===--===// // Saturating Rounding Q-Format Multiplication Index: llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp === --- llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -149,9 +149,11 @@ setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::ZERO_EXTEND); -// Combine int_to_fp of extract_vectors and vice versa into conversions ops +// Combine int_to_fp or fp_extend of extract_vectors and vice versa into +// conversions ops setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::UINT_TO_FP); +setTargetDAGCombine(ISD::FP_EXTEND); setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR); // Combine concat of {s,u}int_to_fp_sat to i32x4.trunc_sat_f64x2_zero_{s,u} @@ -2186,60 +2188,109 @@ if (ResVT != MVT::v2f64) return SDValue(); - if (N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) { -// Combine this: -// -// (v2f64 ({s,u}int_to_fp -// (v2i32 (extract_subvector (v4i32 $x), 0 -// -// into (f64x2.convert_low_i32x4_{s,u} $x). -auto Extract = N->getO