https://github.com/brendandahl created https://github.com/llvm/llvm-project/pull/91545
Adds a builtin and intrinsic for the f32.store_f16 instruction. The instruction stores an f32 value as an f16 memory. Specified at: https://github.com/WebAssembly/half-precision/blob/29a9b9462c9285d4ccc1a5dc39214ddfd1892658/proposals/half-precision/Overview.md Note: the current spec has f32.store_f16 as opcode 0xFD0121, but this is incorrect and will be changed to 0xFC31 soon. >From adcb77e15d09f466f217d754f6f80aeb729aadc4 Mon Sep 17 00:00:00 2001 From: Brendan Dahl <brendan.d...@gmail.com> Date: Wed, 8 May 2024 23:10:07 +0000 Subject: [PATCH] [WebAssembly] Implement prototype f32.store_f16 instruction. Adds a builtin and intrinsic for the f32.store_f16 instruction. The instruction stores an f32 value as an f16 memory. Specified at: https://github.com/WebAssembly/half-precision/blob/29a9b9462c9285d4ccc1a5dc39214ddfd1892658/proposals/half-precision/Overview.md Note: the current spec has f32.store_f16 as opcode 0xFD0121, but this is incorrect and will be changed to 0xFC31 soon. --- .../clang/Basic/BuiltinsWebAssembly.def | 1 + clang/lib/CodeGen/CGBuiltin.cpp | 6 +++++ clang/test/CodeGen/builtins-wasm.c | 6 +++++ llvm/include/llvm/IR/IntrinsicsWebAssembly.td | 5 ++++ .../MCTargetDesc/WebAssemblyMCTargetDesc.h | 1 + .../WebAssembly/WebAssemblyISelLowering.cpp | 8 ++++++ .../WebAssembly/WebAssemblyInstrMemory.td | 4 +++ .../CodeGen/WebAssembly/half-precision.ll | 9 +++++++ llvm/test/CodeGen/WebAssembly/offset.ll | 27 +++++++++++++++++++ llvm/test/MC/WebAssembly/simd-encodings.s | 3 +++ 10 files changed, 70 insertions(+) diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def index cf54f8f4422f8..41fadd10e9432 100644 --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -192,6 +192,7 @@ TARGET_BUILTIN(__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4, "V4fV8UsV8UsV4f" // Half-Precision (fp16) TARGET_BUILTIN(__builtin_wasm_loadf16_f32, "fh*", "nU", "half-precision") +TARGET_BUILTIN(__builtin_wasm_storef16_f32, "vfh*", "nU", "half-precision") // Reference Types builtins // Some builtins are custom type-checked - see 't' as part of the third argument, diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e8a6bd050e17e..abb644d8eb506 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -21308,6 +21308,12 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_loadf16_f32); return Builder.CreateCall(Callee, {Addr}); } + case WebAssembly::BI__builtin_wasm_storef16_f32: { + Value *Val = EmitScalarExpr(E->getArg(0)); + Value *Addr = EmitScalarExpr(E->getArg(1)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_storef16_f32); + return Builder.CreateCall(Callee, {Val, Addr}); + } case WebAssembly::BI__builtin_wasm_table_get: { assert(E->getArg(0)->getType()->isArrayType()); Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this); diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c index ab1c6cd494ae5..bcb15969de1c5 100644 --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -807,6 +807,12 @@ float load_f16_f32(__fp16 *addr) { // WEBASSEMBLY: call float @llvm.wasm.loadf16.f32(ptr %{{.*}}) } +void store_f16_f32(float val, __fp16 *addr) { + return __builtin_wasm_storef16_f32(val, addr); + // WEBASSEMBLY: tail call void @llvm.wasm.storef16.f32(float %val, ptr %{{.*}}) + // WEBASSEMBLY-NEXT: ret +} + __externref_t externref_null() { return __builtin_wasm_ref_null_extern(); // WEBASSEMBLY: tail call ptr addrspace(10) @llvm.wasm.ref.null.extern() diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index f8142a8ca9e93..572d334ac9552 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -332,6 +332,11 @@ def int_wasm_loadf16_f32: [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly], "", [SDNPMemOperand]>; +def int_wasm_storef16_f32: + Intrinsic<[], + [llvm_float_ty, llvm_ptr_ty], + [IntrWriteMem, IntrArgMemOnly], + "", [SDNPMemOperand]>; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index d3b496ae59179..d4e9fb057c44d 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -207,6 +207,7 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { WASM_LOAD_STORE(LOAD_LANE_I16x8) WASM_LOAD_STORE(STORE_LANE_I16x8) WASM_LOAD_STORE(LOAD_F16_F32) + WASM_LOAD_STORE(STORE_F16_F32) return 1; WASM_LOAD_STORE(LOAD_I32) WASM_LOAD_STORE(LOAD_F32) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index ed52fe53bc609..527bb4c9fbea6 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -914,6 +914,14 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.align = Align(2); Info.flags = MachineMemOperand::MOLoad; return true; + case Intrinsic::wasm_storef16_f32: + Info.opc = ISD::INTRINSIC_VOID; + Info.memVT = MVT::f16; + Info.ptrVal = I.getArgOperand(1); + Info.offset = 0; + Info.align = Align(2); + Info.flags = MachineMemOperand::MOStore; + return true; default: return false; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td index e4baf842462a9..c6a379f51247c 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -171,12 +171,16 @@ defm STORE8_I64 : WebAssemblyStore<I64, "i64.store8", 0x3c>; defm STORE16_I64 : WebAssemblyStore<I64, "i64.store16", 0x3d>; defm STORE32_I64 : WebAssemblyStore<I64, "i64.store32", 0x3e>; +defm STORE_F16_F32 : WebAssemblyStore<F32, "f32.store_f16", 0xfc31, [HasHalfPrecision]>; + defm : StorePat<i32, truncstorei8, "STORE8_I32">; defm : StorePat<i32, truncstorei16, "STORE16_I32">; defm : StorePat<i64, truncstorei8, "STORE8_I64">; defm : StorePat<i64, truncstorei16, "STORE16_I64">; defm : StorePat<i64, truncstorei32, "STORE32_I64">; +defm : StorePat<f32, int_wasm_storef16_f32, "STORE_F16_F32">; + multiclass MemoryOps<WebAssemblyRegClass rc, string B> { // Current memory size. defm MEMORY_SIZE_A#B : I<(outs rc:$dst), (ins i32imm:$flags), diff --git a/llvm/test/CodeGen/WebAssembly/half-precision.ll b/llvm/test/CodeGen/WebAssembly/half-precision.ll index 582771d3f95fc..89e9c42637c14 100644 --- a/llvm/test/CodeGen/WebAssembly/half-precision.ll +++ b/llvm/test/CodeGen/WebAssembly/half-precision.ll @@ -2,6 +2,7 @@ ; RUN: llc < %s --mtriple=wasm64-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+half-precision | FileCheck %s declare float @llvm.wasm.loadf32.f16(ptr) +declare void @llvm.wasm.storef16.f32(float, ptr) ; CHECK-LABEL: ldf16_32: ; CHECK: f32.load_f16 $push[[NUM0:[0-9]+]]=, 0($0){{$}} @@ -10,3 +11,11 @@ define float @ldf16_32(ptr %p) { %v = call float @llvm.wasm.loadf16.f32(ptr %p) ret float %v } + +; CHECK-LABEL: stf16_32: +; CHECK: f32.store_f16 0($1), $0 +; CHECK-NEXT: return +define void @stf16_32(float %v, ptr %p) { + tail call void @llvm.wasm.storef16.f32(float %v, ptr %p) + ret void +} diff --git a/llvm/test/CodeGen/WebAssembly/offset.ll b/llvm/test/CodeGen/WebAssembly/offset.ll index b497ddd7273a0..65de341780e31 100644 --- a/llvm/test/CodeGen/WebAssembly/offset.ll +++ b/llvm/test/CodeGen/WebAssembly/offset.ll @@ -692,3 +692,30 @@ define float @load_f16_f32_with_folded_gep_offset(ptr %p) { %t = call float @llvm.wasm.loadf16.f32(ptr %s) ret float %t } + +;===---------------------------------------------------------------------------- +; Stores: Half Precision +;===---------------------------------------------------------------------------- + +; Basic store. + +; CHECK-LABEL: store_f16_f32_no_offset: +; CHECK-NEXT: .functype store_f16_f32_no_offset (i32, f32) -> (){{$}} +; CHECK-NEXT: f32.store_f16 0($0), $1{{$}} +; CHECK-NEXT: return{{$}} +define void @store_f16_f32_no_offset(ptr %p, float %v) { + call void @llvm.wasm.storef16.f32(float %v, ptr %p) + ret void +} + +; Storing to a fixed address. + +; CHECK-LABEL: store_f16_f32_to_numeric_address: +; CHECK: i32.const $push1=, 0{{$}} +; CHECK-NEXT: f32.const $push0=, 0x0p0{{$}} +; CHECK-NEXT: f32.store_f16 42($pop1), $pop0{{$}} +define void @store_f16_f32_to_numeric_address() { + %s = inttoptr i32 42 to ptr + call void @llvm.wasm.storef16.f32(float 0.0, ptr %s) + ret void +} diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s index e7c3761f381d0..57fa71e74b8d7 100644 --- a/llvm/test/MC/WebAssembly/simd-encodings.s +++ b/llvm/test/MC/WebAssembly/simd-encodings.s @@ -842,4 +842,7 @@ main: # CHECK: f32.load_f16 48 # encoding: [0xfc,0x30,0x01,0x30] f32.load_f16 48 + # CHECK: f32.store_f16 32 # encoding: [0xfc,0x31,0x01,0x20] + f32.store_f16 32 + end_function _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits