[PATCH] D105950: [WebAssembly] Codegen for v128.loadX_lane instructions
This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rG970e0900104d: [WebAssembly] Codegen for v128.loadX_lane instructions (authored by tlively). Changed prior to commit: https://reviews.llvm.org/D105950?vs=358668=358676#toc Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D105950/new/ https://reviews.llvm.org/D105950 Files: clang/include/clang/Basic/BuiltinsWebAssembly.def clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Headers/wasm_simd128.h clang/test/CodeGen/builtins-wasm.c clang/test/Headers/wasm.c llvm/include/llvm/IR/IntrinsicsWebAssembly.td llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td llvm/test/CodeGen/WebAssembly/simd-build-vector.ll llvm/test/CodeGen/WebAssembly/simd-load-lane-offset.ll llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll Index: llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll === --- llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll +++ llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll @@ -133,6 +133,34 @@ ret <16 x i8> %v2 } +; 1 is the default alignment for v128.load8_lane so no attribute is needed. +define <16 x i8> @load_lane_i8_a1(i8* %p, <16 x i8> %v) { +; CHECK-LABEL: load_lane_i8_a1: +; CHECK: .functype load_lane_i8_a1 (i32, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:local.get 1 +; CHECK-NEXT:v128.load8_lane 0, 0 +; CHECK-NEXT:# fallthrough-return + %e = load i8, i8* %p, align 1 + %v1 = insertelement <16 x i8> %v, i8 %e, i32 0 + ret <16 x i8> %v1 +} + +; 2 is greater than the default alignment so it is ignored. +define <16 x i8> @load_lane_i8_a2(i8* %p, <16 x i8> %v) { +; CHECK-LABEL: load_lane_i8_a2: +; CHECK: .functype load_lane_i8_a2 (i32, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:local.get 1 +; CHECK-NEXT:v128.load8_lane 0, 0 +; CHECK-NEXT:# fallthrough-return + %e = load i8, i8* %p, align 2 + %v1 = insertelement <16 x i8> %v, i8 %e, i32 0 + ret <16 x i8> %v1 +} + ; == ; 8 x i16 ; == @@ -393,6 +421,47 @@ ret <8 x i16> %v2 } +define <8 x i16> @load_lane_i16_a1(i16* %p, <8 x i16> %v) { +; CHECK-LABEL: load_lane_i16_a1: +; CHECK: .functype load_lane_i16_a1 (i32, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:local.get 1 +; CHECK-NEXT:v128.load16_lane 0:p2align=0, 0 +; CHECK-NEXT:# fallthrough-return + %e = load i16, i16* %p, align 1 + %v1 = insertelement <8 x i16> %v, i16 %e, i32 0 + ret <8 x i16> %v1 +} + +; 2 is the default alignment for v128.load16_lane so no attribute is needed. +define <8 x i16> @load_lane_i16_a2(i16* %p, <8 x i16> %v) { +; CHECK-LABEL: load_lane_i16_a2: +; CHECK: .functype load_lane_i16_a2 (i32, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:local.get 1 +; CHECK-NEXT:v128.load16_lane 0, 0 +; CHECK-NEXT:# fallthrough-return + %e = load i16, i16* %p, align 2 + %v1 = insertelement <8 x i16> %v, i16 %e, i32 0 + ret <8 x i16> %v1 +} + +; 4 is greater than the default alignment so it is ignored. +define <8 x i16> @load_lane_i16_a4(i16* %p, <8 x i16> %v) { +; CHECK-LABEL: load_lane_i16_a4: +; CHECK: .functype load_lane_i16_a4 (i32, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:local.get 1 +; CHECK-NEXT:v128.load16_lane 0, 0 +; CHECK-NEXT:# fallthrough-return + %e = load i16, i16* %p, align 4 + %v1 = insertelement <8 x i16> %v, i16 %e, i32 0 + ret <8 x i16> %v1 +} + ; == ; 4 x i32 ; == @@ -666,6 +735,60 @@ ret <4 x i32> %v2 } +define <4 x i32> @load_lane_i32_a1(i32* %p, <4 x i32> %v) { +; CHECK-LABEL: load_lane_i32_a1: +; CHECK: .functype load_lane_i32_a1 (i32, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:local.get 1 +; CHECK-NEXT:v128.load32_lane 0:p2align=0, 0 +; CHECK-NEXT:# fallthrough-return + %e = load i32, i32* %p, align 1 + %v1 = insertelement <4 x i32> %v, i32 %e, i32 0 + ret <4 x i32> %v1 +} + +define <4 x i32> @load_lane_i32_a2(i32* %p, <4 x i32> %v) { +; CHECK-LABEL: load_lane_i32_a2: +; CHECK: .functype load_lane_i32_a2 (i32, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:local.get 1 +; CHECK-NEXT:v128.load32_lane 0:p2align=1, 0 +; CHECK-NEXT:# fallthrough-return + %e = load
[PATCH] D105950: [WebAssembly] Codegen for v128.loadX_lane instructions
tlively added inline comments. Comment at: llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td:324 + PatFrag<(ops node:$ptr, node:$vec, node:$idx), + (vector_insert $vec, (i32 (extloadi8 $ptr)), $idx)>; +def load16_lane : aheejin wrote: > Why are i8 and i16 are extended-loaded? For i8x16 and i16x8 vectors, loading a lane from memory means loading just the i8 or i16. But after selection DAG legalization, the result of those loads are legalized to be i32, making these extending loads. If this were a DAG combine rather than an ISel pattern, I would use the pre-legalization i8 and i16 with non-extending loads. Comment at: llvm/test/CodeGen/WebAssembly/simd-build-vector.ll:214 ; CHECK: v128.const $push[[L0:[0-9]+]]=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0 -; CHECK: i8x16.replace_lane +; CHECK: v128.load8_lane ; CHECK: i8x16.replace_lane aheejin wrote: > Why the change? The lane for the swizzle comes from a load from the stack, so that now gets selected to v128.load8_lane rather than a load followed by a replace_lane. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D105950/new/ https://reviews.llvm.org/D105950 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D105950: [WebAssembly] Codegen for v128.loadX_lane instructions
tlively updated this revision to Diff 358668. tlively marked 2 inline comments as done. tlively added a comment. - Address comments Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D105950/new/ https://reviews.llvm.org/D105950 Files: clang/include/clang/Basic/BuiltinsWebAssembly.def clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Headers/wasm_simd128.h clang/test/CodeGen/builtins-wasm.c clang/test/Headers/wasm.c llvm/include/llvm/IR/IntrinsicsWebAssembly.td llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td llvm/test/CodeGen/WebAssembly/simd-build-vector.ll llvm/test/CodeGen/WebAssembly/simd-load-lane-offset.ll llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll Index: llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll === --- llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll +++ llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll @@ -134,6 +134,34 @@ ret <16 x i8> %v2 } +; 1 is the default alignment for v128.load8_lane so no attribute is needed. +define <16 x i8> @load_lane_i8_a1(i8* %p, <16 x i8> %v) { +; CHECK-LABEL: load_lane_i8_a1: +; CHECK: .functype load_lane_i8_a1 (i32, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:local.get 1 +; CHECK-NEXT:v128.load8_lane 0, 0 +; CHECK-NEXT:# fallthrough-return + %e = load i8, i8* %p, align 1 + %v1 = insertelement <16 x i8> %v, i8 %e, i32 0 + ret <16 x i8> %v1 +} + +; 2 is greater than the default alignment so it is ignored. +define <16 x i8> @load_lane_i8_a2(i8* %p, <16 x i8> %v) { +; CHECK-LABEL: load_lane_i8_a2: +; CHECK: .functype load_lane_i8_a2 (i32, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:local.get 1 +; CHECK-NEXT:v128.load8_lane 0, 0 +; CHECK-NEXT:# fallthrough-return + %e = load i8, i8* %p, align 2 + %v1 = insertelement <16 x i8> %v, i8 %e, i32 0 + ret <16 x i8> %v1 +} + ; == ; 8 x i16 ; == @@ -394,6 +422,47 @@ ret <8 x i16> %v2 } +define <8 x i16> @load_lane_i16_a1(i16* %p, <8 x i16> %v) { +; CHECK-LABEL: load_lane_i16_a1: +; CHECK: .functype load_lane_i16_a1 (i32, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:local.get 1 +; CHECK-NEXT:v128.load16_lane 0:p2align=0, 0 +; CHECK-NEXT:# fallthrough-return + %e = load i16, i16* %p, align 1 + %v1 = insertelement <8 x i16> %v, i16 %e, i32 0 + ret <8 x i16> %v1 +} + +; 2 is the default alignment for v128.load16_lane so no attribute is needed. +define <8 x i16> @load_lane_i16_a2(i16* %p, <8 x i16> %v) { +; CHECK-LABEL: load_lane_i16_a2: +; CHECK: .functype load_lane_i16_a2 (i32, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:local.get 1 +; CHECK-NEXT:v128.load16_lane 0, 0 +; CHECK-NEXT:# fallthrough-return + %e = load i16, i16* %p, align 2 + %v1 = insertelement <8 x i16> %v, i16 %e, i32 0 + ret <8 x i16> %v1 +} + +; 4 is greater than the default alignment so it is ignored. +define <8 x i16> @load_lane_i16_a4(i16* %p, <8 x i16> %v) { +; CHECK-LABEL: load_lane_i16_a4: +; CHECK: .functype load_lane_i16_a4 (i32, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:local.get 1 +; CHECK-NEXT:v128.load16_lane 0, 0 +; CHECK-NEXT:# fallthrough-return + %e = load i16, i16* %p, align 4 + %v1 = insertelement <8 x i16> %v, i16 %e, i32 0 + ret <8 x i16> %v1 +} + ; == ; 4 x i32 ; == @@ -667,6 +736,60 @@ ret <4 x i32> %v2 } +define <4 x i32> @load_lane_i32_a1(i32* %p, <4 x i32> %v) { +; CHECK-LABEL: load_lane_i32_a1: +; CHECK: .functype load_lane_i32_a1 (i32, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:local.get 1 +; CHECK-NEXT:v128.load32_lane 0:p2align=0, 0 +; CHECK-NEXT:# fallthrough-return + %e = load i32, i32* %p, align 1 + %v1 = insertelement <4 x i32> %v, i32 %e, i32 0 + ret <4 x i32> %v1 +} + +define <4 x i32> @load_lane_i32_a2(i32* %p, <4 x i32> %v) { +; CHECK-LABEL: load_lane_i32_a2: +; CHECK: .functype load_lane_i32_a2 (i32, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:local.get 1 +; CHECK-NEXT:v128.load32_lane 0:p2align=1, 0 +; CHECK-NEXT:# fallthrough-return + %e = load i32, i32* %p, align 2 + %v1 = insertelement <4 x i32> %v, i32 %e, i32 0 + ret <4 x i32> %v1 +} + +; 4 is the default alignment for v128.load32_lane so no attribute is needed. +define <4 x
[PATCH] D105950: [WebAssembly] Codegen for v128.loadX_lane instructions
aheejin added inline comments. Comment at: clang/lib/Headers/wasm_simd128.h:174 +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load8_lane( +const void *__ptr, v128_t __vec, int __i) __REQUIRE_CONSTANT(__i) { + struct __wasm_v128_load8_lane_struct { Nit: Other similar functions in this file seem to be using `__mem` instead of `__ptr`? (Currently only builtins are using `__ptr`) Comment at: llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td:324 + PatFrag<(ops node:$ptr, node:$vec, node:$idx), + (vector_insert $vec, (i32 (extloadi8 $ptr)), $idx)>; +def load16_lane : Why are i8 and i16 are extended-loaded? Comment at: llvm/test/CodeGen/WebAssembly/simd-build-vector.ll:7 -target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" This seems already contained in D105842? The same for the other files. Comment at: llvm/test/CodeGen/WebAssembly/simd-build-vector.ll:214 ; CHECK: v128.const $push[[L0:[0-9]+]]=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0 -; CHECK: i8x16.replace_lane +; CHECK: v128.load8_lane ; CHECK: i8x16.replace_lane Why the change? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D105950/new/ https://reviews.llvm.org/D105950 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D105950: [WebAssembly] Codegen for v128.loadX_lane instructions
tlively created this revision. tlively added reviewers: aheejin, dschuff. Herald added subscribers: wingo, ecnelises, sunfish, hiraditya, jgravelle-google, sbc100. tlively requested review of this revision. Herald added projects: clang, LLVM. Herald added subscribers: llvm-commits, cfe-commits. Replace the experimental clang builtin and LLVM intrinsics for these instructions with normal codegen patterns. Resolves PR50433. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D105950 Files: clang/include/clang/Basic/BuiltinsWebAssembly.def clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Headers/wasm_simd128.h clang/test/CodeGen/builtins-wasm.c clang/test/Headers/wasm.c llvm/include/llvm/IR/IntrinsicsWebAssembly.td llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td llvm/test/CodeGen/WebAssembly/simd-build-vector.ll llvm/test/CodeGen/WebAssembly/simd-load-lane-offset.ll llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll Index: llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll === --- llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll +++ llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll @@ -134,6 +134,34 @@ ret <16 x i8> %v2 } +; 1 is the default alignment for v128.load8_lane so no attribute is needed. +define <16 x i8> @load_lane_i8_a1(i8* %p, <16 x i8> %v) { +; CHECK-LABEL: load_lane_i8_a1: +; CHECK: .functype load_lane_i8_a1 (i32, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:local.get 1 +; CHECK-NEXT:v128.load8_lane 0, 0 +; CHECK-NEXT:# fallthrough-return + %e = load i8, i8* %p, align 1 + %v1 = insertelement <16 x i8> %v, i8 %e, i32 0 + ret <16 x i8> %v1 +} + +; 2 is greater than the default alignment so it is ignored. +define <16 x i8> @load_lane_i8_a2(i8* %p, <16 x i8> %v) { +; CHECK-LABEL: load_lane_i8_a2: +; CHECK: .functype load_lane_i8_a2 (i32, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:local.get 1 +; CHECK-NEXT:v128.load8_lane 0, 0 +; CHECK-NEXT:# fallthrough-return + %e = load i8, i8* %p, align 2 + %v1 = insertelement <16 x i8> %v, i8 %e, i32 0 + ret <16 x i8> %v1 +} + ; == ; 8 x i16 ; == @@ -394,6 +422,47 @@ ret <8 x i16> %v2 } +define <8 x i16> @load_lane_i16_a1(i16* %p, <8 x i16> %v) { +; CHECK-LABEL: load_lane_i16_a1: +; CHECK: .functype load_lane_i16_a1 (i32, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:local.get 1 +; CHECK-NEXT:v128.load16_lane 0:p2align=0, 0 +; CHECK-NEXT:# fallthrough-return + %e = load i16, i16* %p, align 1 + %v1 = insertelement <8 x i16> %v, i16 %e, i32 0 + ret <8 x i16> %v1 +} + +; 2 is the default alignment for v128.load16_lane so no attribute is needed. +define <8 x i16> @load_lane_i16_a2(i16* %p, <8 x i16> %v) { +; CHECK-LABEL: load_lane_i16_a2: +; CHECK: .functype load_lane_i16_a2 (i32, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:local.get 1 +; CHECK-NEXT:v128.load16_lane 0, 0 +; CHECK-NEXT:# fallthrough-return + %e = load i16, i16* %p, align 2 + %v1 = insertelement <8 x i16> %v, i16 %e, i32 0 + ret <8 x i16> %v1 +} + +; 4 is greater than the default alignment so it is ignored. +define <8 x i16> @load_lane_i16_a4(i16* %p, <8 x i16> %v) { +; CHECK-LABEL: load_lane_i16_a4: +; CHECK: .functype load_lane_i16_a4 (i32, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:local.get 1 +; CHECK-NEXT:v128.load16_lane 0, 0 +; CHECK-NEXT:# fallthrough-return + %e = load i16, i16* %p, align 4 + %v1 = insertelement <8 x i16> %v, i16 %e, i32 0 + ret <8 x i16> %v1 +} + ; == ; 4 x i32 ; == @@ -667,6 +736,60 @@ ret <4 x i32> %v2 } +define <4 x i32> @load_lane_i32_a1(i32* %p, <4 x i32> %v) { +; CHECK-LABEL: load_lane_i32_a1: +; CHECK: .functype load_lane_i32_a1 (i32, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:local.get 1 +; CHECK-NEXT:v128.load32_lane 0:p2align=0, 0 +; CHECK-NEXT:# fallthrough-return + %e = load i32, i32* %p, align 1 + %v1 = insertelement <4 x i32> %v, i32 %e, i32 0 + ret <4 x i32> %v1 +} + +define <4 x i32> @load_lane_i32_a2(i32* %p, <4 x i32> %v) { +; CHECK-LABEL: load_lane_i32_a2: +; CHECK: .functype load_lane_i32_a2 (i32, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:local.get 1 +; CHECK-NEXT:v128.load32_lane 0:p2align=1, 0 +;