[PATCH] D106019: [WebAssembly] Codegen for v128.storeX_lane instructions

2021-07-14 Thread Thomas Lively via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG4a4229f70f81: [WebAssembly] Codegen for v128.storeX_lane 
instructions (authored by tlively).

Changed prior to commit:
  https://reviews.llvm.org/D106019?vs=358747&id=358774#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106019/new/

https://reviews.llvm.org/D106019

Files:
  clang/include/clang/Basic/BuiltinsWebAssembly.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Headers/wasm_simd128.h
  clang/test/CodeGen/builtins-wasm.c
  clang/test/Headers/wasm.c
  llvm/include/llvm/IR/IntrinsicsWebAssembly.td
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
  llvm/test/CodeGen/WebAssembly/simd-build-pair.ll
  llvm/test/CodeGen/WebAssembly/simd-load-lane-offset.ll
  llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll

Index: llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll
===
--- llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll
+++ llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll
@@ -161,6 +161,34 @@
   ret <16 x i8> %v1
 }
 
+; 1 is the default alignment for v128.store8_lane so no attribute is needed.
+define void @store_lane_i8_a1(<16 x i8> %v, i8* %p) {
+; CHECK-LABEL: store_lane_i8_a1:
+; CHECK: .functype store_lane_i8_a1 (v128, i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:v128.store8_lane 0, 0
+; CHECK-NEXT:# fallthrough-return
+  %x = extractelement <16 x i8> %v, i32 0
+  store i8 %x, i8* %p, align 1
+  ret void
+}
+
+; 2 is greater than the default alignment so it is ignored.
+define void @store_lane_i8_a2(<16 x i8> %v, i8* %p) {
+; CHECK-LABEL: store_lane_i8_a2:
+; CHECK: .functype store_lane_i8_a2 (v128, i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:v128.store8_lane 0, 0
+; CHECK-NEXT:# fallthrough-return
+  %x = extractelement <16 x i8> %v, i32 0
+  store i8 %x, i8* %p, align 2
+  ret void
+}
+
 ; ==
 ; 8 x i16
 ; ==
@@ -462,6 +490,47 @@
   ret <8 x i16> %v1
 }
 
+define void @store_lane_i16_a1(<8 x i16> %v, i16* %p) {
+; CHECK-LABEL: store_lane_i16_a1:
+; CHECK: .functype store_lane_i16_a1 (v128, i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:v128.store16_lane 0:p2align=0, 0
+; CHECK-NEXT:# fallthrough-return
+  %x = extractelement <8 x i16> %v, i32 0
+  store i16 %x, i16* %p, align 1
+  ret void
+}
+
+; 2 is the default alignment for v128.store16_lane so no attribute is needed.
+define void @store_lane_i16_a2(<8 x i16> %v, i16* %p) {
+; CHECK-LABEL: store_lane_i16_a2:
+; CHECK: .functype store_lane_i16_a2 (v128, i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:v128.store16_lane 0, 0
+; CHECK-NEXT:# fallthrough-return
+  %x = extractelement <8 x i16> %v, i32 0
+  store i16 %x, i16* %p, align 2
+  ret void
+}
+
+; 4 is greater than the default alignment so it is ignored.
+define void @store_lane_i16_a4(<8 x i16> %v, i16* %p) {
+; CHECK-LABEL: store_lane_i16_a4:
+; CHECK: .functype store_lane_i16_a4 (v128, i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:v128.store16_lane 0, 0
+; CHECK-NEXT:# fallthrough-return
+  %x = extractelement <8 x i16> %v, i32 0
+  store i16 %x, i16* %p, align 4
+  ret void
+}
+
 ; ==
 ; 4 x i32
 ; ==
@@ -789,6 +858,60 @@
   ret <4 x i32> %v1
 }
 
+define void @store_lane_i32_a1(<4 x i32> %v, i32* %p) {
+; CHECK-LABEL: store_lane_i32_a1:
+; CHECK: .functype store_lane_i32_a1 (v128, i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:v128.store32_lane 0:p2align=0, 0
+; CHECK-NEXT:# fallthrough-return
+  %x = extractelement <4 x i32> %v, i32 0
+  store i32 %x, i32* %p, align 1
+  ret void
+}
+
+define void @store_lane_i32_a2(<4 x i32> %v, i32* %p) {
+; CHECK-LABEL: store_lane_i32_a2:
+; CHECK: .functype store_lane_i32_a2 (v128, i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:v128.store32_lane 0:p2align=1, 0
+; CHECK-NEXT:# fallthrough-return
+  %x = extractelement <4 x i32> %v, i32 0
+  store i32 %x, i32* %p, align 2
+  ret void
+}
+
+; 4 is the default alignment for v128.store32_lane so n

[PATCH] D106019: [WebAssembly] Codegen for v128.storeX_lane instructions

2021-07-14 Thread Thomas Lively via Phabricator via cfe-commits
tlively created this revision.
tlively added reviewers: aheejin, dschuff.
Herald added subscribers: wingo, ecnelises, sunfish, hiraditya, 
jgravelle-google, sbc100.
tlively requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

Replace the experimental clang builtins and LLVM intrinsics for these
instructions with normal codegen patterns. Resolves PR50435.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D106019

Files:
  clang/include/clang/Basic/BuiltinsWebAssembly.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Headers/wasm_simd128.h
  clang/test/CodeGen/builtins-wasm.c
  clang/test/Headers/wasm.c
  llvm/include/llvm/IR/IntrinsicsWebAssembly.td
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
  llvm/test/CodeGen/WebAssembly/simd-build-pair.ll
  llvm/test/CodeGen/WebAssembly/simd-load-lane-offset.ll
  llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll

Index: llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll
===
--- llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll
+++ llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll
@@ -162,6 +162,34 @@
   ret <16 x i8> %v1
 }
 
+; 1 is the default alignment for v128.store8_lane so no attribute is needed.
+define void @store_lane_i8_a1(<16 x i8> %v, i8* %p) {
+; CHECK-LABEL: store_lane_i8_a1:
+; CHECK: .functype store_lane_i8_a1 (v128, i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:v128.store8_lane 0, 0
+; CHECK-NEXT:# fallthrough-return
+  %x = extractelement <16 x i8> %v, i32 0
+  store i8 %x, i8* %p, align 1
+  ret void
+}
+
+; 2 is greater than the default alignment so it is ignored.
+define void @store_lane_i8_a2(<16 x i8> %v, i8* %p) {
+; CHECK-LABEL: store_lane_i8_a2:
+; CHECK: .functype store_lane_i8_a2 (v128, i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:v128.store8_lane 0, 0
+; CHECK-NEXT:# fallthrough-return
+  %x = extractelement <16 x i8> %v, i32 0
+  store i8 %x, i8* %p, align 2
+  ret void
+}
+
 ; ==
 ; 8 x i16
 ; ==
@@ -463,6 +491,47 @@
   ret <8 x i16> %v1
 }
 
+define void @store_lane_i16_a1(<8 x i16> %v, i16* %p) {
+; CHECK-LABEL: store_lane_i16_a1:
+; CHECK: .functype store_lane_i16_a1 (v128, i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:v128.store16_lane 0:p2align=0, 0
+; CHECK-NEXT:# fallthrough-return
+  %x = extractelement <8 x i16> %v, i32 0
+  store i16 %x, i16* %p, align 1
+  ret void
+}
+
+; 2 is the default alignment for v128.store16_lane so no attribute is needed.
+define void @store_lane_i16_a2(<8 x i16> %v, i16* %p) {
+; CHECK-LABEL: store_lane_i16_a2:
+; CHECK: .functype store_lane_i16_a2 (v128, i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:v128.store16_lane 0, 0
+; CHECK-NEXT:# fallthrough-return
+  %x = extractelement <8 x i16> %v, i32 0
+  store i16 %x, i16* %p, align 2
+  ret void
+}
+
+; 4 is greater than the default alignment so it is ignored.
+define void @store_lane_i16_a4(<8 x i16> %v, i16* %p) {
+; CHECK-LABEL: store_lane_i16_a4:
+; CHECK: .functype store_lane_i16_a4 (v128, i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:v128.store16_lane 0, 0
+; CHECK-NEXT:# fallthrough-return
+  %x = extractelement <8 x i16> %v, i32 0
+  store i16 %x, i16* %p, align 4
+  ret void
+}
+
 ; ==
 ; 4 x i32
 ; ==
@@ -790,6 +859,60 @@
   ret <4 x i32> %v1
 }
 
+define void @store_lane_i32_a1(<4 x i32> %v, i32* %p) {
+; CHECK-LABEL: store_lane_i32_a1:
+; CHECK: .functype store_lane_i32_a1 (v128, i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:v128.store32_lane 0:p2align=0, 0
+; CHECK-NEXT:# fallthrough-return
+  %x = extractelement <4 x i32> %v, i32 0
+  store i32 %x, i32* %p, align 1
+  ret void
+}
+
+define void @store_lane_i32_a2(<4 x i32> %v, i32* %p) {
+; CHECK-LABEL: store_lane_i32_a2:
+; CHECK: .functype store_lane_i32_a2 (v128, i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:v128.store32_lane 0:p2align=1, 0
+; CHECK-NEXT:# fallthrough-return
+  %x = extractelement <4 x i32> %v, i32 0
+  store i32 %x, i32* %p, align 2
+  ret void
+}
+
+; 4 is the default a