[PATCH] D105950: [WebAssembly] Codegen for v128.loadX_lane instructions

2021-07-14 Thread Thomas Lively via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG970e0900104d: [WebAssembly] Codegen for v128.loadX_lane 
instructions (authored by tlively).

Changed prior to commit:
  https://reviews.llvm.org/D105950?vs=358668=358676#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105950/new/

https://reviews.llvm.org/D105950

Files:
  clang/include/clang/Basic/BuiltinsWebAssembly.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Headers/wasm_simd128.h
  clang/test/CodeGen/builtins-wasm.c
  clang/test/Headers/wasm.c
  llvm/include/llvm/IR/IntrinsicsWebAssembly.td
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
  llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
  llvm/test/CodeGen/WebAssembly/simd-load-lane-offset.ll
  llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll

Index: llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll
===
--- llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll
+++ llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll
@@ -133,6 +133,34 @@
   ret <16 x i8> %v2
 }
 
+; 1 is the default alignment for v128.load8_lane so no attribute is needed.
+define <16 x i8> @load_lane_i8_a1(i8* %p, <16 x i8> %v) {
+; CHECK-LABEL: load_lane_i8_a1:
+; CHECK: .functype load_lane_i8_a1 (i32, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:v128.load8_lane 0, 0
+; CHECK-NEXT:# fallthrough-return
+  %e = load i8, i8* %p, align 1
+  %v1 = insertelement <16 x i8> %v, i8 %e, i32 0
+  ret <16 x i8> %v1
+}
+
+; 2 is greater than the default alignment so it is ignored.
+define <16 x i8> @load_lane_i8_a2(i8* %p, <16 x i8> %v) {
+; CHECK-LABEL: load_lane_i8_a2:
+; CHECK: .functype load_lane_i8_a2 (i32, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:v128.load8_lane 0, 0
+; CHECK-NEXT:# fallthrough-return
+  %e = load i8, i8* %p, align 2
+  %v1 = insertelement <16 x i8> %v, i8 %e, i32 0
+  ret <16 x i8> %v1
+}
+
 ; ==
 ; 8 x i16
 ; ==
@@ -393,6 +421,47 @@
   ret <8 x i16> %v2
 }
 
+define <8 x i16> @load_lane_i16_a1(i16* %p, <8 x i16> %v) {
+; CHECK-LABEL: load_lane_i16_a1:
+; CHECK: .functype load_lane_i16_a1 (i32, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:v128.load16_lane 0:p2align=0, 0
+; CHECK-NEXT:# fallthrough-return
+  %e = load i16, i16* %p, align 1
+  %v1 = insertelement <8 x i16> %v, i16 %e, i32 0
+  ret <8 x i16> %v1
+}
+
+; 2 is the default alignment for v128.load16_lane so no attribute is needed.
+define <8 x i16> @load_lane_i16_a2(i16* %p, <8 x i16> %v) {
+; CHECK-LABEL: load_lane_i16_a2:
+; CHECK: .functype load_lane_i16_a2 (i32, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:v128.load16_lane 0, 0
+; CHECK-NEXT:# fallthrough-return
+  %e = load i16, i16* %p, align 2
+  %v1 = insertelement <8 x i16> %v, i16 %e, i32 0
+  ret <8 x i16> %v1
+}
+
+; 4 is greater than the default alignment so it is ignored.
+define <8 x i16> @load_lane_i16_a4(i16* %p, <8 x i16> %v) {
+; CHECK-LABEL: load_lane_i16_a4:
+; CHECK: .functype load_lane_i16_a4 (i32, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:v128.load16_lane 0, 0
+; CHECK-NEXT:# fallthrough-return
+  %e = load i16, i16* %p, align 4
+  %v1 = insertelement <8 x i16> %v, i16 %e, i32 0
+  ret <8 x i16> %v1
+}
+
 ; ==
 ; 4 x i32
 ; ==
@@ -666,6 +735,60 @@
   ret <4 x i32> %v2
 }
 
+define <4 x i32> @load_lane_i32_a1(i32* %p, <4 x i32> %v) {
+; CHECK-LABEL: load_lane_i32_a1:
+; CHECK: .functype load_lane_i32_a1 (i32, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:v128.load32_lane 0:p2align=0, 0
+; CHECK-NEXT:# fallthrough-return
+  %e = load i32, i32* %p, align 1
+  %v1 = insertelement <4 x i32> %v, i32 %e, i32 0
+  ret <4 x i32> %v1
+}
+
+define <4 x i32> @load_lane_i32_a2(i32* %p, <4 x i32> %v) {
+; CHECK-LABEL: load_lane_i32_a2:
+; CHECK: .functype load_lane_i32_a2 (i32, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:v128.load32_lane 0:p2align=1, 0
+; CHECK-NEXT:# fallthrough-return
+  %e = load 

[PATCH] D105950: [WebAssembly] Codegen for v128.loadX_lane instructions

2021-07-14 Thread Thomas Lively via Phabricator via cfe-commits
tlively added inline comments.



Comment at: llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td:324
+  PatFrag<(ops node:$ptr, node:$vec, node:$idx),
+  (vector_insert $vec, (i32 (extloadi8 $ptr)), $idx)>;
+def load16_lane :

aheejin wrote:
> Why are i8 and i16 are extended-loaded?
For i8x16 and i16x8 vectors, loading a lane  from memory means loading just the 
i8 or i16. But after selection DAG legalization, the result of those loads are 
legalized to be i32, making these extending loads. If this were a DAG combine 
rather than an ISel pattern, I would use the pre-legalization i8 and i16 with 
non-extending loads.



Comment at: llvm/test/CodeGen/WebAssembly/simd-build-vector.ll:214
 ; CHECK:   v128.const  $push[[L0:[0-9]+]]=, 0, 0, 0, 0, 42, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 42, 0
-; CHECK:   i8x16.replace_lane
+; CHECK:   v128.load8_lane
 ; CHECK:   i8x16.replace_lane

aheejin wrote:
> Why the change?
The lane for the swizzle comes from a load from the stack, so that now gets 
selected to v128.load8_lane rather than a load followed by a replace_lane.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105950/new/

https://reviews.llvm.org/D105950

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D105950: [WebAssembly] Codegen for v128.loadX_lane instructions

2021-07-14 Thread Thomas Lively via Phabricator via cfe-commits
tlively updated this revision to Diff 358668.
tlively marked 2 inline comments as done.
tlively added a comment.

- Address comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105950/new/

https://reviews.llvm.org/D105950

Files:
  clang/include/clang/Basic/BuiltinsWebAssembly.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Headers/wasm_simd128.h
  clang/test/CodeGen/builtins-wasm.c
  clang/test/Headers/wasm.c
  llvm/include/llvm/IR/IntrinsicsWebAssembly.td
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
  llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
  llvm/test/CodeGen/WebAssembly/simd-load-lane-offset.ll
  llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll

Index: llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll
===
--- llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll
+++ llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll
@@ -134,6 +134,34 @@
   ret <16 x i8> %v2
 }
 
+; 1 is the default alignment for v128.load8_lane so no attribute is needed.
+define <16 x i8> @load_lane_i8_a1(i8* %p, <16 x i8> %v) {
+; CHECK-LABEL: load_lane_i8_a1:
+; CHECK: .functype load_lane_i8_a1 (i32, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:v128.load8_lane 0, 0
+; CHECK-NEXT:# fallthrough-return
+  %e = load i8, i8* %p, align 1
+  %v1 = insertelement <16 x i8> %v, i8 %e, i32 0
+  ret <16 x i8> %v1
+}
+
+; 2 is greater than the default alignment so it is ignored.
+define <16 x i8> @load_lane_i8_a2(i8* %p, <16 x i8> %v) {
+; CHECK-LABEL: load_lane_i8_a2:
+; CHECK: .functype load_lane_i8_a2 (i32, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:v128.load8_lane 0, 0
+; CHECK-NEXT:# fallthrough-return
+  %e = load i8, i8* %p, align 2
+  %v1 = insertelement <16 x i8> %v, i8 %e, i32 0
+  ret <16 x i8> %v1
+}
+
 ; ==
 ; 8 x i16
 ; ==
@@ -394,6 +422,47 @@
   ret <8 x i16> %v2
 }
 
+define <8 x i16> @load_lane_i16_a1(i16* %p, <8 x i16> %v) {
+; CHECK-LABEL: load_lane_i16_a1:
+; CHECK: .functype load_lane_i16_a1 (i32, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:v128.load16_lane 0:p2align=0, 0
+; CHECK-NEXT:# fallthrough-return
+  %e = load i16, i16* %p, align 1
+  %v1 = insertelement <8 x i16> %v, i16 %e, i32 0
+  ret <8 x i16> %v1
+}
+
+; 2 is the default alignment for v128.load16_lane so no attribute is needed.
+define <8 x i16> @load_lane_i16_a2(i16* %p, <8 x i16> %v) {
+; CHECK-LABEL: load_lane_i16_a2:
+; CHECK: .functype load_lane_i16_a2 (i32, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:v128.load16_lane 0, 0
+; CHECK-NEXT:# fallthrough-return
+  %e = load i16, i16* %p, align 2
+  %v1 = insertelement <8 x i16> %v, i16 %e, i32 0
+  ret <8 x i16> %v1
+}
+
+; 4 is greater than the default alignment so it is ignored.
+define <8 x i16> @load_lane_i16_a4(i16* %p, <8 x i16> %v) {
+; CHECK-LABEL: load_lane_i16_a4:
+; CHECK: .functype load_lane_i16_a4 (i32, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:v128.load16_lane 0, 0
+; CHECK-NEXT:# fallthrough-return
+  %e = load i16, i16* %p, align 4
+  %v1 = insertelement <8 x i16> %v, i16 %e, i32 0
+  ret <8 x i16> %v1
+}
+
 ; ==
 ; 4 x i32
 ; ==
@@ -667,6 +736,60 @@
   ret <4 x i32> %v2
 }
 
+define <4 x i32> @load_lane_i32_a1(i32* %p, <4 x i32> %v) {
+; CHECK-LABEL: load_lane_i32_a1:
+; CHECK: .functype load_lane_i32_a1 (i32, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:v128.load32_lane 0:p2align=0, 0
+; CHECK-NEXT:# fallthrough-return
+  %e = load i32, i32* %p, align 1
+  %v1 = insertelement <4 x i32> %v, i32 %e, i32 0
+  ret <4 x i32> %v1
+}
+
+define <4 x i32> @load_lane_i32_a2(i32* %p, <4 x i32> %v) {
+; CHECK-LABEL: load_lane_i32_a2:
+; CHECK: .functype load_lane_i32_a2 (i32, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:v128.load32_lane 0:p2align=1, 0
+; CHECK-NEXT:# fallthrough-return
+  %e = load i32, i32* %p, align 2
+  %v1 = insertelement <4 x i32> %v, i32 %e, i32 0
+  ret <4 x i32> %v1
+}
+
+; 4 is the default alignment for v128.load32_lane so no attribute is needed.
+define <4 x 

[PATCH] D105950: [WebAssembly] Codegen for v128.loadX_lane instructions

2021-07-14 Thread Heejin Ahn via Phabricator via cfe-commits
aheejin added inline comments.



Comment at: clang/lib/Headers/wasm_simd128.h:174
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load8_lane(
+const void *__ptr, v128_t __vec, int __i) __REQUIRE_CONSTANT(__i) {
+  struct __wasm_v128_load8_lane_struct {

Nit: Other similar functions in this file seem to be using `__mem` instead of 
`__ptr`? (Currently only builtins are using `__ptr`)



Comment at: llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td:324
+  PatFrag<(ops node:$ptr, node:$vec, node:$idx),
+  (vector_insert $vec, (i32 (extloadi8 $ptr)), $idx)>;
+def load16_lane :

Why are i8 and i16 are extended-loaded?



Comment at: llvm/test/CodeGen/WebAssembly/simd-build-vector.ll:7
 
-target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"

This seems already contained in D105842? The same for the other files.



Comment at: llvm/test/CodeGen/WebAssembly/simd-build-vector.ll:214
 ; CHECK:   v128.const  $push[[L0:[0-9]+]]=, 0, 0, 0, 0, 42, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 42, 0
-; CHECK:   i8x16.replace_lane
+; CHECK:   v128.load8_lane
 ; CHECK:   i8x16.replace_lane

Why the change?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105950/new/

https://reviews.llvm.org/D105950

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D105950: [WebAssembly] Codegen for v128.loadX_lane instructions

2021-07-13 Thread Thomas Lively via Phabricator via cfe-commits
tlively created this revision.
tlively added reviewers: aheejin, dschuff.
Herald added subscribers: wingo, ecnelises, sunfish, hiraditya, 
jgravelle-google, sbc100.
tlively requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

Replace the experimental clang builtin and LLVM intrinsics for these
instructions with normal codegen patterns. Resolves PR50433.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D105950

Files:
  clang/include/clang/Basic/BuiltinsWebAssembly.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Headers/wasm_simd128.h
  clang/test/CodeGen/builtins-wasm.c
  clang/test/Headers/wasm.c
  llvm/include/llvm/IR/IntrinsicsWebAssembly.td
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
  llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
  llvm/test/CodeGen/WebAssembly/simd-load-lane-offset.ll
  llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll

Index: llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll
===
--- llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll
+++ llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll
@@ -134,6 +134,34 @@
   ret <16 x i8> %v2
 }
 
+; 1 is the default alignment for v128.load8_lane so no attribute is needed.
+define <16 x i8> @load_lane_i8_a1(i8* %p, <16 x i8> %v) {
+; CHECK-LABEL: load_lane_i8_a1:
+; CHECK: .functype load_lane_i8_a1 (i32, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:v128.load8_lane 0, 0
+; CHECK-NEXT:# fallthrough-return
+  %e = load i8, i8* %p, align 1
+  %v1 = insertelement <16 x i8> %v, i8 %e, i32 0
+  ret <16 x i8> %v1
+}
+
+; 2 is greater than the default alignment so it is ignored.
+define <16 x i8> @load_lane_i8_a2(i8* %p, <16 x i8> %v) {
+; CHECK-LABEL: load_lane_i8_a2:
+; CHECK: .functype load_lane_i8_a2 (i32, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:v128.load8_lane 0, 0
+; CHECK-NEXT:# fallthrough-return
+  %e = load i8, i8* %p, align 2
+  %v1 = insertelement <16 x i8> %v, i8 %e, i32 0
+  ret <16 x i8> %v1
+}
+
 ; ==
 ; 8 x i16
 ; ==
@@ -394,6 +422,47 @@
   ret <8 x i16> %v2
 }
 
+define <8 x i16> @load_lane_i16_a1(i16* %p, <8 x i16> %v) {
+; CHECK-LABEL: load_lane_i16_a1:
+; CHECK: .functype load_lane_i16_a1 (i32, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:v128.load16_lane 0:p2align=0, 0
+; CHECK-NEXT:# fallthrough-return
+  %e = load i16, i16* %p, align 1
+  %v1 = insertelement <8 x i16> %v, i16 %e, i32 0
+  ret <8 x i16> %v1
+}
+
+; 2 is the default alignment for v128.load16_lane so no attribute is needed.
+define <8 x i16> @load_lane_i16_a2(i16* %p, <8 x i16> %v) {
+; CHECK-LABEL: load_lane_i16_a2:
+; CHECK: .functype load_lane_i16_a2 (i32, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:v128.load16_lane 0, 0
+; CHECK-NEXT:# fallthrough-return
+  %e = load i16, i16* %p, align 2
+  %v1 = insertelement <8 x i16> %v, i16 %e, i32 0
+  ret <8 x i16> %v1
+}
+
+; 4 is greater than the default alignment so it is ignored.
+define <8 x i16> @load_lane_i16_a4(i16* %p, <8 x i16> %v) {
+; CHECK-LABEL: load_lane_i16_a4:
+; CHECK: .functype load_lane_i16_a4 (i32, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:v128.load16_lane 0, 0
+; CHECK-NEXT:# fallthrough-return
+  %e = load i16, i16* %p, align 4
+  %v1 = insertelement <8 x i16> %v, i16 %e, i32 0
+  ret <8 x i16> %v1
+}
+
 ; ==
 ; 4 x i32
 ; ==
@@ -667,6 +736,60 @@
   ret <4 x i32> %v2
 }
 
+define <4 x i32> @load_lane_i32_a1(i32* %p, <4 x i32> %v) {
+; CHECK-LABEL: load_lane_i32_a1:
+; CHECK: .functype load_lane_i32_a1 (i32, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:v128.load32_lane 0:p2align=0, 0
+; CHECK-NEXT:# fallthrough-return
+  %e = load i32, i32* %p, align 1
+  %v1 = insertelement <4 x i32> %v, i32 %e, i32 0
+  ret <4 x i32> %v1
+}
+
+define <4 x i32> @load_lane_i32_a2(i32* %p, <4 x i32> %v) {
+; CHECK-LABEL: load_lane_i32_a2:
+; CHECK: .functype load_lane_i32_a2 (i32, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:local.get 0
+; CHECK-NEXT:local.get 1
+; CHECK-NEXT:v128.load32_lane 0:p2align=1, 0
+;