[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads

2020-01-22 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGcdcc4f2a44b5: [AArch64][SVE] Add intrinsic for non-faulting 
loads (authored by kmclaughlin).

Changed prior to commit:
  https://reviews.llvm.org/D71698?vs=239144&id=239531#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71698/new/

https://reviews.llvm.org/D71698

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
@@ -0,0 +1,182 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define  @ldnf1b( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b:
+; CHECK: ldnf1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv16i8( %pg, i8* %a)
+  ret  %load
+}
+
+define  @ldnf1b_h( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_h:
+; CHECK: ldnf1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_h( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_h:
+; CHECK: ldnf1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h:
+; CHECK: ldnf1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i16( %pg, i16* %a)
+  ret  %load
+}
+
+define  @ldnf1h_f16( %pg, half* %a) {
+; CHECK-LABEL: ldnf1h_f16:
+; CHECK: ldnf1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8f16( %pg, half* %a)
+  ret  %load
+}
+
+define  @ldnf1b_s( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_s:
+; CHECK: ldnf1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_s( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_s:
+; CHECK: ldnf1sb { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h_s( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h_s:
+; CHECK: ldnf1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sh_s( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1sh_s:
+; CHECK: ldnf1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1w( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1w:
+; CHECK: ldnf1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i32( %pg, i32* %a)
+  ret  %load
+}
+
+define  @ldnf1w_f32( %pg, float* %a) {
+; CHECK-LABEL: ldnf1w_f32:
+; CHECK: ldnf1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4f32( %pg, float* %a)
+  ret  %load
+}
+
+define  @ldnf1b_d( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_d:
+; CHECK: ldnf1b { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_d( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_d:
+; CHECK: ldnf1sb { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h_d( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h_d:
+; CHECK: ldnf1h { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sh_d( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1sh_d:
+; CHECK: ldnf1sh { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1w_d( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1w_d:
+; CHECK: ldnf1w { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sw_d( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1sw_d:
+; CHECK: ldnf1sw { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1d( %pg, i64* %a) {
+; CHECK-LABEL: ldnf1d:
+; CHECK: ldnf1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i64( %pg, i64* %a)
+  ret  %load
+}
+
+define  @ldnf1d_f64( 

[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads

2020-01-21 Thread Sander de Smalen via Phabricator via cfe-commits
sdesmalen accepted this revision.
sdesmalen added a comment.
This revision is now accepted and ready to land.

LGTM [with the caveat that we need to revisit the modelling of the `FFR` 
register and get rid fo the `PseudoInstExpansion` at a later point, as 
discussed during the previous sync-up call]


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71698/new/

https://reviews.llvm.org/D71698



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads

2020-01-20 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 239144.
kmclaughlin added a comment.

- Some minor changes to performSignExtendInRegCombine to address comments from 
@sdesmalen


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71698/new/

https://reviews.llvm.org/D71698

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
@@ -0,0 +1,182 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define  @ldnf1b( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b:
+; CHECK: ldnf1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv16i8( %pg, i8* %a)
+  ret  %load
+}
+
+define  @ldnf1b_h( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_h:
+; CHECK: ldnf1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_h( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_h:
+; CHECK: ldnf1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h:
+; CHECK: ldnf1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i16( %pg, i16* %a)
+  ret  %load
+}
+
+define  @ldnf1h_f16( %pg, half* %a) {
+; CHECK-LABEL: ldnf1h_f16:
+; CHECK: ldnf1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8f16( %pg, half* %a)
+  ret  %load
+}
+
+define  @ldnf1b_s( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_s:
+; CHECK: ldnf1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_s( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_s:
+; CHECK: ldnf1sb { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h_s( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h_s:
+; CHECK: ldnf1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sh_s( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1sh_s:
+; CHECK: ldnf1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1w( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1w:
+; CHECK: ldnf1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i32( %pg, i32* %a)
+  ret  %load
+}
+
+define  @ldnf1w_f32( %pg, float* %a) {
+; CHECK-LABEL: ldnf1w_f32:
+; CHECK: ldnf1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4f32( %pg, float* %a)
+  ret  %load
+}
+
+define  @ldnf1b_d( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_d:
+; CHECK: ldnf1b { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_d( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_d:
+; CHECK: ldnf1sb { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h_d( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h_d:
+; CHECK: ldnf1h { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sh_d( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1sh_d:
+; CHECK: ldnf1sh { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1w_d( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1w_d:
+; CHECK: ldnf1w { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sw_d( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1sw_d:
+; CHECK: ldnf1sw { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1d( %pg, i64* %a) {
+; CHECK-LABEL: ldnf1d:
+; CHECK: ldnf1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i64( %pg, i64* %a)
+  ret  %load
+}
+
+define  @ldnf1d_f64( %pg, double* %a) {
+; CHECK-LABEL: ldnf1d_f64:
+; CHECK: ldnf1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve

[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads

2020-01-20 Thread Sander de Smalen via Phabricator via cfe-commits
sdesmalen added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:12460
 
-  if ((SignExtSrcVT != GLD1SrcMemVT) || !Src.hasOneUse())
+  unsigned OpNum = NewOpc == AArch64ISD::LDNF1S ? 3 : 4;
+  EVT LD1SrcMemVT = cast(Src->getOperand(OpNum))->getVT();

Move the assignment of `MemVTOpNum` to the switch statement above instead of 
special-casing it here?



Comment at: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:12461
+  unsigned OpNum = NewOpc == AArch64ISD::LDNF1S ? 3 : 4;
+  EVT LD1SrcMemVT = cast(Src->getOperand(OpNum))->getVT();
+

nit: `s/LD1SrcMemVT/SrcMemVT/`



Comment at: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:12469
+
+  SmallVector Ops = {Src->getOperand(0), Src->getOperand(1),
+ Src->getOperand(2), Src->getOperand(3)};

Better make the default '5' if there is a large likelihood of there being 5 
default values.



Comment at: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:12469
+
+  SmallVector Ops = {Src->getOperand(0), Src->getOperand(1),
+ Src->getOperand(2), Src->getOperand(3)};

sdesmalen wrote:
> Better make the default '5' if there is a large likelihood of there being 5 
> default values.
Instead of special -casing LDNF1S below, you can write this as:

  SmallVector Ops;
  for(unsigned I=0; IgetNumOperands(); ++I)
Ops.push_back(Src->getOperand(I));


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71698/new/

https://reviews.llvm.org/D71698



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads

2020-01-14 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin marked 5 inline comments as done.
kmclaughlin added a comment.

Thanks for your suggestions, @andwar!


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71698/new/

https://reviews.llvm.org/D71698



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads

2020-01-14 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 237906.
kmclaughlin added a comment.

- Rebased patch
- Updated comments and extended getSVEContainerType to handle nxv8i16 & nxv16i8


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71698/new/

https://reviews.llvm.org/D71698

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
@@ -0,0 +1,182 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define  @ldnf1b( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b:
+; CHECK: ldnf1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv16i8( %pg, i8* %a)
+  ret  %load
+}
+
+define  @ldnf1b_h( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_h:
+; CHECK: ldnf1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_h( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_h:
+; CHECK: ldnf1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h:
+; CHECK: ldnf1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i16( %pg, i16* %a)
+  ret  %load
+}
+
+define  @ldnf1h_f16( %pg, half* %a) {
+; CHECK-LABEL: ldnf1h_f16:
+; CHECK: ldnf1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8f16( %pg, half* %a)
+  ret  %load
+}
+
+define  @ldnf1b_s( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_s:
+; CHECK: ldnf1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_s( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_s:
+; CHECK: ldnf1sb { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h_s( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h_s:
+; CHECK: ldnf1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sh_s( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1sh_s:
+; CHECK: ldnf1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1w( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1w:
+; CHECK: ldnf1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i32( %pg, i32* %a)
+  ret  %load
+}
+
+define  @ldnf1w_f32( %pg, float* %a) {
+; CHECK-LABEL: ldnf1w_f32:
+; CHECK: ldnf1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4f32( %pg, float* %a)
+  ret  %load
+}
+
+define  @ldnf1b_d( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_d:
+; CHECK: ldnf1b { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_d( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_d:
+; CHECK: ldnf1sb { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h_d( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h_d:
+; CHECK: ldnf1h { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sh_d( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1sh_d:
+; CHECK: ldnf1sh { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1w_d( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1w_d:
+; CHECK: ldnf1w { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sw_d( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1sw_d:
+; CHECK: ldnf1sw { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1d( %pg, i64* %a) {
+; CHECK-LABEL: ldnf1d:
+; CHECK: ldnf1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i64( %pg, i64* %a)
+  ret  %load
+}
+
+define  @ldnf1d_f64( %pg, double* %a) {
+; CHECK-LABEL: ldnf1d_f64:
+; CHECK: ldnf1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch6

[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads

2020-01-08 Thread Eli Friedman via Phabricator via cfe-commits
efriedma added inline comments.



Comment at: llvm/lib/Target/AArch64/SVEInstrFormats.td:5333
+  // We need a layer of indirection because early machine code passes balk at
+  // physical register (i.e. FFR) uses that have no previous definition.
+  let hasSideEffects = 1, hasNoSchedulingInfo = 1, mayLoad = 1 in {

sdesmalen wrote:
> efriedma wrote:
> > kmclaughlin wrote:
> > > efriedma wrote:
> > > > This is depending on hasSideEffects to preserve the correct ordering 
> > > > with instructions that read/write FFR?  That probably works.  I guess 
> > > > the alternative is to insert an IMPLICIT_DEF of FFR in the entry block 
> > > > of each function.
> > > > 
> > > > What are the calling convention rules for FFR?  Is it callee-save?  If 
> > > > not, we might need to do some work to make FFR reads/writes do 
> > > > something sane across calls inserted by the compiler.
> > > The FFR is not callee-saved. We will need to add support to save & 
> > > restore it where appropriate at the point the compiler starts generating 
> > > reads to the FFR, but for the purpose of the ACLE the user will be 
> > > required to do this if necessary.
> > How can the user write correct code to save/restore the FFR?  The compiler 
> > can move arbitrary readnone/argmemonly calls between the definition and the 
> > use.
> There are separate intrinsics for loading/writing the FFR (svrdffr, svsetffr, 
> svwrffr), which use a `svbool_t` to keep the value of the FFR. These 
> intrinsics are implemented in the same way with a Pseudo with `hasSideEffects 
> = 1` set.
> 
> I thought this flag would prevent other calls from being scheduled/moved over 
> these intrinsics, as they have unknown/unmodelled side-effects and would thus 
> act kind of like a barrier?
> 
The issue would be transforms at the IR/SelectionDAG level. We can probably 
model calls at the MIR level correctly, like you're describing.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71698/new/

https://reviews.llvm.org/D71698



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads

2020-01-08 Thread Sander de Smalen via Phabricator via cfe-commits
sdesmalen added inline comments.



Comment at: llvm/lib/Target/AArch64/SVEInstrFormats.td:5333
+  // We need a layer of indirection because early machine code passes balk at
+  // physical register (i.e. FFR) uses that have no previous definition.
+  let hasSideEffects = 1, hasNoSchedulingInfo = 1, mayLoad = 1 in {

efriedma wrote:
> kmclaughlin wrote:
> > efriedma wrote:
> > > This is depending on hasSideEffects to preserve the correct ordering with 
> > > instructions that read/write FFR?  That probably works.  I guess the 
> > > alternative is to insert an IMPLICIT_DEF of FFR in the entry block of 
> > > each function.
> > > 
> > > What are the calling convention rules for FFR?  Is it callee-save?  If 
> > > not, we might need to do some work to make FFR reads/writes do something 
> > > sane across calls inserted by the compiler.
> > The FFR is not callee-saved. We will need to add support to save & restore 
> > it where appropriate at the point the compiler starts generating reads to 
> > the FFR, but for the purpose of the ACLE the user will be required to do 
> > this if necessary.
> How can the user write correct code to save/restore the FFR?  The compiler 
> can move arbitrary readnone/argmemonly calls between the definition and the 
> use.
There are separate intrinsics for loading/writing the FFR (svrdffr, svsetffr, 
svwrffr), which use a `svbool_t` to keep the value of the FFR. These intrinsics 
are implemented in the same way with a Pseudo with `hasSideEffects = 1` set.

I thought this flag would prevent other calls from being scheduled/moved over 
these intrinsics, as they have unknown/unmodelled side-effects and would thus 
act kind of like a barrier?



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71698/new/

https://reviews.llvm.org/D71698



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads

2020-01-02 Thread Andrzej Warzynski via Phabricator via cfe-commits
andwar added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:9998
+
   // GLD1* instructions perform an implicit zero-extend, which makes them
   // perfect candidates for combining.

Could you replace `GLD1*` with `Load`? I believe that that will be still 
correct with the added bonus of covering the new case :)



Comment at: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:11051
+  if (ContainerVT.isInteger()) {
+switch (VT.getVectorNumElements()) {
+default: return SDValue();

You could use `getSVEContainterType` here instead. You'll need to extend it a 
wee bit.



Comment at: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:12284
 
   // Gather load nodes (e.g. AArch64ISD::GLD1) are straightforward candidates
   // for DAG Combine with SIGN_EXTEND_INREG. Bail out for all other nodes.

The following `switch` statement will now cover more than just *Gather* nodes. 
Maybe `SVE load nodes` instead? 



Comment at: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:12328-12331
+  Ops.push_back(Src->getOperand(0));
+  Ops.push_back(Src->getOperand(1));
+  Ops.push_back(Src->getOperand(2));
+  Ops.push_back(Src->getOperand(3));

Why not:
```
SmallVector Ops = {Src->getOperand(0), Src->getOperand(1), 
Src->getOperand(2), Src->getOperand(3), Src->getOperand(4)};
```
?



Comment at: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:12332
+  Ops.push_back(Src->getOperand(3));
+  if (NewOpc != AArch64ISD::LDNF1S)
+Ops.push_back(Src->getOperand(4));

Could you add a comment explaining what the underlying difference between 
`LDNF1S` and `GLD1S` is? Otherwise it's not clear why this `if` statement is 
needed. IIUC, `GLD1S` has an extra argument for the offsets (hence 5 args vs 4).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71698/new/

https://reviews.llvm.org/D71698



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads

2019-12-20 Thread Eli Friedman via Phabricator via cfe-commits
efriedma added inline comments.



Comment at: llvm/lib/Target/AArch64/SVEInstrFormats.td:5333
+  // We need a layer of indirection because early machine code passes balk at
+  // physical register (i.e. FFR) uses that have no previous definition.
+  let hasSideEffects = 1, hasNoSchedulingInfo = 1, mayLoad = 1 in {

kmclaughlin wrote:
> efriedma wrote:
> > This is depending on hasSideEffects to preserve the correct ordering with 
> > instructions that read/write FFR?  That probably works.  I guess the 
> > alternative is to insert an IMPLICIT_DEF of FFR in the entry block of each 
> > function.
> > 
> > What are the calling convention rules for FFR?  Is it callee-save?  If not, 
> > we might need to do some work to make FFR reads/writes do something sane 
> > across calls inserted by the compiler.
> The FFR is not callee-saved. We will need to add support to save & restore it 
> where appropriate at the point the compiler starts generating reads to the 
> FFR, but for the purpose of the ACLE the user will be required to do this if 
> necessary.
How can the user write correct code to save/restore the FFR?  The compiler can 
move arbitrary readnone/argmemonly calls between the definition and the use.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71698/new/

https://reviews.llvm.org/D71698



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads

2019-12-20 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin added inline comments.



Comment at: llvm/lib/Target/AArch64/SVEInstrFormats.td:5333
+  // We need a layer of indirection because early machine code passes balk at
+  // physical register (i.e. FFR) uses that have no previous definition.
+  let hasSideEffects = 1, hasNoSchedulingInfo = 1, mayLoad = 1 in {

efriedma wrote:
> This is depending on hasSideEffects to preserve the correct ordering with 
> instructions that read/write FFR?  That probably works.  I guess the 
> alternative is to insert an IMPLICIT_DEF of FFR in the entry block of each 
> function.
> 
> What are the calling convention rules for FFR?  Is it callee-save?  If not, 
> we might need to do some work to make FFR reads/writes do something sane 
> across calls inserted by the compiler.
The FFR is not callee-saved. We will need to add support to save & restore it 
where appropriate at the point the compiler starts generating reads to the FFR, 
but for the purpose of the ACLE the user will be required to do this if 
necessary.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71698/new/

https://reviews.llvm.org/D71698



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads

2019-12-19 Thread Eli Friedman via Phabricator via cfe-commits
efriedma added inline comments.



Comment at: llvm/lib/Target/AArch64/SVEInstrFormats.td:5333
+  // We need a layer of indirection because early machine code passes balk at
+  // physical register (i.e. FFR) uses that have no previous definition.
+  let hasSideEffects = 1, hasNoSchedulingInfo = 1, mayLoad = 1 in {

This is depending on hasSideEffects to preserve the correct ordering with 
instructions that read/write FFR?  That probably works.  I guess the 
alternative is to insert an IMPLICIT_DEF of FFR in the entry block of each 
function.

What are the calling convention rules for FFR?  Is it callee-save?  If not, we 
might need to do some work to make FFR reads/writes do something sane across 
calls inserted by the compiler.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71698/new/

https://reviews.llvm.org/D71698



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads

2019-12-19 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, efriedma, andwar, dancgr, mgudim.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.

This patch adds the llvm.aarch64.sve.ldnf1 intrinsic, plus
DAG combine rules for non-faulting loads and sign/zero extends


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D71698

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
@@ -0,0 +1,182 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define  @ldnf1b( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b:
+; CHECK: ldnf1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv16i8( %pg, i8* %a)
+  ret  %load
+}
+
+define  @ldnf1b_h( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_h:
+; CHECK: ldnf1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_h( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_h:
+; CHECK: ldnf1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h:
+; CHECK: ldnf1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i16( %pg, i16* %a)
+  ret  %load
+}
+
+define  @ldnf1h_f16( %pg, half* %a) {
+; CHECK-LABEL: ldnf1h_f16:
+; CHECK: ldnf1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8f16( %pg, half* %a)
+  ret  %load
+}
+
+define  @ldnf1b_s( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_s:
+; CHECK: ldnf1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_s( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_s:
+; CHECK: ldnf1sb { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h_s( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h_s:
+; CHECK: ldnf1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sh_s( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1sh_s:
+; CHECK: ldnf1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1w( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1w:
+; CHECK: ldnf1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i32( %pg, i32* %a)
+  ret  %load
+}
+
+define  @ldnf1w_f32( %pg, float* %a) {
+; CHECK-LABEL: ldnf1w_f32:
+; CHECK: ldnf1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4f32( %pg, float* %a)
+  ret  %load
+}
+
+define  @ldnf1b_d( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_d:
+; CHECK: ldnf1b { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_d( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_d:
+; CHECK: ldnf1sb { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h_d( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h_d:
+; CHECK: ldnf1h { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sh_d( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1sh_d:
+; CHECK: ldnf1sh { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1w_d( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1w_d:
+; CHECK: ldnf1w { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sw_d( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1sw_d:
+; CHECK: ldnf1sw { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1d( %pg, i64* %a) {
+; CHECK-LABEL: ldnf1d:
+; CHECK: ldnf1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i64( %pg, i64* %a)
+