[PATCH] D74833: [AArch64][SVE] Add intrinsics for SVE2 cryptographic instructions

2020-02-19 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, c-rhodes, dancgr, cameron.mcinally, 
efriedma.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.

Implements the following SVE2 intrinsics:

- @llvm.aarch64.sve.aesd
- @llvm.aarch64.sve.aesimc
- @llvm.aarch64.sve.aese
- @llvm.aarch64.sve.aesmc
- @llvm.aarch64.sve.rax1
- @llvm.aarch64.sve.sm4e
- @llvm.aarch64.sve.sm4ekey


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D74833

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-crypto.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-crypto.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-crypto.ll
@@ -0,0 +1,99 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2-aes,+sve2-sha3,+sve2-sm4 -asm-verbose=0 < %s | FileCheck %s
+
+;
+; AESD
+;
+
+define  @aesd_i8( %a,  %b) {
+; CHECK-LABEL: aesd_i8:
+; CHECK: aesd z0.b, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.aesd( %a,
+ %b)
+  ret  %out
+}
+
+;
+; AESIMC
+;
+
+define  @aesimc_i8( %a) {
+; CHECK-LABEL: aesimc_i8:
+; CHECK: aesimc z0.b, z0.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.aesimc( %a)
+  ret  %out
+}
+
+;
+; AESE
+;
+
+define  @aese_i8( %a,  %b) {
+; CHECK-LABEL: aese_i8:
+; CHECK: aese z0.b, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.aese( %a,
+ %b)
+  ret  %out
+}
+
+;
+; AESMC
+;
+
+define  @aesmc_i8( %a) {
+; CHECK-LABEL: aesmc_i8:
+; CHECK: aesmc z0.b, z0.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.aesmc( %a)
+  ret  %out
+}
+
+;
+; RAX1
+;
+
+define  @rax1_i64( %a,  %b) {
+; CHECK-LABEL: rax1_i64:
+; CHECK: rax1 z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.rax1( %a,
+ %b)
+  ret  %out
+}
+
+;
+; SM4E
+;
+
+define  @sm4e_i32( %a,  %b) {
+; CHECK-LABEL: sm4e_i32:
+; CHECK: sm4e z0.s, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sm4e( %a,
+ %b)
+  ret  %out
+}
+
+;
+; SM4EKEY
+;
+
+define  @sm4ekey_i32( %a,  %b) {
+; CHECK-LABEL: sm4ekey_i32:
+; CHECK: sm4ekey z0.s, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sm4ekey( %a,
+%b)
+  ret  %out
+}
+
+
+declare  @llvm.aarch64.sve.aesd(, )
+declare  @llvm.aarch64.sve.aesimc()
+declare  @llvm.aarch64.sve.aese(, )
+declare  @llvm.aarch64.sve.aesmc()
+declare  @llvm.aarch64.sve.rax1(, )
+declare  @llvm.aarch64.sve.sm4e(, )
+declare  @llvm.aarch64.sve.sm4ekey(, )
Index: llvm/lib/Target/AArch64/SVEInstrFormats.td
===
--- llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -7024,6 +7024,12 @@
   let Inst{4-0}   = Zd;
 }
 
+multiclass sve2_crypto_cons_bin_op {
+  def NAME : sve2_crypto_cons_bin_op;
+  def : SVE_2_Op_Pat(NAME)>;
+}
+
 class sve2_crypto_des_bin_op opc, string asm, ZPRRegOp zprty>
 : I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm),
   asm, "\t$Zdn, $_Zdn, $Zm",
@@ -7041,8 +7047,14 @@
   let Constraints = "$Zdn = $_Zdn";
 }
 
-class sve2_crypto_unary_op
-: I<(outs ZPR8:$Zdn), (ins ZPR8:$_Zdn),
+multiclass sve2_crypto_des_bin_op opc, string asm, ZPRRegOp zprty,
+  SDPatternOperator op, ValueType vt> {
+  def NAME : sve2_crypto_des_bin_op;
+  def : SVE_2_Op_Pat(NAME)>;
+}
+
+class sve2_crypto_unary_op
+: I<(outs zprty:$Zdn), (ins zprty:$_Zdn),
   asm, "\t$Zdn, $_Zdn",
   "",
   []>, Sched<[]> {
@@ -7054,3 +7066,8 @@
 
   let Constraints = "$Zdn = $_Zdn";
 }
+
+multiclass sve2_crypto_unary_op {
+  def NAME : sve2_crypto_unary_op;
+  def : SVE_1_Op_Pat(NAME)>;
+}
Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
===
--- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1801,12 +1801,12 @@
 
 let Predicates = [HasSVE2AES] in {
   // SVE2 crypto destructive binary operations
-  def AESE_ZZZ_B : sve2_crypto_des_bin_op<0b00, "aese", ZPR8>;
-  def AESD_ZZZ_B : sve2_crypto_des_bin_op<0b01, "aesd", ZPR8>;
+  defm AESE_ZZZ_B : sve2_crypto_des_bin_op<0b00, "aese", ZPR8, int_aarch64_sve_aese, nxv16i8>;
+  defm AESD_ZZZ_B : sve2_crypto_des_bin_op<0b01, "aesd", ZPR8, int_aarch64_sve_aesd, nxv16i8>;
 
   // SVE2 crypto unary operations
-  def AESMC_ZZ_B  : sve2_crypto_unary_op<0b0, "aesmc">;
-  def AESIMC_ZZ_B : sve2_crypto_unary_op<0b1, "aesimc">;
+  defm AESMC_ZZ_B  : sve2_crypto_unary_op<0b0, "aesmc", 

[PATCH] D74769: [AArch64][SVE] Add SVE2 intrinsics for polynomial arithmetic

2020-02-19 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG63236078d243: [AArch64][SVE] Add SVE2 intrinsics for 
polynomial arithmetic (authored by kmclaughlin).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D74769/new/

https://reviews.llvm.org/D74769

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-polynomial-arithmetic-128.ll
  llvm/test/CodeGen/AArch64/sve2-intrinsics-polynomial-arithmetic.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-polynomial-arithmetic.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-polynomial-arithmetic.ll
@@ -0,0 +1,149 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s
+
+;
+; EORBT
+;
+
+define  @eorbt_i8( %a,  %b,  %c) {
+; CHECK-LABEL: eorbt_i8:
+; CHECK: eorbt z0.b, z1.b, z2.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.eorbt.nxv16i8( %a,
+  %b,
+  %c)
+  ret  %out
+}
+
+define  @eorbt_i16( %a,  %b,  %c) {
+; CHECK-LABEL: eorbt_i16:
+; CHECK: eorbt z0.h, z1.h, z2.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.eorbt.nxv8i16( %a,
+  %b,
+  %c)
+  ret  %out
+}
+
+define  @eorbt_i32( %a,  %b,  %c) {
+; CHECK-LABEL: eorbt_i32:
+; CHECK: eorbt z0.s, z1.s, z2.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.eorbt.nxv4i32( %a,
+  %b,
+  %c)
+  ret  %out
+}
+
+define  @eorbt_i64( %a,  %b,  %c) {
+; CHECK-LABEL: eorbt_i64:
+; CHECK: eorbt z0.d, z1.d, z2.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.eorbt.nxv2i64( %a,
+  %b,
+  %c)
+  ret  %out
+}
+
+;
+; EORTB
+;
+
+define  @eortb_i8( %a,  %b,  %c) {
+; CHECK-LABEL: eortb_i8:
+; CHECK: eortb z0.b, z1.b, z2.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.eortb.nxv16i8( %a,
+  %b,
+  %c)
+  ret  %out
+}
+
+define  @eortb_i16( %a,  %b,  %c) {
+; CHECK-LABEL: eortb_i16:
+; CHECK: eortb z0.h, z1.h, z2.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.eortb.nxv8i16( %a,
+  %b,
+  %c)
+  ret  %out
+}
+
+define  @eortb_i32( %a,  %b,  %c) {
+; CHECK-LABEL: eortb_i32:
+; CHECK: eortb z0.s, z1.s, z2.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.eortb.nxv4i32( %a,
+  %b,
+  %c)
+  ret  %out
+}
+
+define  @eortb_i64( %a,  %b,  %c) {
+; CHECK-LABEL: eortb_i64:
+; CHECK: eortb z0.d, z1.d, z2.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.eortb.nxv2i64( %a,
+  %b,
+  %c)
+  ret  %out
+}
+
+;
+; PMULLB
+;
+
+define  @pmullb_i8( %a,  %b) {
+; CHECK-LABEL: pmullb_i8:
+; CHECK: pmullb z0.h, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.pmullb.pair.nxv16i8( %a,
+%b)
+  ret  %out
+}
+
+define  @pmullb_i32( %a,  %b) {
+; CHECK-LABEL: pmullb_i32:
+; CHECK: pmullb z0.d, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.pmullb.pair.nxv4i32( %a,
+%b)
+  ret  %out
+}
+
+;
+; PMULLT
+;
+
+define  @pmullt_i8( %a,  %b) {
+; CHECK-LABEL: pmullt_i8:
+; CHECK: pmullt z0.h, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.pmullt.pair.nxv16i8( %a,
+%b)
+  ret  %out
+}
+
+define  @pmullt_i32( %a,  %b) {
+; CHECK-LABEL: pmullt_i32:
+; CHECK: pmullt z0.d, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.pmullt.pair.nxv4i32( %a,
+%b)
+  ret  %out
+}
+
+declare  @llvm.aarch64.sve.eorbt.nxv16i8(, , )
+declare  @llvm.aarch64.sve.eorbt.nxv8i16(, , )
+declare  @llvm.aarch64.sve.eorbt.nxv4i32(, , )
+declare  @llvm.aarch64.sve.eorbt.nxv2i64(, , )
+
+declare  @llvm.aarch64.sve.eortb.nxv16i8(, , )
+declare  

[PATCH] D74769: [AArch64][SVE] Add SVE2 intrinsics for polynomial arithmetic

2020-02-18 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, c-rhodes, dancgr, cameron.mcinally, 
efriedma.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.

Implements the following intrinsics:

- @llvm.aarch64.sve.eorbt
- @llvm.aarch64.sve.eortb
- @llvm.aarch64.sve.pmullb.pair
- @llvm.aarch64.sve.pmullt.pair


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D74769

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-polynomial-arithmetic-128.ll
  llvm/test/CodeGen/AArch64/sve2-intrinsics-polynomial-arithmetic.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-polynomial-arithmetic.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-polynomial-arithmetic.ll
@@ -0,0 +1,149 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s
+
+;
+; EORBT
+;
+
+define  @eorbt_i8( %a,  %b,  %c) {
+; CHECK-LABEL: eorbt_i8:
+; CHECK: eorbt z0.b, z1.b, z2.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.eorbt.nxv16i8( %a,
+  %b,
+  %c)
+  ret  %out
+}
+
+define  @eorbt_i16( %a,  %b,  %c) {
+; CHECK-LABEL: eorbt_i16:
+; CHECK: eorbt z0.h, z1.h, z2.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.eorbt.nxv8i16( %a,
+  %b,
+  %c)
+  ret  %out
+}
+
+define  @eorbt_i32( %a,  %b,  %c) {
+; CHECK-LABEL: eorbt_i32:
+; CHECK: eorbt z0.s, z1.s, z2.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.eorbt.nxv4i32( %a,
+  %b,
+  %c)
+  ret  %out
+}
+
+define  @eorbt_i64( %a,  %b,  %c) {
+; CHECK-LABEL: eorbt_i64:
+; CHECK: eorbt z0.d, z1.d, z2.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.eorbt.nxv2i64( %a,
+  %b,
+  %c)
+  ret  %out
+}
+
+;
+; EORTB
+;
+
+define  @eortb_i8( %a,  %b,  %c) {
+; CHECK-LABEL: eortb_i8:
+; CHECK: eortb z0.b, z1.b, z2.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.eortb.nxv16i8( %a,
+  %b,
+  %c)
+  ret  %out
+}
+
+define  @eortb_i16( %a,  %b,  %c) {
+; CHECK-LABEL: eortb_i16:
+; CHECK: eortb z0.h, z1.h, z2.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.eortb.nxv8i16( %a,
+  %b,
+  %c)
+  ret  %out
+}
+
+define  @eortb_i32( %a,  %b,  %c) {
+; CHECK-LABEL: eortb_i32:
+; CHECK: eortb z0.s, z1.s, z2.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.eortb.nxv4i32( %a,
+  %b,
+  %c)
+  ret  %out
+}
+
+define  @eortb_i64( %a,  %b,  %c) {
+; CHECK-LABEL: eortb_i64:
+; CHECK: eortb z0.d, z1.d, z2.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.eortb.nxv2i64( %a,
+  %b,
+  %c)
+  ret  %out
+}
+
+;
+; PMULLB
+;
+
+define  @pmullb_i8( %a,  %b) {
+; CHECK-LABEL: pmullb_i8:
+; CHECK: pmullb z0.h, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.pmullb.pair.nxv16i8( %a,
+%b)
+  ret  %out
+}
+
+define  @pmullb_i32( %a,  %b) {
+; CHECK-LABEL: pmullb_i32:
+; CHECK: pmullb z0.d, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.pmullb.pair.nxv4i32( %a,
+%b)
+  ret  %out
+}
+
+;
+; PMULLT
+;
+
+define  @pmullt_i8( %a,  %b) {
+; CHECK-LABEL: pmullt_i8:
+; CHECK: pmullt z0.h, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.pmullt.pair.nxv16i8( %a,
+%b)
+  ret  %out
+}
+
+define  @pmullt_i32( %a,  %b) {
+; CHECK-LABEL: pmullt_i32:
+; CHECK: pmullt z0.d, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.pmullt.pair.nxv4i32( %a,
+%b)
+  ret  %out
+}
+
+declare  @llvm.aarch64.sve.eorbt.nxv16i8(, , )
+declare  @llvm.aarch64.sve.eorbt.nxv8i16(, , )
+declare  

[PATCH] D73903: [AArch64][SVE] Add remaining SVE2 intrinsics for widening DSP operations

2020-02-18 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGd4576080da72: [AArch64][SVE] Add remaining SVE2 intrinsics 
for widening DSP operations (authored by kmclaughlin).

Changed prior to commit:
  https://reviews.llvm.org/D73903?vs=244367=245114#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73903/new/

https://reviews.llvm.org/D73903

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll
===
--- llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll
@@ -193,6 +193,69 @@
 }
 
 ;
+; SADDWB
+;
+
+define  @saddwb_b( %a,  %b) {
+; CHECK-LABEL: saddwb_b:
+; CHECK: saddwb z0.h, z0.h, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddwb.nxv8i16( %a,
+   %b)
+  ret  %out
+}
+
+define  @saddwb_h( %a,  %b) {
+; CHECK-LABEL: saddwb_h:
+; CHECK: saddwb z0.s, z0.s, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddwb.nxv4i32( %a,
+   %b)
+  ret  %out
+}
+
+define  @saddwb_s( %a,  %b) {
+; CHECK-LABEL: saddwb_s:
+; CHECK: saddwb z0.d, z0.d, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddwb.nxv2i64( %a,
+   %b)
+  ret  %out
+}
+
+;
+; SADDWT
+;
+
+define  @saddwt_b( %a,  %b) {
+; CHECK-LABEL: saddwt_b:
+; CHECK: saddwt z0.h, z0.h, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddwt.nxv8i16( %a,
+   %b)
+  ret  %out
+}
+
+define  @saddwt_h( %a,  %b) {
+; CHECK-LABEL: saddwt_h:
+; CHECK: saddwt z0.s, z0.s, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddwt.nxv4i32( %a,
+   %b)
+  ret  %out
+}
+
+define  @saddwt_s( %a,  %b) {
+; CHECK-LABEL: saddwt_s:
+; CHECK: saddwt z0.d, z0.d, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddwt.nxv2i64( %a,
+   %b)
+  ret  %out
+}
+
+
+;
 ; SMULLB (Vectors)
 ;
 
@@ -224,6 +287,30 @@
 }
 
 ;
+; SMULLB (Indexed)
+;
+
+define  @smullb_lane_h( %a,  %b) {
+; CHECK-LABEL: smullb_lane_h:
+; CHECK: smullb z0.s, z0.h, z1.h[4]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.smullb.lane.nxv4i32( %a,
+%b,
+   i32 4)
+  ret  %out
+}
+
+define  @smullb_lane_s( %a,  %b) {
+; CHECK-LABEL: smullb_lane_s:
+; CHECK: smullb z0.d, z0.s, z1.s[3]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.smullb.lane.nxv2i64( %a,
+%b,
+   i32 3)
+  ret  %out
+}
+
+;
 ; SMULLT (Vectors)
 ;
 
@@ -255,6 +342,30 @@
 }
 
 ;
+; SMULLT (Indexed)
+;
+
+define  @smullt_lane_h( %a,  %b) {
+; CHECK-LABEL: smullt_lane_h:
+; CHECK: smullt z0.s, z0.h, z1.h[5]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.smullt.lane.nxv4i32( %a,
+%b,
+   i32 5)
+  ret  %out
+}
+
+define  @smullt_lane_s( %a,  %b) {
+; CHECK-LABEL: smullt_lane_s:
+; CHECK: smullt z0.d, z0.s, z1.s[2]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.smullt.lane.nxv2i64( %a,
+%b,
+   i32 2)
+  ret  %out
+}
+
+;
 ; SQDMULLB (Vectors)
 ;
 
@@ -286,6 +397,30 @@
 }
 
 ;
+; SQDMULLB (Indexed)
+;
+
+define  @sqdmullb_lane_h( %a,  %b) {
+; CHECK-LABEL: sqdmullb_lane_h:
+; CHECK: sqdmullb z0.s, z0.h, z1.h[2]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sqdmullb.lane.nxv4i32( %a,
+  %b,
+ i32 2)
+  ret  %out
+}
+
+define  @sqdmullb_lane_s( %a,  %b) {
+; CHECK-LABEL: sqdmullb_lane_s:
+; CHECK: sqdmullb z0.d, z0.s, z1.s[1]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sqdmullb.lane.nxv2i64( %a,
+  %b,
+ i32 1)
+  ret  %out
+}
+
+;
 ; SQDMULLT (Vectors)
 ;
 
@@ -317,6 +452,30 @@
 }
 
 ;
+; SQDMULLT (Indexed)
+;
+
+define  

[PATCH] D74734: [AArch64][SVE] Add the SVE dupq_lane intrinsic

2020-02-17 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, c-rhodes, cameron.mcinally, efriedma, 
dancgr.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.

Implements the @llvm.aarch64.sve.dupq.lane intrinsic.

As specified in the ACLE, the behaviour of:

  svdupq_lane_u64(data, index)

...is identical to:

  svtbl(data, svadd_x(svptrue_b64(),
   svand_x(svptrue_b64(), svindex_u64(0, 1), 1),
   index * 2))

If the index is in the range [0,3], the operation is equivalent
to a single DUP (.q) instruction.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D74734

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
===
--- llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
@@ -297,6 +297,179 @@
 }
 
 ;
+; DUPQ
+;
+
+define  @dupq_i8( %a) {
+; CHECK-LABEL: dupq_i8:
+; CHECK: mov z0.q, q0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.dupq.lane.nxv16i8( %a, i64 0)
+  ret  %out
+}
+
+define  @dupq_i16( %a) {
+; CHECK-LABEL: dupq_i16:
+; CHECK: mov z0.q, z0.q[1]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.dupq.lane.nxv8i16( %a, i64 1)
+  ret  %out
+}
+
+define  @dupq_i32( %a) {
+; CHECK-LABEL: dupq_i32:
+; CHECK: mov z0.q, z0.q[2]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.dupq.lane.nxv4i32( %a, i64 2)
+  ret  %out
+}
+
+define  @dupq_i64( %a) {
+; CHECK-LABEL: dupq_i64:
+; CHECK: mov z0.q, z0.q[3]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.dupq.lane.nxv2i64( %a, i64 3)
+  ret  %out
+}
+
+define  @dupq_f16( %a) {
+; CHECK-LABEL: dupq_f16:
+; CHECK: mov z0.q, q0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.dupq.lane.nxv8f16( %a, i64 0)
+  ret  %out
+}
+
+define  @dupq_f32( %a) {
+; CHECK-LABEL: dupq_f32:
+; CHECK: mov z0.q, z0.q[1]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.dupq.lane.nxv4f32( %a, i64 1)
+  ret  %out
+}
+
+define  @dupq_f64( %a) {
+; CHECK-LABEL: dupq_f64:
+; CHECK: mov z0.q, z0.q[2]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.dupq.lane.nxv2f64( %a, i64 2)
+  ret  %out
+}
+
+;
+; DUPQ_LANE
+;
+
+define  @dupq_lane_i8( %a, i64 %idx) {
+; CHECK-LABEL: dupq_lane_i8:
+; CHECK-DAG:  index [[Z1:z[0-9]+]].d, #0, #1
+; CHECK-DAG:  and   [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
+; CHECK-DAG:  add   [[X1:x[0-9]+]], x0, x0
+; CHECK-DAG:  mov   [[Z3:z[0-9]+]].d, [[X1]]
+; CHECK:  add   [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
+; CHECK-NEXT: tbl   z0.d, { z0.d }, [[Z4]].d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.dupq.lane.nxv16i8( %a, i64 %idx)
+  ret  %out
+}
+
+; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
+define  @dupq_lane_i16( %a, i64 %idx) {
+; CHECK-LABEL: dupq_lane_i16:
+; CHECK-DAG:  index [[Z1:z[0-9]+]].d, #0, #1
+; CHECK-DAG:  and   [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
+; CHECK-DAG:  add   [[X1:x[0-9]+]], x0, x0
+; CHECK-DAG:  mov   [[Z3:z[0-9]+]].d, [[X1]]
+; CHECK:  add   [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
+; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.dupq.lane.nxv8i16( %a, i64 %idx)
+  ret  %out
+}
+
+; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
+define  @dupq_lane_i32( %a, i64 %idx) {
+; CHECK-LABEL: dupq_lane_i32:
+; CHECK-DAG:  index [[Z1:z[0-9]+]].d, #0, #1
+; CHECK-DAG:  and   [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
+; CHECK-DAG:  add   [[X1:x[0-9]+]], x0, x0
+; CHECK-DAG:  mov   [[Z3:z[0-9]+]].d, [[X1]]
+; CHECK:  add   [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
+; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.dupq.lane.nxv4i32( %a, i64 %idx)
+  ret  %out
+}
+
+; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
+define  @dupq_lane_i64( %a, i64 %idx) {
+; CHECK-LABEL: dupq_lane_i64:
+; CHECK-DAG:  index [[Z1:z[0-9]+]].d, #0, #1
+; CHECK-DAG:  and   [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
+; CHECK-DAG:  add   [[X1:x[0-9]+]], x0, x0
+; CHECK-DAG:  mov   [[Z3:z[0-9]+]].d, [[X1]]
+; CHECK:  add   [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
+; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.dupq.lane.nxv2i64( %a, i64 %idx)
+  ret  %out
+}
+
+; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
+define  @dupq_lane_f16( %a, i64 %idx) {
+; CHECK-LABEL: dupq_lane_f16:
+; CHECK-DAG:  index [[Z1:z[0-9]+]].d, #0, #1
+; CHECK-DAG:  and   [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
+; CHECK-DAG:  add   [[X1:x[0-9]+]], x0, x0
+; CHECK-DAG:  mov   [[Z3:z[0-9]+]].d, [[X1]]
+; CHECK:  add   

[PATCH] D74550: [AArch64][SVE] Add SVE index intrinsic

2020-02-17 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG633db60f3ed0: [AArch64][SVE] Add SVE index intrinsic 
(authored by kmclaughlin).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D74550/new/

https://reviews.llvm.org/D74550

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
  llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll
@@ -0,0 +1,178 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; INDEX (IMMEDIATES)
+;
+
+define  @index_ii_i8() {
+; CHECK-LABEL: index_ii_i8:
+; CHECK: index z0.b, #-16, #15
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv16i8(i8 -16, i8 15)
+  ret  %out
+}
+
+define  @index_ii_i16() {
+; CHECK-LABEL: index_ii_i16:
+; CHECK: index z0.h, #15, #-16
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv8i16(i16 15, i16 -16)
+  ret  %out
+}
+
+define  @index_ii_i32() {
+; CHECK-LABEL: index_ii_i32:
+; CHECK: index z0.s, #-16, #15
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv4i32(i32 -16, i32 15)
+  ret  %out
+}
+
+define  @index_ii_i64() {
+; CHECK-LABEL: index_ii_i64:
+; CHECK: index z0.d, #15, #-16
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv2i64(i64 15, i64 -16)
+  ret  %out
+}
+
+define  @index_ii_range() {
+; CHECK-LABEL: index_ii_range:
+; CHECK: mov w8, #16
+; CHECK-NEXT: mov x9, #-17
+; CHECK-NEXT: index z0.d, x9, x8
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv2i64(i64 -17, i64 16)
+  ret  %out
+}
+
+;
+; INDEX (IMMEDIATE, SCALAR)
+;
+
+define  @index_ir_i8(i8 %a) {
+; CHECK-LABEL: index_ir_i8:
+; CHECK: index z0.b, #15, w0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv16i8(i8 15, i8 %a)
+  ret  %out
+}
+
+define  @index_ir_i16(i16 %a) {
+; CHECK-LABEL: index_ir_i16:
+; CHECK: index z0.h, #-16, w0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv8i16(i16 -16, i16 %a)
+  ret  %out
+}
+
+define  @index_ir_i32(i32 %a) {
+; CHECK-LABEL: index_ir_i32:
+; CHECK: index z0.s, #15, w0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv4i32(i32 15, i32 %a)
+  ret  %out
+}
+
+define  @index_ir_i64(i64 %a) {
+; CHECK-LABEL: index_ir_i64:
+; CHECK: index z0.d, #-16, x0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv2i64(i64 -16, i64 %a)
+  ret  %out
+}
+
+define  @index_ir_range(i32 %a) {
+; CHECK-LABEL: index_ir_range:
+; CHECK: mov w8, #-17
+; CHECK: index z0.s, w8, w0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv4i32(i32 -17, i32 %a)
+  ret  %out
+}
+
+;
+; INDEX (SCALAR, IMMEDIATE)
+;
+
+define  @index_ri_i8(i8 %a) {
+; CHECK-LABEL: index_ri_i8:
+; CHECK: index z0.b, w0, #-16
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv16i8(i8 %a, i8 -16)
+  ret  %out
+}
+
+define  @index_ri_i16(i16 %a) {
+; CHECK-LABEL: index_ri_i16:
+; CHECK: index z0.h, w0, #15
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv8i16(i16 %a, i16 15)
+  ret  %out
+}
+
+define  @index_ri_i32(i32 %a) {
+; CHECK-LABEL: index_ri_i32:
+; CHECK: index z0.s, w0, #-16
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv4i32(i32 %a, i32 -16)
+  ret  %out
+}
+
+define  @index_ri_i64(i64 %a) {
+; CHECK-LABEL: index_ri_i64:
+; CHECK: index z0.d, x0, #15
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv2i64(i64 %a, i64 15)
+  ret  %out
+}
+
+define  @index_ri_range(i16 %a) {
+; CHECK-LABEL: index_ri_range:
+; CHECK: mov w8, #16
+; CHECK: index z0.h, w0, w8
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv8i16(i16 %a, i16 16)
+  ret  %out
+}
+
+;
+; INDEX (SCALARS)
+;
+
+define  @index_rr_i8(i8 %a, i8 %b) {
+; CHECK-LABEL: index_rr_i8:
+; CHECK: index z0.b, w0, w1
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv16i8(i8 %a, i8 %b)
+  ret  %out
+}
+
+define  @index_rr_i16(i16 %a, i16 %b) {
+; CHECK-LABEL: index_rr_i16:
+; CHECK: index z0.h, w0, w1
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv8i16(i16 %a, i16 %b)
+  ret  %out
+}
+
+define  @index_rr_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: index_rr_i32:
+; CHECK: index z0.s, w0, w1
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv4i32(i32 %a, i32 %b)
+  ret  %out
+}
+
+define  @index_rr_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: index_rr_i64:
+; CHECK: index z0.d, x0, x1
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv2i64(i64 %a, i64 %b)
+  ret  %out
+}
+
+declare 

[PATCH] D74550: [AArch64][SVE] Add SVE index intrinsic

2020-02-14 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 244610.
kmclaughlin added a comment.

- Changed 'dl' to 'DL' as suggested by @andwar
- Updated tests to address @sdesmalen's comment (changed all tests to check min 
& max immediate values, plus added some tests where the immediate is out of 
range)


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D74550/new/

https://reviews.llvm.org/D74550

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
  llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll
@@ -0,0 +1,178 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; INDEX (IMMEDIATES)
+;
+
+define  @index_ii_i8() {
+; CHECK-LABEL: index_ii_i8:
+; CHECK: index z0.b, #-16, #15
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv16i8(i8 -16, i8 15)
+  ret  %out
+}
+
+define  @index_ii_i16() {
+; CHECK-LABEL: index_ii_i16:
+; CHECK: index z0.h, #15, #-16
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv8i16(i16 15, i16 -16)
+  ret  %out
+}
+
+define  @index_ii_i32() {
+; CHECK-LABEL: index_ii_i32:
+; CHECK: index z0.s, #-16, #15
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv4i32(i32 -16, i32 15)
+  ret  %out
+}
+
+define  @index_ii_i64() {
+; CHECK-LABEL: index_ii_i64:
+; CHECK: index z0.d, #15, #-16
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv2i64(i64 15, i64 -16)
+  ret  %out
+}
+
+define  @index_ii_range() {
+; CHECK-LABEL: index_ii_range:
+; CHECK: mov w8, #16
+; CHECK-NEXT: mov x9, #-17
+; CHECK-NEXT: index z0.d, x9, x8
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv2i64(i64 -17, i64 16)
+  ret  %out
+}
+
+;
+; INDEX (IMMEDIATE, SCALAR)
+;
+
+define  @index_ir_i8(i8 %a) {
+; CHECK-LABEL: index_ir_i8:
+; CHECK: index z0.b, #15, w0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv16i8(i8 15, i8 %a)
+  ret  %out
+}
+
+define  @index_ir_i16(i16 %a) {
+; CHECK-LABEL: index_ir_i16:
+; CHECK: index z0.h, #-16, w0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv8i16(i16 -16, i16 %a)
+  ret  %out
+}
+
+define  @index_ir_i32(i32 %a) {
+; CHECK-LABEL: index_ir_i32:
+; CHECK: index z0.s, #15, w0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv4i32(i32 15, i32 %a)
+  ret  %out
+}
+
+define  @index_ir_i64(i64 %a) {
+; CHECK-LABEL: index_ir_i64:
+; CHECK: index z0.d, #-16, x0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv2i64(i64 -16, i64 %a)
+  ret  %out
+}
+
+define  @index_ir_range(i32 %a) {
+; CHECK-LABEL: index_ir_range:
+; CHECK: mov w8, #-17
+; CHECK: index z0.s, w8, w0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv4i32(i32 -17, i32 %a)
+  ret  %out
+}
+
+;
+; INDEX (SCALAR, IMMEDIATE)
+;
+
+define  @index_ri_i8(i8 %a) {
+; CHECK-LABEL: index_ri_i8:
+; CHECK: index z0.b, w0, #-16
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv16i8(i8 %a, i8 -16)
+  ret  %out
+}
+
+define  @index_ri_i16(i16 %a) {
+; CHECK-LABEL: index_ri_i16:
+; CHECK: index z0.h, w0, #15
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv8i16(i16 %a, i16 15)
+  ret  %out
+}
+
+define  @index_ri_i32(i32 %a) {
+; CHECK-LABEL: index_ri_i32:
+; CHECK: index z0.s, w0, #-16
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv4i32(i32 %a, i32 -16)
+  ret  %out
+}
+
+define  @index_ri_i64(i64 %a) {
+; CHECK-LABEL: index_ri_i64:
+; CHECK: index z0.d, x0, #15
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv2i64(i64 %a, i64 15)
+  ret  %out
+}
+
+define  @index_ri_range(i16 %a) {
+; CHECK-LABEL: index_ri_range:
+; CHECK: mov w8, #16
+; CHECK: index z0.h, w0, w8
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv8i16(i16 %a, i16 16)
+  ret  %out
+}
+
+;
+; INDEX (SCALARS)
+;
+
+define  @index_rr_i8(i8 %a, i8 %b) {
+; CHECK-LABEL: index_rr_i8:
+; CHECK: index z0.b, w0, w1
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv16i8(i8 %a, i8 %b)
+  ret  %out
+}
+
+define  @index_rr_i16(i16 %a, i16 %b) {
+; CHECK-LABEL: index_rr_i16:
+; CHECK: index z0.h, w0, w1
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv8i16(i16 %a, i16 %b)
+  ret  %out
+}
+
+define  @index_rr_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: index_rr_i32:
+; CHECK: index z0.s, w0, w1
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv4i32(i32 %a, i32 %b)
+  ret  %out
+}
+
+define  @index_rr_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: index_rr_i64:
+; CHECK: index z0.d, x0, x1
+; CHECK-NEXT: ret
+  %out = 

[PATCH] D74550: [AArch64][SVE] Add SVE index intrinsic

2020-02-13 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, andwar, efriedma, dancgr, 
cameron.mcinally.
Herald added subscribers: psnobl, arphaman, rkruppe, hiraditya, kristof.beyls, 
tschuett.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.

Implements the @llvm.aarch64.sve.index intrinsic, which
takes a scalar base and step value.

This patch also adds the printSImm function to AArch64InstPrinter
to ensure that immediates of type i8 & i16 are printed correctly.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D74550

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
  llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll
@@ -0,0 +1,150 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; INDEX (IMMEDIATES)
+;
+
+define  @index_ii_i8() {
+; CHECK-LABEL: index_ii_i8:
+; CHECK: index z0.b, #-1, #0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv16i8(i8 -1, i8 0)
+  ret  %out
+}
+
+define  @index_ii_i16() {
+; CHECK-LABEL: index_ii_i16:
+; CHECK: index z0.h, #-2, #3
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv8i16(i16 -2, i16 3)
+  ret  %out
+}
+
+define  @index_ii_i32() {
+; CHECK-LABEL: index_ii_i32:
+; CHECK: index z0.s, #-12, #13
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv4i32(i32 -12, i32 13)
+  ret  %out
+}
+
+define  @index_ii_i64() {
+; CHECK-LABEL: index_ii_i64:
+; CHECK: index z0.d, #-14, #15
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv2i64(i64 -14, i64 15)
+  ret  %out
+}
+
+;
+; INDEX (IMMEDIATE, SCALAR)
+;
+
+define  @index_ir_i8(i8 %a) {
+; CHECK-LABEL: index_ir_i8:
+; CHECK: index z0.b, #0, w0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv16i8(i8 0, i8 %a)
+  ret  %out
+}
+
+define  @index_ir_i16(i16 %a) {
+; CHECK-LABEL: index_ir_i16:
+; CHECK: index z0.h, #1, w0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv8i16(i16 1, i16 %a)
+  ret  %out
+}
+
+define  @index_ir_i32(i32 %a) {
+; CHECK-LABEL: index_ir_i32:
+; CHECK: index z0.s, #-14, w0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv4i32(i32 -14, i32 %a)
+  ret  %out
+}
+
+define  @index_ir_i64(i64 %a) {
+; CHECK-LABEL: index_ir_i64:
+; CHECK: index z0.d, #-15, x0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv2i64(i64 -15, i64 %a)
+  ret  %out
+}
+
+;
+; INDEX (SCALAR, IMMEDIATE)
+;
+
+define  @index_ri_i8(i8 %a) {
+; CHECK-LABEL: index_ri_i8:
+; CHECK: index z0.b, w0, #0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv16i8(i8 %a, i8 0)
+  ret  %out
+}
+
+define  @index_ri_i16(i16 %a) {
+; CHECK-LABEL: index_ri_i16:
+; CHECK: index z0.h, w0, #1
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv8i16(i16 %a, i16 1)
+  ret  %out
+}
+
+define  @index_ri_i32(i32 %a) {
+; CHECK-LABEL: index_ri_i32:
+; CHECK: index z0.s, w0, #-16
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv4i32(i32 %a, i32 -16)
+  ret  %out
+}
+
+define  @index_ri_i64(i64 %a) {
+; CHECK-LABEL: index_ri_i64:
+; CHECK: index z0.d, x0, #15
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv2i64(i64 %a, i64 15)
+  ret  %out
+}
+
+;
+; INDEX (SCALARS)
+;
+
+define  @index_rr_i8(i8 %a, i8 %b) {
+; CHECK-LABEL: index_rr_i8:
+; CHECK: index z0.b, w0, w1
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv16i8(i8 %a, i8 %b)
+  ret  %out
+}
+
+define  @index_rr_i16(i16 %a, i16 %b) {
+; CHECK-LABEL: index_rr_i16:
+; CHECK: index z0.h, w0, w1
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv8i16(i16 %a, i16 %b)
+  ret  %out
+}
+
+define  @index_rr_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: index_rr_i32:
+; CHECK: index z0.s, w0, w1
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv4i32(i32 %a, i32 %b)
+  ret  %out
+}
+
+define  @index_rr_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: index_rr_i64:
+; CHECK: index z0.d, x0, x1
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.index.nxv2i64(i64 %a, i64 %b)
+  ret  %out
+}
+
+declare  @llvm.aarch64.sve.index.nxv16i8(i8, i8)
+declare  @llvm.aarch64.sve.index.nxv8i16(i16, i16)
+declare  @llvm.aarch64.sve.index.nxv4i32(i32, i32)
+declare  @llvm.aarch64.sve.index.nxv2i64(i64, i64)
Index: llvm/lib/Target/AArch64/SVEInstrFormats.td
===
--- llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -4386,11 +4386,20 @@
   let 

[PATCH] D74222: [AArch64][SVE] Add mul/mla/mls lane & dup intrinsics

2020-02-13 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG671cbc1fbba0: [AArch64][SVE] Add mul/mla/mls lane  dup 
intrinsics (authored by kmclaughlin).

Changed prior to commit:
  https://reviews.llvm.org/D74222?vs=243164=244372#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D74222/new/

https://reviews.llvm.org/D74222

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll
  llvm/test/CodeGen/AArch64/sve2-intrinsics-int-mul-lane.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-int-mul-lane.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-int-mul-lane.ll
@@ -0,0 +1,119 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+;
+; MUL
+;
+
+define  @mul_lane_d( %a,  %b) {
+; CHECK-LABEL: mul_lane_d:
+; CHECK: mul z0.d, z0.d, z1.d[1]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.mul.lane.nxv2i64( %a,
+ %b,
+i32 1)
+  ret  %out
+}
+
+define  @mul_lane_s( %a,  %b) {
+; CHECK-LABEL: mul_lane_s:
+; CHECK: mul z0.s, z0.s, z1.s[1]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.mul.lane.nxv4i32( %a,
+ %b,
+i32 1)
+  ret  %out
+}
+
+define  @mul_lane_h( %a,  %b) {
+; CHECK-LABEL: mul_lane_h:
+; CHECK: mul z0.h, z0.h, z1.h[1]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.mul.lane.nxv8i16( %a,
+ %b,
+i32 1)
+  ret  %out
+}
+
+;
+; MLA
+;
+
+define  @mla_lane_d( %a,  %b,  %c) {
+; CHECK-LABEL: mla_lane_d:
+; CHECK: mla z0.d, z1.d, z2.d[1]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.mla.lane.nxv2i64( %a,
+ %b,
+ %c,
+i32 1)
+  ret  %out
+}
+
+define  @mla_lane_s( %a,  %b,  %c) {
+; CHECK-LABEL: mla_lane_s:
+; CHECK: mla z0.s, z1.s, z2.s[1]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.mla.lane.nxv4i32( %a,
+ %b,
+ %c,
+i32 1)
+  ret  %out
+}
+
+define  @mla_lane_h( %a,  %b,  %c) {
+; CHECK-LABEL: mla_lane_h:
+; CHECK: mla z0.h, z1.h, z2.h[1]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.mla.lane.nxv8i16( %a,
+ %b,
+ %c,
+i32 1)
+  ret  %out
+}
+
+;
+; MLS
+;
+
+define  @mls_lane_d( %a,  %b,  %c) {
+; CHECK-LABEL: mls_lane_d:
+; CHECK: mls z0.d, z1.d, z2.d[1]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.mls.lane.nxv2i64( %a,
+ %b,
+ %c,
+i32 1)
+  ret  %out
+}
+
+define  @mls_lane_s( %a,  %b,  %c) {
+; CHECK-LABEL: mls_lane_s:
+; CHECK: mls z0.s, z1.s, z2.s[1]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.mls.lane.nxv4i32( %a,
+ %b,
+ %c,
+i32 1)
+  ret  %out
+}
+
+define  @mls_lane_h( %a,  %b,  %c) {
+; CHECK-LABEL: mls_lane_h:
+; CHECK: mls z0.h, z1.h, z2.h[1]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.mls.lane.nxv8i16( %a,
+ %b,
+ %c,
+i32 1)
+  ret  %out
+}
+
+declare  @llvm.aarch64.sve.mul.lane.nxv8i16(, , i32)
+declare  @llvm.aarch64.sve.mul.lane.nxv4i32(, , i32)
+declare  @llvm.aarch64.sve.mul.lane.nxv2i64(, , i32)
+declare  @llvm.aarch64.sve.mla.lane.nxv8i16(, , , i32)
+declare  @llvm.aarch64.sve.mla.lane.nxv4i32(, , , i32)
+declare  @llvm.aarch64.sve.mla.lane.nxv2i64(, , , i32)
+declare  

[PATCH] D73903: [AArch64][SVE] Add remaining SVE2 intrinsics for widening DSP operations

2020-02-13 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 244367.
kmclaughlin added a comment.

- Rebased & moved new intrinsics under the existing headers in 
IntrinsicsAArch64.td


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73903/new/

https://reviews.llvm.org/D73903

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll
===
--- llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll
@@ -193,6 +193,69 @@
 }
 
 ;
+; SADDWB
+;
+
+define  @saddwb_b( %a,  %b) {
+; CHECK-LABEL: saddwb_b:
+; CHECK: saddwb z0.h, z0.h, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddwb.nxv8i16( %a,
+   %b)
+  ret  %out
+}
+
+define  @saddwb_h( %a,  %b) {
+; CHECK-LABEL: saddwb_h:
+; CHECK: saddwb z0.s, z0.s, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddwb.nxv4i32( %a,
+   %b)
+  ret  %out
+}
+
+define  @saddwb_s( %a,  %b) {
+; CHECK-LABEL: saddwb_s:
+; CHECK: saddwb z0.d, z0.d, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddwb.nxv2i64( %a,
+   %b)
+  ret  %out
+}
+
+;
+; SADDWT
+;
+
+define  @saddwt_b( %a,  %b) {
+; CHECK-LABEL: saddwt_b:
+; CHECK: saddwt z0.h, z0.h, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddwt.nxv8i16( %a,
+   %b)
+  ret  %out
+}
+
+define  @saddwt_h( %a,  %b) {
+; CHECK-LABEL: saddwt_h:
+; CHECK: saddwt z0.s, z0.s, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddwt.nxv4i32( %a,
+   %b)
+  ret  %out
+}
+
+define  @saddwt_s( %a,  %b) {
+; CHECK-LABEL: saddwt_s:
+; CHECK: saddwt z0.d, z0.d, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddwt.nxv2i64( %a,
+   %b)
+  ret  %out
+}
+
+
+;
 ; SMULLB (Vectors)
 ;
 
@@ -224,6 +287,30 @@
 }
 
 ;
+; SMULLB (Indexed)
+;
+
+define  @smullb_lane_h( %a,  %b) {
+; CHECK-LABEL: smullb_lane_h:
+; CHECK: smullb z0.s, z0.h, z1.h[4]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.smullb.lane.nxv4i32( %a,
+%b,
+   i32 4)
+  ret  %out
+}
+
+define  @smullb_lane_s( %a,  %b) {
+; CHECK-LABEL: smullb_lane_s:
+; CHECK: smullb z0.d, z0.s, z1.s[3]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.smullb.lane.nxv2i64( %a,
+%b,
+   i32 3)
+  ret  %out
+}
+
+;
 ; SMULLT (Vectors)
 ;
 
@@ -255,6 +342,30 @@
 }
 
 ;
+; SMULLT (Indexed)
+;
+
+define  @smullt_lane_h( %a,  %b) {
+; CHECK-LABEL: smullt_lane_h:
+; CHECK: smullt z0.s, z0.h, z1.h[5]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.smullt.lane.nxv4i32( %a,
+%b,
+   i32 5)
+  ret  %out
+}
+
+define  @smullt_lane_s( %a,  %b) {
+; CHECK-LABEL: smullt_lane_s:
+; CHECK: smullt z0.d, z0.s, z1.s[2]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.smullt.lane.nxv2i64( %a,
+%b,
+   i32 2)
+  ret  %out
+}
+
+;
 ; SQDMULLB (Vectors)
 ;
 
@@ -286,6 +397,30 @@
 }
 
 ;
+; SQDMULLB (Indexed)
+;
+
+define  @sqdmullb_lane_h( %a,  %b) {
+; CHECK-LABEL: sqdmullb_lane_h:
+; CHECK: sqdmullb z0.s, z0.h, z1.h[2]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sqdmullb.lane.nxv4i32( %a,
+  %b,
+ i32 2)
+  ret  %out
+}
+
+define  @sqdmullb_lane_s( %a,  %b) {
+; CHECK-LABEL: sqdmullb_lane_s:
+; CHECK: sqdmullb z0.d, z0.s, z1.s[1]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sqdmullb.lane.nxv2i64( %a,
+  %b,
+ i32 1)
+  ret  %out
+}
+
+;
 ; SQDMULLT (Vectors)
 ;
 
@@ -317,6 +452,30 @@
 }
 
 ;
+; SQDMULLT (Indexed)
+;
+
+define  @sqdmullt_lane_h( %a,  %b) {
+; CHECK-LABEL: sqdmullt_lane_h:
+; CHECK: sqdmullt z0.s, z0.h, z1.h[3]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sqdmullt.lane.nxv4i32( 

[PATCH] D73687: [AArch64][SVE] Add SVE2 intrinsics for complex integer dot product

2020-02-11 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGe7755f9e4f48: [AArch64][SVE] Add SVE2 intrinsics for complex 
integer dot product (authored by kmclaughlin).

Changed prior to commit:
  https://reviews.llvm.org/D73687?vs=241726=243781#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73687/new/

https://reviews.llvm.org/D73687

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+
+;
+; CDOT
+;
+
+define  @cdot_s( %a,  %b,  %c) {
+; CHECK-LABEL: cdot_s:
+; CHECK: cdot z0.s, z1.b, z2.b, #0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cdot.nxv4i32( %a,
+ %b,
+ %c,
+i32 0)
+  ret  %out
+}
+
+define  @cdot_d( %a,  %b,  %c) {
+; CHECK-LABEL: cdot_d:
+; CHECK: cdot z0.d, z1.h, z2.h, #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cdot.nxv2i64( %a,
+ %b,
+ %c,
+i32 90)
+  ret  %out
+}
+
+;
+; CDOT(indexed)
+;
+
+define  @cdot_s_idx( %a,  %b,  %c) {
+; CHECK-LABEL: cdot_s_idx:
+; CHECK: cdot z0.s, z1.b, z2.b[0], #180
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cdot.lane.nxv4i32( %a,
+  %b,
+  %c,
+ i32 0, i32 180)
+  ret  %out
+}
+
+
+define  @cdot_d_idx( %a,  %b,  %c) {
+; CHECK-LABEL: cdot_d_idx:
+; CHECK: cdot z0.d, z1.h, z2.h[1], #270
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cdot.lane.nxv2i64( %a,
+  %b,
+  %c,
+ i32 1, i32 270)
+  ret  %out
+}
+
+
+declare  @llvm.aarch64.sve.cdot.nxv4i32(, , , i32)
+declare  @llvm.aarch64.sve.cdot.nxv2i64(, , , i32)
+declare  @llvm.aarch64.sve.cdot.lane.nxv4i32(, , , i32, i32)
+declare  @llvm.aarch64.sve.cdot.lane.nxv2i64(, , , i32, i32)
Index: llvm/lib/Target/AArch64/SVEInstrFormats.td
===
--- llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2542,9 +2542,16 @@
   let ElementSize = ElementSizeNone;
 }
 
-multiclass sve2_cintx_dot {
+multiclass sve2_cintx_dot {
   def _S : sve2_complex_int_arith<0b10, 0b0001, asm, ZPR32, ZPR8>;
   def _D : sve2_complex_int_arith<0b11, 0b0001, asm, ZPR64, ZPR16>;
+
+  def : Pat<(nxv4i32 (op (nxv4i32 ZPR32:$Op1), (nxv16i8 ZPR8:$Op2), (nxv16i8 ZPR8:$Op3),
+ (i32 complexrotateop:$imm))),
+(!cast(NAME # "_S") ZPR32:$Op1, ZPR8:$Op2, ZPR8:$Op3, complexrotateop:$imm)>;
+  def : Pat<(nxv2i64 (op (nxv2i64 ZPR64:$Op1), (nxv8i16 ZPR16:$Op2), (nxv8i16 ZPR16:$Op3),
+ (i32 complexrotateop:$imm))),
+(!cast(NAME # "_D") ZPR64:$Op1, ZPR16:$Op2, ZPR16:$Op3, complexrotateop:$imm)>;
 }
 
 //===--===//
@@ -2589,19 +2596,26 @@
   let ElementSize = ElementSizeNone;
 }
 
-multiclass sve2_cintx_dot_by_indexed_elem {
-  def _S : sve2_complex_int_arith_indexed<0b10, 0b0100, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> {
+multiclass sve2_cintx_dot_by_indexed_elem {
+  def _S : sve2_complex_int_arith_indexed<0b10, 0b0100, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b> {
 bits<2> iop;
 bits<3> Zm;
 let Inst{20-19} = iop;
 let Inst{18-16} = Zm;
   }
-  def _D : sve2_complex_int_arith_indexed<0b11, 0b0100, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> {
+  def _D : sve2_complex_int_arith_indexed<0b11, 0b0100, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b> {
 bit iop;
 bits<4> Zm;
 let Inst{20} = iop;
 let Inst{19-16} = Zm;
   }
+
+  def : Pat<(nxv4i32 (op (nxv4i32 ZPR32:$Op1), (nxv16i8 ZPR8:$Op2), (nxv16i8 ZPR8:$Op3),
+ (i32 VectorIndexS32b_timm:$idx), (i32 complexrotateop:$imm))),
+(!cast(NAME # "_S") ZPR32:$Op1, ZPR8:$Op2, ZPR8:$Op3, VectorIndexS32b_timm:$idx, complexrotateop:$imm)>;
+  def : Pat<(nxv2i64 (op (nxv2i64 ZPR64:$Op1), 

[PATCH] D73636: [AArch64][SVE] SVE2 intrinsics for complex integer arithmetic

2020-02-10 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG92a78750929b: [AArch64][SVE] SVE2 intrinsics for complex 
integer arithmetic (authored by kmclaughlin).

Changed prior to commit:
  https://reviews.llvm.org/D73636?vs=242683=243511#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73636/new/

https://reviews.llvm.org/D73636

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-complex-arith.ll
  llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll
@@ -0,0 +1,106 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+;
+; SADDLBT
+;
+
+define  @saddlbt_b( %a,  %b) {
+; CHECK-LABEL: saddlbt_b:
+; CHECK: saddlbt z0.h, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddlbt.nxv8i16( %a,
+   %b)
+  ret  %out
+}
+
+define  @saddlbt_h( %a,  %b) {
+; CHECK-LABEL: saddlbt_h:
+; CHECK: saddlbt z0.s, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddlbt.nxv4i32( %a,
+   %b)
+  ret  %out
+}
+
+define  @saddlbt_s( %a,  %b) {
+; CHECK-LABEL: saddlbt_s:
+; CHECK: saddlbt z0.d, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddlbt.nxv2i64( %a,
+   %b)
+  ret  %out
+}
+
+;
+; SSUBLBT
+;
+
+define  @ssublbt_b( %a,  %b) {
+; CHECK-LABEL: ssublbt_b:
+; CHECK: ssublbt z0.h, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.ssublbt.nxv8i16( %a,
+   %b)
+  ret  %out
+}
+
+define  @ssublbt_h( %a,  %b) {
+; CHECK-LABEL: ssublbt_h:
+; CHECK: ssublbt z0.s, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.ssublbt.nxv4i32( %a,
+   %b)
+  ret  %out
+}
+
+define  @ssublbt_s( %a,  %b) {
+; CHECK-LABEL: ssublbt_s:
+; CHECK: ssublbt z0.d, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.ssublbt.nxv2i64( %a,
+   %b)
+  ret  %out
+}
+
+;
+; SSUBLTB
+;
+
+define  @ssubltb_b( %a,  %b) {
+; CHECK-LABEL: ssubltb_b:
+; CHECK: ssubltb z0.h, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.ssubltb.nxv8i16( %a,
+   %b)
+  ret  %out
+}
+
+define  @ssubltb_h( %a,  %b) {
+; CHECK-LABEL: ssubltb_h:
+; CHECK: ssubltb z0.s, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.ssubltb.nxv4i32( %a,
+   %b)
+  ret  %out
+}
+
+define  @ssubltb_s( %a,  %b) {
+; CHECK-LABEL: ssubltb_s:
+; CHECK: ssubltb z0.d, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.ssubltb.nxv2i64( %a,
+   %b)
+  ret  %out
+}
+
+declare  @llvm.aarch64.sve.saddlbt.nxv8i16(, )
+declare  @llvm.aarch64.sve.saddlbt.nxv4i32(, )
+declare  @llvm.aarch64.sve.saddlbt.nxv2i64(, )
+
+declare  @llvm.aarch64.sve.ssublbt.nxv8i16(, )
+declare  @llvm.aarch64.sve.ssublbt.nxv4i32(, )
+declare  @llvm.aarch64.sve.ssublbt.nxv2i64(, )
+
+declare  @llvm.aarch64.sve.ssubltb.nxv8i16(, )
+declare  @llvm.aarch64.sve.ssubltb.nxv4i32(, )
+declare  @llvm.aarch64.sve.ssubltb.nxv2i64(, )
Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-complex-arith.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-complex-arith.ll
@@ -0,0 +1,267 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+;
+; CADD
+;
+
+define  @cadd_b( %a,  %b) {
+; CHECK-LABEL: cadd_b:
+; CHECK: cadd z0.b, z0.b, z1.b, #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cadd.x.nxv16i8( %a,
+   %b,
+  i32 90)
+  ret  %out
+}
+
+define  @cadd_h( %a,  %b) {
+; CHECK-LABEL: cadd_h:
+; CHECK: cadd z0.h, z0.h, z1.h, #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cadd.x.nxv8i16( %a,
+   %b,
+  i32 90)
+  ret  %out
+}
+
+define  @cadd_s( %a,  %b) {
+; CHECK-LABEL: cadd_s:
+; CHECK: cadd z0.s, z0.s, z1.s, #270
+; CHECK-NEXT: ret
+  %out = call  

[PATCH] D74117: [AArch64][SVE] SVE2 intrinsics for character match & histogram generation

2020-02-10 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGe299a0814956: [AArch64][SVE] SVE2 intrinsics for character 
match  histogram generation (authored by kmclaughlin).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D74117/new/

https://reviews.llvm.org/D74117

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-character-match.ll
  llvm/test/CodeGen/AArch64/sve2-intrinsics-vec-hist-count.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-vec-hist-count.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-vec-hist-count.ll
@@ -0,0 +1,42 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s
+
+;
+; HISTCNT
+;
+
+define  @histcnt_i32( %pg,  %a,  %b) {
+; CHECK-LABEL: histcnt_i32:
+; CHECK: histcnt z0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.histcnt.nxv4i32( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @histcnt_i64( %pg,  %a,  %b) {
+; CHECK-LABEL: histcnt_i64:
+; CHECK: histcnt z0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.histcnt.nxv2i64( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+;
+; HISTSEG
+;
+
+define  @histseg( %a,  %b) {
+; CHECK-LABEL: histseg:
+; CHECK: histseg z0.b, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.histseg.nxv16i8( %a,
+%b)
+  ret  %out
+}
+
+declare  @llvm.aarch64.sve.histcnt.nxv4i32(, , )
+declare  @llvm.aarch64.sve.histcnt.nxv2i64(, , )
+declare  @llvm.aarch64.sve.histseg.nxv16i8(, )
Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-character-match.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-character-match.ll
@@ -0,0 +1,54 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s
+
+;
+; MATCH
+;
+
+define  @match_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: match_i8:
+; CHECK: match p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.match.nxv16i8( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @match_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: match_i16:
+; CHECK: match p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.match.nxv8i16( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+;
+; NMATCH
+;
+
+define  @nmatch_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: nmatch_i8:
+; CHECK: match p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.nmatch.nxv16i8( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @nmatch_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: nmatch_i16:
+; CHECK: match p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.nmatch.nxv8i16( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+declare  @llvm.aarch64.sve.match.nxv16i8(, , )
+declare  @llvm.aarch64.sve.match.nxv8i16(, , )
+declare  @llvm.aarch64.sve.nmatch.nxv16i8(, , )
+declare  @llvm.aarch64.sve.nmatch.nxv8i16(, , )
Index: llvm/lib/Target/AArch64/SVEInstrFormats.td
===
--- llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -6828,20 +6828,23 @@
   let Defs = [NZCV];
 }
 
-multiclass sve2_char_match {
+multiclass sve2_char_match {
   def _B : sve2_char_match<0b0, opc, asm, PPR8, ZPR8>;
   def _H : sve2_char_match<0b1, opc, asm, PPR16, ZPR16>;
+
+  def : SVE_3_Op_Pat(NAME # _B)>;
+  def : SVE_3_Op_Pat(NAME # _H)>;
 }
 
 //===--===//
 // SVE2 Histogram Computation - Segment Group
 //===--===//
 
-class sve2_hist_gen_segment
+class sve2_hist_gen_segment
 : I<(outs ZPR8:$Zd), (ins ZPR8:$Zn, ZPR8:$Zm),
   asm, "\t$Zd, $Zn, $Zm",
   "",
-  []>, Sched<[]> {
+  [(set nxv16i8:$Zd, (op nxv16i8:$Zn, nxv16i8:$Zm))]>, Sched<[]> {
   bits<5> Zd;
   bits<5> Zn;
   bits<5> Zm;
@@ -6875,9 +6878,12 @@

[PATCH] D73719: [AArch64][SVE] Add SVE2 intrinsics for widening DSP operations

2020-02-10 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG5e1d7bb6798d: [AArch64][SVE] Add SVE2 intrinsics for 
widening DSP operations (authored by kmclaughlin).

Changed prior to commit:
  https://reviews.llvm.org/D73719?vs=241482=243489#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73719/new/

https://reviews.llvm.org/D73719

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll
@@ -0,0 +1,783 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+;
+; SABALB
+;
+
+define  @sabalb_b( %a,  %b,  %c) {
+; CHECK-LABEL: sabalb_b:
+; CHECK: sabalb z0.h, z1.b, z2.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sabalb.nxv8i16( %a,
+   %b,
+   %c)
+  ret  %out
+}
+
+define  @sabalb_h( %a,  %b,  %c) {
+; CHECK-LABEL: sabalb_h:
+; CHECK: sabalb z0.s, z1.h, z2.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sabalb.nxv4i32( %a,
+   %b,
+   %c)
+  ret  %out
+}
+
+define  @sabalb_s( %a,  %b,  %c) {
+; CHECK-LABEL: sabalb_s:
+; CHECK: sabalb z0.d, z1.s, z2.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sabalb.nxv2i64( %a,
+   %b,
+   %c)
+  ret  %out
+}
+
+;
+; SABALT
+;
+
+define  @sabalt_b( %a,  %b,  %c) {
+; CHECK-LABEL: sabalt_b:
+; CHECK: sabalt z0.h, z1.b, z2.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sabalt.nxv8i16( %a,
+   %b,
+   %c)
+  ret  %out
+}
+
+define  @sabalt_h( %a,  %b,  %c) {
+; CHECK-LABEL: sabalt_h:
+; CHECK: sabalt z0.s, z1.h, z2.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sabalt.nxv4i32( %a,
+   %b,
+   %c)
+  ret  %out
+}
+
+define  @sabalt_s( %a,  %b,  %c) {
+; CHECK-LABEL: sabalt_s:
+; CHECK: sabalt z0.d, z1.s, z2.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sabalt.nxv2i64( %a,
+   %b,
+   %c)
+  ret  %out
+}
+
+;
+; SABDLB
+;
+
+define  @sabdlb_b( %a,  %b) {
+; CHECK-LABEL: sabdlb_b:
+; CHECK: sabdlb z0.h, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sabdlb.nxv8i16( %a,
+   %b)
+  ret  %out
+}
+
+define  @sabdlb_h( %a,  %b) {
+; CHECK-LABEL: sabdlb_h:
+; CHECK: sabdlb z0.s, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sabdlb.nxv4i32( %a,
+   %b)
+  ret  %out
+}
+
+define  @sabdlb_s( %a,  %b) {
+; CHECK-LABEL: sabdlb_s:
+; CHECK: sabdlb z0.d, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sabdlb.nxv2i64( %a,
+   %b)
+  ret  %out
+}
+
+;
+; SABDLT
+;
+
+define  @sabdlt_b( %a,  %b) {
+; CHECK-LABEL: sabdlt_b:
+; CHECK: sabdlt z0.h, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sabdlt.nxv8i16( %a,
+   %b)
+  ret  %out
+}
+
+define  @sabdlt_h( %a,  %b) {
+; CHECK-LABEL: sabdlt_h:
+; CHECK: sabdlt z0.s, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sabdlt.nxv4i32( %a,
+   %b)
+  ret  %out
+}
+
+define  @sabdlt_s( %a,  %b) {
+; CHECK-LABEL: sabdlt_s:
+; CHECK: sabdlt z0.d, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sabdlt.nxv2i64( %a,
+   %b)
+  ret  %out
+}
+
+;
+; SADDLB
+;
+
+define  @saddlb_b( %a,  %b) {
+; CHECK-LABEL: saddlb_b:
+; CHECK: saddlb z0.h, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddlb.nxv8i16( %a,
+   %b)
+  ret  %out
+}
+
+define  @saddlb_h( %a,  %b) {
+; CHECK-LABEL: saddlb_h:
+; CHECK: saddlb z0.s, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddlb.nxv4i32( %a,
+   

[PATCH] D74222: [AArch64][SVE] Add mul/mla/mls lane & dup intrinsics

2020-02-07 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: c-rhodes, sdesmalen, dancgr, efriedma.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.

Implements the following intrinsics:

- @llvm.aarch64.sve.dup
- @llvm.aarch64.sve.mul.lane
- @llvm.aarch64.sve.mla.lane
- @llvm.aarch64.sve.mls.lane


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D74222

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll
  llvm/test/CodeGen/AArch64/sve2-intrinsics-int-mul-lane.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-int-mul-lane.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-int-mul-lane.ll
@@ -0,0 +1,119 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+;
+; MUL
+;
+
+define  @mul_lane_d( %a,  %b) {
+; CHECK-LABEL: mul_lane_d:
+; CHECK: mul z0.d, z0.d, z1.d[1]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.mul.lane.nxv2i64( %a,
+ %b,
+i32 1)
+  ret  %out
+}
+
+define  @mul_lane_s( %a,  %b) {
+; CHECK-LABEL: mul_lane_s:
+; CHECK: mul z0.s, z0.s, z1.s[1]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.mul.lane.nxv4i32( %a,
+ %b,
+i32 1)
+  ret  %out
+}
+
+define  @mul_lane_h( %a,  %b) {
+; CHECK-LABEL: mul_lane_h:
+; CHECK: mul z0.h, z0.h, z1.h[1]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.mul.lane.nxv8i16( %a,
+ %b,
+i32 1)
+  ret  %out
+}
+
+;
+; MLA
+;
+
+define  @mla_lane_d( %a,  %b,  %c) {
+; CHECK-LABEL: mla_lane_d:
+; CHECK: mla z0.d, z1.d, z2.d[1]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.mla.lane.nxv2i64( %a,
+ %b,
+ %c,
+i32 1)
+  ret  %out
+}
+
+define  @mla_lane_s( %a,  %b,  %c) {
+; CHECK-LABEL: mla_lane_s:
+; CHECK: mla z0.s, z1.s, z2.s[1]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.mla.lane.nxv4i32( %a,
+ %b,
+ %c,
+i32 1)
+  ret  %out
+}
+
+define  @mla_lane_h( %a,  %b,  %c) {
+; CHECK-LABEL: mla_lane_h:
+; CHECK: mla z0.h, z1.h, z2.h[1]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.mla.lane.nxv8i16( %a,
+ %b,
+ %c,
+i32 1)
+  ret  %out
+}
+
+;
+; MLS
+;
+
+define  @mls_lane_d( %a,  %b,  %c) {
+; CHECK-LABEL: mls_lane_d:
+; CHECK: mls z0.d, z1.d, z2.d[1]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.mls.lane.nxv2i64( %a,
+ %b,
+ %c,
+i32 1)
+  ret  %out
+}
+
+define  @mls_lane_s( %a,  %b,  %c) {
+; CHECK-LABEL: mls_lane_s:
+; CHECK: mls z0.s, z1.s, z2.s[1]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.mls.lane.nxv4i32( %a,
+ %b,
+ %c,
+i32 1)
+  ret  %out
+}
+
+define  @mls_lane_h( %a,  %b,  %c) {
+; CHECK-LABEL: mls_lane_h:
+; CHECK: mls z0.h, z1.h, z2.h[1]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.mls.lane.nxv8i16( %a,
+ %b,
+ %c,
+i32 1)
+  ret  %out
+}
+
+declare  @llvm.aarch64.sve.mul.lane.nxv8i16(, , i32)
+declare  @llvm.aarch64.sve.mul.lane.nxv4i32(, , i32)
+declare  @llvm.aarch64.sve.mul.lane.nxv2i64(, , i32)
+declare  @llvm.aarch64.sve.mla.lane.nxv8i16(, , , i32)
+declare  @llvm.aarch64.sve.mla.lane.nxv4i32(, , , i32)
+declare  

[PATCH] D74117: [AArch64][SVE] SVE2 intrinsics for character match & histogram generation

2020-02-06 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: c-rhodes, sdesmalen, dancgr, efriedma.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.

Implements the following intrinsics:

- @llvm.aarch64.sve.histcnt
- @llvm.aarch64.sve.histseg
- @llvm.aarch64.sve.match
- @llvm.aarch64.sve.nmatch


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D74117

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-character-match.ll
  llvm/test/CodeGen/AArch64/sve2-intrinsics-vec-hist-count.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-vec-hist-count.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-vec-hist-count.ll
@@ -0,0 +1,42 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s
+
+;
+; HISTCNT
+;
+
+define  @histcnt_i32( %pg,  %a,  %b) {
+; CHECK-LABEL: histcnt_i32:
+; CHECK: histcnt z0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.histcnt.nxv4i32( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @histcnt_i64( %pg,  %a,  %b) {
+; CHECK-LABEL: histcnt_i64:
+; CHECK: histcnt z0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.histcnt.nxv2i64( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+;
+; HISTSEG
+;
+
+define  @histseg( %a,  %b) {
+; CHECK-LABEL: histseg:
+; CHECK: histseg z0.b, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.histseg.nxv16i8( %a,
+%b)
+  ret  %out
+}
+
+declare  @llvm.aarch64.sve.histcnt.nxv4i32(, , )
+declare  @llvm.aarch64.sve.histcnt.nxv2i64(, , )
+declare  @llvm.aarch64.sve.histseg.nxv16i8(, )
Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-character-match.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-character-match.ll
@@ -0,0 +1,54 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s
+
+;
+; MATCH
+;
+
+define  @match_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: match_i8:
+; CHECK: match p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.match.nxv16i8( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @match_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: match_i16:
+; CHECK: match p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.match.nxv8i16( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+;
+; NMATCH
+;
+
+define  @nmatch_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: nmatch_i8:
+; CHECK: match p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.nmatch.nxv16i8( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @nmatch_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: nmatch_i16:
+; CHECK: match p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.nmatch.nxv8i16( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+declare  @llvm.aarch64.sve.match.nxv16i8(, , )
+declare  @llvm.aarch64.sve.match.nxv8i16(, , )
+declare  @llvm.aarch64.sve.nmatch.nxv16i8(, , )
+declare  @llvm.aarch64.sve.nmatch.nxv8i16(, , )
Index: llvm/lib/Target/AArch64/SVEInstrFormats.td
===
--- llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -6818,20 +6818,23 @@
   let Defs = [NZCV];
 }
 
-multiclass sve2_char_match {
+multiclass sve2_char_match {
   def _B : sve2_char_match<0b0, opc, asm, PPR8, ZPR8>;
   def _H : sve2_char_match<0b1, opc, asm, PPR16, ZPR16>;
+
+  def : SVE_3_Op_Pat(NAME # _B)>;
+  def : SVE_3_Op_Pat(NAME # _H)>;
 }
 
 //===--===//
 // SVE2 Histogram Computation - Segment Group
 //===--===//
 
-class sve2_hist_gen_segment
+class sve2_hist_gen_segment
 : I<(outs ZPR8:$Zd), (ins ZPR8:$Zn, ZPR8:$Zm),
   asm, "\t$Zd, $Zn, $Zm",
   "",
-  []>, Sched<[]> {
+  [(set 

[PATCH] D73636: [AArch64][SVE] SVE2 intrinsics for complex integer arithmetic

2020-02-05 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 242683.
kmclaughlin edited the summary of this revision.
kmclaughlin added a comment.

Changed misleading intrinsic class names used with cadd, sqcadd, cmla & 
sqrdcmlah


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73636/new/

https://reviews.llvm.org/D73636

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-complex-arith.ll
  llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll
@@ -0,0 +1,106 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+;
+; SADDLBT
+;
+
+define  @saddlbt_b( %a,  %b) {
+; CHECK-LABEL: saddlbt_b:
+; CHECK: saddlbt z0.h, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddlbt.nxv8i16( %a,
+   %b)
+  ret  %out
+}
+
+define  @saddlbt_h( %a,  %b) {
+; CHECK-LABEL: saddlbt_h:
+; CHECK: saddlbt z0.s, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddlbt.nxv4i32( %a,
+   %b)
+  ret  %out
+}
+
+define  @saddlbt_s( %a,  %b) {
+; CHECK-LABEL: saddlbt_s:
+; CHECK: saddlbt z0.d, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddlbt.nxv2i64( %a,
+   %b)
+  ret  %out
+}
+
+;
+; SSUBLBT
+;
+
+define  @ssublbt_b( %a,  %b) {
+; CHECK-LABEL: ssublbt_b:
+; CHECK: ssublbt z0.h, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.ssublbt.nxv8i16( %a,
+   %b)
+  ret  %out
+}
+
+define  @ssublbt_h( %a,  %b) {
+; CHECK-LABEL: ssublbt_h:
+; CHECK: ssublbt z0.s, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.ssublbt.nxv4i32( %a,
+   %b)
+  ret  %out
+}
+
+define  @ssublbt_s( %a,  %b) {
+; CHECK-LABEL: ssublbt_s:
+; CHECK: ssublbt z0.d, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.ssublbt.nxv2i64( %a,
+   %b)
+  ret  %out
+}
+
+;
+; SSUBLTB
+;
+
+define  @ssubltb_b( %a,  %b) {
+; CHECK-LABEL: ssubltb_b:
+; CHECK: ssubltb z0.h, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.ssubltb.nxv8i16( %a,
+   %b)
+  ret  %out
+}
+
+define  @ssubltb_h( %a,  %b) {
+; CHECK-LABEL: ssubltb_h:
+; CHECK: ssubltb z0.s, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.ssubltb.nxv4i32( %a,
+   %b)
+  ret  %out
+}
+
+define  @ssubltb_s( %a,  %b) {
+; CHECK-LABEL: ssubltb_s:
+; CHECK: ssubltb z0.d, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.ssubltb.nxv2i64( %a,
+   %b)
+  ret  %out
+}
+
+declare  @llvm.aarch64.sve.saddlbt.nxv8i16(, )
+declare  @llvm.aarch64.sve.saddlbt.nxv4i32(, )
+declare  @llvm.aarch64.sve.saddlbt.nxv2i64(, )
+
+declare  @llvm.aarch64.sve.ssublbt.nxv8i16(, )
+declare  @llvm.aarch64.sve.ssublbt.nxv4i32(, )
+declare  @llvm.aarch64.sve.ssublbt.nxv2i64(, )
+
+declare  @llvm.aarch64.sve.ssubltb.nxv8i16(, )
+declare  @llvm.aarch64.sve.ssubltb.nxv4i32(, )
+declare  @llvm.aarch64.sve.ssubltb.nxv2i64(, )
Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-complex-arith.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-complex-arith.ll
@@ -0,0 +1,267 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+;
+; CADD
+;
+
+define  @cadd_b( %a,  %b) {
+; CHECK-LABEL: cadd_b:
+; CHECK: cadd z0.b, z0.b, z1.b, #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cadd.x.nxv16i8( %a,
+   %b,
+  i32 90)
+  ret  %out
+}
+
+define  @cadd_h( %a,  %b) {
+; CHECK-LABEL: cadd_h:
+; CHECK: cadd z0.h, z0.h, z1.h, #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cadd.x.nxv8i16( %a,
+   %b,
+  i32 90)
+  ret  %out
+}
+
+define  @cadd_s( %a,  %b) {
+; CHECK-LABEL: cadd_s:
+; CHECK: cadd z0.s, z0.s, z1.s, #270
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cadd.x.nxv4i32( %a,
+   %b,
+

[PATCH] D73903: [AArch64][SVE] Add remaining SVE2 intrinsics for widening DSP operations

2020-02-05 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin marked an inline comment as done.
kmclaughlin added inline comments.



Comment at: llvm/include/llvm/IR/IntrinsicsAArch64.td:1852
 
-// SVE2 MLA LANE.
-def int_aarch64_sve_smlalb_lane   : SVE2_3VectorArg_Indexed_Intrinsic;

sdesmalen wrote:
> nit: why are you moving these?
I thought it made sense for them to be in the same category as the SVE2 mul & 
mul_lane intrinsics above as they are also "widening DSP operations". I'm happy 
to leave these where they are though.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73903/new/

https://reviews.llvm.org/D73903



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D73903: [AArch64][SVE] Add remaining SVE2 intrinsics for widening DSP operations

2020-02-03 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, dancgr, efriedma, c-rhodes.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.
kmclaughlin added a parent revision: D73719: [AArch64][SVE] Add SVE2 intrinsics 
for widening DSP operations.

Implements the following intrinsics:

- llvm.aarch64.sve.[s|u]mullb_lane
- llvm.aarch64.sve.[s|u]mullt_lane
- llvm.aarch64.sve.sqdmullb_lane
- llvm.aarch64.sve.sqdmullt_lane
- llvm.aarch64.sve.[s|u]addwb
- llvm.aarch64.sve.[s|u]addwt
- llvm.aarch64.sve.[s|u]shllb
- llvm.aarch64.sve.[s|u]shllt
- llvm.aarch64.sve.[s|u]subwb
- llvm.aarch64.sve.[s|u]subwt


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D73903

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll
===
--- llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll
@@ -193,6 +193,69 @@
 }
 
 ;
+; SADDWB
+;
+
+define  @saddwb_b( %a,  %b) {
+; CHECK-LABEL: saddwb_b:
+; CHECK: saddwb z0.h, z0.h, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddwb.nxv8i16( %a,
+   %b)
+  ret  %out
+}
+
+define  @saddwb_h( %a,  %b) {
+; CHECK-LABEL: saddwb_h:
+; CHECK: saddwb z0.s, z0.s, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddwb.nxv4i32( %a,
+   %b)
+  ret  %out
+}
+
+define  @saddwb_s( %a,  %b) {
+; CHECK-LABEL: saddwb_s:
+; CHECK: saddwb z0.d, z0.d, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddwb.nxv2i64( %a,
+   %b)
+  ret  %out
+}
+
+;
+; SADDWT
+;
+
+define  @saddwt_b( %a,  %b) {
+; CHECK-LABEL: saddwt_b:
+; CHECK: saddwt z0.h, z0.h, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddwt.nxv8i16( %a,
+   %b)
+  ret  %out
+}
+
+define  @saddwt_h( %a,  %b) {
+; CHECK-LABEL: saddwt_h:
+; CHECK: saddwt z0.s, z0.s, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddwt.nxv4i32( %a,
+   %b)
+  ret  %out
+}
+
+define  @saddwt_s( %a,  %b) {
+; CHECK-LABEL: saddwt_s:
+; CHECK: saddwt z0.d, z0.d, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddwt.nxv2i64( %a,
+   %b)
+  ret  %out
+}
+
+
+;
 ; SMULLB (Vectors)
 ;
 
@@ -224,6 +287,30 @@
 }
 
 ;
+; SMULLB (Indexed)
+;
+
+define  @smullb_lane_h( %a,  %b) {
+; CHECK-LABEL: smullb_lane_h:
+; CHECK: smullb z0.s, z0.h, z1.h[4]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.smullb.lane.nxv4i32( %a,
+%b,
+   i32 4)
+  ret  %out
+}
+
+define  @smullb_lane_s( %a,  %b) {
+; CHECK-LABEL: smullb_lane_s:
+; CHECK: smullb z0.d, z0.s, z1.s[3]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.smullb.lane.nxv2i64( %a,
+%b,
+   i32 3)
+  ret  %out
+}
+
+;
 ; SMULLT (Vectors)
 ;
 
@@ -255,6 +342,30 @@
 }
 
 ;
+; SMULLT (Indexed)
+;
+
+define  @smullt_lane_h( %a,  %b) {
+; CHECK-LABEL: smullt_lane_h:
+; CHECK: smullt z0.s, z0.h, z1.h[5]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.smullt.lane.nxv4i32( %a,
+%b,
+   i32 5)
+  ret  %out
+}
+
+define  @smullt_lane_s( %a,  %b) {
+; CHECK-LABEL: smullt_lane_s:
+; CHECK: smullt z0.d, z0.s, z1.s[2]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.smullt.lane.nxv2i64( %a,
+%b,
+   i32 2)
+  ret  %out
+}
+
+;
 ; SQDMULLB (Vectors)
 ;
 
@@ -286,6 +397,30 @@
 }
 
 ;
+; SQDMULLB (Indexed)
+;
+
+define  @sqdmullb_lane_h( %a,  %b) {
+; CHECK-LABEL: sqdmullb_lane_h:
+; CHECK: sqdmullb z0.s, z0.h, z1.h[2]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sqdmullb.lane.nxv4i32( %a,
+  %b,
+ i32 2)
+  ret  %out
+}
+
+define  @sqdmullb_lane_s( %a,  %b) {
+; CHECK-LABEL: sqdmullb_lane_s:
+; CHECK: sqdmullb z0.d, z0.s, z1.s[1]
+; 

[PATCH] D73687: [AArch64][SVE] Add SVE2 intrinsics for complex integer dot product

2020-01-31 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin marked 2 inline comments as done.
kmclaughlin added a comment.

Thanks for reviewing this, @efriedma!




Comment at: llvm/include/llvm/IR/IntrinsicsAArch64.td:
+ LLVMSubdivide4VectorType<0>,
+ llvm_i32_ty],
+[IntrNoMem]>;

efriedma wrote:
> Missing ImmArg?
Replaced this with //AdvSIMD_SVE_DOT_Indexed_Intrinsic//, which has the ImmArg 
property but is otherwise identical


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73687/new/

https://reviews.llvm.org/D73687



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D73687: [AArch64][SVE] Add SVE2 intrinsics for complex integer dot product

2020-01-31 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 241726.
kmclaughlin added a comment.

- Removed the AdvSIMD_SVE_CDOT_Intrinsic class
- Added ImmArg<4> to AdvSIMD_SVE_CDOT_LANE_Intrinsic


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73687/new/

https://reviews.llvm.org/D73687

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+
+;
+; CDOT
+;
+
+define  @cdot_s( %a,  %b,  %c) {
+; CHECK-LABEL: cdot_s:
+; CHECK: cdot z0.s, z1.b, z2.b, #0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cdot.nxv4i32( %a,
+ %b,
+ %c,
+i32 0)
+  ret  %out
+}
+
+define  @cdot_d( %a,  %b,  %c) {
+; CHECK-LABEL: cdot_d:
+; CHECK: cdot z0.d, z1.h, z2.h, #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cdot.nxv2i64( %a,
+ %b,
+ %c,
+i32 90)
+  ret  %out
+}
+
+;
+; CDOT(indexed)
+;
+
+define  @cdot_s_idx( %a,  %b,  %c) {
+; CHECK-LABEL: cdot_s_idx:
+; CHECK: cdot z0.s, z1.b, z2.b[0], #180
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cdot.lane.nxv4i32( %a,
+  %b,
+  %c,
+ i32 0, i32 180)
+  ret  %out
+}
+
+
+define  @cdot_d_idx( %a,  %b,  %c) {
+; CHECK-LABEL: cdot_d_idx:
+; CHECK: cdot z0.d, z1.h, z2.h[1], #270
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cdot.lane.nxv2i64( %a,
+  %b,
+  %c,
+ i32 1, i32 270)
+  ret  %out
+}
+
+
+declare  @llvm.aarch64.sve.cdot.nxv4i32(, , , i32)
+declare  @llvm.aarch64.sve.cdot.nxv2i64(, , , i32)
+declare  @llvm.aarch64.sve.cdot.lane.nxv4i32(, , , i32, i32)
+declare  @llvm.aarch64.sve.cdot.lane.nxv2i64(, , , i32, i32)
Index: llvm/lib/Target/AArch64/SVEInstrFormats.td
===
--- llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2542,9 +2542,16 @@
   let ElementSize = ElementSizeNone;
 }
 
-multiclass sve2_cintx_dot {
+multiclass sve2_cintx_dot {
   def _S : sve2_complex_int_arith<0b10, 0b0001, asm, ZPR32, ZPR8>;
   def _D : sve2_complex_int_arith<0b11, 0b0001, asm, ZPR64, ZPR16>;
+
+  def : Pat<(nxv4i32 (op (nxv4i32 ZPR32:$Op1), (nxv16i8 ZPR8:$Op2), (nxv16i8 ZPR8:$Op3),
+ (i32 complexrotateop:$imm))),
+(!cast(NAME # "_S") ZPR32:$Op1, ZPR8:$Op2, ZPR8:$Op3, complexrotateop:$imm)>;
+  def : Pat<(nxv2i64 (op (nxv2i64 ZPR64:$Op1), (nxv8i16 ZPR16:$Op2), (nxv8i16 ZPR16:$Op3),
+ (i32 complexrotateop:$imm))),
+(!cast(NAME # "_D") ZPR64:$Op1, ZPR16:$Op2, ZPR16:$Op3, complexrotateop:$imm)>;
 }
 
 //===--===//
@@ -2589,19 +2596,26 @@
   let ElementSize = ElementSizeNone;
 }
 
-multiclass sve2_cintx_dot_by_indexed_elem {
-  def _S : sve2_complex_int_arith_indexed<0b10, 0b0100, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> {
+multiclass sve2_cintx_dot_by_indexed_elem {
+  def _S : sve2_complex_int_arith_indexed<0b10, 0b0100, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b> {
 bits<2> iop;
 bits<3> Zm;
 let Inst{20-19} = iop;
 let Inst{18-16} = Zm;
   }
-  def _D : sve2_complex_int_arith_indexed<0b11, 0b0100, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> {
+  def _D : sve2_complex_int_arith_indexed<0b11, 0b0100, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b> {
 bit iop;
 bits<4> Zm;
 let Inst{20} = iop;
 let Inst{19-16} = Zm;
   }
+
+  def : Pat<(nxv4i32 (op (nxv4i32 ZPR32:$Op1), (nxv16i8 ZPR8:$Op2), (nxv16i8 ZPR8:$Op3),
+ (i32 VectorIndexS32b_timm:$idx), (i32 complexrotateop:$imm))),
+(!cast(NAME # "_S") ZPR32:$Op1, ZPR8:$Op2, ZPR8:$Op3, VectorIndexS32b_timm:$idx, complexrotateop:$imm)>;
+  def : Pat<(nxv2i64 (op (nxv2i64 ZPR64:$Op1), (nxv8i16 ZPR16:$Op2), (nxv8i16 ZPR16:$Op3),
+ (i32 VectorIndexD32b_timm:$idx), (i32 complexrotateop:$imm))),
+   

[PATCH] D73636: [AArch64][SVE] SVE2 intrinsics for complex integer arithmetic

2020-01-31 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin marked 2 inline comments as done.
kmclaughlin added a comment.

Thanks for reviewing this @sdesmalen!




Comment at: llvm/include/llvm/IR/IntrinsicsAArch64.td:1116
+ LLVMMatchType<0>,
+ llvm_i32_ty],
+[IntrNoMem]>;

sdesmalen wrote:
> missing ImmArg
This has been replaced with AdvSIMD_2VectorArgIndexed_Intrinsic, which it is 
the same as this but includes ImmArg<2>



Comment at: llvm/include/llvm/IR/IntrinsicsAArch64.td:1124
+ LLVMMatchType<0>,
+ llvm_i32_ty],
+[IntrNoMem]>;

sdesmalen wrote:
> missing ImmArg
As above, but using AdvSIMD_3VectorArgIndexed_Intrinsic instead


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73636/new/

https://reviews.llvm.org/D73636



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D73636: [AArch64][SVE] SVE2 intrinsics for complex integer arithmetic

2020-01-31 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 241675.
kmclaughlin added a comment.

- Changed complexrotateop & complexrotateopodd to use TImmLeaf
- Removed SVE2_CADD_Intrinsic & SVE2_CMLA_Intrinsic classes


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73636/new/

https://reviews.llvm.org/D73636

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-complex-arith.ll
  llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll
@@ -0,0 +1,182 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+;
+; SADDLBT
+;
+
+define  @saddlbt_b( %a,  %b) {
+; CHECK-LABEL: saddlbt_b:
+; CHECK: saddlbt z0.h, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddlbt.nxv8i16( %a,
+   %b)
+  ret  %out
+}
+
+define  @saddlbt_h( %a,  %b) {
+; CHECK-LABEL: saddlbt_h:
+; CHECK: saddlbt z0.s, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddlbt.nxv4i32( %a,
+   %b)
+  ret  %out
+}
+
+define  @saddlbt_s( %a,  %b) {
+; CHECK-LABEL: saddlbt_s:
+; CHECK: saddlbt z0.d, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddlbt.nxv2i64( %a,
+   %b)
+  ret  %out
+}
+
+;
+; SQDMLALBT
+;
+
+define  @sqdmlalbt_b( %a,  %b,  %c) {
+; CHECK-LABEL: sqdmlalbt_b:
+; CHECK: sqdmlalbt z0.h, z1.b, z2.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sqdmlalbt.nxv8i16( %a,
+ %b,
+ %c)
+  ret  %out
+}
+
+define  @sqdmlalbt_h( %a,  %b,  %c) {
+; CHECK-LABEL: sqdmlalbt_h:
+; CHECK: sqdmlalbt z0.s, z1.h, z2.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sqdmlalbt.nxv4i32( %a,
+ %b,
+ %c)
+  ret  %out
+}
+
+define  @sqdmlalbt_s( %a,  %b,  %c) {
+; CHECK-LABEL: sqdmlalbt_s:
+; CHECK: sqdmlalbt z0.d, z1.s, z2.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sqdmlalbt.nxv2i64( %a,
+ %b,
+ %c)
+  ret  %out
+}
+
+;
+; SQDMLSLBT
+;
+
+define  @sqdmlslbt_b( %a,  %b,  %c) {
+; CHECK-LABEL: sqdmlslbt_b:
+; CHECK: sqdmlslbt z0.h, z1.b, z2.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sqdmlslbt.nxv8i16( %a,
+ %b,
+ %c)
+  ret  %out
+}
+
+define  @sqdmlslbt_h( %a,  %b,  %c) {
+; CHECK-LABEL: sqdmlslbt_h:
+; CHECK: sqdmlslbt z0.s, z1.h, z2.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sqdmlslbt.nxv4i32( %a,
+ %b,
+ %c)
+  ret  %out
+}
+
+define  @sqdmlslbt_s( %a,  %b,  %c) {
+; CHECK-LABEL: sqdmlslbt_s:
+; CHECK: sqdmlslbt z0.d, z1.s, z2.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sqdmlslbt.nxv2i64( %a,
+ %b,
+ %c)
+  ret  %out
+}
+
+;
+; SSUBLBT
+;
+
+define  @ssublbt_b( %a,  %b) {
+; CHECK-LABEL: ssublbt_b:
+; CHECK: ssublbt z0.h, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.ssublbt.nxv8i16( %a,
+   %b)
+  ret  %out
+}
+
+define  @ssublbt_h( %a,  %b) {
+; CHECK-LABEL: ssublbt_h:
+; CHECK: ssublbt z0.s, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.ssublbt.nxv4i32( %a,
+   %b)
+  ret  %out
+}
+
+define  @ssublbt_s( %a,  %b) {
+; CHECK-LABEL: ssublbt_s:
+; CHECK: ssublbt z0.d, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.ssublbt.nxv2i64( %a,
+   %b)
+  ret  %out
+}
+
+;
+; SSUBLTB
+;
+
+define  @ssubltb_b( %a,  %b) {
+; CHECK-LABEL: ssubltb_b:
+; CHECK: ssubltb z0.h, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.ssubltb.nxv8i16( %a,
+   %b)
+  ret  %out
+}
+
+define  @ssubltb_h( %a,  %b) {
+; CHECK-LABEL: ssubltb_h:
+; CHECK: ssubltb z0.s, z0.h, z1.h
+; CHECK-NEXT: ret
+  

[PATCH] D73551: [AArch64][SVE] Add remaining SVE2 intrinsics for uniform DSP operations

2020-01-31 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG69558c84871d: [AArch64][SVE] Add remaining SVE2 intrinsics 
for uniform DSP operations (authored by kmclaughlin).

Changed prior to commit:
  https://reviews.llvm.org/D73551?vs=240865=241668#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73551/new/

https://reviews.llvm.org/D73551

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
===
--- llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
@@ -1,6 +1,50 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s
 
 ;
+; SABA
+;
+
+define  @saba_i8( %a,  %b,  %c) {
+; CHECK-LABEL: saba_i8:
+; CHECK: saba z0.b, z1.b, z2.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saba.nxv16i8( %a,
+ %b,
+ %c)
+  ret  %out
+}
+
+define  @saba_i16( %a,  %b,  %c) {
+; CHECK-LABEL: saba_i16:
+; CHECK: saba z0.h, z1.h, z2.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saba.nxv8i16( %a,
+ %b,
+ %c)
+  ret  %out
+}
+
+define  @saba_i32( %a,  %b,  %c) {
+; CHECK-LABEL: saba_i32:
+; CHECK: saba z0.s, z1.s, z2.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saba.nxv4i32( %a,
+ %b,
+ %c)
+  ret  %out
+}
+
+define  @saba_i64( %a,  %b,  %c) {
+; CHECK-LABEL: saba_i64:
+; CHECK: saba z0.d, z1.d, z2.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saba.nxv2i64( %a,
+ %b,
+ %c)
+  ret  %out
+}
+
+;
 ; SHADD
 ;
 
@@ -133,6 +177,50 @@
 }
 
 ;
+; SLI
+;
+
+define  @sli_i8( %a,  %b) {
+; CHECK-LABEL: sli_i8:
+; CHECK: sli z0.b, z1.b, #0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sli.nxv16i8( %a,
+%b,
+   i32 0)
+  ret  %out
+}
+
+define  @sli_i16( %a,  %b) {
+; CHECK-LABEL: sli_i16:
+; CHECK: sli z0.h, z1.h, #1
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sli.nxv8i16( %a,
+%b,
+   i32 1)
+  ret  %out
+}
+
+define  @sli_i32( %a,  %b) {
+; CHECK-LABEL: sli_i32:
+; CHECK: sli z0.s, z1.s, #30
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sli.nxv4i32( %a,
+%b,
+   i32 30);
+  ret  %out
+}
+
+define  @sli_i64( %a,  %b) {
+; CHECK-LABEL: sli_i64:
+; CHECK: sli z0.d, z1.d, #63
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sli.nxv2i64( %a,
+%b,
+   i32 63)
+  ret  %out
+}
+
+;
 ; SQABS
 ;
 
@@ -177,6 +265,50 @@
 }
 
 ;
+; SQADD
+;
+
+define  @sqadd_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: sqadd_i8:
+; CHECK: sqadd z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sqadd.nxv16i8( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @sqadd_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: sqadd_i16:
+; CHECK: sqadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sqadd.nxv8i16( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @sqadd_i32( %pg,  %a,  %b) {
+; CHECK-LABEL: sqadd_i32:
+; CHECK: sqadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sqadd.nxv4i32( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @sqadd_i64( %pg,  %a,  %b) {
+; CHECK-LABEL: sqadd_i64:
+; CHECK: sqadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sqadd.nxv2i64( %pg,
+  %a,
+ 

[PATCH] D73719: [AArch64][SVE] Add SVE2 intrinsics for widening DSP operations

2020-01-30 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, dancgr, efriedma, cameron.mcinally.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.

Implements the following intrinsics:

- @llvm.aarch64.sve.[s|u]abalb
- @llvm.aarch64.sve.[s|u]abalt
- @llvm.aarch64.sve.[s|u]addlb
- @llvm.aarch64.sve.[s|u]addlt
- @llvm.aarch64.sve.[s|u]sublb
- @llvm.aarch64.sve.[s|u]sublt
- @llvm.aarch64.sve.[s|u]abdlb
- @llvm.aarch64.sve.[s|u]abdlt
- @llvm.aarch64.sve.sqdmullb
- @llvm.aarch64.sve.sqdmullt
- @llvm.aarch64.sve.[s|u]mullb
- @llvm.aarch64.sve.[s|u]mullt


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D73719

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll
@@ -0,0 +1,783 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+;
+; SABALB
+;
+
+define  @sabalb_b( %a,  %b,  %c) {
+; CHECK-LABEL: sabalb_b:
+; CHECK: sabalb z0.h, z1.b, z2.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sabalb.nxv8i16( %a,
+   %b,
+   %c)
+  ret  %out
+}
+
+define  @sabalb_h( %a,  %b,  %c) {
+; CHECK-LABEL: sabalb_h:
+; CHECK: sabalb z0.s, z1.h, z2.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sabalb.nxv4i32( %a,
+   %b,
+   %c)
+  ret  %out
+}
+
+define  @sabalb_s( %a,  %b,  %c) {
+; CHECK-LABEL: sabalb_s:
+; CHECK: sabalb z0.d, z1.s, z2.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sabalb.nxv2i64( %a,
+   %b,
+   %c)
+  ret  %out
+}
+
+;
+; SABALT
+;
+
+define  @sabalt_b( %a,  %b,  %c) {
+; CHECK-LABEL: sabalt_b:
+; CHECK: sabalt z0.h, z1.b, z2.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sabalt.nxv8i16( %a,
+   %b,
+   %c)
+  ret  %out
+}
+
+define  @sabalt_h( %a,  %b,  %c) {
+; CHECK-LABEL: sabalt_h:
+; CHECK: sabalt z0.s, z1.h, z2.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sabalt.nxv4i32( %a,
+   %b,
+   %c)
+  ret  %out
+}
+
+define  @sabalt_s( %a,  %b,  %c) {
+; CHECK-LABEL: sabalt_s:
+; CHECK: sabalt z0.d, z1.s, z2.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sabalt.nxv2i64( %a,
+   %b,
+   %c)
+  ret  %out
+}
+
+;
+; SABDLB
+;
+
+define  @sabdlb_b( %a,  %b) {
+; CHECK-LABEL: sabdlb_b:
+; CHECK: sabdlb z0.h, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sabdlb.nxv8i16( %a,
+   %b)
+  ret  %out
+}
+
+define  @sabdlb_h( %a,  %b) {
+; CHECK-LABEL: sabdlb_h:
+; CHECK: sabdlb z0.s, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sabdlb.nxv4i32( %a,
+   %b)
+  ret  %out
+}
+
+define  @sabdlb_s( %a,  %b) {
+; CHECK-LABEL: sabdlb_s:
+; CHECK: sabdlb z0.d, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sabdlb.nxv2i64( %a,
+   %b)
+  ret  %out
+}
+
+;
+; SABDLT
+;
+
+define  @sabdlt_b( %a,  %b) {
+; CHECK-LABEL: sabdlt_b:
+; CHECK: sabdlt z0.h, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sabdlt.nxv8i16( %a,
+   %b)
+  ret  %out
+}
+
+define  @sabdlt_h( %a,  %b) {
+; CHECK-LABEL: sabdlt_h:
+; CHECK: sabdlt z0.s, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sabdlt.nxv4i32( %a,
+   %b)
+  ret  %out
+}
+
+define  @sabdlt_s( %a,  %b) {
+; CHECK-LABEL: sabdlt_s:
+; CHECK: sabdlt z0.d, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sabdlt.nxv2i64( %a,
+   %b)
+  ret  %out
+}
+
+;
+; SADDLB
+;
+
+define  @saddlb_b( %a,  %b) {
+; CHECK-LABEL: saddlb_b:
+; CHECK: saddlb z0.h, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  

[PATCH] D73687: [AArch64][SVE] Add SVE2 intrinsics for complex integer dot product

2020-01-30 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, efriedma, dancgr, c-rhodes.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.

Implements the following intrinsics:

- @llvm.aarch64.sve.cdot
- @llvm.aarch64.sve.cdot.lane


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D73687

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+
+;
+; CDOT
+;
+
+define  @cdot_s( %a,  %b,  %c) {
+; CHECK-LABEL: cdot_s:
+; CHECK: cdot z0.s, z1.b, z2.b, #0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cdot.nxv4i32( %a,
+ %b,
+ %c,
+i32 0)
+  ret  %out
+}
+
+define  @cdot_d( %a,  %b,  %c) {
+; CHECK-LABEL: cdot_d:
+; CHECK: cdot z0.d, z1.h, z2.h, #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cdot.nxv2i64( %a,
+ %b,
+ %c,
+i32 90)
+  ret  %out
+}
+
+;
+; CDOT(indexed)
+;
+
+define  @cdot_s_idx( %a,  %b,  %c) {
+; CHECK-LABEL: cdot_s_idx:
+; CHECK: cdot z0.s, z1.b, z2.b[0], #180
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cdot.lane.nxv4i32( %a,
+  %b,
+  %c,
+ i32 0, i32 180)
+  ret  %out
+}
+
+
+define  @cdot_d_idx( %a,  %b,  %c) {
+; CHECK-LABEL: cdot_d_idx:
+; CHECK: cdot z0.d, z1.h, z2.h[1], #270
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cdot.lane.nxv2i64( %a,
+  %b,
+  %c,
+ i32 1, i32 270)
+  ret  %out
+}
+
+
+declare  @llvm.aarch64.sve.cdot.nxv4i32(, , , i32)
+declare  @llvm.aarch64.sve.cdot.nxv2i64(, , , i32)
+declare  @llvm.aarch64.sve.cdot.lane.nxv4i32(, , , i32, i32)
+declare  @llvm.aarch64.sve.cdot.lane.nxv2i64(, , , i32, i32)
Index: llvm/lib/Target/AArch64/SVEInstrFormats.td
===
--- llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2538,9 +2538,16 @@
   let ElementSize = ElementSizeNone;
 }
 
-multiclass sve2_cintx_dot {
+multiclass sve2_cintx_dot {
   def _S : sve2_complex_int_arith<0b10, 0b0001, asm, ZPR32, ZPR8>;
   def _D : sve2_complex_int_arith<0b11, 0b0001, asm, ZPR64, ZPR16>;
+
+  def : Pat<(nxv4i32 (op (nxv4i32 ZPR32:$Op1), (nxv16i8 ZPR8:$Op2), (nxv16i8 ZPR8:$Op3),
+ (i32 complexrotateop:$imm))),
+(!cast(NAME # "_S") ZPR32:$Op1, ZPR8:$Op2, ZPR8:$Op3, complexrotateop:$imm)>;
+  def : Pat<(nxv2i64 (op (nxv2i64 ZPR64:$Op1), (nxv8i16 ZPR16:$Op2), (nxv8i16 ZPR16:$Op3),
+ (i32 complexrotateop:$imm))),
+(!cast(NAME # "_D") ZPR64:$Op1, ZPR16:$Op2, ZPR16:$Op3, complexrotateop:$imm)>;
 }
 
 //===--===//
@@ -2580,19 +2587,26 @@
   let ElementSize = ElementSizeNone;
 }
 
-multiclass sve2_cintx_dot_by_indexed_elem {
-  def _S : sve2_complex_int_arith_indexed<0b10, 0b0100, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> {
+multiclass sve2_cintx_dot_by_indexed_elem {
+  def _S : sve2_complex_int_arith_indexed<0b10, 0b0100, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b> {
 bits<2> iop;
 bits<3> Zm;
 let Inst{20-19} = iop;
 let Inst{18-16} = Zm;
   }
-  def _D : sve2_complex_int_arith_indexed<0b11, 0b0100, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> {
+  def _D : sve2_complex_int_arith_indexed<0b11, 0b0100, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b> {
 bit iop;
 bits<4> Zm;
 let Inst{20} = iop;
 let Inst{19-16} = Zm;
   }
+
+  def : Pat<(nxv4i32 (op (nxv4i32 ZPR32:$Op1), (nxv16i8 ZPR8:$Op2), (nxv16i8 ZPR8:$Op3),
+ (i32 VectorIndexS32b_timm:$idx), (i32 complexrotateop:$imm))),
+(!cast(NAME # "_S") ZPR32:$Op1, ZPR8:$Op2, ZPR8:$Op3, VectorIndexS32b_timm:$idx, complexrotateop:$imm)>;
+  def : Pat<(nxv2i64 (op (nxv2i64 ZPR64:$Op1), 

[PATCH] D73636: [AArch64][SVE] SVE2 intrinsics for complex integer arithmetic

2020-01-29 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, dancgr, efriedma, cameron.mcinally, 
c-rhodes.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.

Adds the following SVE2 intrinsics:

- cadd & sqcadd
- cmla & sqrdcmlah
- sqdmlalbt & sqdmlalbt
- saddlbt, ssublbt & ssubltb


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D73636

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-complex-arith.ll
  llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll
@@ -0,0 +1,182 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+;
+; SADDLBT
+;
+
+define  @saddlbt_b( %a,  %b) {
+; CHECK-LABEL: saddlbt_b:
+; CHECK: saddlbt z0.h, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddlbt.nxv8i16( %a,
+   %b)
+  ret  %out
+}
+
+define  @saddlbt_h( %a,  %b) {
+; CHECK-LABEL: saddlbt_h:
+; CHECK: saddlbt z0.s, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddlbt.nxv4i32( %a,
+   %b)
+  ret  %out
+}
+
+define  @saddlbt_s( %a,  %b) {
+; CHECK-LABEL: saddlbt_s:
+; CHECK: saddlbt z0.d, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saddlbt.nxv2i64( %a,
+   %b)
+  ret  %out
+}
+
+;
+; SQDMLALBT
+;
+
+define  @sqdmlalbt_b( %a,  %b,  %c) {
+; CHECK-LABEL: sqdmlalbt_b:
+; CHECK: sqdmlalbt z0.h, z1.b, z2.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sqdmlalbt.nxv8i16( %a,
+ %b,
+ %c)
+  ret  %out
+}
+
+define  @sqdmlalbt_h( %a,  %b,  %c) {
+; CHECK-LABEL: sqdmlalbt_h:
+; CHECK: sqdmlalbt z0.s, z1.h, z2.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sqdmlalbt.nxv4i32( %a,
+ %b,
+ %c)
+  ret  %out
+}
+
+define  @sqdmlalbt_s( %a,  %b,  %c) {
+; CHECK-LABEL: sqdmlalbt_s:
+; CHECK: sqdmlalbt z0.d, z1.s, z2.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sqdmlalbt.nxv2i64( %a,
+ %b,
+ %c)
+  ret  %out
+}
+
+;
+; SQDMLSLBT
+;
+
+define  @sqdmlslbt_b( %a,  %b,  %c) {
+; CHECK-LABEL: sqdmlslbt_b:
+; CHECK: sqdmlslbt z0.h, z1.b, z2.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sqdmlslbt.nxv8i16( %a,
+ %b,
+ %c)
+  ret  %out
+}
+
+define  @sqdmlslbt_h( %a,  %b,  %c) {
+; CHECK-LABEL: sqdmlslbt_h:
+; CHECK: sqdmlslbt z0.s, z1.h, z2.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sqdmlslbt.nxv4i32( %a,
+ %b,
+ %c)
+  ret  %out
+}
+
+define  @sqdmlslbt_s( %a,  %b,  %c) {
+; CHECK-LABEL: sqdmlslbt_s:
+; CHECK: sqdmlslbt z0.d, z1.s, z2.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sqdmlslbt.nxv2i64( %a,
+ %b,
+ %c)
+  ret  %out
+}
+
+;
+; SSUBLBT
+;
+
+define  @ssublbt_b( %a,  %b) {
+; CHECK-LABEL: ssublbt_b:
+; CHECK: ssublbt z0.h, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.ssublbt.nxv8i16( %a,
+   %b)
+  ret  %out
+}
+
+define  @ssublbt_h( %a,  %b) {
+; CHECK-LABEL: ssublbt_h:
+; CHECK: ssublbt z0.s, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.ssublbt.nxv4i32( %a,
+   %b)
+  ret  %out
+}
+
+define  @ssublbt_s( %a,  %b) {
+; CHECK-LABEL: ssublbt_s:
+; CHECK: ssublbt z0.d, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.ssublbt.nxv2i64( %a,
+   %b)
+  ret  %out
+}
+
+;
+; SSUBLTB
+;
+
+define  @ssubltb_b( %a,  %b) {
+; CHECK-LABEL: ssubltb_b:
+; CHECK: ssubltb z0.h, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.ssubltb.nxv8i16( %a,
+   %b)
+  ret  %out
+}
+

[PATCH] D73493: [AArch64][SVE] Add SVE2 intrinsics for uniform DSP operations

2020-01-29 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG3cf80822a906: [AArch64][SVE] Add SVE2 intrinsics for uniform 
DSP operations (authored by kmclaughlin).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73493/new/

https://reviews.llvm.org/D73493

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
@@ -0,0 +1,869 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s
+
+;
+; SHADD
+;
+
+define  @shadd_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: shadd_i8:
+; CHECK: shadd z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.shadd.nxv16i8( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @shadd_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: shadd_i16:
+; CHECK: shadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.shadd.nxv8i16( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @shadd_i32( %pg,  %a,  %b) {
+; CHECK-LABEL: shadd_i32:
+; CHECK: shadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.shadd.nxv4i32( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @shadd_i64( %pg,  %a,  %b) {
+; CHECK-LABEL: shadd_i64:
+; CHECK: shadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.shadd.nxv2i64( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+;
+; SHSUB
+;
+
+define  @shsub_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: shsub_i8:
+; CHECK: shsub z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.shsub.nxv16i8( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @shsub_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: shsub_i16:
+; CHECK: shsub z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.shsub.nxv8i16( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @shsub_i32( %pg,  %a,  %b) {
+; CHECK-LABEL: shsub_i32:
+; CHECK: shsub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.shsub.nxv4i32( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @shsub_i64( %pg,  %a,  %b) {
+; CHECK-LABEL: shsub_i64:
+; CHECK: shsub z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.shsub.nxv2i64( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+;
+; SHSUBR
+;
+
+define  @shsubr_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: shsubr_i8:
+; CHECK: shsubr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.shsubr.nxv16i8( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @shsubr_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: shsubr_i16:
+; CHECK: shsubr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.shsubr.nxv8i16( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @shsubr_i32( %pg,  %a,  %b) {
+; CHECK-LABEL: shsubr_i32:
+; CHECK: shsubr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.shsubr.nxv4i32( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @shsubr_i64( %pg,  %a,  %b) {
+; CHECK-LABEL: shsubr_i64:
+; CHECK: shsubr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.shsubr.nxv2i64( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+;
+; SQABS
+;
+
+define  

[PATCH] D73347: [AArch64][SVE] Add SVE2 intrinsics for pairwise arithmetic

2020-01-29 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGbd33a46213d3: [AArch64][SVE] Add SVE2 intrinsics for 
pairwise arithmetic (authored by kmclaughlin).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73347/new/

https://reviews.llvm.org/D73347

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll
  llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-pairwise-arith.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-pairwise-arith.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-pairwise-arith.ll
@@ -0,0 +1,77 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+;
+; SADALP
+;
+
+define  @sadalp_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: sadalp_i8:
+; CHECK: sadalp z0.h, p0/m, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sadalp.nxv8i16( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @sadalp_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: sadalp_i16:
+; CHECK: sadalp z0.s, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sadalp.nxv4i32( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @sadalp_i32( %pg,  %a,  %b) {
+; CHECK-LABEL: sadalp_i32:
+; CHECK: sadalp z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sadalp.nxv2i64( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+;
+; UADALP
+;
+
+define  @uadalp_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: uadalp_i8:
+; CHECK: uadalp z0.h, p0/m, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.uadalp.nxv8i16( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @uadalp_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: uadalp_i16:
+; CHECK: uadalp z0.s, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.uadalp.nxv4i32( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @uadalp_i32( %pg,  %a,  %b) {
+; CHECK-LABEL: uadalp_i32:
+; CHECK: uadalp z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.uadalp.nxv2i64( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+declare  @llvm.aarch64.sve.sadalp.nxv8i16(, , )
+declare  @llvm.aarch64.sve.sadalp.nxv4i32(, , )
+declare  @llvm.aarch64.sve.sadalp.nxv2i64(, , )
+
+declare  @llvm.aarch64.sve.uadalp.nxv8i16(, , )
+declare  @llvm.aarch64.sve.uadalp.nxv4i32(, , )
+declare  @llvm.aarch64.sve.uadalp.nxv2i64(, , )
Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll
===
--- llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll
@@ -1,6 +1,50 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 
 ;
+; ADDP
+;
+
+define  @addp_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: addp_i8:
+; CHECK: addp z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.addp.nxv16i8( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+define  @addp_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: addp_i16:
+; CHECK: addp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.addp.nxv8i16( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+define  @addp_i32( %pg,  %a,  %b) {
+; CHECK-LABEL: addp_i32:
+; CHECK: addp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.addp.nxv4i32( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+define  @addp_i64( %pg,  %a,  %b) {
+; CHECK-LABEL: addp_i64:
+; CHECK: addp z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.addp.nxv2i64( %pg,
+ %a,
+  

[PATCH] D73551: [AArch64][SVE] Add remaining SVE2 intrinsics for uniform DSP operations

2020-01-28 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: efriedma, sdesmalen, dancgr, cameron.mcinally, 
c-rhodes.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.
kmclaughlin added a parent revision: D73493: [AArch64][SVE] Add SVE2 intrinsics 
for uniform DSP operations.

Implements the following intrinsics:

- @llvm.aarch64.sve.[s|u]qadd
- @llvm.aarch64.sve.[s|u]qsub
- @llvm.aarch64.sve.suqadd
- @llvm.aarch64.sve.usqadd
- @llvm.aarch64.sve.[s|u]qsubr
- @llvm.aarch64.sve.[s|u]rshl
- @llvm.aarch64.sve.[s|u]qshl
- @llvm.aarch64.sve.[s|u]qrshl
- @llvm.aarch64.sve.[s|u]rshr
- @llvm.aarch64.sve.sqshlu
- @llvm.aarch64.sve.sri
- @llvm.aarch64.sve.sli
- @llvm.aarch64.sve.[s|u]sra
- @llvm.aarch64.sve.[s|u]rsra
- @llvm.aarch64.sve.[s|u]aba


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D73551

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
===
--- llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
@@ -1,6 +1,50 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s
 
 ;
+; SABA
+;
+
+define  @saba_i8( %a,  %b,  %c) {
+; CHECK-LABEL: saba_i8:
+; CHECK: saba z0.b, z1.b, z2.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saba.nxv16i8( %a,
+ %b,
+ %c)
+  ret  %out
+}
+
+define  @saba_i16( %a,  %b,  %c) {
+; CHECK-LABEL: saba_i16:
+; CHECK: saba z0.h, z1.h, z2.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saba.nxv8i16( %a,
+ %b,
+ %c)
+  ret  %out
+}
+
+define  @saba_i32( %a,  %b,  %c) {
+; CHECK-LABEL: saba_i32:
+; CHECK: saba z0.s, z1.s, z2.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saba.nxv4i32( %a,
+ %b,
+ %c)
+  ret  %out
+}
+
+define  @saba_i64( %a,  %b,  %c) {
+; CHECK-LABEL: saba_i64:
+; CHECK: saba z0.d, z1.d, z2.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.saba.nxv2i64( %a,
+ %b,
+ %c)
+  ret  %out
+}
+
+;
 ; SHADD
 ;
 
@@ -133,6 +177,50 @@
 }
 
 ;
+; SLI
+;
+
+define  @sli_i8( %a,  %b) {
+; CHECK-LABEL: sli_i8:
+; CHECK: sli z0.b, z1.b, #0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sli.nxv16i8( %a,
+%b,
+   i32 0)
+  ret  %out
+}
+
+define  @sli_i16( %a,  %b) {
+; CHECK-LABEL: sli_i16:
+; CHECK: sli z0.h, z1.h, #1
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sli.nxv8i16( %a,
+%b,
+   i32 1)
+  ret  %out
+}
+
+define  @sli_i32( %a,  %b) {
+; CHECK-LABEL: sli_i32:
+; CHECK: sli z0.s, z1.s, #30
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sli.nxv4i32( %a,
+%b,
+   i32 30);
+  ret  %out
+}
+
+define  @sli_i64( %a,  %b) {
+; CHECK-LABEL: sli_i64:
+; CHECK: sli z0.d, z1.d, #63
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sli.nxv2i64( %a,
+%b,
+   i32 63)
+  ret  %out
+}
+
+;
 ; SQABS
 ;
 
@@ -177,6 +265,50 @@
 }
 
 ;
+; SQADD
+;
+
+define  @sqadd_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: sqadd_i8:
+; CHECK: sqadd z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sqadd.nxv16i8( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @sqadd_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: sqadd_i16:
+; CHECK: sqadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sqadd.nxv8i16( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @sqadd_i32( %pg,  %a,  %b) {
+; CHECK-LABEL: sqadd_i32:
+; CHECK: sqadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: 

[PATCH] D73493: [AArch64][SVE] Add SVE2 intrinsics for uniform DSP operations

2020-01-27 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, efriedma, dancgr, cameron.mcinally.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a project: LLVM.

Implements the following intrinsics:

- sqrdmlah, sqrdmlsh, sqrdmulh & sqdmulh
- [s|u]hadd, [s|u]hsub, [s|u]rhadd & [s|u]hsubr
- urecpe, ursqrte, sqabs & sqneg


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D73493

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
@@ -0,0 +1,869 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s
+
+;
+; SHADD
+;
+
+define  @shadd_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: shadd_i8:
+; CHECK: shadd z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.shadd.nxv16i8( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @shadd_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: shadd_i16:
+; CHECK: shadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.shadd.nxv8i16( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @shadd_i32( %pg,  %a,  %b) {
+; CHECK-LABEL: shadd_i32:
+; CHECK: shadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.shadd.nxv4i32( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @shadd_i64( %pg,  %a,  %b) {
+; CHECK-LABEL: shadd_i64:
+; CHECK: shadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.shadd.nxv2i64( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+;
+; SHSUB
+;
+
+define  @shsub_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: shsub_i8:
+; CHECK: shsub z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.shsub.nxv16i8( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @shsub_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: shsub_i16:
+; CHECK: shsub z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.shsub.nxv8i16( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @shsub_i32( %pg,  %a,  %b) {
+; CHECK-LABEL: shsub_i32:
+; CHECK: shsub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.shsub.nxv4i32( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @shsub_i64( %pg,  %a,  %b) {
+; CHECK-LABEL: shsub_i64:
+; CHECK: shsub z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.shsub.nxv2i64( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+;
+; SHSUBR
+;
+
+define  @shsubr_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: shsubr_i8:
+; CHECK: shsubr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.shsubr.nxv16i8( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @shsubr_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: shsubr_i16:
+; CHECK: shsubr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.shsubr.nxv8i16( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @shsubr_i32( %pg,  %a,  %b) {
+; CHECK-LABEL: shsubr_i32:
+; CHECK: shsubr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.shsubr.nxv4i32( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @shsubr_i64( %pg,  %a,  %b) {
+; CHECK-LABEL: shsubr_i64:
+; CHECK: shsubr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.shsubr.nxv2i64( %pg,
+   %a,

[PATCH] D73347: [AArch64][SVE] Add SVE2 intrinsics for pairwise arithmetic

2020-01-24 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: dancgr, efriedma, sdesmalen, c-rhodes.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a project: LLVM.

Implements the following intrinsics:

- addp
- smaxp, sminp, umaxp & uminp
- sadalp & uadalp


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D73347

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll
  llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-pairwise-arith.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-pairwise-arith.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-pairwise-arith.ll
@@ -0,0 +1,77 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+;
+; SADALP
+;
+
+define  @sadalp_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: sadalp_i8:
+; CHECK: sadalp z0.h, p0/m, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sadalp.nxv8i16( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @sadalp_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: sadalp_i16:
+; CHECK: sadalp z0.s, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sadalp.nxv4i32( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @sadalp_i32( %pg,  %a,  %b) {
+; CHECK-LABEL: sadalp_i32:
+; CHECK: sadalp z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sadalp.nxv2i64( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+;
+; UADALP
+;
+
+define  @uadalp_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: uadalp_i8:
+; CHECK: uadalp z0.h, p0/m, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.uadalp.nxv8i16( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @uadalp_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: uadalp_i16:
+; CHECK: uadalp z0.s, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.uadalp.nxv4i32( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @uadalp_i32( %pg,  %a,  %b) {
+; CHECK-LABEL: uadalp_i32:
+; CHECK: uadalp z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.uadalp.nxv2i64( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+declare  @llvm.aarch64.sve.sadalp.nxv8i16(, , )
+declare  @llvm.aarch64.sve.sadalp.nxv4i32(, , )
+declare  @llvm.aarch64.sve.sadalp.nxv2i64(, , )
+
+declare  @llvm.aarch64.sve.uadalp.nxv8i16(, , )
+declare  @llvm.aarch64.sve.uadalp.nxv4i32(, , )
+declare  @llvm.aarch64.sve.uadalp.nxv2i64(, , )
Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll
===
--- llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll
@@ -1,6 +1,50 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 
 ;
+; ADDP
+;
+
+define  @addp_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: addp_i8:
+; CHECK: addp z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.addp.nxv16i8( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+define  @addp_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: addp_i16:
+; CHECK: addp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.addp.nxv8i16( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+define  @addp_i32( %pg,  %a,  %b) {
+; CHECK-LABEL: addp_i32:
+; CHECK: addp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.addp.nxv4i32( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+define  @addp_i64( %pg,  %a,  %b) {
+; CHECK-LABEL: addp_i64:
+; CHECK: addp z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.addp.nxv2i64( %pg,
+ %a,
+   

[PATCH] D73097: [AArch64][SVE] Add intrinsics for FFR manipulation

2020-01-24 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG4c4861b577cb: [AArch64][SVE] Add intrinsics for FFR 
manipulation (authored by kmclaughlin).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73097/new/

https://reviews.llvm.org/D73097

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll
@@ -0,0 +1,50 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; RDFFR
+;
+
+define  @rdffr() {
+; CHECK-LABEL: rdffr:
+; CHECK: rdffr p0.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.rdffr()
+  ret  %out
+}
+
+define  @rdffr_z( %pg) {
+; CHECK-LABEL: rdffr_z:
+; CHECK: rdffr p0.b, p0/z
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.rdffr.z( %pg)
+  ret  %out
+}
+
+;
+; SETFFR
+;
+
+define void @set_ffr() {
+; CHECK-LABEL: set_ffr:
+; CHECK: setffr
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.setffr()
+  ret void
+}
+
+;
+; WRFFR
+;
+
+define void @wrffr( %a) {
+; CHECK-LABEL: wrffr:
+; CHECK: wrffr p0.b
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.wrffr( %a)
+  ret void
+}
+
+declare  @llvm.aarch64.sve.rdffr()
+declare  @llvm.aarch64.sve.rdffr.z()
+declare void @llvm.aarch64.sve.setffr()
+declare void @llvm.aarch64.sve.wrffr()
Index: llvm/lib/Target/AArch64/SVEInstrFormats.td
===
--- llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -5105,6 +5105,17 @@
   let Uses = [FFR];
 }
 
+multiclass sve_int_rdffr_pred {
+  def _REAL : sve_int_rdffr_pred;
+
+  // We need a layer of indirection because early machine code passes balk at
+  // physical register (i.e. FFR) uses that have no previous definition.
+  let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
+  def "" : Pseudo<(outs PPR8:$Pd), (ins PPRAny:$Pg), [(set (nxv16i1 PPR8:$Pd), (op (nxv16i1 PPRAny:$Pg)))]>,
+   PseudoInstExpansion<(!cast(NAME # _REAL) PPR8:$Pd, PPRAny:$Pg)>;
+  }
+}
+
 class sve_int_rdffr_unpred : I<
   (outs PPR8:$Pd), (ins),
   asm, "\t$Pd",
@@ -5117,11 +5128,22 @@
   let Uses = [FFR];
 }
 
-class sve_int_wrffr
+multiclass sve_int_rdffr_unpred {
+  def _REAL : sve_int_rdffr_unpred;
+
+  // We need a layer of indirection because early machine code passes balk at
+  // physical register (i.e. FFR) uses that have no previous definition.
+  let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
+  def "" : Pseudo<(outs PPR8:$Pd), (ins), [(set (nxv16i1 PPR8:$Pd), (op))]>,
+   PseudoInstExpansion<(!cast(NAME # _REAL) PPR8:$Pd)>;
+  }
+}
+
+class sve_int_wrffr
 : I<(outs), (ins PPR8:$Pn),
   asm, "\t$Pn",
   "",
-  []>, Sched<[]> {
+  [(op (nxv16i1 PPR8:$Pn))]>, Sched<[]> {
   bits<4> Pn;
   let Inst{31-9} = 0b00100101001010001001000;
   let Inst{8-5}  = Pn;
@@ -5131,11 +5153,11 @@
   let Defs = [FFR];
 }
 
-class sve_int_setffr
+class sve_int_setffr
 : I<(outs), (ins),
   asm, "",
   "",
-  []>, Sched<[]> {
+  [(op)]>, Sched<[]> {
   let Inst{31-0} = 0b00100101001011001001;
 
   let hasSideEffects = 1;
Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
===
--- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -98,11 +98,11 @@
 
 let Predicates = [HasSVE] in {
 
-  def RDFFR_PPz  : sve_int_rdffr_pred<0b0, "rdffr">;
-  def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
-  def RDFFR_P: sve_int_rdffr_unpred<"rdffr">;
-  def SETFFR : sve_int_setffr<"setffr">;
-  def WRFFR  : sve_int_wrffr<"wrffr">;
+  defm RDFFR_PPz  : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
+  def  RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
+  defm RDFFR_P: sve_int_rdffr_unpred<"rdffr", int_aarch64_sve_rdffr>;
+  def  SETFFR : sve_int_setffr<"setffr", int_aarch64_sve_setffr>;
+  def  WRFFR  : sve_int_wrffr<"wrffr", int_aarch64_sve_wrffr>;
 
   defm ADD_ZZZ   : sve_int_bin_cons_arit_0<0b000, "add", add>;
   defm SUB_ZZZ   : sve_int_bin_cons_arit_0<0b001, "sub", sub>;
Index: llvm/include/llvm/IR/IntrinsicsAArch64.td
===
--- llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -1291,6 +1291,15 @@
 def int_aarch64_sve_cntp : AdvSIMD_SVE_CNTP_Intrinsic;
 
 //
+// FFR manipulation
+//
+
+def int_aarch64_sve_rdffr   : GCCBuiltin<"__builtin_sve_svrdffr">,   Intrinsic<[llvm_nxv16i1_ty], []>;
+def int_aarch64_sve_rdffr_z : GCCBuiltin<"__builtin_sve_svrdffr_z">, Intrinsic<[llvm_nxv16i1_ty], 

[PATCH] D73097: [AArch64][SVE] Add intrinsics for FFR manipulation

2020-01-24 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin added a comment.

Thanks for your input on this @efriedma, I will submit this patch for now and 
we will revisit the modelling of the FFR as also discussed in D71698 



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73097/new/

https://reviews.llvm.org/D73097



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D73025: [AArch64][SVE] Add first-faulting load intrinsic

2020-01-23 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGaa0f37e14a87: [AArch64][SVE] Add first-faulting load 
intrinsic (authored by kmclaughlin).

Changed prior to commit:
  https://reviews.llvm.org/D73025?vs=239303=239849#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73025/new/

https://reviews.llvm.org/D73025

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll
@@ -0,0 +1,220 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; LDFF1B
+;
+
+define  @ldff1b( %pg, i8* %a) {
+; CHECK-LABEL: ldff1b:
+; CHECK: ldff1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv16i8( %pg, i8* %a)
+  ret  %load
+}
+
+define  @ldff1b_h( %pg, i8* %a) {
+; CHECK-LABEL: ldff1b_h:
+; CHECK: ldff1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv8i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldff1b_s( %pg, i8* %a) {
+; CHECK-LABEL: ldff1b_s:
+; CHECK: ldff1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv4i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldff1b_d( %pg, i8* %a) {
+; CHECK-LABEL: ldff1b_d:
+; CHECK: ldff1b { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv2i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+;
+; LDFF1SB
+;
+
+define  @ldff1sb_h( %pg, i8* %a) {
+; CHECK-LABEL: ldff1sb_h:
+; CHECK: ldff1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv8i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldff1sb_s( %pg, i8* %a) {
+; CHECK-LABEL: ldff1sb_s:
+; CHECK: ldff1sb { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv4i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldff1sb_d( %pg, i8* %a) {
+; CHECK-LABEL: ldff1sb_d:
+; CHECK: ldff1sb { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv2i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+;
+; LDFF1H
+;
+
+define  @ldff1h( %pg, i16* %a) {
+; CHECK-LABEL: ldff1h:
+; CHECK: ldff1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv8i16( %pg, i16* %a)
+  ret  %load
+}
+
+define  @ldff1h_s( %pg, i16* %a) {
+; CHECK-LABEL: ldff1h_s:
+; CHECK: ldff1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv4i16( %pg, i16* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldff1h_d( %pg, i16* %a) {
+; CHECK-LABEL: ldff1h_d:
+; CHECK: ldff1h { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv2i16( %pg, i16* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldff1h_f16( %pg, half* %a) {
+; CHECK-LABEL: ldff1h_f16:
+; CHECK: ldff1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv8f16( %pg, half* %a)
+  ret  %load
+}
+
+;
+; LDFF1SH
+;
+
+define  @ldff1sh_s( %pg, i16* %a) {
+; CHECK-LABEL: ldff1sh_s:
+; CHECK: ldff1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv4i16( %pg, i16* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldff1sh_d( %pg, i16* %a) {
+; CHECK-LABEL: ldff1sh_d:
+; CHECK: ldff1sh { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv2i16( %pg, i16* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+;
+; LDFF1W
+;
+
+define  @ldff1w( %pg, i32* %a) {
+; CHECK-LABEL: ldff1w:
+; CHECK: ldff1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv4i32( %pg, i32* %a)
+  ret  %load
+}
+
+define  @ldff1w_d( %pg, i32* %a) {
+; CHECK-LABEL: ldff1w_d:
+; CHECK: ldff1w { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv2i32( %pg, i32* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldff1w_f32( %pg, float* %a) {
+; CHECK-LABEL: ldff1w_f32:
+; CHECK: ldff1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv4f32( %pg, float* %a)
+  ret  %load
+}
+
+define  @ldff1w_2f32( %pg, float* %a) {
+; CHECK-LABEL: ldff1w_2f32:
+; CHECK: ldff1w { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv2f32( %pg, float* %a)
+  ret  %load
+}
+
+;
+; LDFF1SW
+;
+
+define  @ldff1sw_d( %pg, i32* %a) {
+; CHECK-LABEL: ldff1sw_d:
+; CHECK: ldff1sw { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = 

[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads

2020-01-22 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGcdcc4f2a44b5: [AArch64][SVE] Add intrinsic for non-faulting 
loads (authored by kmclaughlin).

Changed prior to commit:
  https://reviews.llvm.org/D71698?vs=239144=239531#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71698/new/

https://reviews.llvm.org/D71698

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
@@ -0,0 +1,182 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define  @ldnf1b( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b:
+; CHECK: ldnf1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv16i8( %pg, i8* %a)
+  ret  %load
+}
+
+define  @ldnf1b_h( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_h:
+; CHECK: ldnf1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_h( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_h:
+; CHECK: ldnf1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h:
+; CHECK: ldnf1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i16( %pg, i16* %a)
+  ret  %load
+}
+
+define  @ldnf1h_f16( %pg, half* %a) {
+; CHECK-LABEL: ldnf1h_f16:
+; CHECK: ldnf1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8f16( %pg, half* %a)
+  ret  %load
+}
+
+define  @ldnf1b_s( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_s:
+; CHECK: ldnf1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_s( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_s:
+; CHECK: ldnf1sb { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h_s( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h_s:
+; CHECK: ldnf1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sh_s( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1sh_s:
+; CHECK: ldnf1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1w( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1w:
+; CHECK: ldnf1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i32( %pg, i32* %a)
+  ret  %load
+}
+
+define  @ldnf1w_f32( %pg, float* %a) {
+; CHECK-LABEL: ldnf1w_f32:
+; CHECK: ldnf1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4f32( %pg, float* %a)
+  ret  %load
+}
+
+define  @ldnf1b_d( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_d:
+; CHECK: ldnf1b { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_d( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_d:
+; CHECK: ldnf1sb { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h_d( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h_d:
+; CHECK: ldnf1h { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sh_d( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1sh_d:
+; CHECK: ldnf1sh { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1w_d( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1w_d:
+; CHECK: ldnf1w { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sw_d( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1sw_d:
+; CHECK: ldnf1sw { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1d( %pg, i64* %a) {
+; CHECK-LABEL: ldnf1d:
+; CHECK: ldnf1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i64( %pg, i64* %a)
+  ret  %load
+}
+
+define  @ldnf1d_f64( 

[PATCH] D73025: [AArch64][SVE] Add first-faulting load intrinsic

2020-01-21 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 239303.
kmclaughlin added a comment.

- Rebased patch after changes made to parent revision


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73025/new/

https://reviews.llvm.org/D73025

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll
@@ -0,0 +1,220 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; LDFF1B
+;
+
+define  @ldff1b( %pg, i8* %a) {
+; CHECK-LABEL: ldff1b:
+; CHECK: ldff1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv16i8( %pg, i8* %a)
+  ret  %load
+}
+
+define  @ldff1b_h( %pg, i8* %a) {
+; CHECK-LABEL: ldff1b_h:
+; CHECK: ldff1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv8i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldff1b_s( %pg, i8* %a) {
+; CHECK-LABEL: ldff1b_s:
+; CHECK: ldff1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv4i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldff1b_d( %pg, i8* %a) {
+; CHECK-LABEL: ldff1b_d:
+; CHECK: ldff1b { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv2i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+;
+; LDFF1SB
+;
+
+define  @ldff1sb_h( %pg, i8* %a) {
+; CHECK-LABEL: ldff1sb_h:
+; CHECK: ldff1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv8i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldff1sb_s( %pg, i8* %a) {
+; CHECK-LABEL: ldff1sb_s:
+; CHECK: ldff1sb { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv4i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldff1sb_d( %pg, i8* %a) {
+; CHECK-LABEL: ldff1sb_d:
+; CHECK: ldff1sb { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv2i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+;
+; LDFF1H
+;
+
+define  @ldff1h( %pg, i16* %a) {
+; CHECK-LABEL: ldff1h:
+; CHECK: ldff1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv8i16( %pg, i16* %a)
+  ret  %load
+}
+
+define  @ldff1h_s( %pg, i16* %a) {
+; CHECK-LABEL: ldff1h_s:
+; CHECK: ldff1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv4i16( %pg, i16* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldff1h_d( %pg, i16* %a) {
+; CHECK-LABEL: ldff1h_d:
+; CHECK: ldff1h { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv2i16( %pg, i16* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldff1h_f16( %pg, half* %a) {
+; CHECK-LABEL: ldff1h_f16:
+; CHECK: ldff1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv8f16( %pg, half* %a)
+  ret  %load
+}
+
+;
+; LDFF1SH
+;
+
+define  @ldff1sh_s( %pg, i16* %a) {
+; CHECK-LABEL: ldff1sh_s:
+; CHECK: ldff1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv4i16( %pg, i16* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldff1sh_d( %pg, i16* %a) {
+; CHECK-LABEL: ldff1sh_d:
+; CHECK: ldff1sh { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv2i16( %pg, i16* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+;
+; LDFF1W
+;
+
+define  @ldff1w( %pg, i32* %a) {
+; CHECK-LABEL: ldff1w:
+; CHECK: ldff1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv4i32( %pg, i32* %a)
+  ret  %load
+}
+
+define  @ldff1w_d( %pg, i32* %a) {
+; CHECK-LABEL: ldff1w_d:
+; CHECK: ldff1w { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv2i32( %pg, i32* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldff1w_f32( %pg, float* %a) {
+; CHECK-LABEL: ldff1w_f32:
+; CHECK: ldff1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv4f32( %pg, float* %a)
+  ret  %load
+}
+
+define  @ldff1w_2f32( %pg, float* %a) {
+; CHECK-LABEL: ldff1w_2f32:
+; CHECK: ldff1w { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv2f32( %pg, float* %a)
+  ret  %load
+}
+
+;
+; LDFF1SW
+;
+
+define  @ldff1sw_d( %pg, i32* %a) {
+; CHECK-LABEL: ldff1sw_d:
+; CHECK: ldff1sw { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv2i32( %pg, i32* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+;
+; LDFF1D
+;
+
+define  @ldff1d( %pg, i64* %a) {
+; CHECK-LABEL: 

[PATCH] D73097: [AArch64][SVE] Add intrinsics for FFR manipulation

2020-01-21 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, efriedma, dancgr.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.

Implements the following intrinsics:

- llvm.aarch64.sve.setffr
- llvm.aarch64.sve.rdffr
- llvm.aarch64.sve.rdffr.z
- llvm.aarch64.sve.wrffr


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D73097

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll
@@ -0,0 +1,50 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; RDFFR
+;
+
+define  @rdffr() {
+; CHECK-LABEL: rdffr:
+; CHECK: rdffr p0.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.rdffr()
+  ret  %out
+}
+
+define  @rdffr_z( %pg) {
+; CHECK-LABEL: rdffr_z:
+; CHECK: rdffr p0.b, p0/z
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.rdffr.z( %pg)
+  ret  %out
+}
+
+;
+; SETFFR
+;
+
+define void @set_ffr() {
+; CHECK-LABEL: set_ffr:
+; CHECK: setffr
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.setffr()
+  ret void
+}
+
+;
+; WRFFR
+;
+
+define void @wrffr( %a) {
+; CHECK-LABEL: wrffr:
+; CHECK: wrffr p0.b
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.wrffr( %a)
+  ret void
+}
+
+declare  @llvm.aarch64.sve.rdffr()
+declare  @llvm.aarch64.sve.rdffr.z()
+declare void @llvm.aarch64.sve.setffr()
+declare void @llvm.aarch64.sve.wrffr()
Index: llvm/lib/Target/AArch64/SVEInstrFormats.td
===
--- llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -5094,6 +5094,17 @@
   let Uses = [FFR];
 }
 
+multiclass sve_int_rdffr_pred {
+  def _REAL : sve_int_rdffr_pred;
+
+  // We need a layer of indirection because early machine code passes balk at
+  // physical register (i.e. FFR) uses that have no previous definition.
+  let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
+  def "" : Pseudo<(outs PPR8:$Pd), (ins PPRAny:$Pg), [(set (nxv16i1 PPR8:$Pd), (op (nxv16i1 PPRAny:$Pg)))]>,
+   PseudoInstExpansion<(!cast(NAME # _REAL) PPR8:$Pd, PPRAny:$Pg)>;
+  }
+}
+
 class sve_int_rdffr_unpred : I<
   (outs PPR8:$Pd), (ins),
   asm, "\t$Pd",
@@ -5106,11 +5117,22 @@
   let Uses = [FFR];
 }
 
-class sve_int_wrffr
+multiclass sve_int_rdffr_unpred {
+  def _REAL : sve_int_rdffr_unpred;
+
+  // We need a layer of indirection because early machine code passes balk at
+  // physical register (i.e. FFR) uses that have no previous definition.
+  let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
+  def "" : Pseudo<(outs PPR8:$Pd), (ins), [(set (nxv16i1 PPR8:$Pd), (op))]>,
+   PseudoInstExpansion<(!cast(NAME # _REAL) PPR8:$Pd)>;
+  }
+}
+
+class sve_int_wrffr
 : I<(outs), (ins PPR8:$Pn),
   asm, "\t$Pn",
   "",
-  []>, Sched<[]> {
+  [(op (nxv16i1 PPR8:$Pn))]>, Sched<[]> {
   bits<4> Pn;
   let Inst{31-9} = 0b00100101001010001001000;
   let Inst{8-5}  = Pn;
@@ -5120,11 +5142,11 @@
   let Defs = [FFR];
 }
 
-class sve_int_setffr
+class sve_int_setffr
 : I<(outs), (ins),
   asm, "",
   "",
-  []>, Sched<[]> {
+  [(op)]>, Sched<[]> {
   let Inst{31-0} = 0b00100101001011001001;
 
   let hasSideEffects = 1;
Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
===
--- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -78,11 +78,11 @@
 
 let Predicates = [HasSVE] in {
 
-  def RDFFR_PPz  : sve_int_rdffr_pred<0b0, "rdffr">;
-  def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
-  def RDFFR_P: sve_int_rdffr_unpred<"rdffr">;
-  def SETFFR : sve_int_setffr<"setffr">;
-  def WRFFR  : sve_int_wrffr<"wrffr">;
+  defm RDFFR_PPz  : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
+  def  RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
+  defm RDFFR_P: sve_int_rdffr_unpred<"rdffr", int_aarch64_sve_rdffr>;
+  def  SETFFR : sve_int_setffr<"setffr", int_aarch64_sve_setffr>;
+  def  WRFFR  : sve_int_wrffr<"wrffr", int_aarch64_sve_wrffr>;
 
   defm ADD_ZZZ   : sve_int_bin_cons_arit_0<0b000, "add", add>;
   defm SUB_ZZZ   : sve_int_bin_cons_arit_0<0b001, "sub", sub>;
Index: llvm/include/llvm/IR/IntrinsicsAArch64.td
===
--- llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -1278,6 +1278,15 @@
 def int_aarch64_sve_cntp : AdvSIMD_SVE_CNTP_Intrinsic;
 
 //
+// FFR manipulation
+//
+
+def int_aarch64_sve_rdffr   : GCCBuiltin<"__builtin_sve_svrdffr">,   

[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads

2020-01-20 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 239144.
kmclaughlin added a comment.

- Some minor changes to performSignExtendInRegCombine to address comments from 
@sdesmalen


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71698/new/

https://reviews.llvm.org/D71698

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
@@ -0,0 +1,182 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define  @ldnf1b( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b:
+; CHECK: ldnf1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv16i8( %pg, i8* %a)
+  ret  %load
+}
+
+define  @ldnf1b_h( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_h:
+; CHECK: ldnf1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_h( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_h:
+; CHECK: ldnf1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h:
+; CHECK: ldnf1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i16( %pg, i16* %a)
+  ret  %load
+}
+
+define  @ldnf1h_f16( %pg, half* %a) {
+; CHECK-LABEL: ldnf1h_f16:
+; CHECK: ldnf1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8f16( %pg, half* %a)
+  ret  %load
+}
+
+define  @ldnf1b_s( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_s:
+; CHECK: ldnf1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_s( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_s:
+; CHECK: ldnf1sb { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h_s( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h_s:
+; CHECK: ldnf1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sh_s( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1sh_s:
+; CHECK: ldnf1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1w( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1w:
+; CHECK: ldnf1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i32( %pg, i32* %a)
+  ret  %load
+}
+
+define  @ldnf1w_f32( %pg, float* %a) {
+; CHECK-LABEL: ldnf1w_f32:
+; CHECK: ldnf1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4f32( %pg, float* %a)
+  ret  %load
+}
+
+define  @ldnf1b_d( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_d:
+; CHECK: ldnf1b { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_d( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_d:
+; CHECK: ldnf1sb { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h_d( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h_d:
+; CHECK: ldnf1h { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sh_d( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1sh_d:
+; CHECK: ldnf1sh { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1w_d( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1w_d:
+; CHECK: ldnf1w { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sw_d( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1sw_d:
+; CHECK: ldnf1sw { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1d( %pg, i64* %a) {
+; CHECK-LABEL: ldnf1d:
+; CHECK: ldnf1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i64( %pg, i64* %a)
+  ret  %load
+}
+
+define  @ldnf1d_f64( %pg, double* %a) {
+; CHECK-LABEL: ldnf1d_f64:
+; CHECK: ldnf1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  

[PATCH] D73025: [AArch64][SVE] Add first-faulting load intrinsic

2020-01-20 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, efriedma, andwar, dancgr.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.
kmclaughlin added a parent revision: D71698: [AArch64][SVE] Add intrinsic for 
non-faulting loads.

Implements the llvm.aarch64.sve.ldff1 intrinsic and DAG
combine rules for first-faulting loads with sign & zero extends


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D73025

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll
@@ -0,0 +1,220 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; LDFF1B
+;
+
+define  @ldff1b( %pg, i8* %a) {
+; CHECK-LABEL: ldff1b:
+; CHECK: ldff1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv16i8( %pg, i8* %a)
+  ret  %load
+}
+
+define  @ldff1b_h( %pg, i8* %a) {
+; CHECK-LABEL: ldff1b_h:
+; CHECK: ldff1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv8i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldff1b_s( %pg, i8* %a) {
+; CHECK-LABEL: ldff1b_s:
+; CHECK: ldff1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv4i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldff1b_d( %pg, i8* %a) {
+; CHECK-LABEL: ldff1b_d:
+; CHECK: ldff1b { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv2i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+;
+; LDFF1SB
+;
+
+define  @ldff1sb_h( %pg, i8* %a) {
+; CHECK-LABEL: ldff1sb_h:
+; CHECK: ldff1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv8i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldff1sb_s( %pg, i8* %a) {
+; CHECK-LABEL: ldff1sb_s:
+; CHECK: ldff1sb { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv4i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldff1sb_d( %pg, i8* %a) {
+; CHECK-LABEL: ldff1sb_d:
+; CHECK: ldff1sb { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv2i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+;
+; LDFF1H
+;
+
+define  @ldff1h( %pg, i16* %a) {
+; CHECK-LABEL: ldff1h:
+; CHECK: ldff1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv8i16( %pg, i16* %a)
+  ret  %load
+}
+
+define  @ldff1h_s( %pg, i16* %a) {
+; CHECK-LABEL: ldff1h_s:
+; CHECK: ldff1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv4i16( %pg, i16* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldff1h_d( %pg, i16* %a) {
+; CHECK-LABEL: ldff1h_d:
+; CHECK: ldff1h { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv2i16( %pg, i16* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldff1h_f16( %pg, half* %a) {
+; CHECK-LABEL: ldff1h_f16:
+; CHECK: ldff1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv8f16( %pg, half* %a)
+  ret  %load
+}
+
+;
+; LDFF1SH
+;
+
+define  @ldff1sh_s( %pg, i16* %a) {
+; CHECK-LABEL: ldff1sh_s:
+; CHECK: ldff1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv4i16( %pg, i16* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldff1sh_d( %pg, i16* %a) {
+; CHECK-LABEL: ldff1sh_d:
+; CHECK: ldff1sh { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv2i16( %pg, i16* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+;
+; LDFF1W
+;
+
+define  @ldff1w( %pg, i32* %a) {
+; CHECK-LABEL: ldff1w:
+; CHECK: ldff1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv4i32( %pg, i32* %a)
+  ret  %load
+}
+
+define  @ldff1w_d( %pg, i32* %a) {
+; CHECK-LABEL: ldff1w_d:
+; CHECK: ldff1w { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv2i32( %pg, i32* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldff1w_f32( %pg, float* %a) {
+; CHECK-LABEL: ldff1w_f32:
+; CHECK: ldff1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv4f32( %pg, float* %a)
+  ret  %load
+}
+
+define  @ldff1w_2f32( %pg, float* %a) {
+; CHECK-LABEL: ldff1w_2f32:
+; CHECK: ldff1w { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldff1.nxv2f32( %pg, float* %a)
+  ret  %load
+}
+
+;
+; LDFF1SW
+;

[PATCH] D72612: [AArch64][SVE] Add ImmArg property to intrinsics with immediates

2020-01-17 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
kmclaughlin marked an inline comment as done.
Closed by commit rGfe3bb8ec9683: [AArch64][SVE] Add ImmArg property to 
intrinsics with immediates (authored by kmclaughlin).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72612/new/

https://reviews.llvm.org/D72612

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td

Index: llvm/lib/Target/AArch64/SVEInstrFormats.td
===
--- llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -1646,12 +1646,12 @@
 let Inst{19-16} = Zm;
   }
 
-  def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexH32b:$idx))),
-(!cast(NAME # _H) $Op1, $Op2, $Op3, VectorIndexH32b:$idx)>;
-  def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexS32b:$idx))),
-(!cast(NAME # _S) $Op1, $Op2, $Op3, VectorIndexS32b:$idx)>;
-  def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, (i32 VectorIndexD32b:$idx))),
-(!cast(NAME # _D) $Op1, $Op2, $Op3, VectorIndexD32b:$idx)>;
+  def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexH32b_timm:$idx))),
+(!cast(NAME # _H) $Op1, $Op2, $Op3, VectorIndexH32b_timm:$idx)>;
+  def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexS32b_timm:$idx))),
+(!cast(NAME # _S) $Op1, $Op2, $Op3, VectorIndexS32b_timm:$idx)>;
+  def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, (i32 VectorIndexD32b_timm:$idx))),
+(!cast(NAME # _D) $Op1, $Op2, $Op3, VectorIndexD32b_timm:$idx)>;
 }
 
 
@@ -1694,12 +1694,12 @@
 let Inst{19-16} = Zm;
   }
 
-  def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, (i32 VectorIndexH32b:$idx))),
-(!cast(NAME # _H) $Op1, $Op2, VectorIndexH32b:$idx)>;
-  def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, (i32 VectorIndexS32b:$idx))),
-(!cast(NAME # _S) $Op1, $Op2, VectorIndexS32b:$idx)>;
-  def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, (i32 VectorIndexD32b:$idx))),
-(!cast(NAME # _D) $Op1, $Op2, VectorIndexD32b:$idx)>;
+  def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, (i32 VectorIndexH32b_timm:$idx))),
+(!cast(NAME # _H) $Op1, $Op2, VectorIndexH32b_timm:$idx)>;
+  def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, (i32 VectorIndexS32b_timm:$idx))),
+(!cast(NAME # _S) $Op1, $Op2, VectorIndexS32b_timm:$idx)>;
+  def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, (i32 VectorIndexD32b_timm:$idx))),
+(!cast(NAME # _D) $Op1, $Op2, VectorIndexD32b_timm:$idx)>;
 }
 
 //===--===//
@@ -1785,10 +1785,10 @@
 let Inst{19-16} = Zm;
   }
 
-  def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexS32b:$idx), (i32 complexrotateop:$imm))),
-(!cast(NAME # _H) $Op1, $Op2, $Op3, VectorIndexS32b:$idx, complexrotateop:$imm)>;
-  def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexD32b:$idx), (i32 complexrotateop:$imm))),
-(!cast(NAME # _S) $Op1, $Op2, $Op3, VectorIndexD32b:$idx, complexrotateop:$imm)>;
+  def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexS32b_timm:$idx), (i32 complexrotateop:$imm))),
+(!cast(NAME # _H) $Op1, $Op2, $Op3, VectorIndexS32b_timm:$idx, complexrotateop:$imm)>;
+  def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexD32b_timm:$idx), (i32 complexrotateop:$imm))),
+(!cast(NAME # _S) $Op1, $Op2, $Op3, VectorIndexD32b_timm:$idx, complexrotateop:$imm)>;
 }
 
 //===--===//
@@ -1949,7 +1949,7 @@
 multiclass sve2_fp_mla_long_by_indexed_elem opc, string asm,
 SDPatternOperator op> {
   def NAME : sve2_fp_mla_long_by_indexed_elem;
-  def : SVE_4_Op_Imm_Pat(NAME)>;
+  def : SVE_4_Op_Imm_Pat(NAME)>;
 }
 
 //===--===//
@@ -2479,23 +2479,23 @@
 
 multiclass sve_intx_dot_by_indexed_elem {
-  def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b> {
+  def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b_timm> {
 bits<2> iop;
 bits<3> Zm;
 let Inst{20-19} = iop;
 let Inst{18-16} = Zm;
   }
-  def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b> {
+  def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b_timm> {
 bits<1> iop;
 bits<4> Zm;
 let Inst{20} = iop;
 let Inst{19-16} = Zm;

[PATCH] D72612: [AArch64][SVE] Add ImmArg property to intrinsics with immediates

2020-01-16 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin marked an inline comment as done.
kmclaughlin added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64InstrFormats.td:1108
+  def "" : AsmVectorIndexOpnd, PatLeaf<(ty imm), pred>;
+  def _timm : AsmVectorIndexOpnd, PatLeaf<(ty timm), pred>;
+}

efriedma wrote:
> Using ImmLeaf/TImmLeaf doesn't work here?
Thanks for the suggestion, it looks like I can use ImmLeaf & TImmLeaf here (and 
use //Imm// again instead of //N->getZExtValue()// in VectorIndex1, etc below)


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72612/new/

https://reviews.llvm.org/D72612



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72612: [AArch64][SVE] Add ImmArg property to intrinsics with immediates

2020-01-16 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 238445.
kmclaughlin added a comment.

- Replace PatLeaf with ImmLeaf & TImmLeaf in the VectorIndex multiclass


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72612/new/

https://reviews.llvm.org/D72612

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td

Index: llvm/lib/Target/AArch64/SVEInstrFormats.td
===
--- llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -1646,12 +1646,12 @@
 let Inst{19-16} = Zm;
   }
 
-  def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexH32b:$idx))),
-(!cast(NAME # _H) $Op1, $Op2, $Op3, VectorIndexH32b:$idx)>;
-  def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexS32b:$idx))),
-(!cast(NAME # _S) $Op1, $Op2, $Op3, VectorIndexS32b:$idx)>;
-  def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, (i32 VectorIndexD32b:$idx))),
-(!cast(NAME # _D) $Op1, $Op2, $Op3, VectorIndexD32b:$idx)>;
+  def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexH32b_timm:$idx))),
+(!cast(NAME # _H) $Op1, $Op2, $Op3, VectorIndexH32b_timm:$idx)>;
+  def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexS32b_timm:$idx))),
+(!cast(NAME # _S) $Op1, $Op2, $Op3, VectorIndexS32b_timm:$idx)>;
+  def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, (i32 VectorIndexD32b_timm:$idx))),
+(!cast(NAME # _D) $Op1, $Op2, $Op3, VectorIndexD32b_timm:$idx)>;
 }
 
 
@@ -1694,12 +1694,12 @@
 let Inst{19-16} = Zm;
   }
 
-  def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, (i32 VectorIndexH32b:$idx))),
-(!cast(NAME # _H) $Op1, $Op2, VectorIndexH32b:$idx)>;
-  def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, (i32 VectorIndexS32b:$idx))),
-(!cast(NAME # _S) $Op1, $Op2, VectorIndexS32b:$idx)>;
-  def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, (i32 VectorIndexD32b:$idx))),
-(!cast(NAME # _D) $Op1, $Op2, VectorIndexD32b:$idx)>;
+  def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, (i32 VectorIndexH32b_timm:$idx))),
+(!cast(NAME # _H) $Op1, $Op2, VectorIndexH32b_timm:$idx)>;
+  def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, (i32 VectorIndexS32b_timm:$idx))),
+(!cast(NAME # _S) $Op1, $Op2, VectorIndexS32b_timm:$idx)>;
+  def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, (i32 VectorIndexD32b_timm:$idx))),
+(!cast(NAME # _D) $Op1, $Op2, VectorIndexD32b_timm:$idx)>;
 }
 
 //===--===//
@@ -1785,10 +1785,10 @@
 let Inst{19-16} = Zm;
   }
 
-  def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexS32b:$idx), (i32 complexrotateop:$imm))),
-(!cast(NAME # _H) $Op1, $Op2, $Op3, VectorIndexS32b:$idx, complexrotateop:$imm)>;
-  def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexD32b:$idx), (i32 complexrotateop:$imm))),
-(!cast(NAME # _S) $Op1, $Op2, $Op3, VectorIndexD32b:$idx, complexrotateop:$imm)>;
+  def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexS32b_timm:$idx), (i32 complexrotateop:$imm))),
+(!cast(NAME # _H) $Op1, $Op2, $Op3, VectorIndexS32b_timm:$idx, complexrotateop:$imm)>;
+  def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexD32b_timm:$idx), (i32 complexrotateop:$imm))),
+(!cast(NAME # _S) $Op1, $Op2, $Op3, VectorIndexD32b_timm:$idx, complexrotateop:$imm)>;
 }
 
 //===--===//
@@ -1949,7 +1949,7 @@
 multiclass sve2_fp_mla_long_by_indexed_elem opc, string asm,
 SDPatternOperator op> {
   def NAME : sve2_fp_mla_long_by_indexed_elem;
-  def : SVE_4_Op_Imm_Pat(NAME)>;
+  def : SVE_4_Op_Imm_Pat(NAME)>;
 }
 
 //===--===//
@@ -2479,23 +2479,23 @@
 
 multiclass sve_intx_dot_by_indexed_elem {
-  def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b> {
+  def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b_timm> {
 bits<2> iop;
 bits<3> Zm;
 let Inst{20-19} = iop;
 let Inst{18-16} = Zm;
   }
-  def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b> {
+  def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b_timm> {
 bits<1> iop;
 bits<4> Zm;
 let Inst{20} = iop;
 let Inst{19-16} = Zm;
   }
 
-  def : Pat<(nxv4i32 (op nxv4i32:$Op1, nxv16i8:$Op2, nxv16i8:$Op3, (i32 VectorIndexS32b:$idx))),
-(!cast(NAME # 

[PATCH] D72612: [AArch64][SVE] Add ImmArg property to intrinsics with immediates

2020-01-15 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 238199.
kmclaughlin added a comment.

- Removed shiftimm patterns and reused tvecshiftR8, etc
- Removed complex patterns used by AsmVectorIndexOpnd and instead created a 
multiclass (VectorIndex) to create a PatLeaf with timm if "_timm" is appended


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72612/new/

https://reviews.llvm.org/D72612

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td

Index: llvm/lib/Target/AArch64/SVEInstrFormats.td
===
--- llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -1639,12 +1639,12 @@
 let Inst{19-16} = Zm;
   }
 
-  def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexH32b:$idx))),
-(!cast(NAME # _H) $Op1, $Op2, $Op3, VectorIndexH32b:$idx)>;
-  def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexS32b:$idx))),
-(!cast(NAME # _S) $Op1, $Op2, $Op3, VectorIndexS32b:$idx)>;
-  def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, (i32 VectorIndexD32b:$idx))),
-(!cast(NAME # _D) $Op1, $Op2, $Op3, VectorIndexD32b:$idx)>;
+  def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexH32b_timm:$idx))),
+(!cast(NAME # _H) $Op1, $Op2, $Op3, VectorIndexH32b_timm:$idx)>;
+  def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexS32b_timm:$idx))),
+(!cast(NAME # _S) $Op1, $Op2, $Op3, VectorIndexS32b_timm:$idx)>;
+  def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, (i32 VectorIndexD32b_timm:$idx))),
+(!cast(NAME # _D) $Op1, $Op2, $Op3, VectorIndexD32b_timm:$idx)>;
 }
 
 
@@ -1687,12 +1687,12 @@
 let Inst{19-16} = Zm;
   }
 
-  def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, (i32 VectorIndexH32b:$idx))),
-(!cast(NAME # _H) $Op1, $Op2, VectorIndexH32b:$idx)>;
-  def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, (i32 VectorIndexS32b:$idx))),
-(!cast(NAME # _S) $Op1, $Op2, VectorIndexS32b:$idx)>;
-  def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, (i32 VectorIndexD32b:$idx))),
-(!cast(NAME # _D) $Op1, $Op2, VectorIndexD32b:$idx)>;
+  def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, (i32 VectorIndexH32b_timm:$idx))),
+(!cast(NAME # _H) $Op1, $Op2, VectorIndexH32b_timm:$idx)>;
+  def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, (i32 VectorIndexS32b_timm:$idx))),
+(!cast(NAME # _S) $Op1, $Op2, VectorIndexS32b_timm:$idx)>;
+  def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, (i32 VectorIndexD32b_timm:$idx))),
+(!cast(NAME # _D) $Op1, $Op2, VectorIndexD32b_timm:$idx)>;
 }
 
 //===--===//
@@ -1778,10 +1778,10 @@
 let Inst{19-16} = Zm;
   }
 
-  def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexS32b:$idx), (i32 complexrotateop:$imm))),
-(!cast(NAME # _H) $Op1, $Op2, $Op3, VectorIndexS32b:$idx, complexrotateop:$imm)>;
-  def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexD32b:$idx), (i32 complexrotateop:$imm))),
-(!cast(NAME # _S) $Op1, $Op2, $Op3, VectorIndexD32b:$idx, complexrotateop:$imm)>;
+  def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexS32b_timm:$idx), (i32 complexrotateop:$imm))),
+(!cast(NAME # _H) $Op1, $Op2, $Op3, VectorIndexS32b_timm:$idx, complexrotateop:$imm)>;
+  def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexD32b_timm:$idx), (i32 complexrotateop:$imm))),
+(!cast(NAME # _S) $Op1, $Op2, $Op3, VectorIndexD32b_timm:$idx, complexrotateop:$imm)>;
 }
 
 //===--===//
@@ -1942,7 +1942,7 @@
 multiclass sve2_fp_mla_long_by_indexed_elem opc, string asm,
 SDPatternOperator op> {
   def NAME : sve2_fp_mla_long_by_indexed_elem;
-  def : SVE_4_Op_Imm_Pat(NAME)>;
+  def : SVE_4_Op_Imm_Pat(NAME)>;
 }
 
 //===--===//
@@ -2472,23 +2472,23 @@
 
 multiclass sve_intx_dot_by_indexed_elem {
-  def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b> {
+  def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b_timm> {
 bits<2> iop;
 bits<3> Zm;
 let Inst{20-19} = iop;
 let Inst{18-16} = Zm;
   }
-  def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b> {
+  def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b_timm> {
 bits<1> iop;
 bits<4> Zm;
 let Inst{20} = iop;
 let 

[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads

2020-01-14 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin marked 5 inline comments as done.
kmclaughlin added a comment.

Thanks for your suggestions, @andwar!


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71698/new/

https://reviews.llvm.org/D71698



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads

2020-01-14 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 237906.
kmclaughlin added a comment.

- Rebased patch
- Updated comments and extended getSVEContainerType to handle nxv8i16 & nxv16i8


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71698/new/

https://reviews.llvm.org/D71698

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
@@ -0,0 +1,182 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define  @ldnf1b( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b:
+; CHECK: ldnf1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv16i8( %pg, i8* %a)
+  ret  %load
+}
+
+define  @ldnf1b_h( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_h:
+; CHECK: ldnf1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_h( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_h:
+; CHECK: ldnf1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h:
+; CHECK: ldnf1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i16( %pg, i16* %a)
+  ret  %load
+}
+
+define  @ldnf1h_f16( %pg, half* %a) {
+; CHECK-LABEL: ldnf1h_f16:
+; CHECK: ldnf1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8f16( %pg, half* %a)
+  ret  %load
+}
+
+define  @ldnf1b_s( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_s:
+; CHECK: ldnf1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_s( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_s:
+; CHECK: ldnf1sb { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h_s( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h_s:
+; CHECK: ldnf1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sh_s( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1sh_s:
+; CHECK: ldnf1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1w( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1w:
+; CHECK: ldnf1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i32( %pg, i32* %a)
+  ret  %load
+}
+
+define  @ldnf1w_f32( %pg, float* %a) {
+; CHECK-LABEL: ldnf1w_f32:
+; CHECK: ldnf1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4f32( %pg, float* %a)
+  ret  %load
+}
+
+define  @ldnf1b_d( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_d:
+; CHECK: ldnf1b { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_d( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_d:
+; CHECK: ldnf1sb { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h_d( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h_d:
+; CHECK: ldnf1h { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sh_d( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1sh_d:
+; CHECK: ldnf1sh { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1w_d( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1w_d:
+; CHECK: ldnf1w { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sw_d( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1sw_d:
+; CHECK: ldnf1sw { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1d( %pg, i64* %a) {
+; CHECK-LABEL: ldnf1d:
+; CHECK: ldnf1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i64( %pg, i64* %a)
+  ret  %load
+}
+
+define  @ldnf1d_f64( %pg, double* %a) {
+; CHECK-LABEL: ldnf1d_f64:
+; CHECK: ldnf1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  

[PATCH] D72612: [AArch64][SVE] Add ImmArg property to intrinsics with immediates

2020-01-13 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: efriedma, sdesmalen, andwar.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.

Several SVE intrinsics with immediate arguments (including those
added by D70253  & D70437 
) do not use the ImmArg property.
This patch adds ImmArg where required and changes
the appropriate patterns which match the immediates.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D72612

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td

Index: llvm/lib/Target/AArch64/SVEInstrFormats.td
===
--- llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -354,6 +354,12 @@
 : Pat<(vtd (op vt1:$Op1, vt2:$Op2, (vt3 ImmTy:$Op3))),
   (inst $Op1, $Op2, ImmTy:$Op3)>;
 
+class SVE_3_Op_Cpx_Imm_Pat
+: Pat<(vtd (op vt1:$Op1, vt2:$Op2, (cpx ImmTy:$Op3))),
+  (inst $Op1, $Op2, ImmTy:$Op3)>;
+
 class SVE_4_Op_Imm_Pat
@@ -4377,10 +4383,10 @@
 let Inst{9-8} = imm{4-3};
   }
 
-  def : SVE_3_Op_Imm_Pat(NAME # _B)>;
-  def : SVE_3_Op_Imm_Pat(NAME # _H)>;
-  def : SVE_3_Op_Imm_Pat(NAME # _S)>;
-  def : SVE_3_Op_Imm_Pat(NAME # _D)>;
+  def : SVE_3_Op_Cpx_Imm_Pat(NAME # _B)>;
+  def : SVE_3_Op_Cpx_Imm_Pat(NAME # _H)>;
+  def : SVE_3_Op_Cpx_Imm_Pat(NAME # _S)>;
+  def : SVE_3_Op_Cpx_Imm_Pat(NAME # _D)>;
 }
 
 class sve_int_bin_pred_shift sz8_64, bit wide, bits<3> opc,
Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td
===
--- llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5023,6 +5023,7 @@
 defm : Neon_INS_elt_pattern;
 
 
+let AddedComplexity = 1 in {
 // Floating point vector extractions are codegen'd as either a sequence of
 // subregister extractions, or a MOV (aka CPY here, alias for DUP) if
 // the lane number is anything other than zero.
@@ -5032,6 +5033,7 @@
   (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
 def : Pat<(vector_extract (v8f16 V128:$Rn), 0),
   (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
+}
 
 def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
   (f64 (CPYi64 V128:$Rn, VectorIndexD:$idx))>;
Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td
===
--- llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -683,6 +683,10 @@
   let ParserMatchClass = Imm0_63Operand;
 }
 
+def shiftimm8  : ComplexPattern",  []>;
+def shiftimm16 : ComplexPattern", []>;
+def shiftimm32 : ComplexPattern", []>;
+def shiftimm64 : ComplexPattern", []>;
 
 // Crazy immediate formats used by 32-bit and 64-bit logical immediate
 // instructions for splatting repeating bit patterns across the immediate.
@@ -832,7 +836,7 @@
 }
 
 // imm32_0_7 predicate - True if the 32-bit immediate is in the range [0,7]
-def imm32_0_7 : Operand, ImmLeaf, TImmLeaf {
   let ParserMatchClass = Imm0_7Operand;
@@ -1091,8 +1095,8 @@
   let RenderMethod = "addVectorIndexOperands";
 }
 
-class AsmVectorIndexOpnd
-: Operand, ImmLeaf {
+class AsmVectorIndexOpnd
+: Operand, ComplexPattern", []> {
   let ParserMatchClass = mc;
   let PrintMethod = "printVectorIndex";
 }
@@ -1103,17 +1107,17 @@
 def VectorIndexSOperand : AsmVectorIndex<0, 3>;
 def VectorIndexDOperand : AsmVectorIndex<0, 1>;
 
-def VectorIndex1 : AsmVectorIndexOpnd;
-def VectorIndexB : AsmVectorIndexOpnd;
-def VectorIndexH : AsmVectorIndexOpnd;
-def VectorIndexS : AsmVectorIndexOpnd;
-def VectorIndexD : AsmVectorIndexOpnd;
+def VectorIndex1 : AsmVectorIndexOpnd;
+def VectorIndexB : AsmVectorIndexOpnd;
+def VectorIndexH : AsmVectorIndexOpnd;
+def VectorIndexS : AsmVectorIndexOpnd;
+def VectorIndexD : AsmVectorIndexOpnd;
 
-def VectorIndex132b : AsmVectorIndexOpnd;
-def VectorIndexB32b : AsmVectorIndexOpnd;
-def VectorIndexH32b : AsmVectorIndexOpnd;
-def VectorIndexS32b : AsmVectorIndexOpnd;
-def VectorIndexD32b : AsmVectorIndexOpnd;
+def VectorIndex132b : AsmVectorIndexOpnd;
+def VectorIndexB32b : AsmVectorIndexOpnd;
+def VectorIndexH32b : AsmVectorIndexOpnd;
+def VectorIndexS32b : AsmVectorIndexOpnd;
+def VectorIndexD32b : AsmVectorIndexOpnd;
 
 def SVEVectorIndexExtDupBOperand : AsmVectorIndex<0, 63, "SVE">;
 def SVEVectorIndexExtDupHOperand : AsmVectorIndex<0, 31, "SVE">;
@@ -1122,15 +1126,15 @@
 def SVEVectorIndexExtDupQOperand : AsmVectorIndex<0, 3, "SVE">;
 
 def sve_elm_idx_extdup_b
-  : AsmVectorIndexOpnd;
+  : AsmVectorIndexOpnd;
 def sve_elm_idx_extdup_h
-  : AsmVectorIndexOpnd;
+  : AsmVectorIndexOpnd;
 def sve_elm_idx_extdup_s
- 

[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads

2019-12-20 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin added inline comments.



Comment at: llvm/lib/Target/AArch64/SVEInstrFormats.td:5333
+  // We need a layer of indirection because early machine code passes balk at
+  // physical register (i.e. FFR) uses that have no previous definition.
+  let hasSideEffects = 1, hasNoSchedulingInfo = 1, mayLoad = 1 in {

efriedma wrote:
> This is depending on hasSideEffects to preserve the correct ordering with 
> instructions that read/write FFR?  That probably works.  I guess the 
> alternative is to insert an IMPLICIT_DEF of FFR in the entry block of each 
> function.
> 
> What are the calling convention rules for FFR?  Is it callee-save?  If not, 
> we might need to do some work to make FFR reads/writes do something sane 
> across calls inserted by the compiler.
The FFR is not callee-saved. We will need to add support to save & restore it 
where appropriate at the point the compiler starts generating reads to the FFR, 
but for the purpose of the ACLE the user will be required to do this if 
necessary.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71698/new/

https://reviews.llvm.org/D71698



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads

2019-12-19 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, efriedma, andwar, dancgr, mgudim.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.

This patch adds the llvm.aarch64.sve.ldnf1 intrinsic, plus
DAG combine rules for non-faulting loads and sign/zero extends


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D71698

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
@@ -0,0 +1,182 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define  @ldnf1b( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b:
+; CHECK: ldnf1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv16i8( %pg, i8* %a)
+  ret  %load
+}
+
+define  @ldnf1b_h( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_h:
+; CHECK: ldnf1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_h( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_h:
+; CHECK: ldnf1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h:
+; CHECK: ldnf1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i16( %pg, i16* %a)
+  ret  %load
+}
+
+define  @ldnf1h_f16( %pg, half* %a) {
+; CHECK-LABEL: ldnf1h_f16:
+; CHECK: ldnf1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8f16( %pg, half* %a)
+  ret  %load
+}
+
+define  @ldnf1b_s( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_s:
+; CHECK: ldnf1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_s( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_s:
+; CHECK: ldnf1sb { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h_s( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h_s:
+; CHECK: ldnf1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sh_s( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1sh_s:
+; CHECK: ldnf1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1w( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1w:
+; CHECK: ldnf1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i32( %pg, i32* %a)
+  ret  %load
+}
+
+define  @ldnf1w_f32( %pg, float* %a) {
+; CHECK-LABEL: ldnf1w_f32:
+; CHECK: ldnf1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4f32( %pg, float* %a)
+  ret  %load
+}
+
+define  @ldnf1b_d( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_d:
+; CHECK: ldnf1b { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_d( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_d:
+; CHECK: ldnf1sb { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h_d( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h_d:
+; CHECK: ldnf1h { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sh_d( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1sh_d:
+; CHECK: ldnf1sh { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1w_d( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1w_d:
+; CHECK: ldnf1w { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sw_d( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1sw_d:
+; CHECK: ldnf1sw { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1d( %pg, i64* %a) {
+; CHECK-LABEL: ldnf1d:
+; CHECK: ldnf1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i64( %pg, i64* %a)

[PATCH] D71556: [AArch64][SVE] Implement intrinsic for non-faulting loads

2019-12-19 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin planned changes to this revision.
kmclaughlin added a comment.

Thanks for the feedback on this patch, @efriedma & @sdesmalen!
I think there is still value in adding a NonFaulting flag to MachineMemOperand 
so that we can benefit from legalisation, but as this is not a requirement for 
the ACLE I have created a new patch which implements the non-faulting load 
intrinsic explicitly: https://reviews.llvm.org/D71698
I will leave this patch in the 'plan changes' state so that it can be referred 
to in future discussions on the mailing list.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71556/new/

https://reviews.llvm.org/D71556



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71556: [AArch64][SVE] Implement intrinsic for non-faulting loads

2019-12-16 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, paulwalker-arm, efriedma, dancgr, 
mgudim.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.

Adds the llvm.aarch64.sve.ldnf1 intrinsic, adding a new
flag to MachineMemOperand (MONonFaulting)


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D71556

Files:
  llvm/include/llvm/CodeGen/MachineMemOperand.h
  llvm/include/llvm/CodeGen/SelectionDAGNodes.h
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/CodeGen/MachineOperand.cpp
  llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
@@ -0,0 +1,182 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define  @ldnf1b( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b:
+; CHECK: ldnf1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv16i8( %pg, i8* %a)
+  ret  %load
+}
+
+define  @ldnf1b_h( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_h:
+; CHECK: ldnf1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_h( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_h:
+; CHECK: ldnf1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h:
+; CHECK: ldnf1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8i16( %pg, i16* %a)
+  ret  %load
+}
+
+define  @ldnf1h_f16( %pg, half* %a) {
+; CHECK-LABEL: ldnf1h_f16:
+; CHECK: ldnf1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8f16( %pg, half* %a)
+  ret  %load
+}
+
+define  @ldnf1b_s( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_s:
+; CHECK: ldnf1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_s( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_s:
+; CHECK: ldnf1sb { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h_s( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h_s:
+; CHECK: ldnf1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sh_s( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1sh_s:
+; CHECK: ldnf1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1w( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1w:
+; CHECK: ldnf1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4i32( %pg, i32* %a)
+  ret  %load
+}
+
+define  @ldnf1w_f32( %pg, float* %a) {
+; CHECK-LABEL: ldnf1w_f32:
+; CHECK: ldnf1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv4f32( %pg, float* %a)
+  ret  %load
+}
+
+define  @ldnf1b_d( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1b_d:
+; CHECK: ldnf1b { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sb_d( %pg, i8* %a) {
+; CHECK-LABEL: ldnf1sb_d:
+; CHECK: ldnf1sb { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1h_d( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1h_d:
+; CHECK: ldnf1h { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sh_d( %pg, i16* %a) {
+; CHECK-LABEL: ldnf1sh_d:
+; CHECK: ldnf1sh { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1w_d( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1w_d:
+; CHECK: ldnf1w { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a)
+  %res = zext  %load to 
+  ret  %res
+}
+
+define  @ldnf1sw_d( %pg, i32* %a) {
+; CHECK-LABEL: ldnf1sw_d:
+; CHECK: ldnf1sw { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a)
+  %res = sext  %load to 
+  ret  %res
+}
+
+define  @ldnf1d( %pg, i64* %a) {
+; CHECK-LABEL: ldnf1d:

[PATCH] D71000: [AArch64][SVE] Implement intrinsics for non-temporal loads & stores

2019-12-11 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG3f5bf35f868d: [AArch64][SVE] Implement intrinsics for 
non-temporal loads  stores (authored by kmclaughlin).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71000/new/

https://reviews.llvm.org/D71000

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
@@ -0,0 +1,95 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; STNT1B
+;
+
+define void @stnt1b_i8( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1b_i8:
+; CHECK: stnt1b { z0.b }, p0, [x0, #0]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv16i8( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1H
+;
+
+define void @stnt1h_i16( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1h_i16:
+; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv8i16( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1h_f16( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1h_f16:
+; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv8f16( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1W
+;
+
+define void @stnt1w_i32( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1w_i32:
+; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv4i32( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1w_f32( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1w_f32:
+; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv4f32( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1D
+;
+
+define void @stnt1d_i64( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1d_i64:
+; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv2i64( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1d_f64( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1d_f64:
+; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv2f64( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+declare void @llvm.aarch64.sve.stnt1.nxv16i8(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv8i16(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv4i32(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv2i64(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv8f16(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv4f32(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv2f64(, , *)
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
@@ -0,0 +1,88 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; LDNT1B
+;
+
+define  @ldnt1b_i8( %pred, * %addr) {
+; CHECK-LABEL: ldnt1b_i8:
+; CHECK: ldnt1b { z0.b }, p0/z, [x0, #0]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv16i8( %pred,
+ * %addr)
+  ret  %res
+}
+
+;
+; LDNT1H
+;
+
+define  @ldnt1h_i16( %pred, * %addr) {
+; CHECK-LABEL: ldnt1h_i16:
+; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv8i16( %pred,
+ * %addr)
+  ret  %res
+}
+
+define  @ldnt1h_f16( %pred, * %addr) {
+; CHECK-LABEL: ldnt1h_f16:
+; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv8f16( %pred,
+  * %addr)
+  ret  %res
+}
+
+;
+; LDNT1W
+;
+
+define  @ldnt1w_i32( %pred, * %addr) {
+; CHECK-LABEL: ldnt1w_i32:
+; CHECK: ldnt1w { z0.s }, p0/z, [x0, #0, lsl #2]
+; CHECK-NEXT: 

[PATCH] D71000: [AArch64][SVE] Implement intrinsics for non-temporal loads & stores

2019-12-11 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 233298.
kmclaughlin marked an inline comment as done.
kmclaughlin added a comment.

- Changed 'Offset' value used by getMaskedLoad & getMaskedStore to scalar type


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71000/new/

https://reviews.llvm.org/D71000

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
@@ -0,0 +1,95 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; STNT1B
+;
+
+define void @stnt1b_i8( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1b_i8:
+; CHECK: stnt1b { z0.b }, p0, [x0, #0]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv16i8( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1H
+;
+
+define void @stnt1h_i16( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1h_i16:
+; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv8i16( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1h_f16( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1h_f16:
+; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv8f16( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1W
+;
+
+define void @stnt1w_i32( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1w_i32:
+; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv4i32( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1w_f32( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1w_f32:
+; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv4f32( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1D
+;
+
+define void @stnt1d_i64( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1d_i64:
+; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv2i64( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1d_f64( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1d_f64:
+; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv2f64( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+declare void @llvm.aarch64.sve.stnt1.nxv16i8(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv8i16(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv4i32(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv2i64(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv8f16(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv4f32(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv2f64(, , *)
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
@@ -0,0 +1,88 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; LDNT1B
+;
+
+define  @ldnt1b_i8( %pred, * %addr) {
+; CHECK-LABEL: ldnt1b_i8:
+; CHECK: ldnt1b { z0.b }, p0/z, [x0, #0]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv16i8( %pred,
+ * %addr)
+  ret  %res
+}
+
+;
+; LDNT1H
+;
+
+define  @ldnt1h_i16( %pred, * %addr) {
+; CHECK-LABEL: ldnt1h_i16:
+; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv8i16( %pred,
+ * %addr)
+  ret  %res
+}
+
+define  @ldnt1h_f16( %pred, * %addr) {
+; CHECK-LABEL: ldnt1h_f16:
+; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv8f16( %pred,
+  * %addr)
+  ret  %res
+}
+
+;
+; LDNT1W
+;
+
+define  @ldnt1w_i32( %pred, * %addr) {
+; CHECK-LABEL: ldnt1w_i32:
+; CHECK: ldnt1w { z0.s }, p0/z, [x0, #0, lsl #2]
+; CHECK-NEXT: ret
+  %res = call  

[PATCH] D71000: [AArch64][SVE] Implement intrinsics for non-temporal loads & stores

2019-12-09 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin marked 2 inline comments as done.
kmclaughlin added inline comments.



Comment at: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:6587
+  else if (!Size)
+Size = MemVT.getStoreSize().getKnownMinSize();
 

efriedma wrote:
> In order for alias analysis to correctly handle a MachineMemOperand, the 
> "Size" of an operation has to be conservative, in the sense that the the 
> number of bytes accessed must be at most "Size". Otherwise we'll assume two 
> operations don't alias when they actually do.
> 
> For a scaled vector, we don't know the size, so we have to conservatively 
> pass "MemoryLocation::UnknownSize".
Thanks @efriedma, I have changed this to use MemoryLocation::UnknownSize


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71000/new/

https://reviews.llvm.org/D71000



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71000: [AArch64][SVE] Implement intrinsics for non-temporal loads & stores

2019-12-09 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 232870.
kmclaughlin added a comment.

- Set 'Size' to MemoryLocation::UnknownSize for scalable vectors in 
getMemIntrinsicNode
- Ensure MLOAD zeroes inactive lanes by using a zero value for the PassThru in 
getMaskedLoad


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71000/new/

https://reviews.llvm.org/D71000

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
@@ -0,0 +1,95 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; STNT1B
+;
+
+define void @stnt1b_i8( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1b_i8:
+; CHECK: stnt1b { z0.b }, p0, [x0, #0]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv16i8( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1H
+;
+
+define void @stnt1h_i16( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1h_i16:
+; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv8i16( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1h_f16( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1h_f16:
+; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv8f16( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1W
+;
+
+define void @stnt1w_i32( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1w_i32:
+; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv4i32( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1w_f32( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1w_f32:
+; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv4f32( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1D
+;
+
+define void @stnt1d_i64( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1d_i64:
+; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv2i64( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1d_f64( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1d_f64:
+; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv2f64( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+declare void @llvm.aarch64.sve.stnt1.nxv16i8(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv8i16(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv4i32(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv2i64(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv8f16(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv4f32(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv2f64(, , *)
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
@@ -0,0 +1,88 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; LDNT1B
+;
+
+define  @ldnt1b_i8( %pred, * %addr) {
+; CHECK-LABEL: ldnt1b_i8:
+; CHECK: ldnt1b { z0.b }, p0/z, [x0, #0]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv16i8( %pred,
+ * %addr)
+  ret  %res
+}
+
+;
+; LDNT1H
+;
+
+define  @ldnt1h_i16( %pred, * %addr) {
+; CHECK-LABEL: ldnt1h_i16:
+; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv8i16( %pred,
+ * %addr)
+  ret  %res
+}
+
+define  @ldnt1h_f16( %pred, * %addr) {
+; CHECK-LABEL: ldnt1h_f16:
+; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv8f16( %pred,
+  * %addr)
+  ret  %res
+}
+
+;
+; LDNT1W
+;
+
+define  @ldnt1w_i32( %pred, * %addr) {
+; CHECK-LABEL: ldnt1w_i32:
+; CHECK: ldnt1w { z0.s }, p0/z, [x0, #0, 

[PATCH] D71000: [AArch64][SVE] Implement intrinsics for non-temporal loads & stores

2019-12-04 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 232072.
kmclaughlin edited the summary of this revision.
kmclaughlin added a comment.

- Removed AArch64 specific ISDNodes for MLOAD & MSTORE


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71000/new/

https://reviews.llvm.org/D71000

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
@@ -0,0 +1,95 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; STNT1B
+;
+
+define void @stnt1b_i8( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1b_i8:
+; CHECK: stnt1b { z0.b }, p0, [x0, #0]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv16i8( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1H
+;
+
+define void @stnt1h_i16( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1h_i16:
+; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv8i16( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1h_f16( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1h_f16:
+; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv8f16( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1W
+;
+
+define void @stnt1w_i32( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1w_i32:
+; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv4i32( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1w_f32( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1w_f32:
+; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv4f32( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1D
+;
+
+define void @stnt1d_i64( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1d_i64:
+; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv2i64( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1d_f64( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1d_f64:
+; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv2f64( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+declare void @llvm.aarch64.sve.stnt1.nxv16i8(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv8i16(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv4i32(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv2i64(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv8f16(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv4f32(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv2f64(, , *)
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
@@ -0,0 +1,88 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; LDNT1B
+;
+
+define  @ldnt1b_i8( %pred, * %addr) {
+; CHECK-LABEL: ldnt1b_i8:
+; CHECK: ldnt1b { z0.b }, p0/z, [x0, #0]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv16i8( %pred,
+ * %addr)
+  ret  %res
+}
+
+;
+; LDNT1H
+;
+
+define  @ldnt1h_i16( %pred, * %addr) {
+; CHECK-LABEL: ldnt1h_i16:
+; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv8i16( %pred,
+ * %addr)
+  ret  %res
+}
+
+define  @ldnt1h_f16( %pred, * %addr) {
+; CHECK-LABEL: ldnt1h_f16:
+; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv8f16( %pred,
+  * %addr)
+  ret  %res
+}
+
+;
+; LDNT1W
+;
+
+define  @ldnt1w_i32( %pred, * %addr) {
+; CHECK-LABEL: ldnt1w_i32:
+; CHECK: ldnt1w { z0.s }, p0/z, [x0, #0, lsl #2]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv4i32( 

[PATCH] D71000: [AArch64][SVE] Implement intrinsics for non-temporal loads & stores

2019-12-04 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, paulwalker-arm, dancgr, mgudim, 
efriedma.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.

Adds the following intrinsics:

- llvm.aarch64.sve.ldnt1
- llvm.aarch64.sve.stnt1

This patch also adds the MLOAD & MSTORE AArch64ISD nodes, setting
the MONonTemporal flag when used with the intrinsics above.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D71000

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
@@ -0,0 +1,95 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; STNT1B
+;
+
+define void @stnt1b_i8( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1b_i8:
+; CHECK: stnt1b { z0.b }, p0, [x0, #0]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv16i8( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1H
+;
+
+define void @stnt1h_i16( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1h_i16:
+; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv8i16( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1h_f16( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1h_f16:
+; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv8f16( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1W
+;
+
+define void @stnt1w_i32( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1w_i32:
+; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv4i32( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1w_f32( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1w_f32:
+; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv4f32( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1D
+;
+
+define void @stnt1d_i64( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1d_i64:
+; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv2i64( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1d_f64( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1d_f64:
+; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv2f64( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+declare void @llvm.aarch64.sve.stnt1.nxv16i8(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv8i16(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv4i32(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv2i64(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv8f16(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv4f32(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv2f64(, , *)
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
@@ -0,0 +1,88 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; LDNT1B
+;
+
+define  @ldnt1b_i8( %pred, * %addr) {
+; CHECK-LABEL: ldnt1b_i8:
+; CHECK: ldnt1b { z0.b }, p0/z, [x0, #0]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv16i8( %pred,
+ * %addr)
+  ret  %res
+}
+
+;
+; LDNT1H
+;
+
+define  @ldnt1h_i16( %pred, * %addr) {
+; CHECK-LABEL: ldnt1h_i16:
+; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv8i16( %pred,
+ * %addr)
+  ret  %res
+}
+
+define  @ldnt1h_f16( %pred, * %addr) {
+; CHECK-LABEL: ldnt1h_f16:
+; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv8f16( 

[PATCH] D70253: [AArch64][SVE2] Implement remaining SVE2 floating-point intrinsics

2019-12-03 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG8881ac9c3986: [AArch64][SVE2] Implement remaining SVE2 
floating-point intrinsics (authored by kmclaughlin).

Changed prior to commit:
  https://reviews.llvm.org/D70253?vs=229341=231886#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70253/new/

https://reviews.llvm.org/D70253

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-int-binary-logarithm.ll
  llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-widening-mul-acc.ll
  llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll
@@ -0,0 +1,191 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+;
+; FADDP
+;
+
+define  @faddp_f16( %pg,  %a,  %b) {
+; CHECK-LABEL: faddp_f16:
+; CHECK: faddp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.faddp.nxv8f16( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @faddp_f32( %pg,  %a,  %b) {
+; CHECK-LABEL: faddp_f32:
+; CHECK: faddp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.faddp.nxv4f32( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @faddp_f64( %pg,  %a,  %b) {
+; CHECK-LABEL: faddp_f64:
+; CHECK: faddp z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.faddp.nxv2f64( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+;
+; FMAXP
+;
+
+define  @fmaxp_f16( %pg,  %a,  %b) {
+; CHECK-LABEL: fmaxp_f16:
+; CHECK: fmaxp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fmaxp.nxv8f16( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @fmaxp_f32( %pg,  %a,  %b) {
+; CHECK-LABEL: fmaxp_f32:
+; CHECK: fmaxp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fmaxp.nxv4f32( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @fmaxp_f64( %pg,  %a,  %b) {
+; CHECK-LABEL: fmaxp_f64:
+; CHECK: fmaxp z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fmaxp.nxv2f64( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+;
+; FMAXNMP
+;
+
+define  @fmaxnmp_f16( %pg,  %a,  %b) {
+; CHECK-LABEL: fmaxnmp_f16:
+; CHECK: fmaxnmp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fmaxnmp.nxv8f16( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+define  @fmaxnmp_f32( %pg,  %a,  %b) {
+; CHECK-LABEL: fmaxnmp_f32:
+; CHECK: fmaxnmp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fmaxnmp.nxv4f32( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @fmaxnmp_f64( %pg,  %a,  %b) {
+; CHECK-LABEL: fmaxnmp_f64:
+; CHECK: fmaxnmp z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fmaxnmp.nxv2f64( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+;
+; FMINP
+;
+
+define  @fminp_f16( %pg,  %a,  %b) {
+; CHECK-LABEL: fminp_f16:
+; CHECK: fminp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fminp.nxv8f16( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @fminp_f32( %pg,  %a,  %b) {
+; CHECK-LABEL: fminp_f32:
+; CHECK: fminp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fminp.nxv4f32( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @fminp_f64( %pg,  %a,  

[PATCH] D70437: [AArch64][SVE] Implement shift intrinsics

2019-12-03 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG7483eb656fd2: [AArch64][SVE] Implement shift intrinsics 
(authored by kmclaughlin).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70437/new/

https://reviews.llvm.org/D70437

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
@@ -0,0 +1,367 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; ASR
+;
+
+define  @asr_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_i8:
+; CHECK: asr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.nxv16i8( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @asr_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_i16:
+; CHECK: asr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.nxv8i16( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @asr_i32( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_i32:
+; CHECK: asr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.nxv4i32( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @asr_i64( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_i64:
+; CHECK: asr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.nxv2i64( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @asr_wide_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_wide_i8:
+; CHECK: asr z0.b, p0/m, z0.b, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.wide.nxv16i8( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+define  @asr_wide_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_wide_i16:
+; CHECK: asr z0.h, p0/m, z0.h, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.wide.nxv8i16( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+define  @asr_wide_i32( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_wide_i32:
+; CHECK: asr z0.s, p0/m, z0.s, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.wide.nxv4i32( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+;
+; ASRD
+;
+
+define  @asrd_i8( %pg,  %a) {
+; CHECK-LABEL: asrd_i8:
+; CHECK: asrd z0.b, p0/m, z0.b, #1
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asrd.nxv16i8( %pg,
+ %a,
+i32 1)
+  ret  %out
+}
+
+define  @asrd_i16( %pg,  %a) {
+; CHECK-LABEL: asrd_i16:
+; CHECK: asrd z0.h, p0/m, z0.h, #2
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asrd.nxv8i16( %pg,
+ %a,
+i32 2)
+  ret  %out
+}
+
+define  @asrd_i32( %pg,  %a) {
+; CHECK-LABEL: asrd_i32:
+; CHECK: asrd z0.s, p0/m, z0.s, #31
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asrd.nxv4i32( %pg,
+ %a,
+i32 31)
+  ret  %out
+}
+
+define  @asrd_i64( %pg,  %a) {
+; CHECK-LABEL: asrd_i64:
+; CHECK: asrd z0.d, p0/m, z0.d, #64
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asrd.nxv2i64( %pg,
+ %a,
+i32 64)
+  ret  %out
+}
+
+;
+; INSR
+;
+
+define  @insr_i8( %a, i8 %b) {
+; CHECK-LABEL: insr_i8:
+; CHECK: insr z0.b, w0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.insr.nxv16i8( %a, i8 %b)
+  ret  %out
+}
+
+define  @insr_i16( %a, i16 %b) {
+; CHECK-LABEL: insr_i16:
+; CHECK: insr z0.h, w0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.insr.nxv8i16( %a, i16 %b)
+  ret  %out

[PATCH] D70437: [AArch64][SVE] Implement shift intrinsics

2019-12-02 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 231693.
kmclaughlin added a comment.

- Removed re-ordering of integer arithmetic & logical op intrinsic definitions 
in IntrinsicsAArch64.td


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70437/new/

https://reviews.llvm.org/D70437

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
@@ -0,0 +1,367 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; ASR
+;
+
+define  @asr_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_i8:
+; CHECK: asr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.nxv16i8( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @asr_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_i16:
+; CHECK: asr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.nxv8i16( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @asr_i32( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_i32:
+; CHECK: asr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.nxv4i32( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @asr_i64( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_i64:
+; CHECK: asr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.nxv2i64( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @asr_wide_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_wide_i8:
+; CHECK: asr z0.b, p0/m, z0.b, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.wide.nxv16i8( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+define  @asr_wide_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_wide_i16:
+; CHECK: asr z0.h, p0/m, z0.h, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.wide.nxv8i16( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+define  @asr_wide_i32( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_wide_i32:
+; CHECK: asr z0.s, p0/m, z0.s, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.wide.nxv4i32( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+;
+; ASRD
+;
+
+define  @asrd_i8( %pg,  %a) {
+; CHECK-LABEL: asrd_i8:
+; CHECK: asrd z0.b, p0/m, z0.b, #1
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asrd.nxv16i8( %pg,
+ %a,
+i32 1)
+  ret  %out
+}
+
+define  @asrd_i16( %pg,  %a) {
+; CHECK-LABEL: asrd_i16:
+; CHECK: asrd z0.h, p0/m, z0.h, #2
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asrd.nxv8i16( %pg,
+ %a,
+i32 2)
+  ret  %out
+}
+
+define  @asrd_i32( %pg,  %a) {
+; CHECK-LABEL: asrd_i32:
+; CHECK: asrd z0.s, p0/m, z0.s, #31
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asrd.nxv4i32( %pg,
+ %a,
+i32 31)
+  ret  %out
+}
+
+define  @asrd_i64( %pg,  %a) {
+; CHECK-LABEL: asrd_i64:
+; CHECK: asrd z0.d, p0/m, z0.d, #64
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asrd.nxv2i64( %pg,
+ %a,
+i32 64)
+  ret  %out
+}
+
+;
+; INSR
+;
+
+define  @insr_i8( %a, i8 %b) {
+; CHECK-LABEL: insr_i8:
+; CHECK: insr z0.b, w0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.insr.nxv16i8( %a, i8 %b)
+  ret  %out
+}
+
+define  @insr_i16( %a, i16 %b) {
+; CHECK-LABEL: insr_i16:
+; CHECK: insr z0.h, w0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.insr.nxv8i16( %a, i16 %b)
+  ret  %out
+}
+
+define  @insr_i32( %a, i32 

[PATCH] D70437: [AArch64][SVE] Implement shift intrinsics

2019-12-02 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 231663.
kmclaughlin added a comment.

- Rebased & enclosed additional //setOperationAction// calls within check for 
//Subtarget->isSVE()//


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70437/new/

https://reviews.llvm.org/D70437

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
@@ -0,0 +1,367 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; ASR
+;
+
+define  @asr_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_i8:
+; CHECK: asr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.nxv16i8( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @asr_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_i16:
+; CHECK: asr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.nxv8i16( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @asr_i32( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_i32:
+; CHECK: asr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.nxv4i32( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @asr_i64( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_i64:
+; CHECK: asr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.nxv2i64( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @asr_wide_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_wide_i8:
+; CHECK: asr z0.b, p0/m, z0.b, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.wide.nxv16i8( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+define  @asr_wide_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_wide_i16:
+; CHECK: asr z0.h, p0/m, z0.h, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.wide.nxv8i16( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+define  @asr_wide_i32( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_wide_i32:
+; CHECK: asr z0.s, p0/m, z0.s, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.wide.nxv4i32( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+;
+; ASRD
+;
+
+define  @asrd_i8( %pg,  %a) {
+; CHECK-LABEL: asrd_i8:
+; CHECK: asrd z0.b, p0/m, z0.b, #1
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asrd.nxv16i8( %pg,
+ %a,
+i32 1)
+  ret  %out
+}
+
+define  @asrd_i16( %pg,  %a) {
+; CHECK-LABEL: asrd_i16:
+; CHECK: asrd z0.h, p0/m, z0.h, #2
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asrd.nxv8i16( %pg,
+ %a,
+i32 2)
+  ret  %out
+}
+
+define  @asrd_i32( %pg,  %a) {
+; CHECK-LABEL: asrd_i32:
+; CHECK: asrd z0.s, p0/m, z0.s, #31
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asrd.nxv4i32( %pg,
+ %a,
+i32 31)
+  ret  %out
+}
+
+define  @asrd_i64( %pg,  %a) {
+; CHECK-LABEL: asrd_i64:
+; CHECK: asrd z0.d, p0/m, z0.d, #64
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asrd.nxv2i64( %pg,
+ %a,
+i32 64)
+  ret  %out
+}
+
+;
+; INSR
+;
+
+define  @insr_i8( %a, i8 %b) {
+; CHECK-LABEL: insr_i8:
+; CHECK: insr z0.b, w0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.insr.nxv16i8( %a, i8 %b)
+  ret  %out
+}
+
+define  @insr_i16( %a, i16 %b) {
+; CHECK-LABEL: insr_i16:
+; CHECK: insr z0.h, w0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.insr.nxv8i16( %a, i16 %b)
+  ret  %out
+}
+
+define  @insr_i32( %a, i32 

[PATCH] D70253: [AArch64][SVE2] Implement remaining SVE2 floating-point intrinsics

2019-12-02 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin added inline comments.



Comment at: llvm/include/llvm/IR/IntrinsicsAArch64.td:898
+ llvm_i32_ty],
+[IntrNoMem]>;
+

sdesmalen wrote:
> efriedma wrote:
> > kmclaughlin wrote:
> > > sdesmalen wrote:
> > > > I'd expect the `llvm_i32_ty` to be an immediate for these instructions, 
> > > > right? If so you'll need to add `ImmArg`  to the list of 
> > > > properties.
> > > > 
> > > Thanks for taking a look at this :) I tried your suggestion of adding 
> > > ImmAr to the list of properties here but had some problems with it 
> > > (i.e. Cannot select: intrinsic %llvm.aarch64.sve.fmlalb.lane). I don't 
> > > think this is too much of an issue here as we have additional checks on 
> > > the immediate with VectorIndexH32b, which ensures the immediate is in the 
> > > correct range.
> > The point of immarg markings isn't to assist the backend; it's to ensure IR 
> > optimizations don't break your intrinsic calls.
> The pattern is probably not matching because the immediate operand is a 
> `TargetConstant` where the `AsmVectorIndexOpnd` derives from `ImmLeaf`, 
> rather than `TImmLeaf` as introduced by D58232.
Thanks for the suggestion, this was the reason why the patterns were not 
matching! As this also affects many of the existing intrinsics not added here 
or in D70437, I would prefer to address this fully in a separate patch - do you 
have objections to this?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70253/new/

https://reviews.llvm.org/D70253



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D70253: [AArch64][SVE2] Implement remaining SVE2 floating-point intrinsics

2019-11-27 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin added inline comments.



Comment at: llvm/include/llvm/IR/IntrinsicsAArch64.td:898
+ llvm_i32_ty],
+[IntrNoMem]>;
+

sdesmalen wrote:
> I'd expect the `llvm_i32_ty` to be an immediate for these instructions, 
> right? If so you'll need to add `ImmArg`  to the list of properties.
> 
Thanks for taking a look at this :) I tried your suggestion of adding ImmAr 
to the list of properties here but had some problems with it (i.e. Cannot 
select: intrinsic %llvm.aarch64.sve.fmlalb.lane). I don't think this is too 
much of an issue here as we have additional checks on the immediate with 
VectorIndexH32b, which ensures the immediate is in the correct range.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70253/new/

https://reviews.llvm.org/D70253



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D70180: [AArch64][SVE] Implement floating-point conversion intrinsics

2019-11-26 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG4a649ad21aa2: [AArch64][SVE] Implement floating-point 
conversion intrinsics (authored by kmclaughlin).

Changed prior to commit:
  https://reviews.llvm.org/D70180?vs=229085=231032#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70180/new/

https://reviews.llvm.org/D70180

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll
  llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll
@@ -0,0 +1,84 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+;
+; FCVTLT
+;
+
+define  @fcvtlt_f32_f16( %a,  %pg,  %b) {
+; CHECK-LABEL: fcvtlt_f32_f16:
+; CHECK: fcvtlt z0.s, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcvtlt.f32f16( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+define  @fcvtlt_f64_f32( %a,  %pg,  %b) {
+; CHECK-LABEL: fcvtlt_f64_f32:
+; CHECK: fcvtlt	z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcvtlt.f64f32( %a,
+ %pg,
+ %b)
+  ret  %out
+}
+
+;
+; FCVTNT
+;
+
+define  @fcvtnt_f16_f32( %a,  %pg,  %b) {
+; CHECK-LABEL: fcvtnt_f16_f32:
+; CHECK: fcvtnt z0.h, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcvtnt.f16f32( %a,
+  %pg,
+  %b)
+  ret  %out
+}
+
+define  @fcvtnt_f32_f64( %a,  %pg,  %b) {
+; CHECK-LABEL: fcvtnt_f32_f64:
+; CHECK: fcvtnt	z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcvtnt.f32f64( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+;
+; FCVTX
+;
+
+define  @fcvtx_f32_f64( %a,  %pg,  %b) {
+; CHECK-LABEL: fcvtx_f32_f64:
+; CHECK: fcvtx z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcvtx.f32f64( %a,
+   %pg,
+   %b)
+  ret  %out
+}
+
+;
+; FCVTXNT
+;
+
+define  @fcvtxnt_f32_f64( %a,  %pg,  %b) {
+; CHECK-LABEL: fcvtxnt_f32_f64:
+; CHECK: fcvtxnt z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcvtxnt.f32f64( %a,
+ %pg,
+ %b)
+  ret  %out
+}
+
+declare  @llvm.aarch64.sve.fcvtlt.f32f16(, , )
+declare  @llvm.aarch64.sve.fcvtlt.f64f32(, , )
+declare  @llvm.aarch64.sve.fcvtnt.f16f32(, , )
+declare  @llvm.aarch64.sve.fcvtnt.f32f64(, , )
+declare  @llvm.aarch64.sve.fcvtx.f32f64(, , )
+declare  @llvm.aarch64.sve.fcvtxnt.f32f64(, , )
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll
@@ -0,0 +1,400 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; FCVT
+;
+
+define  @fcvt_f16_f32( %a,  %pg,  %b) {
+; CHECK-LABEL: fcvt_f16_f32:
+; CHECK: fcvt z0.h, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcvt.f16f32( %a,
+ %pg,
+ %b)
+  ret  %out
+}
+
+define  @fcvt_f16_f64( %a,  %pg,  %b) {
+; CHECK-LABEL: fcvt_f16_f64:
+; CHECK: fcvt z0.h, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcvt.f16f64( %a,
+ %pg,
+ %b)
+  ret  %out
+}
+
+define  @fcvt_f32_f16( %a,  %pg,  %b) {
+; CHECK-LABEL: fcvt_f32_f16:
+; CHECK: fcvt z0.s, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcvt.f32f16( %a,
+  %pg,
+  %b)
+  ret  %out
+}
+
+define  @fcvt_f32_f64( %a,  %pg,  %b) {
+; CHECK-LABEL: fcvt_f32_f64:
+; CHECK: fcvt z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcvt.f32f64( %a,
+  %pg,
+   

[PATCH] D70437: [AArch64][SVE] Implement shift intrinsics

2019-11-19 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: huntergr, sdesmalen, dancgr, mgudim.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.

Adds the following intrinsics:

- asr & asrd
- insr
- lsl & lsr

This patch also adds a new AArch64ISD node (INSR) to represent the 
int_aarch64_sve_insr intrinsic.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D70437

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
@@ -0,0 +1,367 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; ASR
+;
+
+define  @asr_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_i8:
+; CHECK: asr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.nxv16i8( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @asr_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_i16:
+; CHECK: asr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.nxv8i16( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @asr_i32( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_i32:
+; CHECK: asr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.nxv4i32( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @asr_i64( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_i64:
+; CHECK: asr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.nxv2i64( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @asr_wide_i8( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_wide_i8:
+; CHECK: asr z0.b, p0/m, z0.b, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.wide.nxv16i8( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+define  @asr_wide_i16( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_wide_i16:
+; CHECK: asr z0.h, p0/m, z0.h, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.wide.nxv8i16( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+define  @asr_wide_i32( %pg,  %a,  %b) {
+; CHECK-LABEL: asr_wide_i32:
+; CHECK: asr z0.s, p0/m, z0.s, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asr.wide.nxv4i32( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+;
+; ASRD
+;
+
+define  @asrd_i8( %pg,  %a) {
+; CHECK-LABEL: asrd_i8:
+; CHECK: asrd z0.b, p0/m, z0.b, #1
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asrd.nxv16i8( %pg,
+ %a,
+i32 1)
+  ret  %out
+}
+
+define  @asrd_i16( %pg,  %a) {
+; CHECK-LABEL: asrd_i16:
+; CHECK: asrd z0.h, p0/m, z0.h, #2
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asrd.nxv8i16( %pg,
+ %a,
+i32 2)
+  ret  %out
+}
+
+define  @asrd_i32( %pg,  %a) {
+; CHECK-LABEL: asrd_i32:
+; CHECK: asrd z0.s, p0/m, z0.s, #31
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asrd.nxv4i32( %pg,
+ %a,
+i32 31)
+  ret  %out
+}
+
+define  @asrd_i64( %pg,  %a) {
+; CHECK-LABEL: asrd_i64:
+; CHECK: asrd z0.d, p0/m, z0.d, #64
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.asrd.nxv2i64( %pg,
+ %a,
+i32 64)
+  ret  %out
+}
+
+;
+; INSR
+;
+
+define  @insr_i8( %a, i8 %b) {
+; CHECK-LABEL: insr_i8:
+; CHECK: insr z0.b, w0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.insr.nxv16i8( %a, i8 %b)
+  ret  %out
+}
+
+define  

[PATCH] D70253: [AArch64][SVE2] Implement remaining SVE2 floating-point intrinsics

2019-11-14 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: huntergr, sdesmalen, dancgr.
Herald added subscribers: hiraditya, kristof.beyls, tschuett.
Herald added a project: LLVM.

Adds the following intrinsics:

- faddp
- fmaxp, fminp, fmaxnmp & fminnmp
- fmlalb, fmlalt, fmlslb & fmlslt
- flogb


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D70253

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-int-binary-logarithm.ll
  llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-widening-mul-acc.ll
  llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll
@@ -0,0 +1,191 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+;
+; FADDP
+;
+
+define  @faddp_f16( %pg,  %a,  %b) {
+; CHECK-LABEL: faddp_f16:
+; CHECK: faddp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.faddp.nxv8f16( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @faddp_f32( %pg,  %a,  %b) {
+; CHECK-LABEL: faddp_f32:
+; CHECK: faddp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.faddp.nxv4f32( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @faddp_f64( %pg,  %a,  %b) {
+; CHECK-LABEL: faddp_f64:
+; CHECK: faddp z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.faddp.nxv2f64( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+;
+; FMAXP
+;
+
+define  @fmaxp_f16( %pg,  %a,  %b) {
+; CHECK-LABEL: fmaxp_f16:
+; CHECK: fmaxp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fmaxp.nxv8f16( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @fmaxp_f32( %pg,  %a,  %b) {
+; CHECK-LABEL: fmaxp_f32:
+; CHECK: fmaxp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fmaxp.nxv4f32( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @fmaxp_f64( %pg,  %a,  %b) {
+; CHECK-LABEL: fmaxp_f64:
+; CHECK: fmaxp z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fmaxp.nxv2f64( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+;
+; FMAXNMP
+;
+
+define  @fmaxnmp_f16( %pg,  %a,  %b) {
+; CHECK-LABEL: fmaxnmp_f16:
+; CHECK: fmaxnmp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fmaxnmp.nxv8f16( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+define  @fmaxnmp_f32( %pg,  %a,  %b) {
+; CHECK-LABEL: fmaxnmp_f32:
+; CHECK: fmaxnmp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fmaxnmp.nxv4f32( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @fmaxnmp_f64( %pg,  %a,  %b) {
+; CHECK-LABEL: fmaxnmp_f64:
+; CHECK: fmaxnmp z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fmaxnmp.nxv2f64( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+;
+; FMINP
+;
+
+define  @fminp_f16( %pg,  %a,  %b) {
+; CHECK-LABEL: fminp_f16:
+; CHECK: fminp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fminp.nxv8f16( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @fminp_f32( %pg,  %a,  %b) {
+; CHECK-LABEL: fminp_f32:
+; CHECK: fminp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fminp.nxv4f32( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @fminp_f64( %pg,  %a,  %b) {
+; CHECK-LABEL: fminp_f64:
+; CHECK: 

[PATCH] D69858: [AArch64][SVE] Implement floating-point comparison & reduction intrinsics

2019-11-14 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGf9dd03b135d7: [AArch64][SVE] Implement floating-point 
comparison  reduction intrinsics (authored by kmclaughlin).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69858/new/

https://reviews.llvm.org/D69858

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-fp-compares.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll
@@ -0,0 +1,214 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; FADDA
+;
+
+define half @fadda_f16( %pg, half %init,  %a) {
+; CHECK-LABEL: fadda_f16:
+; CHECK: fadda h0, p0, h0, z1.h
+; CHECK-NEXT: ret
+  %res = call half @llvm.aarch64.sve.fadda.nxv8f16( %pg,
+   half %init,
+%a)
+  ret half %res
+}
+
+define float @fadda_f32( %pg, float %init,  %a) {
+; CHECK-LABEL: fadda_f32:
+; CHECK: fadda s0, p0, s0, z1.s
+; CHECK-NEXT: ret
+  %res = call float @llvm.aarch64.sve.fadda.nxv4f32( %pg,
+float %init,
+ %a)
+  ret float %res
+}
+
+define double @fadda_f64( %pg, double %init,  %a) {
+; CHECK-LABEL: fadda_f64:
+; CHECK: fadda d0, p0, d0, z1.d
+; CHECK-NEXT: ret
+  %res = call double @llvm.aarch64.sve.fadda.nxv2f64( %pg,
+ double %init,
+  %a)
+  ret double %res
+}
+
+;
+; FADDV
+;
+
+define half @faddv_f16( %pg,  %a) {
+; CHECK-LABEL: faddv_f16:
+; CHECK: faddv h0, p0, z0.h
+; CHECK-NEXT: ret
+  %res = call half @llvm.aarch64.sve.faddv.nxv8f16( %pg,
+%a)
+  ret half %res
+}
+
+define float @faddv_f32( %pg,  %a) {
+; CHECK-LABEL: faddv_f32:
+; CHECK: faddv s0, p0, z0.s
+; CHECK-NEXT: ret
+  %res = call float @llvm.aarch64.sve.faddv.nxv4f32( %pg,
+ %a)
+  ret float %res
+}
+
+define double @faddv_f64( %pg,  %a) {
+; CHECK-LABEL: faddv_f64:
+; CHECK: faddv d0, p0, z0.d
+; CHECK-NEXT: ret
+  %res = call double @llvm.aarch64.sve.faddv.nxv2f64( %pg,
+  %a)
+  ret double %res
+}
+
+;
+; FMAXNMV
+;
+
+define half @fmaxnmv_f16( %pg,  %a) {
+; CHECK-LABEL: fmaxnmv_f16:
+; CHECK: fmaxnmv h0, p0, z0.h
+; CHECK-NEXT: ret
+  %res = call half @llvm.aarch64.sve.fmaxnmv.nxv8f16( %pg,
+  %a)
+  ret half %res
+}
+
+define float @fmaxnmv_f32( %pg,  %a) {
+; CHECK-LABEL: fmaxnmv_f32:
+; CHECK: fmaxnmv s0, p0, z0.s
+; CHECK-NEXT: ret
+  %res = call float @llvm.aarch64.sve.fmaxnmv.nxv4f32( %pg,
+   %a)
+  ret float %res
+}
+
+define double @fmaxnmv_f64( %pg,  %a) {
+; CHECK-LABEL: fmaxnmv_f64:
+; CHECK: fmaxnmv d0, p0, z0.d
+; CHECK-NEXT: ret
+  %res = call double @llvm.aarch64.sve.fmaxnmv.nxv2f64( %pg,
+%a)
+  ret double %res
+}
+
+;
+; FMAXV
+;
+
+define half @fmaxv_f16( %pg,  %a) {
+; CHECK-LABEL: fmaxv_f16:
+; CHECK: fmaxv h0, p0, z0.h
+; CHECK-NEXT: ret
+  %res = call half @llvm.aarch64.sve.fmaxv.nxv8f16( %pg,
+%a)
+  ret half %res
+}
+
+define float @fmaxv_f32( %pg,  %a) {
+; CHECK-LABEL: fmaxv_f32:
+; CHECK: fmaxv s0, p0, z0.s
+; CHECK-NEXT: ret
+  %res = call float @llvm.aarch64.sve.fmaxv.nxv4f32( %pg,
+ %a)
+  ret float %res
+}
+
+define double @fmaxv_f64( %pg,  %a) {
+; CHECK-LABEL: fmaxv_f64:
+; CHECK: fmaxv d0, p0, z0.d
+; CHECK-NEXT: ret
+  %res = call double @llvm.aarch64.sve.fmaxv.nxv2f64( %pg,
+  %a)
+  ret double %res
+}
+
+;
+; FMINNMV
+;
+
+define half @fminnmv_f16( %pg,  %a) {
+; CHECK-LABEL: fminnmv_f16:
+; CHECK: fminnmv h0, p0, z0.h
+; CHECK-NEXT: ret
+  %res = call half @llvm.aarch64.sve.fminnmv.nxv8f16( %pg,
+  %a)
+  ret half %res
+}
+
+define float @fminnmv_f32( %pg,  %a) {
+; CHECK-LABEL: fminnmv_f32:
+; CHECK: fminnmv s0, p0, z0.s
+; CHECK-NEXT: ret
+  %res = call float @llvm.aarch64.sve.fminnmv.nxv4f32( %pg,
+   %a)
+  ret float %res
+}
+
+define double @fminnmv_f64( %pg,  %a) {
+; CHECK-LABEL: fminnmv_f64:
+; CHECK: fminnmv d0, p0, z0.d
+; CHECK-NEXT: ret
+  %res = call double 

[PATCH] D69800: [AArch64][SVE] Implement remaining floating-point arithmetic intrinsics

2019-11-14 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGcd83d9ff5c90: [AArch64][SVE] Implement remaining 
floating-point arithmetic intrinsics (authored by kmclaughlin).

Changed prior to commit:
  https://reviews.llvm.org/D69800?vs=227688=229276#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69800/new/

https://reviews.llvm.org/D69800

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
===
--- llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
@@ -35,6 +35,40 @@
 }
 
 ;
+; FABS
+;
+
+define  @fabs_h( %a,  %pg,  %b) {
+; CHECK-LABEL: fabs_h:
+; CHECK: fabs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fabs.nxv8f16( %a,
+  %pg,
+  %b)
+  ret  %out
+}
+
+define  @fabs_s( %a,  %pg,  %b) {
+; CHECK-LABEL: fabs_s:
+; CHECK: fabs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fabs.nxv4f32( %a,
+   %pg,
+   %b)
+  ret  %out
+}
+
+define  @fabs_d( %a,  %pg,  %b) {
+; CHECK-LABEL: fabs_d:
+; CHECK: fabs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fabs.nxv2f64( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+;
 ; FADD
 ;
 
@@ -242,6 +276,34 @@
 }
 
 ;
+; FEXPA
+;
+
+define  @fexpa_h( %a) {
+; CHECK-LABEL: fexpa_h:
+; CHECK: fexpa z0.h, z0.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fexpa.x.nxv8f16( %a)
+  ret  %out
+}
+
+define  @fexpa_s( %a) {
+; CHECK-LABEL: fexpa_s:
+; CHECK: fexpa z0.s, z0.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fexpa.x.nxv4f32( %a)
+  ret  %out
+}
+
+define  @fexpa_d( %pg,  %a) {
+; CHECK-LABEL: fexpa_d:
+; CHECK: fexpa z0.d, z0.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fexpa.x.nxv2f64( %a)
+  ret  %out
+}
+
+;
 ; FMAD
 ;
 
@@ -702,36 +764,36 @@
 }
 
 ;
-; FSCALE
+; FNEG
 ;
 
-define  @fscale_h( %pg,  %a,  %b) {
-; CHECK-LABEL: fscale_h:
-; CHECK: fscale z0.h, p0/m, z0.h, z1.h
+define  @fneg_h( %a,  %pg,  %b) {
+; CHECK-LABEL: fneg_h:
+; CHECK: fneg z0.h, p0/m, z1.h
 ; CHECK-NEXT: ret
-  %out = call  @llvm.aarch64.sve.fscale.nxv8f16( %pg,
-%a,
-%b)
+  %out = call  @llvm.aarch64.sve.fneg.nxv8f16( %a,
+  %pg,
+  %b)
   ret  %out
 }
 
-define  @fscale_s( %pg,  %a,  %b) {
-; CHECK-LABEL: fscale_s:
-; CHECK: fscale z0.s, p0/m, z0.s, z1.s
+define  @fneg_s( %a,  %pg,  %b) {
+; CHECK-LABEL: fneg_s:
+; CHECK: fneg z0.s, p0/m, z1.s
 ; CHECK-NEXT: ret
-  %out = call  @llvm.aarch64.sve.fscale.nxv4f32( %pg,
- %a,
- %b)
+  %out = call  @llvm.aarch64.sve.fneg.nxv4f32( %a,
+   %pg,
+   %b)
   ret  %out
 }
 
-define  @fscale_d( %pg,  %a,  %b) {
-; CHECK-LABEL: fscale_d:
-; CHECK: fscale z0.d, p0/m, z0.d, z1.d
+define  @fneg_d( %a,  %pg,  %b) {
+; CHECK-LABEL: fneg_d:
+; CHECK: fneg z0.d, p0/m, z1.d
 ; CHECK-NEXT: ret
-  %out = call  @llvm.aarch64.sve.fscale.nxv2f64( %pg,
-  %a,
-  %b)
+  %out = call  @llvm.aarch64.sve.fneg.nxv2f64( %a,
+%pg,
+%b)
   ret  %out
 }
 
@@ -884,6 +946,402 @@
 }
 
 ;
+; FRECPE
+;
+
+define  @frecpe_h( %a) {
+; CHECK-LABEL: frecpe_h:
+; CHECK: frecpe z0.h, z0.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.frecpe.x.nxv8f16( %a)
+  ret  %out
+}
+
+define  @frecpe_s( %a) {
+; CHECK-LABEL: frecpe_s:
+; CHECK: frecpe z0.s, z0.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.frecpe.x.nxv4f32( %a)
+  ret  %out
+}
+
+define  @frecpe_d( %pg,  %a) {
+; CHECK-LABEL: frecpe_d:
+; CHECK: frecpe z0.d, z0.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.frecpe.x.nxv2f64( %a)
+  ret  %out
+}
+
+;
+; 

[PATCH] D69707: [AArch64][SVE] Implement additional floating-point arithmetic intrinsics

2019-11-14 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGf7848fd8f7b5: [AArch64][SVE] Implement additional 
floating-point arithmetic intrinsics (authored by kmclaughlin).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69707/new/

https://reviews.llvm.org/D69707

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
===
--- llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
@@ -69,6 +69,111 @@
 }
 
 ;
+; FCADD
+;
+
+define  @fcadd_h( %pg,  %a,  %b) {
+; CHECK-LABEL: fcadd_h:
+; CHECK: fcadd z0.h, p0/m, z0.h, z1.h, #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcadd.nxv8f16( %pg,
+   %a,
+   %b,
+  i32 90)
+  ret  %out
+}
+
+define  @fcadd_s( %pg,  %a,  %b) {
+; CHECK-LABEL: fcadd_s:
+; CHECK: fcadd z0.s, p0/m, z0.s, z1.s, #270
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcadd.nxv4f32( %pg,
+%a,
+%b,
+   i32 270)
+  ret  %out
+}
+
+define  @fcadd_d( %pg,  %a,  %b) {
+; CHECK-LABEL: fcadd_d:
+; CHECK: fcadd z0.d, p0/m, z0.d, z1.d, #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcadd.nxv2f64( %pg,
+ %a,
+ %b,
+i32 90)
+  ret  %out
+}
+
+;
+; FCMLA
+;
+
+define  @fcmla_h( %pg,  %a,  %b,  %c) {
+; CHECK-LABEL: fcmla_h:
+; CHECK: fcmla z0.h, p0/m, z1.h, z2.h, #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcmla.nxv8f16( %pg,
+   %a,
+   %b,
+   %c,
+  i32 90)
+  ret  %out
+}
+
+define  @fcmla_s( %pg,  %a,  %b,  %c) {
+; CHECK-LABEL: fcmla_s:
+; CHECK: fcmla z0.s, p0/m, z1.s, z2.s, #180
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcmla.nxv4f32( %pg,
+%a,
+%b,
+%c,
+   i32 180)
+  ret  %out
+}
+
+define  @fcmla_d( %pg,  %a,  %b,  %c) {
+; CHECK-LABEL: fcmla_d:
+; CHECK: fcmla z0.d, p0/m, z1.d, z2.d, #270
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcmla.nxv2f64( %pg,
+ %a,
+ %b,
+ %c,
+i32 270)
+  ret  %out
+}
+
+;
+; FCMLA (Indexed)
+;
+
+define  @fcmla_lane_h( %a,  %b,  %c) {
+; CHECK-LABEL: fcmla_lane_h:
+; CHECK: fcmla z0.h, z1.h, z2.h[3], #0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcmla.lane.nxv8f16( %a,
+%b,
+%c,
+   i32 3,
+   i32 0)
+  ret  %out
+}
+
+define  @fcmla_lane_s( %a,  %b,  %c) {
+; CHECK-LABEL: fcmla_lane_s:
+; CHECK: fcmla z0.s, z1.s, z2.s[1], #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcmla.lane.nxv4f32( %a,
+ %b,
+ %c,
+i32 1,
+i32 90)
+  ret  %out
+}
+
+;
 ; FDIV
 ;
 
@@ -137,6 +242,43 @@
 }
 
 ;
+; FMAD
+;
+
+define  @fmad_h( %pg,  %a,  %b,  %c) {
+; CHECK-LABEL: fmad_h:
+; CHECK: fmad z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fmad.nxv8f16( %pg,
+ 

[PATCH] D69707: [AArch64][SVE] Implement additional floating-point arithmetic intrinsics

2019-11-13 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 229129.
kmclaughlin added a comment.

- Rebased & removed unused //llvm.aarch64.sve.fcmla.lane.nxv2f64// from 
sve-intrinsics-fp-arith.ll


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69707/new/

https://reviews.llvm.org/D69707

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
===
--- llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
@@ -69,6 +69,111 @@
 }
 
 ;
+; FCADD
+;
+
+define  @fcadd_h( %pg,  %a,  %b) {
+; CHECK-LABEL: fcadd_h:
+; CHECK: fcadd z0.h, p0/m, z0.h, z1.h, #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcadd.nxv8f16( %pg,
+   %a,
+   %b,
+  i32 90)
+  ret  %out
+}
+
+define  @fcadd_s( %pg,  %a,  %b) {
+; CHECK-LABEL: fcadd_s:
+; CHECK: fcadd z0.s, p0/m, z0.s, z1.s, #270
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcadd.nxv4f32( %pg,
+%a,
+%b,
+   i32 270)
+  ret  %out
+}
+
+define  @fcadd_d( %pg,  %a,  %b) {
+; CHECK-LABEL: fcadd_d:
+; CHECK: fcadd z0.d, p0/m, z0.d, z1.d, #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcadd.nxv2f64( %pg,
+ %a,
+ %b,
+i32 90)
+  ret  %out
+}
+
+;
+; FCMLA
+;
+
+define  @fcmla_h( %pg,  %a,  %b,  %c) {
+; CHECK-LABEL: fcmla_h:
+; CHECK: fcmla z0.h, p0/m, z1.h, z2.h, #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcmla.nxv8f16( %pg,
+   %a,
+   %b,
+   %c,
+  i32 90)
+  ret  %out
+}
+
+define  @fcmla_s( %pg,  %a,  %b,  %c) {
+; CHECK-LABEL: fcmla_s:
+; CHECK: fcmla z0.s, p0/m, z1.s, z2.s, #180
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcmla.nxv4f32( %pg,
+%a,
+%b,
+%c,
+   i32 180)
+  ret  %out
+}
+
+define  @fcmla_d( %pg,  %a,  %b,  %c) {
+; CHECK-LABEL: fcmla_d:
+; CHECK: fcmla z0.d, p0/m, z1.d, z2.d, #270
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcmla.nxv2f64( %pg,
+ %a,
+ %b,
+ %c,
+i32 270)
+  ret  %out
+}
+
+;
+; FCMLA (Indexed)
+;
+
+define  @fcmla_lane_h( %a,  %b,  %c) {
+; CHECK-LABEL: fcmla_lane_h:
+; CHECK: fcmla z0.h, z1.h, z2.h[3], #0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcmla.lane.nxv8f16( %a,
+%b,
+%c,
+   i32 3,
+   i32 0)
+  ret  %out
+}
+
+define  @fcmla_lane_s( %a,  %b,  %c) {
+; CHECK-LABEL: fcmla_lane_s:
+; CHECK: fcmla z0.s, z1.s, z2.s[1], #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcmla.lane.nxv4f32( %a,
+ %b,
+ %c,
+i32 1,
+i32 90)
+  ret  %out
+}
+
+;
 ; FDIV
 ;
 
@@ -137,6 +242,43 @@
 }
 
 ;
+; FMAD
+;
+
+define  @fmad_h( %pg,  %a,  %b,  %c) {
+; CHECK-LABEL: fmad_h:
+; CHECK: fmad z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fmad.nxv8f16( %pg,
+  %a,
+

[PATCH] D70180: [AArch64][SVE] Implement floating-point conversion intrinsics

2019-11-13 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: huntergr, sdesmalen, dancgr, mgudim.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a project: LLVM.

Adds intrinsics for the following:

- fcvt
- fcvtzs & fcvtzu
- scvtf & ucvtf
- fcvtlt, fcvtnt
- fcvtx & fcvtxnt


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D70180

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll
  llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll
@@ -0,0 +1,84 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+;
+; FCVTLT
+;
+
+define  @fcvtlt_f32_f16( %a,  %pg,  %b) {
+; CHECK-LABEL: fcvtlt_f32_f16:
+; CHECK: fcvtlt z0.s, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcvtlt.f32f16( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+define  @fcvtlt_f64_f32( %a,  %pg,  %b) {
+; CHECK-LABEL: fcvtlt_f64_f32:
+; CHECK: fcvtlt	z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcvtlt.f64f32( %a,
+ %pg,
+ %b)
+  ret  %out
+}
+
+;
+; FCVTNT
+;
+
+define  @fcvtnt_f16_f32( %a,  %pg,  %b) {
+; CHECK-LABEL: fcvtnt_f16_f32:
+; CHECK: fcvtnt z0.h, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcvtnt.f16f32( %a,
+  %pg,
+  %b)
+  ret  %out
+}
+
+define  @fcvtnt_f32_f64( %a,  %pg,  %b) {
+; CHECK-LABEL: fcvtnt_f32_f64:
+; CHECK: fcvtnt	z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcvtnt.f32f64( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+;
+; FCVTX
+;
+
+define  @fcvtx_f32_f64( %a,  %pg,  %b) {
+; CHECK-LABEL: fcvtx_f32_f64:
+; CHECK: fcvtx z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcvtx.f32f64( %a,
+   %pg,
+   %b)
+  ret  %out
+}
+
+;
+; FCVTXNT
+;
+
+define  @fcvtxnt_f32_f64( %a,  %pg,  %b) {
+; CHECK-LABEL: fcvtxnt_f32_f64:
+; CHECK: fcvtxnt z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcvtxnt.f32f64( %a,
+ %pg,
+ %b)
+  ret  %out
+}
+
+declare  @llvm.aarch64.sve.fcvtlt.f32f16(, , )
+declare  @llvm.aarch64.sve.fcvtlt.f64f32(, , )
+declare  @llvm.aarch64.sve.fcvtnt.f16f32(, , )
+declare  @llvm.aarch64.sve.fcvtnt.f32f64(, , )
+declare  @llvm.aarch64.sve.fcvtx.f32f64(, , )
+declare  @llvm.aarch64.sve.fcvtxnt.f32f64(, , )
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll
@@ -0,0 +1,400 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; FCVT
+;
+
+define  @fcvt_f16_f32( %a,  %pg,  %b) {
+; CHECK-LABEL: fcvt_f16_f32:
+; CHECK: fcvt z0.h, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcvt.f16f32( %a,
+ %pg,
+ %b)
+  ret  %out
+}
+
+define  @fcvt_f16_f64( %a,  %pg,  %b) {
+; CHECK-LABEL: fcvt_f16_f64:
+; CHECK: fcvt z0.h, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcvt.f16f64( %a,
+ %pg,
+ %b)
+  ret  %out
+}
+
+define  @fcvt_f32_f16( %a,  %pg,  %b) {
+; CHECK-LABEL: fcvt_f32_f16:
+; CHECK: fcvt z0.s, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcvt.f32f16( %a,
+  %pg,
+  %b)
+  ret  %out
+}
+
+define  @fcvt_f32_f64( %a,  %pg,  %b) {
+; CHECK-LABEL: fcvt_f32_f64:
+; CHECK: fcvt z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcvt.f32f64( %a,
+  %pg,
+   

[PATCH] D69707: [AArch64][SVE] Implement additional floating-point arithmetic intrinsics

2019-11-11 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 228660.
kmclaughlin added a comment.

- Changed target constant to MVT::i32 in complexrotateop & complexrotateopodd 
definitions


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69707/new/

https://reviews.llvm.org/D69707

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
===
--- llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
@@ -69,6 +69,112 @@
 }
 
 ;
+; FCADD
+;
+
+define  @fcadd_h( %pg,  %a,  %b) {
+; CHECK-LABEL: fcadd_h:
+; CHECK: fcadd z0.h, p0/m, z0.h, z1.h, #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcadd.nxv8f16( %pg,
+   %a,
+   %b,
+  i32 90)
+  ret  %out
+}
+
+define  @fcadd_s( %pg,  %a,  %b) {
+; CHECK-LABEL: fcadd_s:
+; CHECK: fcadd z0.s, p0/m, z0.s, z1.s, #270
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcadd.nxv4f32( %pg,
+%a,
+%b,
+   i32 270)
+  ret  %out
+}
+
+define  @fcadd_d( %pg,  %a,  %b) {
+; CHECK-LABEL: fcadd_d:
+; CHECK: fcadd z0.d, p0/m, z0.d, z1.d, #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcadd.nxv2f64( %pg,
+ %a,
+ %b,
+i32 90)
+  ret  %out
+}
+
+;
+; FCMLA
+;
+
+define  @fcmla_h( %pg,  %a,  %b,  %c) {
+; CHECK-LABEL: fcmla_h:
+; CHECK: fcmla z0.h, p0/m, z1.h, z2.h, #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcmla.nxv8f16( %pg,
+   %a,
+   %b,
+   %c,
+  i32 90)
+  ret  %out
+}
+
+define  @fcmla_s( %pg,  %a,  %b,  %c) {
+; CHECK-LABEL: fcmla_s:
+; CHECK: fcmla z0.s, p0/m, z1.s, z2.s, #180
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcmla.nxv4f32( %pg,
+%a,
+%b,
+%c,
+   i32 180)
+  ret  %out
+}
+
+define  @fcmla_d( %pg,  %a,  %b,  %c) {
+; CHECK-LABEL: fcmla_d:
+; CHECK: fcmla z0.d, p0/m, z1.d, z2.d, #270
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcmla.nxv2f64( %pg,
+ %a,
+ %b,
+ %c,
+i32 270)
+  ret  %out
+}
+
+;
+; FCMLA (Indexed)
+;
+
+define  @fcmla_lane_h( %a,  %b,  %c) {
+; CHECK-LABEL: fcmla_lane_h:
+; CHECK: fcmla z0.h, z1.h, z2.h[3], #0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcmla.lane.nxv8f16( %a,
+%b,
+%c,
+   i32 3,
+   i32 0)
+  ret  %out
+}
+
+define  @fcmla_lane_s( %a,  %b,  %c) {
+; CHECK-LABEL: fcmla_lane_s:
+; CHECK: fcmla z0.s, z1.s, z2.s[1], #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcmla.lane.nxv4f32( %a,
+ %b,
+ %c,
+i32 1,
+i32 90)
+  ret  %out
+}
+
+
+;
 ; FDIV
 ;
 
@@ -137,6 +243,43 @@
 }
 
 ;
+; FMAD
+;
+
+define  @fmad_h( %pg,  %a,  %b,  %c) {
+; CHECK-LABEL: fmad_h:
+; CHECK: fmad z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fmad.nxv8f16( %pg,
+  %a,
+   

[PATCH] D69800: [AArch64][SVE] Implement remaining floating-point arithmetic intrinsics

2019-11-08 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin marked an inline comment as done.
kmclaughlin added inline comments.



Comment at: llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll:767
 ;
-; FSCALE
+; FNEG
 ;

sdesmalen wrote:
> Why are you moving this test and changing fscale -> fneg here?
The rest of the tests here are in order and I noticed that fscale was in the 
wrong place, so I moved it further down.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69800/new/

https://reviews.llvm.org/D69800



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69858: [AArch64][SVE] Implement floating-point comparison & reduction intrinsics

2019-11-05 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, huntergr, dancgr, mgudim.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a project: LLVM.

Adds intrinsics for the following:

- fadda & faddv
- fminv, fmaxv, fminnmv & fmaxnmv
- facge & facgt
- fcmp[eq|ge|gt|ne|uo]


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D69858

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-fp-compares.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll
@@ -0,0 +1,214 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; FADDA
+;
+
+define half @fadda_f16( %pg, half %init,  %a) {
+; CHECK-LABEL: fadda_f16:
+; CHECK: fadda h0, p0, h0, z1.h
+; CHECK-NEXT: ret
+  %res = call half @llvm.aarch64.sve.fadda.nxv8f16( %pg,
+   half %init,
+%a)
+  ret half %res
+}
+
+define float @fadda_f32( %pg, float %init,  %a) {
+; CHECK-LABEL: fadda_f32:
+; CHECK: fadda s0, p0, s0, z1.s
+; CHECK-NEXT: ret
+  %res = call float @llvm.aarch64.sve.fadda.nxv4f32( %pg,
+float %init,
+ %a)
+  ret float %res
+}
+
+define double @fadda_f64( %pg, double %init,  %a) {
+; CHECK-LABEL: fadda_f64:
+; CHECK: fadda d0, p0, d0, z1.d
+; CHECK-NEXT: ret
+  %res = call double @llvm.aarch64.sve.fadda.nxv2f64( %pg,
+ double %init,
+  %a)
+  ret double %res
+}
+
+;
+; FADDV
+;
+
+define half @faddv_f16( %pg,  %a) {
+; CHECK-LABEL: faddv_f16:
+; CHECK: faddv h0, p0, z0.h
+; CHECK-NEXT: ret
+  %res = call half @llvm.aarch64.sve.faddv.nxv8f16( %pg,
+%a)
+  ret half %res
+}
+
+define float @faddv_f32( %pg,  %a) {
+; CHECK-LABEL: faddv_f32:
+; CHECK: faddv s0, p0, z0.s
+; CHECK-NEXT: ret
+  %res = call float @llvm.aarch64.sve.faddv.nxv4f32( %pg,
+ %a)
+  ret float %res
+}
+
+define double @faddv_f64( %pg,  %a) {
+; CHECK-LABEL: faddv_f64:
+; CHECK: faddv d0, p0, z0.d
+; CHECK-NEXT: ret
+  %res = call double @llvm.aarch64.sve.faddv.nxv2f64( %pg,
+  %a)
+  ret double %res
+}
+
+;
+; FMAXNMV
+;
+
+define half @fmaxnmv_f16( %pg,  %a) {
+; CHECK-LABEL: fmaxnmv_f16:
+; CHECK: fmaxnmv h0, p0, z0.h
+; CHECK-NEXT: ret
+  %res = call half @llvm.aarch64.sve.fmaxnmv.nxv8f16( %pg,
+  %a)
+  ret half %res
+}
+
+define float @fmaxnmv_f32( %pg,  %a) {
+; CHECK-LABEL: fmaxnmv_f32:
+; CHECK: fmaxnmv s0, p0, z0.s
+; CHECK-NEXT: ret
+  %res = call float @llvm.aarch64.sve.fmaxnmv.nxv4f32( %pg,
+   %a)
+  ret float %res
+}
+
+define double @fmaxnmv_f64( %pg,  %a) {
+; CHECK-LABEL: fmaxnmv_f64:
+; CHECK: fmaxnmv d0, p0, z0.d
+; CHECK-NEXT: ret
+  %res = call double @llvm.aarch64.sve.fmaxnmv.nxv2f64( %pg,
+%a)
+  ret double %res
+}
+
+;
+; FMAXV
+;
+
+define half @fmaxv_f16( %pg,  %a) {
+; CHECK-LABEL: fmaxv_f16:
+; CHECK: fmaxv h0, p0, z0.h
+; CHECK-NEXT: ret
+  %res = call half @llvm.aarch64.sve.fmaxv.nxv8f16( %pg,
+%a)
+  ret half %res
+}
+
+define float @fmaxv_f32( %pg,  %a) {
+; CHECK-LABEL: fmaxv_f32:
+; CHECK: fmaxv s0, p0, z0.s
+; CHECK-NEXT: ret
+  %res = call float @llvm.aarch64.sve.fmaxv.nxv4f32( %pg,
+ %a)
+  ret float %res
+}
+
+define double @fmaxv_f64( %pg,  %a) {
+; CHECK-LABEL: fmaxv_f64:
+; CHECK: fmaxv d0, p0, z0.d
+; CHECK-NEXT: ret
+  %res = call double @llvm.aarch64.sve.fmaxv.nxv2f64( %pg,
+  %a)
+  ret double %res
+}
+
+;
+; FMINNMV
+;
+
+define half @fminnmv_f16( %pg,  %a) {
+; CHECK-LABEL: fminnmv_f16:
+; CHECK: fminnmv h0, p0, z0.h
+; CHECK-NEXT: ret
+  %res = call half @llvm.aarch64.sve.fminnmv.nxv8f16( %pg,
+  %a)
+  ret half %res
+}
+
+define float @fminnmv_f32( %pg,  %a) {
+; CHECK-LABEL: fminnmv_f32:
+; CHECK: fminnmv s0, p0, z0.s
+; CHECK-NEXT: ret
+  %res = call float @llvm.aarch64.sve.fminnmv.nxv4f32( %pg,
+   %a)
+  ret float %res
+}
+
+define double @fminnmv_f64( %pg,  %a) {
+; CHECK-LABEL: fminnmv_f64:
+; CHECK: fminnmv d0, p0, z0.d
+; 

[PATCH] D69800: [AArch64][SVE] Implement remaining floating-point arithmetic intrinsics

2019-11-04 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: huntergr, sdesmalen, dancgr, mgudim.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a project: LLVM.

Adds intrinsics for the following:

- fabs & fneg
- fexpa
- frint[a|i|m|n|p|x|z]
- frecpe, frecps & frecpx
- fsqrt, frsqrte & frsqrts


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D69800

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
===
--- llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
@@ -35,6 +35,40 @@
 }
 
 ;
+; FABS
+;
+
+define  @fabs_h( %a,  %pg,  %b) {
+; CHECK-LABEL: fabs_h:
+; CHECK: fabs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fabs.nxv8f16( %a,
+  %pg,
+  %b)
+  ret  %out
+}
+
+define  @fabs_s( %a,  %pg,  %b) {
+; CHECK-LABEL: fabs_s:
+; CHECK: fabs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fabs.nxv4f32( %a,
+   %pg,
+   %b)
+  ret  %out
+}
+
+define  @fabs_d( %a,  %pg,  %b) {
+; CHECK-LABEL: fabs_d:
+; CHECK: fabs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fabs.nxv2f64( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+;
 ; FADD
 ;
 
@@ -242,6 +276,34 @@
 }
 
 ;
+; FEXPA
+;
+
+define  @fexpa_h( %a) {
+; CHECK-LABEL: fexpa_h:
+; CHECK: fexpa z0.h, z0.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fexpa.x.nxv8f16( %a)
+  ret  %out
+}
+
+define  @fexpa_s( %a) {
+; CHECK-LABEL: fexpa_s:
+; CHECK: fexpa z0.s, z0.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fexpa.x.nxv4f32( %a)
+  ret  %out
+}
+
+define  @fexpa_d( %pg,  %a) {
+; CHECK-LABEL: fexpa_d:
+; CHECK: fexpa z0.d, z0.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fexpa.x.nxv2f64( %a)
+  ret  %out
+}
+
+;
 ; FMAD
 ;
 
@@ -702,36 +764,36 @@
 }
 
 ;
-; FSCALE
+; FNEG
 ;
 
-define  @fscale_h( %pg,  %a,  %b) {
-; CHECK-LABEL: fscale_h:
-; CHECK: fscale z0.h, p0/m, z0.h, z1.h
+define  @fneg_h( %a,  %pg,  %b) {
+; CHECK-LABEL: fneg_h:
+; CHECK: fneg z0.h, p0/m, z1.h
 ; CHECK-NEXT: ret
-  %out = call  @llvm.aarch64.sve.fscale.nxv8f16( %pg,
-%a,
-%b)
+  %out = call  @llvm.aarch64.sve.fneg.nxv8f16( %a,
+  %pg,
+  %b)
   ret  %out
 }
 
-define  @fscale_s( %pg,  %a,  %b) {
-; CHECK-LABEL: fscale_s:
-; CHECK: fscale z0.s, p0/m, z0.s, z1.s
+define  @fneg_s( %a,  %pg,  %b) {
+; CHECK-LABEL: fneg_s:
+; CHECK: fneg z0.s, p0/m, z1.s
 ; CHECK-NEXT: ret
-  %out = call  @llvm.aarch64.sve.fscale.nxv4f32( %pg,
- %a,
- %b)
+  %out = call  @llvm.aarch64.sve.fneg.nxv4f32( %a,
+   %pg,
+   %b)
   ret  %out
 }
 
-define  @fscale_d( %pg,  %a,  %b) {
-; CHECK-LABEL: fscale_d:
-; CHECK: fscale z0.d, p0/m, z0.d, z1.d
+define  @fneg_d( %a,  %pg,  %b) {
+; CHECK-LABEL: fneg_d:
+; CHECK: fneg z0.d, p0/m, z1.d
 ; CHECK-NEXT: ret
-  %out = call  @llvm.aarch64.sve.fscale.nxv2f64( %pg,
-  %a,
-  %b)
+  %out = call  @llvm.aarch64.sve.fneg.nxv2f64( %a,
+%pg,
+%b)
   ret  %out
 }
 
@@ -884,6 +946,402 @@
 }
 
 ;
+; FRECPE
+;
+
+define  @frecpe_h( %a) {
+; CHECK-LABEL: frecpe_h:
+; CHECK: frecpe z0.h, z0.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.frecpe.x.nxv8f16( %a)
+  ret  %out
+}
+
+define  @frecpe_s( %a) {
+; CHECK-LABEL: frecpe_s:
+; CHECK: frecpe z0.s, z0.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.frecpe.x.nxv4f32( %a)
+  ret  %out
+}
+
+define  @frecpe_d( %pg,  %a) {
+; CHECK-LABEL: frecpe_d:
+; CHECK: frecpe z0.d, z0.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.frecpe.x.nxv2f64( %a)
+  ret  %out
+}
+
+;
+; FRECPX
+;
+

[PATCH] D69707: [AArch64][SVE] Implement additional floating-point arithmetic intrinsics

2019-11-01 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, huntergr, dancgr.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a project: LLVM.

Adds intrinsics for the following:

- ftssel
- fcadd, fcmla
- fmla, fmls, fnmla, fnmls
- fmad, fmsb, fnmad, fnmsb


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D69707

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
===
--- llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
@@ -69,6 +69,111 @@
 }
 
 ;
+; FCADD
+;
+
+define  @fcadd_h( %pg,  %a,  %b) {
+; CHECK-LABEL: fcadd_h:
+; CHECK: fcadd z0.h, p0/m, z0.h, z1.h, #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcadd.nxv8f16( %pg,
+   %a,
+   %b,
+  i32 90)
+  ret  %out
+}
+
+define  @fcadd_s( %pg,  %a,  %b) {
+; CHECK-LABEL: fcadd_s:
+; CHECK: fcadd z0.s, p0/m, z0.s, z1.s, #270
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcadd.nxv4f32( %pg,
+%a,
+%b,
+   i32 270)
+  ret  %out
+}
+
+define  @fcadd_d( %pg,  %a,  %b) {
+; CHECK-LABEL: fcadd_d:
+; CHECK: fcadd z0.d, p0/m, z0.d, z1.d, #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcadd.nxv2f64( %pg,
+ %a,
+ %b,
+i32 90)
+  ret  %out
+}
+
+;
+; FCMLA
+;
+
+define  @fcmla_h( %pg,  %a,  %b,  %c) {
+; CHECK-LABEL: fcmla_h:
+; CHECK: fcmla z0.h, p0/m, z1.h, z2.h, #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcmla.nxv8f16( %pg,
+   %a,
+   %b,
+   %c,
+  i32 90)
+  ret  %out
+}
+
+define  @fcmla_s( %pg,  %a,  %b,  %c) {
+; CHECK-LABEL: fcmla_s:
+; CHECK: fcmla z0.s, p0/m, z1.s, z2.s, #180
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcmla.nxv4f32( %pg,
+%a,
+%b,
+%c,
+   i32 180)
+  ret  %out
+}
+
+define  @fcmla_d( %pg,  %a,  %b,  %c) {
+; CHECK-LABEL: fcmla_d:
+; CHECK: fcmla z0.d, p0/m, z1.d, z2.d, #270
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcmla.nxv2f64( %pg,
+ %a,
+ %b,
+ %c,
+i32 270)
+  ret  %out
+}
+
+;
+; FCMLA (Indexed)
+;
+
+define  @fcmla_lane_h( %a,  %b,  %c) {
+; CHECK-LABEL: fcmla_lane_h:
+; CHECK: fcmla z0.h, z1.h, z2.h[3], #0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcmla.lane.nxv8f16( %a,
+%b,
+%c,
+   i32 3,
+   i32 0)
+  ret  %out
+}
+
+define  @fcmla_lane_s( %a,  %b,  %c) {
+; CHECK-LABEL: fcmla_lane_s:
+; CHECK: fcmla z0.s, z1.s, z2.s[1], #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fcmla.lane.nxv4f32( %a,
+ %b,
+ %c,
+i32 1,
+i32 90)
+  ret  %out
+}
+
+;
 ; FDIV
 ;
 
@@ -137,6 +242,43 @@
 }
 
 ;
+; FMAD
+;
+
+define  @fmad_h( %pg,  %a,  %b,  %c) {
+; CHECK-LABEL: fmad_h:
+; CHECK: fmad z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fmad.nxv8f16( %pg,
+ 

[PATCH] D69657: [AArch64][SVE] Implement several floating-point arithmetic intrinsics

2019-11-01 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG5ec34dfdf733: [AArch64][SVE] Implement several 
floating-point arithmetic intrinsics (authored by kmclaughlin).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69657/new/

https://reviews.llvm.org/D69657

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
@@ -0,0 +1,530 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; FABD
+;
+
+define  @fabd_h( %pg,  %a,  %b) {
+; CHECK-LABEL: fabd_h:
+; CHECK: fabd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fabd.nxv8f16( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @fabd_s( %pg,  %a,  %b) {
+; CHECK-LABEL: fabd_s:
+; CHECK: fabd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fabd.nxv4f32( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @fabd_d( %pg,  %a,  %b) {
+; CHECK-LABEL: fabd_d:
+; CHECK: fabd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fabd.nxv2f64( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+;
+; FADD
+;
+
+define  @fadd_h( %pg,  %a,  %b) {
+; CHECK-LABEL: fadd_h:
+; CHECK: fadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fadd.nxv8f16( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @fadd_s( %pg,  %a,  %b) {
+; CHECK-LABEL: fadd_s:
+; CHECK: fadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fadd.nxv4f32( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @fadd_d( %pg,  %a,  %b) {
+; CHECK-LABEL: fadd_d:
+; CHECK: fadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fadd.nxv2f64( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+;
+; FDIV
+;
+
+define  @fdiv_h( %pg,  %a,  %b) {
+; CHECK-LABEL: fdiv_h:
+; CHECK: fdiv z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fdiv.nxv8f16( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @fdiv_s( %pg,  %a,  %b) {
+; CHECK-LABEL: fdiv_s:
+; CHECK: fdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fdiv.nxv4f32( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @fdiv_d( %pg,  %a,  %b) {
+; CHECK-LABEL: fdiv_d:
+; CHECK: fdiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fdiv.nxv2f64( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+;
+; FDIVR
+;
+
+define  @fdivr_h( %pg,  %a,  %b) {
+; CHECK-LABEL: fdivr_h:
+; CHECK: fdivr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fdivr.nxv8f16( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @fdivr_s( %pg,  %a,  %b) {
+; CHECK-LABEL: fdivr_s:
+; CHECK: fdivr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fdivr.nxv4f32( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @fdivr_d( %pg,  %a,  %b) {
+; CHECK-LABEL: fdivr_d:
+; CHECK: fdivr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fdivr.nxv2f64( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+;
+; FMAX
+;
+
+define  @fmax_h( %pg,  %a,  %b) {
+; CHECK-LABEL: fmax_h:
+; CHECK: fmax z0.h, p0/m, z0.h, 

[PATCH] D69657: [AArch64][SVE] Implement several floating-point arithmetic intrinsics

2019-10-31 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 227299.
kmclaughlin added a comment.

- Removed duplicate //AdvSIMD_Pred2VectorArg_Intrinsic// class after rebase


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69657/new/

https://reviews.llvm.org/D69657

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
@@ -0,0 +1,530 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; FABD
+;
+
+define  @fabd_h( %pg,  %a,  %b) {
+; CHECK-LABEL: fabd_h:
+; CHECK: fabd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fabd.nxv8f16( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @fabd_s( %pg,  %a,  %b) {
+; CHECK-LABEL: fabd_s:
+; CHECK: fabd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fabd.nxv4f32( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @fabd_d( %pg,  %a,  %b) {
+; CHECK-LABEL: fabd_d:
+; CHECK: fabd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fabd.nxv2f64( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+;
+; FADD
+;
+
+define  @fadd_h( %pg,  %a,  %b) {
+; CHECK-LABEL: fadd_h:
+; CHECK: fadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fadd.nxv8f16( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @fadd_s( %pg,  %a,  %b) {
+; CHECK-LABEL: fadd_s:
+; CHECK: fadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fadd.nxv4f32( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @fadd_d( %pg,  %a,  %b) {
+; CHECK-LABEL: fadd_d:
+; CHECK: fadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fadd.nxv2f64( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+;
+; FDIV
+;
+
+define  @fdiv_h( %pg,  %a,  %b) {
+; CHECK-LABEL: fdiv_h:
+; CHECK: fdiv z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fdiv.nxv8f16( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @fdiv_s( %pg,  %a,  %b) {
+; CHECK-LABEL: fdiv_s:
+; CHECK: fdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fdiv.nxv4f32( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @fdiv_d( %pg,  %a,  %b) {
+; CHECK-LABEL: fdiv_d:
+; CHECK: fdiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fdiv.nxv2f64( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+;
+; FDIVR
+;
+
+define  @fdivr_h( %pg,  %a,  %b) {
+; CHECK-LABEL: fdivr_h:
+; CHECK: fdivr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fdivr.nxv8f16( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @fdivr_s( %pg,  %a,  %b) {
+; CHECK-LABEL: fdivr_s:
+; CHECK: fdivr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fdivr.nxv4f32( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @fdivr_d( %pg,  %a,  %b) {
+; CHECK-LABEL: fdivr_d:
+; CHECK: fdivr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fdivr.nxv2f64( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+;
+; FMAX
+;
+
+define  @fmax_h( %pg,  %a,  %b) {
+; CHECK-LABEL: fmax_h:
+; CHECK: fmax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fmax.nxv8f16( %pg,
+  

[PATCH] D69657: [AArch64][SVE] Implement several floating-point arithmetic intrinsics

2019-10-31 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: huntergr, sdesmalen, dancgr.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a project: LLVM.

Adds intrinsics for the following:

- fabd, fadd, fsub & fsubr
- fmul, fmulx, fdiv & fdivr
- fmax, fmaxnm, fmin & fminnm
- fscale & ftsmul


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D69657

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
@@ -0,0 +1,530 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; FABD
+;
+
+define  @fabd_h( %pg,  %a,  %b) {
+; CHECK-LABEL: fabd_h:
+; CHECK: fabd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fabd.nxv8f16( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @fabd_s( %pg,  %a,  %b) {
+; CHECK-LABEL: fabd_s:
+; CHECK: fabd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fabd.nxv4f32( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @fabd_d( %pg,  %a,  %b) {
+; CHECK-LABEL: fabd_d:
+; CHECK: fabd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fabd.nxv2f64( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+;
+; FADD
+;
+
+define  @fadd_h( %pg,  %a,  %b) {
+; CHECK-LABEL: fadd_h:
+; CHECK: fadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fadd.nxv8f16( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @fadd_s( %pg,  %a,  %b) {
+; CHECK-LABEL: fadd_s:
+; CHECK: fadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fadd.nxv4f32( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @fadd_d( %pg,  %a,  %b) {
+; CHECK-LABEL: fadd_d:
+; CHECK: fadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fadd.nxv2f64( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+;
+; FDIV
+;
+
+define  @fdiv_h( %pg,  %a,  %b) {
+; CHECK-LABEL: fdiv_h:
+; CHECK: fdiv z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fdiv.nxv8f16( %pg,
+  %a,
+  %b)
+  ret  %out
+}
+
+define  @fdiv_s( %pg,  %a,  %b) {
+; CHECK-LABEL: fdiv_s:
+; CHECK: fdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fdiv.nxv4f32( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @fdiv_d( %pg,  %a,  %b) {
+; CHECK-LABEL: fdiv_d:
+; CHECK: fdiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fdiv.nxv2f64( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+;
+; FDIVR
+;
+
+define  @fdivr_h( %pg,  %a,  %b) {
+; CHECK-LABEL: fdivr_h:
+; CHECK: fdivr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fdivr.nxv8f16( %pg,
+   %a,
+   %b)
+  ret  %out
+}
+
+define  @fdivr_s( %pg,  %a,  %b) {
+; CHECK-LABEL: fdivr_s:
+; CHECK: fdivr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fdivr.nxv4f32( %pg,
+%a,
+%b)
+  ret  %out
+}
+
+define  @fdivr_d( %pg,  %a,  %b) {
+; CHECK-LABEL: fdivr_d:
+; CHECK: fdivr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.fdivr.nxv2f64( %pg,
+ %a,
+ %b)
+  ret  %out
+}
+
+;
+; FMAX
+;
+
+define  @fmax_h( 

[PATCH] D69378: [AArch64][SVE] Implement masked store intrinsics

2019-10-30 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG5c2c94648e42: [AArch64][SVE] Implement masked store 
intrinsics (authored by kmclaughlin).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69378/new/

https://reviews.llvm.org/D69378

Files:
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
  llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
  llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll

Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll
@@ -0,0 +1,66 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
+
+;
+; Masked Stores
+;
+
+define void @masked_trunc_store_nxv2i8( *%a,  %val,  *%b,  %mask) nounwind {
+; CHECK-LABEL: masked_trunc_store_nxv2i8:
+; CHECK-NEXT: st1b { z0.d }, p0, [x1]
+; CHECK-NEXT: ret
+  %trunc = trunc  %val to 
+  call void @llvm.masked.store.nxv2i8( %trunc,  *%b, i32 8,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv2i16( *%a,  %val,  *%b,  %mask) nounwind {
+; CHECK-LABEL: masked_trunc_store_nxv2i16:
+; CHECK-NEXT: st1h { z0.d }, p0, [x1]
+; CHECK-NEXT: ret
+  %trunc = trunc  %val to 
+  call void @llvm.masked.store.nxv2i16( %trunc,  *%b, i32 8,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv2i32( *%a,  %val,  *%b,  %mask) nounwind {
+; CHECK-LABEL: masked_trunc_store_nxv2i32:
+; CHECK-NEXT: st1w { z0.d }, p0, [x1]
+; CHECK-NEXT: ret
+  %trunc = trunc  %val to 
+  call void @llvm.masked.store.nxv2i32( %trunc,  *%b, i32 8,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv4i8( *%a,  %val,  *%b,  %mask) nounwind {
+; CHECK-LABEL: masked_trunc_store_nxv4i8:
+; CHECK-NEXT: st1b { z0.s }, p0, [x1]
+; CHECK-NEXT: ret
+  %trunc = trunc  %val to 
+  call void @llvm.masked.store.nxv4i8( %trunc,  *%b, i32 4,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv4i16( *%a,  %val,  *%b,  %mask) nounwind {
+; CHECK-LABEL: masked_trunc_store_nxv4i16:
+; CHECK-NEXT: st1h { z0.s }, p0, [x1]
+; CHECK-NEXT: ret
+  %trunc = trunc  %val to 
+  call void @llvm.masked.store.nxv4i16( %trunc,  *%b, i32 4,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv8i8( *%a,  %val,  *%b,  %mask) nounwind {
+; CHECK-LABEL: masked_trunc_store_nxv8i8:
+; CHECK-NEXT: st1b { z0.h }, p0, [x1]
+; CHECK-NEXT: ret
+  %trunc = trunc  %val to 
+  call void @llvm.masked.store.nxv8i8( %trunc,  *%b, i32 2,  %mask)
+  ret void
+}
+
+declare void @llvm.masked.store.nxv2i8(, *, i32, )
+declare void @llvm.masked.store.nxv2i16(, *, i32, )
+declare void @llvm.masked.store.nxv2i32(, *, i32, )
+declare void @llvm.masked.store.nxv4i8(, *, i32, )
+declare void @llvm.masked.store.nxv4i16(, *, i32, )
+declare void @llvm.masked.store.nxv8i8(, *, i32, )
Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
===
--- llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
+++ llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
@@ -1,79 +1,173 @@
-; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
 
 ;
 ; Masked Loads
 ;
 
-define  @masked_load_nxv2i64( *%a,  %mask) {
+define  @masked_load_nxv2i64( *%a,  %mask) nounwind {
 ; CHECK-LABEL: masked_load_nxv2i64:
-; CHECK: ld1d { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
   %load = call  @llvm.masked.load.nxv2i64( *%a, i32 8,  %mask,  undef)
   ret  %load
 }
 
-define  @masked_load_nxv4i32( *%a,  %mask) {
+define  @masked_load_nxv4i32( *%a,  %mask) nounwind {
 ; CHECK-LABEL: masked_load_nxv4i32:
-; CHECK: ld1w { [[IN:z[0-9]+]].s }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
   %load = call  @llvm.masked.load.nxv4i32( *%a, i32 4,  %mask,  undef)
   ret  %load
 }
 
-define  @masked_load_nxv8i16( *%a,  %mask) {
+define  @masked_load_nxv8i16( *%a,  %mask) nounwind {
 ; CHECK-LABEL: masked_load_nxv8i16:
-; CHECK: ld1h { [[IN:z[0-9]+]].h }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
   %load = call  @llvm.masked.load.nxv8i16( *%a, i32 2,  %mask,  undef)
   ret  %load
 }
 
-define  @masked_load_nxv16i8( *%a,  %mask) {
+define  @masked_load_nxv16i8( *%a,  %mask) nounwind {
 ; CHECK-LABEL: masked_load_nxv16i8:
-; CHECK: ld1b { [[IN:z[0-9]+]].b }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: ret
   %load = call  @llvm.masked.load.nxv16i8( *%a, i32 1,  %mask,  undef)
   ret  %load
 }
 
-define  @masked_load_nxv2f64( *%a,  %mask) {
+define  @masked_load_nxv2f64( *%a,  %mask) nounwind {
 ; CHECK-LABEL: masked_load_nxv2f64:
-; CHECK: ld1d { 

[PATCH] D69567: [AArch64][SVE] Implement additional integer arithmetic intrinsics

2019-10-30 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGe128c2086489: [AArch64][SVE] Implement additional integer 
arithmetic intrinsics (authored by kmclaughlin).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69567/new/

https://reviews.llvm.org/D69567

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll
@@ -0,0 +1,99 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; CNOT
+;
+
+define  @cnot_i8( %a,  %pg,  %b) {
+; CHECK-LABEL: cnot_i8:
+; CHECK: cnot z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cnot.nxv16i8( %a,
+ %pg,
+ %b)
+  ret  %out
+}
+
+define  @cnot_i16( %a,  %pg,  %b) {
+; CHECK-LABEL: cnot_i16:
+; CHECK: cnot z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cnot.nxv8i16( %a,
+ %pg,
+ %b)
+  ret  %out
+}
+
+define  @cnot_i32( %a,  %pg,  %b) {
+; CHECK-LABEL: cnot_i32:
+; CHECK: cnot z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cnot.nxv4i32( %a,
+ %pg,
+ %b)
+  ret  %out
+}
+
+define  @cnot_i64( %a,  %pg,  %b) {
+; CHECK-LABEL: cnot_i64:
+; CHECK: cnot z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cnot.nxv2i64( %a,
+ %pg,
+ %b)
+  ret  %out
+}
+
+;
+; NOT
+;
+
+define  @not_i8( %a,  %pg,  %b) {
+; CHECK-LABEL: not_i8:
+; CHECK: not z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.not.nxv16i8( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+define  @not_i16( %a,  %pg,  %b) {
+; CHECK-LABEL: not_i16:
+; CHECK: not z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.not.nxv8i16( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+define  @not_i32( %a,  %pg,  %b) {
+; CHECK-LABEL: not_i32:
+; CHECK: not z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.not.nxv4i32( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+define  @not_i64( %a,  %pg,  %b) {
+; CHECK-LABEL: not_i64:
+; CHECK: not z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.not.nxv2i64( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+declare  @llvm.aarch64.sve.cnot.nxv16i8(, , )
+declare  @llvm.aarch64.sve.cnot.nxv8i16(, , )
+declare  @llvm.aarch64.sve.cnot.nxv4i32(, , )
+declare  @llvm.aarch64.sve.cnot.nxv2i64(, , )
+
+declare  @llvm.aarch64.sve.not.nxv16i8(, , )
+declare  @llvm.aarch64.sve.not.nxv8i16(, , )
+declare  @llvm.aarch64.sve.not.nxv4i32(, , )
+declare  @llvm.aarch64.sve.not.nxv2i64(, , )
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll
===
--- llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll
@@ -1,6 +1,94 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
+; CLS
+;
+
+define  @cls_i8( %a,  %pg,  %b) {
+; CHECK-LABEL: cls_i8:
+; CHECK: cls z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cls.nxv16i8( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+define  @cls_i16( %a,  %pg,  %b) {
+; CHECK-LABEL: cls_i16:
+; CHECK: cls z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cls.nxv8i16( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+define  @cls_i32( %a,  %pg,  %b) {
+; CHECK-LABEL: cls_i32:
+; CHECK: cls 

[PATCH] D69378: [AArch64][SVE] Implement masked store intrinsics

2019-10-30 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 227061.
kmclaughlin added a comment.

- Improve CHECK lines used in sve-masked-ldst-nonext.ll & 
sve-masked-ldst-trunc.ll


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69378/new/

https://reviews.llvm.org/D69378

Files:
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
  llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
  llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll

Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll
@@ -0,0 +1,66 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
+
+;
+; Masked Stores
+;
+
+define void @masked_trunc_store_nxv2i8( *%a,  %val,  *%b,  %mask) nounwind {
+; CHECK-LABEL: masked_trunc_store_nxv2i8:
+; CHECK-NEXT: st1b { z0.d }, p0, [x1]
+; CHECK-NEXT: ret
+  %trunc = trunc  %val to 
+  call void @llvm.masked.store.nxv2i8( %trunc,  *%b, i32 8,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv2i16( *%a,  %val,  *%b,  %mask) nounwind {
+; CHECK-LABEL: masked_trunc_store_nxv2i16:
+; CHECK-NEXT: st1h { z0.d }, p0, [x1]
+; CHECK-NEXT: ret
+  %trunc = trunc  %val to 
+  call void @llvm.masked.store.nxv2i16( %trunc,  *%b, i32 8,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv2i32( *%a,  %val,  *%b,  %mask) nounwind {
+; CHECK-LABEL: masked_trunc_store_nxv2i32:
+; CHECK-NEXT: st1w { z0.d }, p0, [x1]
+; CHECK-NEXT: ret
+  %trunc = trunc  %val to 
+  call void @llvm.masked.store.nxv2i32( %trunc,  *%b, i32 8,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv4i8( *%a,  %val,  *%b,  %mask) nounwind {
+; CHECK-LABEL: masked_trunc_store_nxv4i8:
+; CHECK-NEXT: st1b { z0.s }, p0, [x1]
+; CHECK-NEXT: ret
+  %trunc = trunc  %val to 
+  call void @llvm.masked.store.nxv4i8( %trunc,  *%b, i32 4,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv4i16( *%a,  %val,  *%b,  %mask) nounwind {
+; CHECK-LABEL: masked_trunc_store_nxv4i16:
+; CHECK-NEXT: st1h { z0.s }, p0, [x1]
+; CHECK-NEXT: ret
+  %trunc = trunc  %val to 
+  call void @llvm.masked.store.nxv4i16( %trunc,  *%b, i32 4,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv8i8( *%a,  %val,  *%b,  %mask) nounwind {
+; CHECK-LABEL: masked_trunc_store_nxv8i8:
+; CHECK-NEXT: st1b { z0.h }, p0, [x1]
+; CHECK-NEXT: ret
+  %trunc = trunc  %val to 
+  call void @llvm.masked.store.nxv8i8( %trunc,  *%b, i32 2,  %mask)
+  ret void
+}
+
+declare void @llvm.masked.store.nxv2i8(, *, i32, )
+declare void @llvm.masked.store.nxv2i16(, *, i32, )
+declare void @llvm.masked.store.nxv2i32(, *, i32, )
+declare void @llvm.masked.store.nxv4i8(, *, i32, )
+declare void @llvm.masked.store.nxv4i16(, *, i32, )
+declare void @llvm.masked.store.nxv8i8(, *, i32, )
Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
===
--- llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
+++ llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
@@ -1,79 +1,173 @@
-; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
 
 ;
 ; Masked Loads
 ;
 
-define  @masked_load_nxv2i64( *%a,  %mask) {
+define  @masked_load_nxv2i64( *%a,  %mask) nounwind {
 ; CHECK-LABEL: masked_load_nxv2i64:
-; CHECK: ld1d { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
   %load = call  @llvm.masked.load.nxv2i64( *%a, i32 8,  %mask,  undef)
   ret  %load
 }
 
-define  @masked_load_nxv4i32( *%a,  %mask) {
+define  @masked_load_nxv4i32( *%a,  %mask) nounwind {
 ; CHECK-LABEL: masked_load_nxv4i32:
-; CHECK: ld1w { [[IN:z[0-9]+]].s }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
   %load = call  @llvm.masked.load.nxv4i32( *%a, i32 4,  %mask,  undef)
   ret  %load
 }
 
-define  @masked_load_nxv8i16( *%a,  %mask) {
+define  @masked_load_nxv8i16( *%a,  %mask) nounwind {
 ; CHECK-LABEL: masked_load_nxv8i16:
-; CHECK: ld1h { [[IN:z[0-9]+]].h }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
   %load = call  @llvm.masked.load.nxv8i16( *%a, i32 2,  %mask,  undef)
   ret  %load
 }
 
-define  @masked_load_nxv16i8( *%a,  %mask) {
+define  @masked_load_nxv16i8( *%a,  %mask) nounwind {
 ; CHECK-LABEL: masked_load_nxv16i8:
-; CHECK: ld1b { [[IN:z[0-9]+]].b }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: ret
   %load = call  @llvm.masked.load.nxv16i8( *%a, i32 1,  %mask,  undef)
   ret  %load
 }
 
-define  @masked_load_nxv2f64( *%a,  %mask) {
+define  @masked_load_nxv2f64( *%a,  %mask) nounwind {
 ; CHECK-LABEL: masked_load_nxv2f64:
-; CHECK: ld1d { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ld1d { 

[PATCH] D69567: [AArch64][SVE] Implement additional integer arithmetic intrinsics

2019-10-29 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: huntergr, sdesmalen.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a project: LLVM.

Add intrinsics for the following:

- sxt[b|h|w] & uxt[b|h|w]
- cls & clz
- not & cnot


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D69567

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll
@@ -0,0 +1,99 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; CNOT
+;
+
+define  @cnot_i8( %a,  %pg,  %b) {
+; CHECK-LABEL: cnot_i8:
+; CHECK: cnot z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cnot.nxv16i8( %a,
+ %pg,
+ %b)
+  ret  %out
+}
+
+define  @cnot_i16( %a,  %pg,  %b) {
+; CHECK-LABEL: cnot_i16:
+; CHECK: cnot z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cnot.nxv8i16( %a,
+ %pg,
+ %b)
+  ret  %out
+}
+
+define  @cnot_i32( %a,  %pg,  %b) {
+; CHECK-LABEL: cnot_i32:
+; CHECK: cnot z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cnot.nxv4i32( %a,
+ %pg,
+ %b)
+  ret  %out
+}
+
+define  @cnot_i64( %a,  %pg,  %b) {
+; CHECK-LABEL: cnot_i64:
+; CHECK: cnot z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cnot.nxv2i64( %a,
+ %pg,
+ %b)
+  ret  %out
+}
+
+;
+; NOT
+;
+
+define  @not_i8( %a,  %pg,  %b) {
+; CHECK-LABEL: not_i8:
+; CHECK: not z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.not.nxv16i8( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+define  @not_i16( %a,  %pg,  %b) {
+; CHECK-LABEL: not_i16:
+; CHECK: not z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.not.nxv8i16( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+define  @not_i32( %a,  %pg,  %b) {
+; CHECK-LABEL: not_i32:
+; CHECK: not z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.not.nxv4i32( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+define  @not_i64( %a,  %pg,  %b) {
+; CHECK-LABEL: not_i64:
+; CHECK: not z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.not.nxv2i64( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+declare  @llvm.aarch64.sve.cnot.nxv16i8(, , )
+declare  @llvm.aarch64.sve.cnot.nxv8i16(, , )
+declare  @llvm.aarch64.sve.cnot.nxv4i32(, , )
+declare  @llvm.aarch64.sve.cnot.nxv2i64(, , )
+
+declare  @llvm.aarch64.sve.not.nxv16i8(, , )
+declare  @llvm.aarch64.sve.not.nxv8i16(, , )
+declare  @llvm.aarch64.sve.not.nxv4i32(, , )
+declare  @llvm.aarch64.sve.not.nxv2i64(, , )
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll
===
--- llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll
@@ -1,6 +1,94 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
+; CLS
+;
+
+define  @cls_i8( %a,  %pg,  %b) {
+; CHECK-LABEL: cls_i8:
+; CHECK: cls z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cls.nxv16i8( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+define  @cls_i16( %a,  %pg,  %b) {
+; CHECK-LABEL: cls_i16:
+; CHECK: cls z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cls.nxv8i16( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+define  @cls_i32( %a,  %pg,  %b) {
+; CHECK-LABEL: cls_i32:
+; 

[PATCH] D69378: [AArch64][SVE] Implement masked store intrinsics

2019-10-29 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 226885.
kmclaughlin added a comment.

- Removed masked load tests from sve-masked-ldst-trunc.ll


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69378/new/

https://reviews.llvm.org/D69378

Files:
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
  llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
  llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll

Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll
@@ -0,0 +1,60 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; Masked Stores
+;
+
+define void @masked_trunc_store_nxv2i8( *%a,  %val,  *%b,  %mask) {
+; CHECK-LABEL: masked_trunc_store_nxv2i8:
+; CHECK: st1b { [[IN:z[0-9]]].d }, [[PG:p[0-9]]], [x1]
+  %trunc = trunc  %val to 
+  call void @llvm.masked.store.nxv2i8( %trunc,  *%b, i32 8,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv2i16( *%a,  %val,  *%b,  %mask) {
+; CHECK-LABEL: masked_trunc_store_nxv2i16:
+; CHECK: st1h { [[IN:z[0-9]]].d }, [[PG:p[0-9]]], [x1]
+  %trunc = trunc  %val to 
+  call void @llvm.masked.store.nxv2i16( %trunc,  *%b, i32 8,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv2i32( *%a,  %val,  *%b,  %mask) {
+; CHECK-LABEL: masked_trunc_store_nxv2i32:
+; CHECK: st1w { [[IN:z[0-9]]].d }, [[PG:p[0-9]]], [x1]
+  %trunc = trunc  %val to 
+  call void @llvm.masked.store.nxv2i32( %trunc,  *%b, i32 8,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv4i8( *%a,  %val,  *%b,  %mask) {
+; CHECK-LABEL: masked_trunc_store_nxv4i8:
+; CHECK: st1b { [[IN:z[0-9]]].s }, [[PG:p[0-9]]], [x1]
+  %trunc = trunc  %val to 
+  call void @llvm.masked.store.nxv4i8( %trunc,  *%b, i32 4,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv4i16( *%a,  %val,  *%b,  %mask) {
+; CHECK-LABEL: masked_trunc_store_nxv4i16:
+; CHECK: st1h { [[IN:z[0-9]]].s }, [[PG:p[0-9]]], [x1]
+  %trunc = trunc  %val to 
+  call void @llvm.masked.store.nxv4i16( %trunc,  *%b, i32 4,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv8i8( *%a,  %val,  *%b,  %mask) {
+; CHECK-LABEL: masked_trunc_store_nxv8i8:
+; CHECK: st1b { [[IN:z[0-9]]].h }, [[PG:p[0-9]]], [x1]
+  %trunc = trunc  %val to 
+  call void @llvm.masked.store.nxv8i8( %trunc,  *%b, i32 2,  %mask)
+  ret void
+}
+
+declare void @llvm.masked.store.nxv2i8(, *, i32, )
+declare void @llvm.masked.store.nxv2i16(, *, i32, )
+declare void @llvm.masked.store.nxv2i32(, *, i32, )
+declare void @llvm.masked.store.nxv4i8(, *, i32, )
+declare void @llvm.masked.store.nxv4i16(, *, i32, )
+declare void @llvm.masked.store.nxv8i8(, *, i32, )
Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
===
--- llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
+++ llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
@@ -74,6 +74,80 @@
   ret  %load
 }
 
+;
+; Masked Stores
+;
+
+define void @masked_store_nxv2i64( *%a,  %val,  %mask) {
+; CHECK-LABEL: masked_store_nxv2i64:
+; CHECK: st1d { [[IN]].d }, [[PG]], [x0]
+  call void @llvm.masked.store.nxv2i64( %val,  *%a, i32 8,  %mask)
+  ret void
+}
+
+define void @masked_store_nxv4i32( *%a,  %val,  %mask) {
+; CHECK-LABEL: masked_store_nxv4i32:
+; CHECK: st1w { [[IN]].s }, [[PG]], [x0]
+  call void @llvm.masked.store.nxv4i32( %val,  *%a, i32 4,  %mask)
+  ret void
+}
+
+define void @masked_store_nxv8i16( *%a,  %val,  %mask) {
+; CHECK-LABEL: masked_store_nxv8i16:
+; CHECK: st1h { [[IN]].h }, [[PG]], [x0]
+  call void @llvm.masked.store.nxv8i16( %val,  *%a, i32 2,  %mask)
+  ret void
+}
+
+define void @masked_store_nxv16i8( *%a,  %val,  %mask) {
+; CHECK-LABEL: masked_store_nxv16i8:
+; CHECK: st1b { [[IN]].b }, [[PG]], [x0]
+  call void @llvm.masked.store.nxv16i8( %val,  *%a, i32 1,  %mask)
+  ret void
+}
+
+define void @masked_store_nxv2f64( *%a,  %val,  %mask) {
+; CHECK-LABEL: masked_store_nxv2f64:
+; CHECK: st1d { [[IN]].d }, [[PG]], [x0]
+  call void @llvm.masked.store.nxv2f64( %val,  *%a, i32 8,  %mask)
+  ret void
+}
+
+define void @masked_store_nxv2f32( *%a,  %val,  %mask) {
+; CHECK-LABEL: masked_store_nxv2f32:
+; CHECK: st1w { [[IN]].d }, [[PG]], [x0]
+  call void @llvm.masked.store.nxv2f32( %val,  *%a, i32 4,  %mask)
+  ret void
+}
+
+define void @masked_store_nxv2f16( *%a,  %val,  %mask) {
+; CHECK-LABEL: masked_store_nxv2f16:
+; CHECK: st1h { [[IN]].d }, [[PG]], [x0]
+  call void @llvm.masked.store.nxv2f16( %val,  *%a, i32 4,  %mask)
+  ret void
+}
+
+define void @masked_store_nxv4f32( *%a,  %val,  %mask) {
+; CHECK-LABEL: masked_store_nxv4f32:
+; CHECK: st1w { [[IN]].s }, [[PG]], [x0]
+  call void @llvm.masked.store.nxv4f32( %val,  *%a, i32 4,  %mask)
+  ret void
+}
+
+define void @masked_store_nxv4f16( *%a,  %val,  %mask) {
+; CHECK-LABEL: 

[PATCH] D69378: [AArch64][SVE] Implement masked store intrinsics

2019-10-28 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 226640.
kmclaughlin added a reviewer: sdesmalen.
kmclaughlin added a comment.

- Split functions in sve-masked-ldst-nonext.ll into separate load & store tests


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69378/new/

https://reviews.llvm.org/D69378

Files:
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
  llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
  llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll

Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll
@@ -0,0 +1,76 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; Masked Stores
+;
+
+define void @masked_trunc_store_nxv2i8( *%a,  *%b,  %mask) {
+; CHECK-LABEL: masked_trunc_store_nxv2i8:
+; CHECK: ld1d { [[IN:z[0-9]]].d }, [[PG:p[0-9]]]/z, [x0]
+; CHECK: st1b { [[IN]].d }, [[PG]], [x1]
+  %load = call  @llvm.masked.load.nxv2i64( *%a, i32 8,  %mask,  undef)
+  %trunc = trunc  %load to 
+  call void @llvm.masked.store.nxv2i8( %trunc,  *%b, i32 8,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv2i16( *%a,  *%b,  %mask) {
+; CHECK-LABEL: masked_trunc_store_nxv2i16:
+; CHECK: ld1d { [[IN:z[0-9]]].d }, [[PG:p[0-9]]]/z, [x0]
+; CHECK: st1h { [[IN]].d }, [[PG]], [x1]
+  %load = call  @llvm.masked.load.nxv2i64( *%a, i32 8,  %mask,  undef)
+  %trunc = trunc  %load to 
+  call void @llvm.masked.store.nxv2i16( %trunc,  *%b, i32 8,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv2i32( *%a,  *%b,  %mask) {
+; CHECK-LABEL: masked_trunc_store_nxv2i32:
+; CHECK: ld1d { [[IN:z[0-9]]].d }, [[PG:p[0-9]]]/z, [x0]
+; CHECK: st1w { [[IN]].d }, [[PG]], [x1]
+  %load = call  @llvm.masked.load.nxv2i64( *%a, i32 8,  %mask,  undef)
+  %trunc = trunc  %load to 
+  call void @llvm.masked.store.nxv2i32( %trunc,  *%b, i32 8,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv4i8( *%a,  *%b,  %mask) {
+; CHECK-LABEL: masked_trunc_store_nxv4i8:
+; CHECK: ld1w { [[IN:z[0-9]]].s }, [[PG:p[0-9]]]/z, [x0]
+; CHECK: st1b { [[IN]].s }, [[PG]], [x1]
+  %load = call  @llvm.masked.load.nxv4i32( *%a, i32 4,  %mask,  undef)
+  %trunc = trunc  %load to 
+  call void @llvm.masked.store.nxv4i8( %trunc,  *%b, i32 4,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv4i16( *%a,  *%b,  %mask) {
+; CHECK-LABEL: masked_trunc_store_nxv4i16:
+; CHECK: ld1w { [[IN:z[0-9]]].s }, [[PG:p[0-9]]]/z, [x0]
+; CHECK: st1h { [[IN]].s }, [[PG]], [x1]
+  %load = call  @llvm.masked.load.nxv4i32( *%a, i32 4,  %mask,  undef)
+  %trunc = trunc  %load to 
+  call void @llvm.masked.store.nxv4i16( %trunc,  *%b, i32 4,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv8i8( *%a,  *%b,  %mask) {
+; CHECK-LABEL: masked_trunc_store_nxv8i8:
+; CHECK: ld1h { [[IN:z[0-9]]].h }, [[PG:p[0-9]]]/z, [x0]
+; CHECK: st1b { [[IN]].h }, [[PG]], [x1]
+  %load = call  @llvm.masked.load.nxv8i16( *%a, i32 2,  %mask,  undef)
+  %trunc = trunc  %load to 
+  call void @llvm.masked.store.nxv8i8( %trunc,  *%b, i32 2,  %mask)
+  ret void
+}
+
+declare  @llvm.masked.load.nxv2i64(*, i32, , )
+declare  @llvm.masked.load.nxv4i32(*, i32, , )
+declare  @llvm.masked.load.nxv8i16(*, i32, , )
+
+declare void @llvm.masked.store.nxv2i8(, *, i32, )
+declare void @llvm.masked.store.nxv2i16(, *, i32, )
+declare void @llvm.masked.store.nxv2i32(, *, i32, )
+declare void @llvm.masked.store.nxv4i8(, *, i32, )
+declare void @llvm.masked.store.nxv4i16(, *, i32, )
+declare void @llvm.masked.store.nxv8i8(, *, i32, )
Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
===
--- llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
+++ llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
@@ -74,6 +74,80 @@
   ret  %load
 }
 
+;
+; Masked Stores
+;
+
+define void @masked_store_nxv2i64( *%a,  %val,  %mask) {
+; CHECK-LABEL: masked_store_nxv2i64:
+; CHECK: st1d { [[IN]].d }, [[PG]], [x0]
+  call void @llvm.masked.store.nxv2i64( %val,  *%a, i32 8,  %mask)
+  ret void
+}
+
+define void @masked_store_nxv4i32( *%a,  %val,  %mask) {
+; CHECK-LABEL: masked_store_nxv4i32:
+; CHECK: st1w { [[IN]].s }, [[PG]], [x0]
+  call void @llvm.masked.store.nxv4i32( %val,  *%a, i32 4,  %mask)
+  ret void
+}
+
+define void @masked_store_nxv8i16( *%a,  %val,  %mask) {
+; CHECK-LABEL: masked_store_nxv8i16:
+; CHECK: st1h { [[IN]].h }, [[PG]], [x0]
+  call void @llvm.masked.store.nxv8i16( %val,  *%a, i32 2,  %mask)
+  ret void
+}
+
+define void @masked_store_nxv16i8( *%a,  %val,  %mask) {
+; CHECK-LABEL: masked_store_nxv16i8:
+; CHECK: st1b { [[IN]].b }, [[PG]], [x0]
+  call void @llvm.masked.store.nxv16i8( %val,  *%a, i32 1,  %mask)
+  ret void
+}
+
+define void @masked_store_nxv2f64( *%a,  %val,  %mask) {
+; CHECK-LABEL: masked_store_nxv2f64:
+; 

[PATCH] D68877: [AArch64][SVE] Implement masked load intrinsics

2019-10-28 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGda720a38b9f2: [AArch64][SVE] Implement masked load 
intrinsics (authored by kmclaughlin).

Changed prior to commit:
  https://reviews.llvm.org/D68877?vs=226123=226628#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68877/new/

https://reviews.llvm.org/D68877

Files:
  llvm/include/llvm/CodeGen/SelectionDAG.h
  llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
  llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
  llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
  llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
  llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll

Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll
@@ -0,0 +1,72 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; Masked Loads
+;
+
+define  @masked_zload_nxv2i8(* %src,  %mask) {
+; CHECK-LABEL: masked_zload_nxv2i8:
+; CHECK-NOT: ld1sb
+; CHECK: ld1b { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv2i8(* %src, i32 1,  %mask,  undef)
+  %ext = zext  %load to 
+  ret  %ext
+}
+
+define  @masked_zload_nxv2i16(* %src,  %mask) {
+; CHECK-LABEL: masked_zload_nxv2i16:
+; CHECK-NOT: ld1sh
+; CHECK: ld1h { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv2i16(* %src, i32 1,  %mask,  undef)
+  %ext = zext  %load to 
+  ret  %ext
+}
+
+define  @masked_zload_nxv2i32(* %src,  %mask) {
+; CHECK-LABEL: masked_zload_nxv2i32:
+; CHECK-NOT: ld1sw
+; CHECK: ld1w { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv2i32(* %src, i32 1,  %mask,  undef)
+  %ext = zext  %load to 
+  ret  %ext
+}
+
+define  @masked_zload_nxv4i8(* %src,  %mask) {
+; CHECK-LABEL: masked_zload_nxv4i8:
+; CHECK-NOT: ld1sb
+; CHECK: ld1b { [[IN:z[0-9]+]].s }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv4i8(* %src, i32 1,  %mask,  undef)
+  %ext = zext  %load to 
+  ret  %ext
+}
+
+define  @masked_zload_nxv4i16(* %src,  %mask) {
+; CHECK-LABEL: masked_zload_nxv4i16:
+; CHECK-NOT: ld1sh
+; CHECK: ld1h { [[IN:z[0-9]+]].s }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv4i16(* %src, i32 1,  %mask,  undef)
+  %ext = zext  %load to 
+  ret  %ext
+}
+
+define  @masked_zload_nxv8i8(* %src,  %mask) {
+; CHECK-LABEL: masked_zload_nxv8i8:
+; CHECK-NOT: ld1sb
+; CHECK: ld1b { [[IN:z[0-9]+]].h }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv8i8(* %src, i32 1,  %mask,  undef)
+  %ext = zext  %load to 
+  ret  %ext
+}
+
+declare  @llvm.masked.load.nxv2i8(*, i32, , )
+declare  @llvm.masked.load.nxv2i16(*, i32, , )
+declare  @llvm.masked.load.nxv2i32(*, i32, , )
+declare  @llvm.masked.load.nxv4i8(*, i32, , )
+declare  @llvm.masked.load.nxv4i16(*, i32, , )
+declare  @llvm.masked.load.nxv8i8(*, i32, , )
Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
@@ -0,0 +1,66 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; Masked Loads
+;
+
+define  @masked_sload_nxv2i8( *%a,  %mask) {
+; CHECK-LABEL: masked_sload_nxv2i8:
+; CHECK: ld1sb { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv2i8( *%a, i32 1,  %mask,  undef)
+  %ext = sext  %load to 
+  ret  %ext
+}
+
+define  @masked_sload_nxv2i16( *%a,  %mask) {
+; CHECK-LABEL: masked_sload_nxv2i16:
+; CHECK: ld1sh { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv2i16( *%a, i32 1,  %mask,  undef)
+  %ext = sext  %load to 
+  ret  %ext
+}
+
+define  @masked_sload_nxv2i32( *%a,  %mask) {
+; CHECK-LABEL: masked_sload_nxv2i32:
+; CHECK: ld1sw { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv2i32( *%a, i32 1,  %mask,  undef)
+  %ext = sext  %load to 
+  ret  %ext
+}
+
+define  @masked_sload_nxv4i8( *%a,  %mask) {
+; CHECK-LABEL: masked_sload_nxv4i8:
+; CHECK: ld1sb { [[IN:z[0-9]+]].s }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv4i8( *%a, i32 1,  %mask,  undef)
+  %ext = sext  %load to 
+  ret  %ext
+}
+
+define  @masked_sload_nxv4i16( *%a,  %mask) {
+; 

[PATCH] D69378: [AArch64][SVE] Implement masked store intrinsics

2019-10-24 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: huntergr, greened, dmgreen, rovka.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a project: LLVM.

Adds support for codegen of masked stores, with non-truncating
and truncating variants.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D69378

Files:
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
  llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
  llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll

Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll
@@ -0,0 +1,76 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; Masked Stores
+;
+
+define void @masked_trunc_store_nxv2i8( *%a,  *%b,  %mask) {
+; CHECK-LABEL: masked_trunc_store_nxv2i8:
+; CHECK: ld1d { [[IN:z[0-9]]].d }, [[PG:p[0-9]]]/z, [x0]
+; CHECK: st1b { [[IN]].d }, [[PG]], [x1]
+  %load = call  @llvm.masked.load.nxv2i64( *%a, i32 8,  %mask,  undef)
+  %trunc = trunc  %load to 
+  call void @llvm.masked.store.nxv2i8( %trunc,  *%b, i32 8,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv2i16( *%a,  *%b,  %mask) {
+; CHECK-LABEL: masked_trunc_store_nxv2i16:
+; CHECK: ld1d { [[IN:z[0-9]]].d }, [[PG:p[0-9]]]/z, [x0]
+; CHECK: st1h { [[IN]].d }, [[PG]], [x1]
+  %load = call  @llvm.masked.load.nxv2i64( *%a, i32 8,  %mask,  undef)
+  %trunc = trunc  %load to 
+  call void @llvm.masked.store.nxv2i16( %trunc,  *%b, i32 8,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv2i32( *%a,  *%b,  %mask) {
+; CHECK-LABEL: masked_trunc_store_nxv2i32:
+; CHECK: ld1d { [[IN:z[0-9]]].d }, [[PG:p[0-9]]]/z, [x0]
+; CHECK: st1w { [[IN]].d }, [[PG]], [x1]
+  %load = call  @llvm.masked.load.nxv2i64( *%a, i32 8,  %mask,  undef)
+  %trunc = trunc  %load to 
+  call void @llvm.masked.store.nxv2i32( %trunc,  *%b, i32 8,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv4i8( *%a,  *%b,  %mask) {
+; CHECK-LABEL: masked_trunc_store_nxv4i8:
+; CHECK: ld1w { [[IN:z[0-9]]].s }, [[PG:p[0-9]]]/z, [x0]
+; CHECK: st1b { [[IN]].s }, [[PG]], [x1]
+  %load = call  @llvm.masked.load.nxv4i32( *%a, i32 4,  %mask,  undef)
+  %trunc = trunc  %load to 
+  call void @llvm.masked.store.nxv4i8( %trunc,  *%b, i32 4,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv4i16( *%a,  *%b,  %mask) {
+; CHECK-LABEL: masked_trunc_store_nxv4i16:
+; CHECK: ld1w { [[IN:z[0-9]]].s }, [[PG:p[0-9]]]/z, [x0]
+; CHECK: st1h { [[IN]].s }, [[PG]], [x1]
+  %load = call  @llvm.masked.load.nxv4i32( *%a, i32 4,  %mask,  undef)
+  %trunc = trunc  %load to 
+  call void @llvm.masked.store.nxv4i16( %trunc,  *%b, i32 4,  %mask)
+  ret void
+}
+
+define void @masked_trunc_store_nxv8i8( *%a,  *%b,  %mask) {
+; CHECK-LABEL: masked_trunc_store_nxv8i8:
+; CHECK: ld1h { [[IN:z[0-9]]].h }, [[PG:p[0-9]]]/z, [x0]
+; CHECK: st1b { [[IN]].h }, [[PG]], [x1]
+  %load = call  @llvm.masked.load.nxv8i16( *%a, i32 2,  %mask,  undef)
+  %trunc = trunc  %load to 
+  call void @llvm.masked.store.nxv8i8( %trunc,  *%b, i32 2,  %mask)
+  ret void
+}
+
+declare  @llvm.masked.load.nxv2i64(*, i32, , )
+declare  @llvm.masked.load.nxv4i32(*, i32, , )
+declare  @llvm.masked.load.nxv8i16(*, i32, , )
+
+declare void @llvm.masked.store.nxv2i8(, *, i32, )
+declare void @llvm.masked.store.nxv2i16(, *, i32, )
+declare void @llvm.masked.store.nxv2i32(, *, i32, )
+declare void @llvm.masked.store.nxv4i8(, *, i32, )
+declare void @llvm.masked.store.nxv4i16(, *, i32, )
+declare void @llvm.masked.store.nxv8i8(, *, i32, )
Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
===
--- llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
+++ llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
@@ -1,77 +1,97 @@
 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
-; Masked Loads
+; Masked Loads & Stores
 ;
 
-define  @masked_load_nxv2i64( *%a,  %mask) {
+define void @masked_load_nxv2i64( *%a,  %mask) {
 ; CHECK-LABEL: masked_load_nxv2i64:
 ; CHECK: ld1d { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK: st1d { [[IN]].d }, [[PG]], [x0]
   %load = call  @llvm.masked.load.nxv2i64( *%a, i32 8,  %mask,  undef)
-  ret  %load
+  call void @llvm.masked.store.nxv2i64( %load,  *%a, i32 8,  %mask)
+  ret void
 }
 
-define  @masked_load_nxv4i32( *%a,  %mask) {
+define void @masked_load_nxv4i32( *%a,  %mask) {
 ; CHECK-LABEL: masked_load_nxv4i32:
 ; CHECK: ld1w { [[IN:z[0-9]+]].s }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK: st1w { [[IN]].s }, [[PG]], [x0]
   %load = call  @llvm.masked.load.nxv4i32( *%a, i32 4,  %mask,  undef)
-  ret  %load
+  call void @llvm.masked.store.nxv4i32( %load,  *%a, i32 4,  %mask)
+  ret void
 }
 
-define  @masked_load_nxv8i16( *%a, 

[PATCH] D68877: [AArch64][SVE] Implement masked load intrinsics

2019-10-23 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin marked an inline comment as done.
kmclaughlin added a comment.

In D68877#1717820 , @dmgreen wrote:

> I'm not sure if there is support yet for vector selects in the SVE codegen?


There is not yet support for vector selects, so for this patch the intention 
was that any passthru which is not all zero or undef would result in a 
selection failure.
Do you think it would acceptable to handle different passthrus in a future 
patch which also implements vector selects for SVE?




Comment at: llvm/lib/Target/AArch64/SVEInstrFormats.td:4753
+  let hasSideEffects = 1, hasNoSchedulingInfo = 1, mayLoad = 1 in {
+  def "" : Pseudo<(outs listty:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, 
simm4s1:$imm4), []>,
+   PseudoInstExpansion<(!cast(NAME # _REAL) listty:$Zt, 
PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4)>;

dmgreen wrote:
> Can you explain why is this pseudo is needed, exactly? I feel that using 
> pseudos is often the wrong solution to a problem (it may be required here, 
> I'm just sure why exactly).
> 
> We currently seem to generate ld1b (for example), over ldnf1b. Is there ever 
> a time that we expect to generate a nf load?
The pseudo was a workaround that was added downstream for non-faulting loads, 
but it is not needed here.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68877/new/

https://reviews.llvm.org/D68877



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D68877: [AArch64][SVE] Implement masked load intrinsics

2019-10-23 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 226123.
kmclaughlin added a comment.

- Removed unnecessary pseudo from SVEInstrFormats.td


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68877/new/

https://reviews.llvm.org/D68877

Files:
  llvm/include/llvm/CodeGen/SelectionDAG.h
  llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
  llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
  llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
  llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
  llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll

Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll
@@ -0,0 +1,72 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; Masked Loads
+;
+
+define  @masked_zload_nxv2i8(* %src,  %mask) {
+; CHECK-LABEL: masked_zload_nxv2i8:
+; CHECK-NOT: ld1sb
+; CHECK: ld1b { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv2i8(* %src, i32 1,  %mask,  undef)
+  %ext = zext  %load to 
+  ret  %ext
+}
+
+define  @masked_zload_nxv2i16(* %src,  %mask) {
+; CHECK-LABEL: masked_zload_nxv2i16:
+; CHECK-NOT: ld1sh
+; CHECK: ld1h { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv2i16(* %src, i32 1,  %mask,  undef)
+  %ext = zext  %load to 
+  ret  %ext
+}
+
+define  @masked_zload_nxv2i32(* %src,  %mask) {
+; CHECK-LABEL: masked_zload_nxv2i32:
+; CHECK-NOT: ld1sw
+; CHECK: ld1w { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv2i32(* %src, i32 1,  %mask,  undef)
+  %ext = zext  %load to 
+  ret  %ext
+}
+
+define  @masked_zload_nxv4i8(* %src,  %mask) {
+; CHECK-LABEL: masked_zload_nxv4i8:
+; CHECK-NOT: ld1sb
+; CHECK: ld1b { [[IN:z[0-9]+]].s }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv4i8(* %src, i32 1,  %mask,  undef)
+  %ext = zext  %load to 
+  ret  %ext
+}
+
+define  @masked_zload_nxv4i16(* %src,  %mask) {
+; CHECK-LABEL: masked_zload_nxv4i16:
+; CHECK-NOT: ld1sh
+; CHECK: ld1h { [[IN:z[0-9]+]].s }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv4i16(* %src, i32 1,  %mask,  undef)
+  %ext = zext  %load to 
+  ret  %ext
+}
+
+define  @masked_zload_nxv8i8(* %src,  %mask) {
+; CHECK-LABEL: masked_zload_nxv8i8:
+; CHECK-NOT: ld1sb
+; CHECK: ld1b { [[IN:z[0-9]+]].h }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv8i8(* %src, i32 1,  %mask,  undef)
+  %ext = zext  %load to 
+  ret  %ext
+}
+
+declare  @llvm.masked.load.nxv2i8(*, i32, , )
+declare  @llvm.masked.load.nxv2i16(*, i32, , )
+declare  @llvm.masked.load.nxv2i32(*, i32, , )
+declare  @llvm.masked.load.nxv4i8(*, i32, , )
+declare  @llvm.masked.load.nxv4i16(*, i32, , )
+declare  @llvm.masked.load.nxv8i8(*, i32, , )
Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
@@ -0,0 +1,66 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; Masked Loads
+;
+
+define  @masked_sload_nxv2i8( *%a,  %mask) {
+; CHECK-LABEL: masked_sload_nxv2i8:
+; CHECK: ld1sb { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv2i8( *%a, i32 1,  %mask,  undef)
+  %ext = sext  %load to 
+  ret  %ext
+}
+
+define  @masked_sload_nxv2i16( *%a,  %mask) {
+; CHECK-LABEL: masked_sload_nxv2i16:
+; CHECK: ld1sh { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv2i16( *%a, i32 1,  %mask,  undef)
+  %ext = sext  %load to 
+  ret  %ext
+}
+
+define  @masked_sload_nxv2i32( *%a,  %mask) {
+; CHECK-LABEL: masked_sload_nxv2i32:
+; CHECK: ld1sw { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv2i32( *%a, i32 1,  %mask,  undef)
+  %ext = sext  %load to 
+  ret  %ext
+}
+
+define  @masked_sload_nxv4i8( *%a,  %mask) {
+; CHECK-LABEL: masked_sload_nxv4i8:
+; CHECK: ld1sb { [[IN:z[0-9]+]].s }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv4i8( *%a, i32 1,  %mask,  undef)
+  %ext = sext  %load to 
+  ret  %ext
+}
+
+define  @masked_sload_nxv4i16( *%a,  %mask) {
+; CHECK-LABEL: masked_sload_nxv4i16:
+; CHECK: ld1sh { [[IN:z[0-9]+]].s }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv4i16( *%a, i32 1,  

[PATCH] D68877: [AArch64][SVE] Implement masked load intrinsics

2019-10-21 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin marked 4 inline comments as done.
kmclaughlin added a comment.

Thanks for reviewing this, @dmgreen! I have updated the patch to make use of 
the changes to DAGCombine introduced by D68337 
.




Comment at: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:10393
+  ((!LegalOperations && !cast(N0)->isVolatile()) ||
+   TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
+MaskedLoadSDNode *LN0 = cast(N0);

dmgreen wrote:
> I'm not convinced that just because a sext load is legal and a masked load is 
> legal, that a sext masked load is always legal.
Removed, as this patch can also use tryToFoldExtOfMaskedLoad for sext & zext 
masked loads.



Comment at: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h:151
+  bool isLegalMaskedLoad(Type *DataType) {
+return ST->hasSVE();
+  }

dmgreen wrote:
> This can handle all masked loads? Of any type, extended into any other type, 
> with any alignment?
I have added checks on types we can handle here



Comment at: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h:153
+  }
+  bool isLegalMaskedStore(Type *DataType) {
+return ST->hasSVE();

dmgreen wrote:
> This patch doesn't handle stores yet.
Removed!


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68877/new/

https://reviews.llvm.org/D68877



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D68877: [AArch64][SVE] Implement masked load intrinsics

2019-10-21 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 225900.
kmclaughlin edited the summary of this revision.
kmclaughlin added a comment.

- Rebased patch, removed extra sext & zext combine from DAGCombine which are no 
longer necessary
- Added isVectorLoadExtDesirable to AArch64ISelLowering
- Added more checks to isLegalMaskedLoad
- Changed //SVEUndef// to //SVEDup0Undef//, handling undef or all zeros
- Changed SelectionDAG::getConstant to return SPLAT_VECTOR instead of 
BUILD_VECTOR for scalable types


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68877/new/

https://reviews.llvm.org/D68877

Files:
  llvm/include/llvm/CodeGen/SelectionDAG.h
  llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
  llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
  llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
  llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
  llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll

Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll
@@ -0,0 +1,72 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; Masked Loads
+;
+
+define  @masked_zload_nxv2i8(* %src,  %mask) {
+; CHECK-LABEL: masked_zload_nxv2i8:
+; CHECK-NOT: ld1sb
+; CHECK: ld1b { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv2i8(* %src, i32 1,  %mask,  undef)
+  %ext = zext  %load to 
+  ret  %ext
+}
+
+define  @masked_zload_nxv2i16(* %src,  %mask) {
+; CHECK-LABEL: masked_zload_nxv2i16:
+; CHECK-NOT: ld1sh
+; CHECK: ld1h { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv2i16(* %src, i32 1,  %mask,  undef)
+  %ext = zext  %load to 
+  ret  %ext
+}
+
+define  @masked_zload_nxv2i32(* %src,  %mask) {
+; CHECK-LABEL: masked_zload_nxv2i32:
+; CHECK-NOT: ld1sw
+; CHECK: ld1w { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv2i32(* %src, i32 1,  %mask,  undef)
+  %ext = zext  %load to 
+  ret  %ext
+}
+
+define  @masked_zload_nxv4i8(* %src,  %mask) {
+; CHECK-LABEL: masked_zload_nxv4i8:
+; CHECK-NOT: ld1sb
+; CHECK: ld1b { [[IN:z[0-9]+]].s }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv4i8(* %src, i32 1,  %mask,  undef)
+  %ext = zext  %load to 
+  ret  %ext
+}
+
+define  @masked_zload_nxv4i16(* %src,  %mask) {
+; CHECK-LABEL: masked_zload_nxv4i16:
+; CHECK-NOT: ld1sh
+; CHECK: ld1h { [[IN:z[0-9]+]].s }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv4i16(* %src, i32 1,  %mask,  undef)
+  %ext = zext  %load to 
+  ret  %ext
+}
+
+define  @masked_zload_nxv8i8(* %src,  %mask) {
+; CHECK-LABEL: masked_zload_nxv8i8:
+; CHECK-NOT: ld1sb
+; CHECK: ld1b { [[IN:z[0-9]+]].h }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv8i8(* %src, i32 1,  %mask,  undef)
+  %ext = zext  %load to 
+  ret  %ext
+}
+
+declare  @llvm.masked.load.nxv2i8(*, i32, , )
+declare  @llvm.masked.load.nxv2i16(*, i32, , )
+declare  @llvm.masked.load.nxv2i32(*, i32, , )
+declare  @llvm.masked.load.nxv4i8(*, i32, , )
+declare  @llvm.masked.load.nxv4i16(*, i32, , )
+declare  @llvm.masked.load.nxv8i8(*, i32, , )
Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
@@ -0,0 +1,66 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; Masked Loads
+;
+
+define  @masked_sload_nxv2i8( *%a,  %mask) {
+; CHECK-LABEL: masked_sload_nxv2i8:
+; CHECK: ld1sb { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv2i8( *%a, i32 1,  %mask,  undef)
+  %ext = sext  %load to 
+  ret  %ext
+}
+
+define  @masked_sload_nxv2i16( *%a,  %mask) {
+; CHECK-LABEL: masked_sload_nxv2i16:
+; CHECK: ld1sh { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv2i16( *%a, i32 1,  %mask,  undef)
+  %ext = sext  %load to 
+  ret  %ext
+}
+
+define  @masked_sload_nxv2i32( *%a,  %mask) {
+; CHECK-LABEL: masked_sload_nxv2i32:
+; CHECK: ld1sw { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv2i32( *%a, i32 1,  %mask,  undef)
+  %ext = sext  %load to 
+  ret  %ext
+}
+
+define  @masked_sload_nxv4i8( *%a,  %mask) {
+; CHECK-LABEL: masked_sload_nxv4i8:
+; CHECK: ld1sb { [[IN:z[0-9]+]].s }, 

[PATCH] D67550: [AArch64][SVE] Implement unpack intrinsics

2019-10-18 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
kmclaughlin marked an inline comment as done.
Closed by commit rG0c7cc383e5b8: [AArch64][SVE] Implement unpack intrinsics 
(authored by kmclaughlin).
Herald added a subscriber: hiraditya.
Herald added a project: LLVM.

Changed prior to commit:
  https://reviews.llvm.org/D67550?vs=224558=225581#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67550/new/

https://reviews.llvm.org/D67550

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
@@ -0,0 +1,129 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; SUNPKHI
+;
+
+define  @sunpkhi_i16( %a) {
+; CHECK-LABEL: sunpkhi_i16
+; CHECK: sunpkhi z0.h, z0.b
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.sunpkhi.nxv8i16( %a)
+  ret  %res
+}
+
+define  @sunpkhi_i32( %a) {
+; CHECK-LABEL: sunpkhi_i32
+; CHECK: sunpkhi z0.s, z0.h
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.sunpkhi.nxv4i32( %a)
+  ret  %res
+}
+
+define  @sunpkhi_i64( %a) {
+; CHECK-LABEL:  sunpkhi_i64
+; CHECK: sunpkhi z0.d, z0.s
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.sunpkhi.nxv2i64( %a)
+  ret  %res
+}
+
+;
+; SUNPKLO
+;
+
+define  @sunpklo_i16( %a) {
+; CHECK-LABEL: sunpklo_i16
+; CHECK: sunpklo z0.h, z0.b
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.sunpklo.nxv8i16( %a)
+  ret  %res
+}
+
+define  @sunpklo_i32( %a) {
+; CHECK-LABEL: sunpklo_i32
+; CHECK: sunpklo z0.s, z0.h
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.sunpklo.nxv4i32( %a)
+  ret  %res
+}
+
+define  @sunpklo_i64( %a) {
+; CHECK-LABEL:  sunpklo_i64
+; CHECK: sunpklo z0.d, z0.s
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.sunpklo.nxv2i64( %a)
+  ret  %res
+}
+
+;
+; UUNPKHI
+;
+
+define  @uunpkhi_i16( %a) {
+; CHECK-LABEL: uunpkhi_i16
+; CHECK: uunpkhi z0.h, z0.b
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.uunpkhi.nxv8i16( %a)
+  ret  %res
+}
+
+define  @uunpkhi_i32( %a) {
+; CHECK-LABEL: uunpkhi_i32
+; CHECK: uunpkhi z0.s, z0.h
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.uunpkhi.nxv4i32( %a)
+  ret  %res
+}
+
+define  @uunpkhi_i64( %a) {
+; CHECK-LABEL:  uunpkhi_i64
+; CHECK: uunpkhi z0.d, z0.s
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.uunpkhi.nxv2i64( %a)
+  ret  %res
+}
+
+;
+; UUNPKLO
+;
+
+define  @uunpklo_i16( %a) {
+; CHECK-LABEL: uunpklo_i16
+; CHECK: uunpklo z0.h, z0.b
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.uunpklo.nxv8i16( %a)
+  ret  %res
+}
+
+define  @uunpklo_i32( %a) {
+; CHECK-LABEL: uunpklo_i32
+; CHECK: uunpklo z0.s, z0.h
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.uunpklo.nxv4i32( %a)
+  ret  %res
+}
+
+define  @uunpklo_i64( %a) {
+; CHECK-LABEL:  uunpklo_i64
+; CHECK: uunpklo z0.d, z0.s
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.uunpklo.nxv2i64( %a)
+  ret  %res
+}
+
+declare  @llvm.aarch64.sve.sunpkhi.nxv8i16()
+declare  @llvm.aarch64.sve.sunpkhi.nxv4i32()
+declare  @llvm.aarch64.sve.sunpkhi.nxv2i64()
+
+declare  @llvm.aarch64.sve.sunpklo.nxv8i16()
+declare  @llvm.aarch64.sve.sunpklo.nxv4i32()
+declare  @llvm.aarch64.sve.sunpklo.nxv2i64()
+
+declare  @llvm.aarch64.sve.uunpkhi.nxv8i16()
+declare  @llvm.aarch64.sve.uunpkhi.nxv4i32()
+declare  @llvm.aarch64.sve.uunpkhi.nxv2i64()
+
+declare  @llvm.aarch64.sve.uunpklo.nxv8i16()
+declare  @llvm.aarch64.sve.uunpklo.nxv4i32()
+declare  @llvm.aarch64.sve.uunpklo.nxv2i64()
Index: llvm/lib/Target/AArch64/SVEInstrFormats.td
===
--- llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -848,10 +848,14 @@
   let Inst{4-0}   = Zd;
 }
 
-multiclass sve_int_perm_unpk opc, string asm> {
+multiclass sve_int_perm_unpk opc, string asm, SDPatternOperator op> {
   def _H : sve_int_perm_unpk<0b01, opc, asm, ZPR16, ZPR8>;
   def _S : sve_int_perm_unpk<0b10, opc, asm, ZPR32, ZPR16>;
   def _D : sve_int_perm_unpk<0b11, opc, asm, ZPR64, ZPR32>;
+
+  def : SVE_1_Op_Pat(NAME # _H)>;
+  def : SVE_1_Op_Pat(NAME # _S)>;
+  def : SVE_1_Op_Pat(NAME # _D)>;
 }
 
 class sve_int_perm_insrs sz8_64, string asm, ZPRRegOp zprty,
Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
===
--- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -211,10 +211,10 @@
   defm REV_PP : sve_int_perm_reverse_p<"rev">;
   defm REV_ZZ : 

[PATCH] D68877: [AArch64][SVE] Implement masked load intrinsics

2019-10-11 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: huntergr, rovka, greened.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a project: LLVM.
kmclaughlin added a parent revision: D47775: [AArch64][SVE] Add SPLAT_VECTOR 
ISD Node.

Adds support for codegen of masked loads, with non-extending,
zero-extending and sign-extending variants.

Depends on the changes in D47775  for 
isConstantSplatVectorMaskForType


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D68877

Files:
  llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
  llvm/lib/CodeGen/TargetLoweringBase.cpp
  llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
  llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
  llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll

Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll
@@ -0,0 +1,72 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; Masked Loads
+;
+
+define  @masked_zload_nxv2i8(* %src,  %mask) {
+; CHECK-LABEL: masked_zload_nxv2i8:
+; CHECK-NOT: ld1sb
+; CHECK: ld1b { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv2i8(* %src, i32 1,  %mask,  undef)
+  %ext = zext  %load to 
+  ret  %ext
+}
+
+define  @masked_zload_nxv2i16(* %src,  %mask) {
+; CHECK-LABEL: masked_zload_nxv2i16:
+; CHECK-NOT: ld1sh
+; CHECK: ld1h { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv2i16(* %src, i32 1,  %mask,  undef)
+  %ext = zext  %load to 
+  ret  %ext
+}
+
+define  @masked_zload_nxv2i32(* %src,  %mask) {
+; CHECK-LABEL: masked_zload_nxv2i32:
+; CHECK-NOT: ld1sw
+; CHECK: ld1w { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv2i32(* %src, i32 1,  %mask,  undef)
+  %ext = zext  %load to 
+  ret  %ext
+}
+
+define  @masked_zload_nxv4i8(* %src,  %mask) {
+; CHECK-LABEL: masked_zload_nxv4i8:
+; CHECK-NOT: ld1sb
+; CHECK: ld1b { [[IN:z[0-9]+]].s }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv4i8(* %src, i32 1,  %mask,  undef)
+  %ext = zext  %load to 
+  ret  %ext
+}
+
+define  @masked_zload_nxv4i16(* %src,  %mask) {
+; CHECK-LABEL: masked_zload_nxv4i16:
+; CHECK-NOT: ld1sh
+; CHECK: ld1h { [[IN:z[0-9]+]].s }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv4i16(* %src, i32 1,  %mask,  undef)
+  %ext = zext  %load to 
+  ret  %ext
+}
+
+define  @masked_zload_nxv8i8(* %src,  %mask) {
+; CHECK-LABEL: masked_zload_nxv8i8:
+; CHECK-NOT: ld1sb
+; CHECK: ld1b { [[IN:z[0-9]+]].h }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv8i8(* %src, i32 1,  %mask,  undef)
+  %ext = zext  %load to 
+  ret  %ext
+}
+
+declare  @llvm.masked.load.nxv2i8(*, i32, , )
+declare  @llvm.masked.load.nxv2i16(*, i32, , )
+declare  @llvm.masked.load.nxv2i32(*, i32, , )
+declare  @llvm.masked.load.nxv4i8(*, i32, , )
+declare  @llvm.masked.load.nxv4i16(*, i32, , )
+declare  @llvm.masked.load.nxv8i8(*, i32, , )
Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
@@ -0,0 +1,66 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; Masked Loads
+;
+
+define  @masked_sload_nxv2i8( *%a,  %mask) {
+; CHECK-LABEL: masked_sload_nxv2i8:
+; CHECK: ld1sb { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv2i8( *%a, i32 1,  %mask,  undef)
+  %ext = sext  %load to 
+  ret  %ext
+}
+
+define  @masked_sload_nxv2i16( *%a,  %mask) {
+; CHECK-LABEL: masked_sload_nxv2i16:
+; CHECK: ld1sh { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv2i16( *%a, i32 1,  %mask,  undef)
+  %ext = sext  %load to 
+  ret  %ext
+}
+
+define  @masked_sload_nxv2i32( *%a,  %mask) {
+; CHECK-LABEL: masked_sload_nxv2i32:
+; CHECK: ld1sw { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv2i32( *%a, i32 1,  %mask,  undef)
+  %ext = sext  %load to 
+  ret  %ext
+}
+
+define  @masked_sload_nxv4i8( *%a,  %mask) {
+; CHECK-LABEL: masked_sload_nxv4i8:
+; CHECK: ld1sb { [[IN:z[0-9]+]].s }, [[PG:p[0-9]+]]/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv4i8( *%a, i32 1,  %mask,  undef)
+  %ext = sext  %load to 
+  ret  %ext
+}
+
+define  @masked_sload_nxv4i16( 

[PATCH] D67551: [AArch64][SVE] Implement sdot and udot (lane) intrinsics

2019-10-11 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGee0a0a34646f: [AArch64][SVE] Implement sdot and udot (lane) 
intrinsics (authored by kmclaughlin).
Herald added a subscriber: hiraditya.
Herald added a project: LLVM.

Changed prior to commit:
  https://reviews.llvm.org/D67551?vs=220096=224608#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67551/new/

https://reviews.llvm.org/D67551

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll
===
--- llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll
@@ -88,6 +88,87 @@
   ret  %out
 }
 
+; SDOT
+
+define  @sdot_i32( %a,  %b,  %c) {
+; CHECK-LABEL: sdot_i32:
+; CHECK: sdot z0.s, z1.b, z2.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sdot.nxv4i32( %a,
+ %b,
+ %c)
+  ret  %out
+}
+
+define  @sdot_i64( %a,  %b,  %c) {
+; CHECK-LABEL: sdot_i64:
+; CHECK: sdot z0.d, z1.h, z2.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sdot.nxv2i64( %a,
+ %b,
+ %c)
+  ret  %out
+}
+
+; SDOT (Indexed)
+
+define  @sdot_lane_i32( %a,  %b,  %c) {
+; CHECK-LABEL: sdot_lane_i32:
+; CHECK: sdot z0.s, z1.b, z2.b[2]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sdot.lane.nxv4i32( %a,
+  %b,
+  %c,
+ i32 2)
+  ret  %out
+}
+
+define  @sdot_lane_i64( %a,  %b,  %c) {
+; CHECK-LABEL: sdot_lane_i64:
+; CHECK: sdot z0.d, z1.h, z2.h[1]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.sdot.lane.nxv2i64( %a,
+  %b,
+  %c,
+ i32 1)
+  ret  %out
+}
+
+; UDOT
+
+define  @udot_i32( %a,  %b,  %c) {
+; CHECK-LABEL: udot_i32:
+; CHECK: udot z0.s, z1.b, z2.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.udot.nxv4i32( %a,
+ %b,
+ %c)
+  ret  %out
+}
+
+define  @udot_i64( %a,  %b,  %c) {
+; CHECK-LABEL: udot_i64:
+; CHECK: udot z0.d, z1.h, z2.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.udot.nxv2i64( %a,
+ %b,
+ %c)
+  ret  %out
+}
+
+; UDOT (Indexed)
+
+define  @udot_lane_i32( %a,  %b,  %c) {
+; CHECK-LABEL: udot_lane_i32:
+; CHECK: udot z0.s, z1.b, z2.b[2]
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.udot.lane.nxv4i32( %a,
+  %b,
+  %c,
+ i32 2)
+  ret  %out
+}
+
 declare  @llvm.aarch64.sve.abs.nxv16i8(, , )
 declare  @llvm.aarch64.sve.abs.nxv8i16(, , )
 declare  @llvm.aarch64.sve.abs.nxv4i32(, , )
@@ -97,3 +178,15 @@
 declare  @llvm.aarch64.sve.neg.nxv8i16(, , )
 declare  @llvm.aarch64.sve.neg.nxv4i32(, , )
 declare  @llvm.aarch64.sve.neg.nxv2i64(, , )
+
+declare  @llvm.aarch64.sve.sdot.nxv4i32(, , )
+declare  @llvm.aarch64.sve.sdot.nxv2i64(, , )
+
+declare  @llvm.aarch64.sve.sdot.lane.nxv4i32(, , , i32)
+declare  @llvm.aarch64.sve.sdot.lane.nxv2i64(, , , i32)
+
+declare  @llvm.aarch64.sve.udot.nxv4i32(, , )
+declare  @llvm.aarch64.sve.udot.nxv2i64(, , )
+
+declare  @llvm.aarch64.sve.udot.lane.nxv4i32(, , , i32)
+declare  @llvm.aarch64.sve.udot.lane.nxv2i64(, , , i32)
Index: llvm/lib/Target/AArch64/SVEInstrFormats.td
===
--- llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2024,12 +2024,14 @@
 
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = Destructive;
-  let ElementSize = zprty1.ElementSize;
 }
 
-multiclass sve_intx_dot {
+multiclass sve_intx_dot {
   def _S : sve_intx_dot<0b0, opc, asm, ZPR32, ZPR8>;
   def _D : sve_intx_dot<0b1, opc, asm, ZPR64, ZPR16>;
+
+  def : SVE_3_Op_Pat(NAME # _S)>;
+  def : SVE_3_Op_Pat(NAME # _D)>;
 }
 
 

[PATCH] D67550: [AArch64][SVE] Implement unpack intrinsics

2019-10-11 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin marked 2 inline comments as done.
kmclaughlin added inline comments.



Comment at: lib/Target/AArch64/SVEInstrFormats.td:836
 class sve_int_perm_unpk sz16_64, bits<2> opc, string asm,
-ZPRRegOp zprty1, ZPRRegOp zprty2>
+ZPRRegOp zprty1, ZPRRegOp zprty2, SDPatternOperator op>
 : I<(outs zprty1:$Zd), (ins zprty2:$Zn),

greened wrote:
> Where is `op` used?  I assume that comes later but it would help to 
> understand where this is going.
Thanks for pointing this out, op isn't actually used here!


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67550/new/

https://reviews.llvm.org/D67550



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D67550: [AArch64][SVE] Implement unpack intrinsics

2019-10-11 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 224558.
kmclaughlin added a comment.

Removed unused //SDPatternOperator op// from sve_int_perm_unpk class


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67550/new/

https://reviews.llvm.org/D67550

Files:
  include/llvm/IR/IntrinsicsAArch64.td
  lib/Target/AArch64/AArch64ISelLowering.cpp
  lib/Target/AArch64/AArch64ISelLowering.h
  lib/Target/AArch64/AArch64InstrInfo.td
  lib/Target/AArch64/AArch64SVEInstrInfo.td
  lib/Target/AArch64/SVEInstrFormats.td
  test/CodeGen/AArch64/sve-intrinsics-perm-select.ll

Index: test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
===
--- /dev/null
+++ test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
@@ -0,0 +1,129 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; SUNPKHI
+;
+
+define  @sunpkhi_i16( %a) {
+; CHECK-LABEL: sunpkhi_i16
+; CHECK: sunpkhi z0.h, z0.b
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.sunpkhi.nxv8i16( %a)
+  ret  %res
+}
+
+define  @sunpkhi_i32( %a) {
+; CHECK-LABEL: sunpkhi_i32
+; CHECK: sunpkhi z0.s, z0.h
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.sunpkhi.nxv4i32( %a)
+  ret  %res
+}
+
+define  @sunpkhi_i64( %a) {
+; CHECK-LABEL:  sunpkhi_i64
+; CHECK: sunpkhi z0.d, z0.s
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.sunpkhi.nxv2i64( %a)
+  ret  %res
+}
+
+;
+; SUNPKLO
+;
+
+define  @sunpklo_i16( %a) {
+; CHECK-LABEL: sunpklo_i16
+; CHECK: sunpklo z0.h, z0.b
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.sunpklo.nxv8i16( %a)
+  ret  %res
+}
+
+define  @sunpklo_i32( %a) {
+; CHECK-LABEL: sunpklo_i32
+; CHECK: sunpklo z0.s, z0.h
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.sunpklo.nxv4i32( %a)
+  ret  %res
+}
+
+define  @sunpklo_i64( %a) {
+; CHECK-LABEL:  sunpklo_i64
+; CHECK: sunpklo z0.d, z0.s
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.sunpklo.nxv2i64( %a)
+  ret  %res
+}
+
+;
+; UUNPKHI
+;
+
+define  @uunpkhi_i16( %a) {
+; CHECK-LABEL: uunpkhi_i16
+; CHECK: uunpkhi z0.h, z0.b
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.uunpkhi.nxv8i16( %a)
+  ret  %res
+}
+
+define  @uunpkhi_i32( %a) {
+; CHECK-LABEL: uunpkhi_i32
+; CHECK: uunpkhi z0.s, z0.h
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.uunpkhi.nxv4i32( %a)
+  ret  %res
+}
+
+define  @uunpkhi_i64( %a) {
+; CHECK-LABEL:  uunpkhi_i64
+; CHECK: uunpkhi z0.d, z0.s
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.uunpkhi.nxv2i64( %a)
+  ret  %res
+}
+
+;
+; UUNPKLO
+;
+
+define  @uunpklo_i16( %a) {
+; CHECK-LABEL: uunpklo_i16
+; CHECK: uunpklo z0.h, z0.b
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.uunpklo.nxv8i16( %a)
+  ret  %res
+}
+
+define  @uunpklo_i32( %a) {
+; CHECK-LABEL: uunpklo_i32
+; CHECK: uunpklo z0.s, z0.h
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.uunpklo.nxv4i32( %a)
+  ret  %res
+}
+
+define  @uunpklo_i64( %a) {
+; CHECK-LABEL:  uunpklo_i64
+; CHECK: uunpklo z0.d, z0.s
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.uunpklo.nxv2i64( %a)
+  ret  %res
+}
+
+declare  @llvm.aarch64.sve.sunpkhi.nxv8i16()
+declare  @llvm.aarch64.sve.sunpkhi.nxv4i32()
+declare  @llvm.aarch64.sve.sunpkhi.nxv2i64()
+
+declare  @llvm.aarch64.sve.sunpklo.nxv8i16()
+declare  @llvm.aarch64.sve.sunpklo.nxv4i32()
+declare  @llvm.aarch64.sve.sunpklo.nxv2i64()
+
+declare  @llvm.aarch64.sve.uunpkhi.nxv8i16()
+declare  @llvm.aarch64.sve.uunpkhi.nxv4i32()
+declare  @llvm.aarch64.sve.uunpkhi.nxv2i64()
+
+declare  @llvm.aarch64.sve.uunpklo.nxv8i16()
+declare  @llvm.aarch64.sve.uunpklo.nxv4i32()
+declare  @llvm.aarch64.sve.uunpklo.nxv2i64()
Index: lib/Target/AArch64/SVEInstrFormats.td
===
--- lib/Target/AArch64/SVEInstrFormats.td
+++ lib/Target/AArch64/SVEInstrFormats.td
@@ -848,10 +848,14 @@
   let Inst{4-0}   = Zd;
 }
 
-multiclass sve_int_perm_unpk opc, string asm> {
+multiclass sve_int_perm_unpk opc, string asm, SDPatternOperator op> {
   def _H : sve_int_perm_unpk<0b01, opc, asm, ZPR16, ZPR8>;
   def _S : sve_int_perm_unpk<0b10, opc, asm, ZPR32, ZPR16>;
   def _D : sve_int_perm_unpk<0b11, opc, asm, ZPR64, ZPR32>;
+
+  def : SVE_1_Op_Pat(NAME # _H)>;
+  def : SVE_1_Op_Pat(NAME # _S)>;
+  def : SVE_1_Op_Pat(NAME # _D)>;
 }
 
 class sve_int_perm_insrs sz8_64, string asm, ZPRRegOp zprty,
Index: lib/Target/AArch64/AArch64SVEInstrInfo.td
===
--- lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -211,10 +211,10 @@
   defm REV_PP : sve_int_perm_reverse_p<"rev">;
   defm REV_ZZ : sve_int_perm_reverse_z<"rev">;
 
-  defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo">;
-  defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi">;
-  defm UUNPKLO_ZZ : sve_int_perm_unpk<0b10, "uunpklo">;
-  defm UUNPKHI_ZZ : sve_int_perm_unpk<0b11, "uunpkhi">;
+  defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo", AArch64sunpklo>;
+  defm SUNPKHI_ZZ : 

[PATCH] D68023: [AArch64][SVE] Implement int_aarch64_sve_cnt intrinsic

2019-10-02 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL373468: [AArch64][SVE] Implement int_aarch64_sve_cnt 
intrinsic (authored by kmclaughlin, committed by ).

Changed prior to commit:
  https://reviews.llvm.org/D68023?vs=222780=222814#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68023/new/

https://reviews.llvm.org/D68023

Files:
  llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td
  llvm/trunk/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/trunk/lib/Target/AArch64/SVEInstrFormats.td
  llvm/trunk/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll

Index: llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td
===
--- llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td
+++ llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td
@@ -768,6 +768,13 @@
  LLVMMatchType<0>],
 [IntrNoMem]>;
 
+  class AdvSIMD_SVE_CNT_Intrinsic
+: Intrinsic<[LLVMVectorOfBitcastsToInt<0>],
+[LLVMVectorOfBitcastsToInt<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyvector_ty],
+[IntrNoMem]>;
+
   class AdvSIMD_SVE_PUNPKHI_Intrinsic
 : Intrinsic<[LLVMHalfElementsVectorType<0>],
 [llvm_anyvector_ty],
@@ -793,6 +800,12 @@
 def int_aarch64_sve_neg : AdvSIMD_Merged1VectorArg_Intrinsic;
 
 //
+// Counting bits
+//
+
+def int_aarch64_sve_cnt : AdvSIMD_SVE_CNT_Intrinsic;
+
+//
 // Floating-point comparisons
 //
 
Index: llvm/trunk/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll
===
--- llvm/trunk/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll
+++ llvm/trunk/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll
@@ -0,0 +1,83 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; CNT
+;
+
+define  @cnt_i8( %a,  %pg,  %b) {
+; CHECK-LABEL: cnt_i8:
+; CHECK: cnt z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cnt.nxv16i8( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+define  @cnt_i16( %a,  %pg,  %b) {
+; CHECK-LABEL: cnt_i16:
+; CHECK: cnt z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cnt.nxv8i16( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+define  @cnt_i32( %a,  %pg,  %b) {
+; CHECK-LABEL: cnt_i32:
+; CHECK: cnt z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cnt.nxv4i32( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+define  @cnt_i64( %a,  %pg,  %b) {
+; CHECK-LABEL: cnt_i64:
+; CHECK: cnt z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cnt.nxv2i64( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+define  @cnt_f16( %a,  %pg,  %b) {
+; CHECK-LABEL: cnt_f16:
+; CHECK: cnt z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cnt.nxv8f16( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+define  @cnt_f32( %a,  %pg,  %b) {
+; CHECK-LABEL: cnt_f32:
+; CHECK: cnt z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cnt.nxv4f32( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+define  @cnt_f64( %a,  %pg,  %b) {
+; CHECK-LABEL: cnt_f64:
+; CHECK: cnt z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cnt.nxv2f64( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+declare  @llvm.aarch64.sve.cnt.nxv16i8(, , )
+declare  @llvm.aarch64.sve.cnt.nxv8i16(, , )
+declare  @llvm.aarch64.sve.cnt.nxv4i32(, , )
+declare  @llvm.aarch64.sve.cnt.nxv2i64(, , )
+declare  @llvm.aarch64.sve.cnt.nxv8f16(, , )
+declare  @llvm.aarch64.sve.cnt.nxv4f32(, , )
+declare  @llvm.aarch64.sve.cnt.nxv2f64(, , )
Index: llvm/trunk/lib/Target/AArch64/AArch64SVEInstrInfo.td
===
--- llvm/trunk/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ llvm/trunk/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -97,11 +97,11 @@
   defm ABS_ZPmZ  : sve_int_un_pred_arit_0<  0b110, "abs", int_aarch64_sve_abs>;
   defm NEG_ZPmZ  : sve_int_un_pred_arit_0<  0b111, "neg", int_aarch64_sve_neg>;
 
-  defm CLS_ZPmZ  : sve_int_un_pred_arit_1<   0b000, "cls">;
-  defm CLZ_ZPmZ  : 

[PATCH] D68023: [AArch64][SVE] Implement int_aarch64_sve_cnt intrinsic

2019-10-02 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 222780.
kmclaughlin added a comment.

- Fixes alignment of last argument to //sve_int_un_pred_arit_1// in 
AArch64SVEInstrInfo.td


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68023/new/

https://reviews.llvm.org/D68023

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll
@@ -0,0 +1,83 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; CNT
+;
+
+define  @cnt_i8( %a,  %pg,  %b) {
+; CHECK-LABEL: cnt_i8:
+; CHECK: cnt z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cnt.nxv16i8( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+define  @cnt_i16( %a,  %pg,  %b) {
+; CHECK-LABEL: cnt_i16:
+; CHECK: cnt z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cnt.nxv8i16( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+define  @cnt_i32( %a,  %pg,  %b) {
+; CHECK-LABEL: cnt_i32:
+; CHECK: cnt z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cnt.nxv4i32( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+define  @cnt_i64( %a,  %pg,  %b) {
+; CHECK-LABEL: cnt_i64:
+; CHECK: cnt z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cnt.nxv2i64( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+define  @cnt_f16( %a,  %pg,  %b) {
+; CHECK-LABEL: cnt_f16:
+; CHECK: cnt z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cnt.nxv8f16( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+define  @cnt_f32( %a,  %pg,  %b) {
+; CHECK-LABEL: cnt_f32:
+; CHECK: cnt z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cnt.nxv4f32( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+define  @cnt_f64( %a,  %pg,  %b) {
+; CHECK-LABEL: cnt_f64:
+; CHECK: cnt z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cnt.nxv2f64( %a,
+%pg,
+%b)
+  ret  %out
+}
+
+declare  @llvm.aarch64.sve.cnt.nxv16i8(, , )
+declare  @llvm.aarch64.sve.cnt.nxv8i16(, , )
+declare  @llvm.aarch64.sve.cnt.nxv4i32(, , )
+declare  @llvm.aarch64.sve.cnt.nxv2i64(, , )
+declare  @llvm.aarch64.sve.cnt.nxv8f16(, , )
+declare  @llvm.aarch64.sve.cnt.nxv4f32(, , )
+declare  @llvm.aarch64.sve.cnt.nxv2f64(, , )
Index: llvm/lib/Target/AArch64/SVEInstrFormats.td
===
--- llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2871,11 +2871,21 @@
   def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
 }
 
-multiclass sve_int_un_pred_arit_1 opc, string asm> {
+multiclass sve_int_un_pred_arit_1 opc, string asm,
+  SDPatternOperator op> {
   def _B : sve_int_un_pred_arit<0b00, { opc, 0b1 }, asm, ZPR8>;
   def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>;
   def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>;
   def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>;
+
+  def : SVE_3_Op_Pat(NAME # _B)>;
+  def : SVE_3_Op_Pat(NAME # _H)>;
+  def : SVE_3_Op_Pat(NAME # _S)>;
+  def : SVE_3_Op_Pat(NAME # _D)>;
+
+  def : SVE_3_Op_Pat(NAME # _H)>;
+  def : SVE_3_Op_Pat(NAME # _S)>;
+  def : SVE_3_Op_Pat(NAME # _D)>;
 }
 
 multiclass sve_int_un_pred_arit_1_fp opc, string asm> {
Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
===
--- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -97,11 +97,11 @@
   defm ABS_ZPmZ  : sve_int_un_pred_arit_0<  0b110, "abs", int_aarch64_sve_abs>;
   defm NEG_ZPmZ  : sve_int_un_pred_arit_0<  0b111, "neg", int_aarch64_sve_neg>;
 
-  defm CLS_ZPmZ  : sve_int_un_pred_arit_1<   0b000, "cls">;
-  defm CLZ_ZPmZ  : sve_int_un_pred_arit_1<   0b001, "clz">;
-  defm CNT_ZPmZ  : sve_int_un_pred_arit_1<   0b010, "cnt">;
-  defm 

[PATCH] D68021: [IntrinsicEmitter] Add overloaded type VecOfBitcastsToInt for SVE intrinsics

2019-10-02 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL373437: [IntrinsicEmitter] Add overloaded type 
VecOfBitcastsToInt for SVE intrinsics (authored by kmclaughlin, committed by ).

Changed prior to commit:
  https://reviews.llvm.org/D68021?vs=221756=222777#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68021/new/

https://reviews.llvm.org/D68021

Files:
  llvm/trunk/include/llvm/IR/Intrinsics.h
  llvm/trunk/include/llvm/IR/Intrinsics.td
  llvm/trunk/lib/IR/Function.cpp
  llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp

Index: llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp
===
--- llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp
+++ llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp
@@ -223,7 +223,8 @@
   IIT_VEC_ELEMENT = 42,
   IIT_SCALABLE_VEC = 43,
   IIT_SUBDIVIDE2_ARG = 44,
-  IIT_SUBDIVIDE4_ARG = 45
+  IIT_SUBDIVIDE4_ARG = 45,
+  IIT_VEC_OF_BITCASTS_TO_INT = 46
 };
 
 static void EncodeFixedValueType(MVT::SimpleValueType VT,
@@ -299,6 +300,8 @@
   Sig.push_back(IIT_SUBDIVIDE2_ARG);
 else if (R->isSubClassOf("LLVMSubdivide4VectorType"))
   Sig.push_back(IIT_SUBDIVIDE4_ARG);
+else if (R->isSubClassOf("LLVMVectorOfBitcastsToInt"))
+  Sig.push_back(IIT_VEC_OF_BITCASTS_TO_INT);
 else
   Sig.push_back(IIT_ARG);
 return Sig.push_back((Number << 3) | 7 /*IITDescriptor::AK_MatchType*/);
Index: llvm/trunk/include/llvm/IR/Intrinsics.h
===
--- llvm/trunk/include/llvm/IR/Intrinsics.h
+++ llvm/trunk/include/llvm/IR/Intrinsics.h
@@ -101,7 +101,7 @@
   Argument, ExtendArgument, TruncArgument, HalfVecArgument,
   SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfAnyPtrsToElt,
   VecElementArgument, ScalableVecArgument, Subdivide2Argument,
-  Subdivide4Argument
+  Subdivide4Argument, VecOfBitcastsToInt
 } Kind;
 
 union {
@@ -127,7 +127,8 @@
  Kind == TruncArgument || Kind == HalfVecArgument ||
  Kind == SameVecWidthArgument || Kind == PtrToArgument ||
  Kind == PtrToElt || Kind == VecElementArgument ||
- Kind == Subdivide2Argument || Kind == Subdivide4Argument);
+ Kind == Subdivide2Argument || Kind == Subdivide4Argument ||
+ Kind == VecOfBitcastsToInt);
   return Argument_Info >> 3;
 }
 ArgKind getArgumentKind() const {
@@ -135,7 +136,7 @@
  Kind == TruncArgument || Kind == HalfVecArgument ||
  Kind == SameVecWidthArgument || Kind == PtrToArgument ||
  Kind == VecElementArgument || Kind == Subdivide2Argument ||
- Kind == Subdivide4Argument);
+ Kind == Subdivide4Argument || Kind == VecOfBitcastsToInt);
   return (ArgKind)(Argument_Info & 7);
 }
 
Index: llvm/trunk/include/llvm/IR/Intrinsics.td
===
--- llvm/trunk/include/llvm/IR/Intrinsics.td
+++ llvm/trunk/include/llvm/IR/Intrinsics.td
@@ -193,6 +193,10 @@
 class LLVMSubdivide2VectorType : LLVMMatchType;
 class LLVMSubdivide4VectorType : LLVMMatchType;
 
+// Match the element count and bit width of another intrinsic parameter, but
+// change the element type to an integer.
+class LLVMVectorOfBitcastsToInt : LLVMMatchType;
+
 def llvm_void_ty   : LLVMType;
 let isAny = 1 in {
   def llvm_any_ty: LLVMType;
Index: llvm/trunk/lib/IR/Function.cpp
===
--- llvm/trunk/lib/IR/Function.cpp
+++ llvm/trunk/lib/IR/Function.cpp
@@ -706,7 +706,8 @@
   IIT_VEC_ELEMENT = 42,
   IIT_SCALABLE_VEC = 43,
   IIT_SUBDIVIDE2_ARG = 44,
-  IIT_SUBDIVIDE4_ARG = 45
+  IIT_SUBDIVIDE4_ARG = 45,
+  IIT_VEC_OF_BITCASTS_TO_INT = 46
 };
 
 static void DecodeIITType(unsigned , ArrayRef Infos,
@@ -895,6 +896,12 @@
 DecodeIITType(NextElt, Infos, OutputTable);
 return;
   }
+  case IIT_VEC_OF_BITCASTS_TO_INT: {
+unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
+OutputTable.push_back(IITDescriptor::get(IITDescriptor::VecOfBitcastsToInt,
+ ArgInfo));
+return;
+  }
   }
   llvm_unreachable("unhandled");
 }
@@ -1021,6 +1028,12 @@
   return VTy->getElementType();
 llvm_unreachable("Expected an argument of Vector Type");
   }
+  case IITDescriptor::VecOfBitcastsToInt: {
+Type *Ty = Tys[D.getArgumentNumber()];
+VectorType *VTy = dyn_cast(Ty);
+assert(VTy && "Expected an argument of Vector Type");
+return VectorType::getInteger(VTy);
+  }
   case IITDescriptor::VecOfAnyPtrsToElt:
 // Return the overloaded type (which determines the pointers address space)
 return Tys[D.getOverloadArgNumber()];
@@ -1314,6 +1327,15 @@
   return matchIntrinsicType(VTy, Infos, ArgTys, DeferredChecks,
 IsDeferredCheck);
 }
+case 

[PATCH] D67830: [AArch64][SVE] Implement punpk[hi|lo] intrinsics

2019-09-30 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL373232: [AArch64][SVE] Implement punpk[hi|lo] intrinsics 
(authored by kmclaughlin, committed by ).

Changed prior to commit:
  https://reviews.llvm.org/D67830?vs=221010=222458#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67830/new/

https://reviews.llvm.org/D67830

Files:
  llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td
  llvm/trunk/lib/IR/Function.cpp
  llvm/trunk/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/trunk/lib/Target/AArch64/SVEInstrFormats.td
  llvm/trunk/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll

Index: llvm/trunk/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll
===
--- llvm/trunk/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll
+++ llvm/trunk/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll
@@ -0,0 +1,65 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; PUNPKHI
+;
+
+define  @punpkhi_b16( %a) {
+; CHECK-LABEL: punpkhi_b16
+; CHECK: punpkhi p0.h, p0.b
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.punpkhi.nxv8i1( %a)
+  ret  %res
+}
+
+define  @punpkhi_b8( %a) {
+; CHECK-LABEL: punpkhi_b8
+; CHECK: punpkhi p0.h, p0.b
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.punpkhi.nxv4i1( %a)
+  ret  %res
+}
+
+define  @punpkhi_b4( %a) {
+; CHECK-LABEL: punpkhi_b4
+; CHECK: punpkhi p0.h, p0.b
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.punpkhi.nxv2i1( %a)
+  ret  %res
+}
+
+;
+; PUNPKLO
+;
+
+define  @punpklo_b16( %a) {
+; CHECK-LABEL: punpklo_b16
+; CHECK: punpklo p0.h, p0.b
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.punpklo.nxv8i1( %a)
+  ret  %res
+}
+
+define  @punpklo_b8( %a) {
+; CHECK-LABEL: punpklo_b8
+; CHECK: punpklo p0.h, p0.b
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.punpklo.nxv4i1( %a)
+  ret  %res
+}
+
+define  @punpklo_b4( %a) {
+; CHECK-LABEL: punpklo_b4
+; CHECK: punpklo p0.h, p0.b
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.punpklo.nxv2i1( %a)
+  ret  %res
+}
+
+declare  @llvm.aarch64.sve.punpkhi.nxv8i1()
+declare  @llvm.aarch64.sve.punpkhi.nxv4i1()
+declare  @llvm.aarch64.sve.punpkhi.nxv2i1()
+
+declare  @llvm.aarch64.sve.punpklo.nxv8i1()
+declare  @llvm.aarch64.sve.punpklo.nxv4i1()
+declare  @llvm.aarch64.sve.punpklo.nxv2i1()
Index: llvm/trunk/lib/Target/AArch64/SVEInstrFormats.td
===
--- llvm/trunk/lib/Target/AArch64/SVEInstrFormats.td
+++ llvm/trunk/lib/Target/AArch64/SVEInstrFormats.td
@@ -283,6 +283,11 @@
 // SVE pattern match helpers.
 //===--===//
 
+class SVE_1_Op_Pat
+: Pat<(vtd (op vt1:$Op1)),
+  (inst $Op1)>;
+
 class SVE_3_Op_Pat
 : Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)),
@@ -4280,6 +4285,14 @@
   let Inst{3-0}   = Pd;
 }
 
+multiclass sve_int_perm_punpk {
+  def NAME : sve_int_perm_punpk;
+
+  def : SVE_1_Op_Pat(NAME)>;
+  def : SVE_1_Op_Pat(NAME)>;
+  def : SVE_1_Op_Pat(NAME)>;
+}
+
 class sve_int_rdffr_pred
 : I<(outs PPR8:$Pd), (ins PPRAny:$Pg),
   asm, "\t$Pd, $Pg/z",
Index: llvm/trunk/lib/Target/AArch64/AArch64SVEInstrInfo.td
===
--- llvm/trunk/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ llvm/trunk/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -216,8 +216,8 @@
   defm UUNPKLO_ZZ : sve_int_perm_unpk<0b10, "uunpklo">;
   defm UUNPKHI_ZZ : sve_int_perm_unpk<0b11, "uunpkhi">;
 
-  def  PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo">;
-  def  PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi">;
+  defm PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo", int_aarch64_sve_punpklo>;
+  defm PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi", int_aarch64_sve_punpkhi>;
 
   defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">;
   defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">;
Index: llvm/trunk/lib/IR/Function.cpp
===
--- llvm/trunk/lib/IR/Function.cpp
+++ llvm/trunk/lib/IR/Function.cpp
@@ -1211,8 +1211,9 @@
 }
 case IITDescriptor::HalfVecArgument:
   // If this is a forward reference, defer the check for later.
-  return D.getArgumentNumber() >= ArgTys.size() ||
- !isa(ArgTys[D.getArgumentNumber()]) ||
+  if (D.getArgumentNumber() >= ArgTys.size())
+return IsDeferredCheck || DeferCheck(Ty);
+  return !isa(ArgTys[D.getArgumentNumber()]) ||
  VectorType::getHalfElementsVectorType(
  cast(ArgTys[D.getArgumentNumber()])) != Ty;
 case IITDescriptor::SameVecWidthArgument: {
Index: llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td
===
--- llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td
+++ llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td
@@ -768,6 +768,11 

<    1   2   3   >