[PATCH] D73687: [AArch64][SVE] Add SVE2 intrinsics for complex integer dot product

2020-02-11 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGe7755f9e4f48: [AArch64][SVE] Add SVE2 intrinsics for complex 
integer dot product (authored by kmclaughlin).

Changed prior to commit:
  https://reviews.llvm.org/D73687?vs=241726&id=243781#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73687/new/

https://reviews.llvm.org/D73687

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+
+;
+; CDOT
+;
+
+define  @cdot_s( %a,  %b,  %c) {
+; CHECK-LABEL: cdot_s:
+; CHECK: cdot z0.s, z1.b, z2.b, #0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cdot.nxv4i32( %a,
+ %b,
+ %c,
+i32 0)
+  ret  %out
+}
+
+define  @cdot_d( %a,  %b,  %c) {
+; CHECK-LABEL: cdot_d:
+; CHECK: cdot z0.d, z1.h, z2.h, #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cdot.nxv2i64( %a,
+ %b,
+ %c,
+i32 90)
+  ret  %out
+}
+
+;
+; CDOT(indexed)
+;
+
+define  @cdot_s_idx( %a,  %b,  %c) {
+; CHECK-LABEL: cdot_s_idx:
+; CHECK: cdot z0.s, z1.b, z2.b[0], #180
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cdot.lane.nxv4i32( %a,
+  %b,
+  %c,
+ i32 0, i32 180)
+  ret  %out
+}
+
+
+define  @cdot_d_idx( %a,  %b,  %c) {
+; CHECK-LABEL: cdot_d_idx:
+; CHECK: cdot z0.d, z1.h, z2.h[1], #270
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cdot.lane.nxv2i64( %a,
+  %b,
+  %c,
+ i32 1, i32 270)
+  ret  %out
+}
+
+
+declare  @llvm.aarch64.sve.cdot.nxv4i32(, , , i32)
+declare  @llvm.aarch64.sve.cdot.nxv2i64(, , , i32)
+declare  @llvm.aarch64.sve.cdot.lane.nxv4i32(, , , i32, i32)
+declare  @llvm.aarch64.sve.cdot.lane.nxv2i64(, , , i32, i32)
Index: llvm/lib/Target/AArch64/SVEInstrFormats.td
===
--- llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2542,9 +2542,16 @@
   let ElementSize = ElementSizeNone;
 }
 
-multiclass sve2_cintx_dot {
+multiclass sve2_cintx_dot {
   def _S : sve2_complex_int_arith<0b10, 0b0001, asm, ZPR32, ZPR8>;
   def _D : sve2_complex_int_arith<0b11, 0b0001, asm, ZPR64, ZPR16>;
+
+  def : Pat<(nxv4i32 (op (nxv4i32 ZPR32:$Op1), (nxv16i8 ZPR8:$Op2), (nxv16i8 ZPR8:$Op3),
+ (i32 complexrotateop:$imm))),
+(!cast(NAME # "_S") ZPR32:$Op1, ZPR8:$Op2, ZPR8:$Op3, complexrotateop:$imm)>;
+  def : Pat<(nxv2i64 (op (nxv2i64 ZPR64:$Op1), (nxv8i16 ZPR16:$Op2), (nxv8i16 ZPR16:$Op3),
+ (i32 complexrotateop:$imm))),
+(!cast(NAME # "_D") ZPR64:$Op1, ZPR16:$Op2, ZPR16:$Op3, complexrotateop:$imm)>;
 }
 
 //===--===//
@@ -2589,19 +2596,26 @@
   let ElementSize = ElementSizeNone;
 }
 
-multiclass sve2_cintx_dot_by_indexed_elem {
-  def _S : sve2_complex_int_arith_indexed<0b10, 0b0100, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> {
+multiclass sve2_cintx_dot_by_indexed_elem {
+  def _S : sve2_complex_int_arith_indexed<0b10, 0b0100, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b> {
 bits<2> iop;
 bits<3> Zm;
 let Inst{20-19} = iop;
 let Inst{18-16} = Zm;
   }
-  def _D : sve2_complex_int_arith_indexed<0b11, 0b0100, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> {
+  def _D : sve2_complex_int_arith_indexed<0b11, 0b0100, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b> {
 bit iop;
 bits<4> Zm;
 let Inst{20} = iop;
 let Inst{19-16} = Zm;
   }
+
+  def : Pat<(nxv4i32 (op (nxv4i32 ZPR32:$Op1), (nxv16i8 ZPR8:$Op2), (nxv16i8 ZPR8:$Op3),
+ (i32 VectorIndexS32b_timm:$idx), (i32 complexrotateop:$imm))),
+(!cast(NAME # "_S") ZPR32:$Op1, ZPR8:$Op2, ZPR8:$Op3, VectorIndexS32b_timm:$idx, complexrotateop:$imm)>;
+  def : Pat<(nxv2i64 (op (nxv2i64 ZPR64:$Op1

[PATCH] D73687: [AArch64][SVE] Add SVE2 intrinsics for complex integer dot product

2020-01-31 Thread Eli Friedman via Phabricator via cfe-commits
efriedma accepted this revision.
efriedma added a comment.
This revision is now accepted and ready to land.

LGTM


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73687/new/

https://reviews.llvm.org/D73687



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D73687: [AArch64][SVE] Add SVE2 intrinsics for complex integer dot product

2020-01-31 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin marked 2 inline comments as done.
kmclaughlin added a comment.

Thanks for reviewing this, @efriedma!




Comment at: llvm/include/llvm/IR/IntrinsicsAArch64.td:
+ LLVMSubdivide4VectorType<0>,
+ llvm_i32_ty],
+[IntrNoMem]>;

efriedma wrote:
> Missing ImmArg?
Replaced this with //AdvSIMD_SVE_DOT_Indexed_Intrinsic//, which has the ImmArg 
property but is otherwise identical


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73687/new/

https://reviews.llvm.org/D73687



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D73687: [AArch64][SVE] Add SVE2 intrinsics for complex integer dot product

2020-01-31 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 241726.
kmclaughlin added a comment.

- Removed the AdvSIMD_SVE_CDOT_Intrinsic class
- Added ImmArg<4> to AdvSIMD_SVE_CDOT_LANE_Intrinsic


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73687/new/

https://reviews.llvm.org/D73687

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+
+;
+; CDOT
+;
+
+define  @cdot_s( %a,  %b,  %c) {
+; CHECK-LABEL: cdot_s:
+; CHECK: cdot z0.s, z1.b, z2.b, #0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cdot.nxv4i32( %a,
+ %b,
+ %c,
+i32 0)
+  ret  %out
+}
+
+define  @cdot_d( %a,  %b,  %c) {
+; CHECK-LABEL: cdot_d:
+; CHECK: cdot z0.d, z1.h, z2.h, #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cdot.nxv2i64( %a,
+ %b,
+ %c,
+i32 90)
+  ret  %out
+}
+
+;
+; CDOT(indexed)
+;
+
+define  @cdot_s_idx( %a,  %b,  %c) {
+; CHECK-LABEL: cdot_s_idx:
+; CHECK: cdot z0.s, z1.b, z2.b[0], #180
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cdot.lane.nxv4i32( %a,
+  %b,
+  %c,
+ i32 0, i32 180)
+  ret  %out
+}
+
+
+define  @cdot_d_idx( %a,  %b,  %c) {
+; CHECK-LABEL: cdot_d_idx:
+; CHECK: cdot z0.d, z1.h, z2.h[1], #270
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cdot.lane.nxv2i64( %a,
+  %b,
+  %c,
+ i32 1, i32 270)
+  ret  %out
+}
+
+
+declare  @llvm.aarch64.sve.cdot.nxv4i32(, , , i32)
+declare  @llvm.aarch64.sve.cdot.nxv2i64(, , , i32)
+declare  @llvm.aarch64.sve.cdot.lane.nxv4i32(, , , i32, i32)
+declare  @llvm.aarch64.sve.cdot.lane.nxv2i64(, , , i32, i32)
Index: llvm/lib/Target/AArch64/SVEInstrFormats.td
===
--- llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2542,9 +2542,16 @@
   let ElementSize = ElementSizeNone;
 }
 
-multiclass sve2_cintx_dot {
+multiclass sve2_cintx_dot {
   def _S : sve2_complex_int_arith<0b10, 0b0001, asm, ZPR32, ZPR8>;
   def _D : sve2_complex_int_arith<0b11, 0b0001, asm, ZPR64, ZPR16>;
+
+  def : Pat<(nxv4i32 (op (nxv4i32 ZPR32:$Op1), (nxv16i8 ZPR8:$Op2), (nxv16i8 ZPR8:$Op3),
+ (i32 complexrotateop:$imm))),
+(!cast(NAME # "_S") ZPR32:$Op1, ZPR8:$Op2, ZPR8:$Op3, complexrotateop:$imm)>;
+  def : Pat<(nxv2i64 (op (nxv2i64 ZPR64:$Op1), (nxv8i16 ZPR16:$Op2), (nxv8i16 ZPR16:$Op3),
+ (i32 complexrotateop:$imm))),
+(!cast(NAME # "_D") ZPR64:$Op1, ZPR16:$Op2, ZPR16:$Op3, complexrotateop:$imm)>;
 }
 
 //===--===//
@@ -2589,19 +2596,26 @@
   let ElementSize = ElementSizeNone;
 }
 
-multiclass sve2_cintx_dot_by_indexed_elem {
-  def _S : sve2_complex_int_arith_indexed<0b10, 0b0100, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> {
+multiclass sve2_cintx_dot_by_indexed_elem {
+  def _S : sve2_complex_int_arith_indexed<0b10, 0b0100, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b> {
 bits<2> iop;
 bits<3> Zm;
 let Inst{20-19} = iop;
 let Inst{18-16} = Zm;
   }
-  def _D : sve2_complex_int_arith_indexed<0b11, 0b0100, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> {
+  def _D : sve2_complex_int_arith_indexed<0b11, 0b0100, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b> {
 bit iop;
 bits<4> Zm;
 let Inst{20} = iop;
 let Inst{19-16} = Zm;
   }
+
+  def : Pat<(nxv4i32 (op (nxv4i32 ZPR32:$Op1), (nxv16i8 ZPR8:$Op2), (nxv16i8 ZPR8:$Op3),
+ (i32 VectorIndexS32b_timm:$idx), (i32 complexrotateop:$imm))),
+(!cast(NAME # "_S") ZPR32:$Op1, ZPR8:$Op2, ZPR8:$Op3, VectorIndexS32b_timm:$idx, complexrotateop:$imm)>;
+  def : Pat<(nxv2i64 (op (nxv2i64 ZPR64:$Op1), (nxv8i16 ZPR16:$Op2), (nxv8i16 ZPR16:$Op3),
+ (i32 VectorIndexD32b_timm:$idx), (i32 complexrotateop:$imm))),
+   

[PATCH] D73687: [AArch64][SVE] Add SVE2 intrinsics for complex integer dot product

2020-01-30 Thread Eli Friedman via Phabricator via cfe-commits
efriedma added inline comments.



Comment at: llvm/include/llvm/IR/IntrinsicsAArch64.td:
+ LLVMSubdivide4VectorType<0>,
+ llvm_i32_ty],
+[IntrNoMem]>;

Missing ImmArg?



Comment at: llvm/include/llvm/IR/IntrinsicsAArch64.td:1121
+ llvm_i32_ty],
+[IntrNoMem, ImmArg<3>]>;
+

Missing `ImmArg<4>`?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73687/new/

https://reviews.llvm.org/D73687



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D73687: [AArch64][SVE] Add SVE2 intrinsics for complex integer dot product

2020-01-30 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, efriedma, dancgr, c-rhodes.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.

Implements the following intrinsics:

- @llvm.aarch64.sve.cdot
- @llvm.aarch64.sve.cdot.lane


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D73687

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll

Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+
+;
+; CDOT
+;
+
+define  @cdot_s( %a,  %b,  %c) {
+; CHECK-LABEL: cdot_s:
+; CHECK: cdot z0.s, z1.b, z2.b, #0
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cdot.nxv4i32( %a,
+ %b,
+ %c,
+i32 0)
+  ret  %out
+}
+
+define  @cdot_d( %a,  %b,  %c) {
+; CHECK-LABEL: cdot_d:
+; CHECK: cdot z0.d, z1.h, z2.h, #90
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cdot.nxv2i64( %a,
+ %b,
+ %c,
+i32 90)
+  ret  %out
+}
+
+;
+; CDOT(indexed)
+;
+
+define  @cdot_s_idx( %a,  %b,  %c) {
+; CHECK-LABEL: cdot_s_idx:
+; CHECK: cdot z0.s, z1.b, z2.b[0], #180
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cdot.lane.nxv4i32( %a,
+  %b,
+  %c,
+ i32 0, i32 180)
+  ret  %out
+}
+
+
+define  @cdot_d_idx( %a,  %b,  %c) {
+; CHECK-LABEL: cdot_d_idx:
+; CHECK: cdot z0.d, z1.h, z2.h[1], #270
+; CHECK-NEXT: ret
+  %out = call  @llvm.aarch64.sve.cdot.lane.nxv2i64( %a,
+  %b,
+  %c,
+ i32 1, i32 270)
+  ret  %out
+}
+
+
+declare  @llvm.aarch64.sve.cdot.nxv4i32(, , , i32)
+declare  @llvm.aarch64.sve.cdot.nxv2i64(, , , i32)
+declare  @llvm.aarch64.sve.cdot.lane.nxv4i32(, , , i32, i32)
+declare  @llvm.aarch64.sve.cdot.lane.nxv2i64(, , , i32, i32)
Index: llvm/lib/Target/AArch64/SVEInstrFormats.td
===
--- llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2538,9 +2538,16 @@
   let ElementSize = ElementSizeNone;
 }
 
-multiclass sve2_cintx_dot {
+multiclass sve2_cintx_dot {
   def _S : sve2_complex_int_arith<0b10, 0b0001, asm, ZPR32, ZPR8>;
   def _D : sve2_complex_int_arith<0b11, 0b0001, asm, ZPR64, ZPR16>;
+
+  def : Pat<(nxv4i32 (op (nxv4i32 ZPR32:$Op1), (nxv16i8 ZPR8:$Op2), (nxv16i8 ZPR8:$Op3),
+ (i32 complexrotateop:$imm))),
+(!cast(NAME # "_S") ZPR32:$Op1, ZPR8:$Op2, ZPR8:$Op3, complexrotateop:$imm)>;
+  def : Pat<(nxv2i64 (op (nxv2i64 ZPR64:$Op1), (nxv8i16 ZPR16:$Op2), (nxv8i16 ZPR16:$Op3),
+ (i32 complexrotateop:$imm))),
+(!cast(NAME # "_D") ZPR64:$Op1, ZPR16:$Op2, ZPR16:$Op3, complexrotateop:$imm)>;
 }
 
 //===--===//
@@ -2580,19 +2587,26 @@
   let ElementSize = ElementSizeNone;
 }
 
-multiclass sve2_cintx_dot_by_indexed_elem {
-  def _S : sve2_complex_int_arith_indexed<0b10, 0b0100, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> {
+multiclass sve2_cintx_dot_by_indexed_elem {
+  def _S : sve2_complex_int_arith_indexed<0b10, 0b0100, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b> {
 bits<2> iop;
 bits<3> Zm;
 let Inst{20-19} = iop;
 let Inst{18-16} = Zm;
   }
-  def _D : sve2_complex_int_arith_indexed<0b11, 0b0100, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> {
+  def _D : sve2_complex_int_arith_indexed<0b11, 0b0100, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b> {
 bit iop;
 bits<4> Zm;
 let Inst{20} = iop;
 let Inst{19-16} = Zm;
   }
+
+  def : Pat<(nxv4i32 (op (nxv4i32 ZPR32:$Op1), (nxv16i8 ZPR8:$Op2), (nxv16i8 ZPR8:$Op3),
+ (i32 VectorIndexS32b_timm:$idx), (i32 complexrotateop:$imm))),
+(!cast(NAME # "_S") ZPR32:$Op1, ZPR8:$Op2, ZPR8:$Op3, VectorIndexS32b_timm:$idx, complexrotateop:$imm)>;
+  def : Pat<(nxv2i64 (op (nxv2i64 ZPR64:$Op1), (nxv8i1