[PATCH] D98487: [AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE

2021-03-17 Thread Bradley Smith via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGcf0da91ba5e1: [AArch64][SVE/NEON] Add support for FROUNDEVEN 
for both NEON and fixed length… (authored by bsmith).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98487/new/

https://reviews.llvm.org/D98487

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-neon-intrinsics.c
  clang/test/CodeGen/aarch64-neon-misc.c
  clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
  clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
  clang/test/CodeGen/arm-neon-directed-rounding.c
  clang/test/CodeGen/arm64-vrnd.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/include/llvm/Target/TargetSelectionDAG.td
  llvm/lib/IR/AutoUpgrade.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/arm64-vcvt.ll
  llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
  llvm/test/CodeGen/AArch64/f16-instructions.ll
  llvm/test/CodeGen/AArch64/fp-intrinsics.ll
  llvm/test/CodeGen/AArch64/frintn.ll
  llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
  llvm/test/CodeGen/AArch64/vec-libcalls.ll

Index: llvm/test/CodeGen/AArch64/vec-libcalls.ll
===
--- llvm/test/CodeGen/AArch64/vec-libcalls.ll
+++ llvm/test/CodeGen/AArch64/vec-libcalls.ll
@@ -29,6 +29,7 @@
 declare <3 x float> @llvm.nearbyint.v3f32(<3 x float>)
 declare <3 x float> @llvm.rint.v3f32(<3 x float>)
 declare <3 x float> @llvm.round.v3f32(<3 x float>)
+declare <3 x float> @llvm.roundeven.v3f32(<3 x float>)
 declare <3 x float> @llvm.sqrt.v3f32(<3 x float>)
 declare <3 x float> @llvm.trunc.v3f32(<3 x float>)
 
@@ -478,6 +479,15 @@
   ret <3 x float> %r
 }
 
+define <3 x float> @roundeven_v3f32(<3 x float> %x) nounwind {
+; CHECK-LABEL: roundeven_v3f32:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:frintn v0.4s, v0.4s
+; CHECK-NEXT:ret
+  %r = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %x)
+  ret <3 x float> %r
+}
+
 define <3 x float> @sqrt_v3f32(<3 x float> %x) nounwind {
 ; CHECK-LABEL: sqrt_v3f32:
 ; CHECK:   // %bb.0:
Index: llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
===
--- llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
+++ llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
@@ -1255,6 +1255,253 @@
   ret void
 }
 
+;
+; ROUNDEVEN -> FRINTN
+;
+
+; Don't use SVE for 64-bit vectors.
+define <4 x half> @frintn_v4f16(<4 x half> %op) #0 {
+; CHECK-LABEL: frintn_v4f16:
+; CHECK: frintn v0.4h, v0.4h
+; CHECK-NEXT: ret
+  %res = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %op)
+  ret <4 x half> %res
+}
+
+; Don't use SVE for 128-bit vectors.
+define <8 x half> @frintn_v8f16(<8 x half> %op) #0 {
+; CHECK-LABEL: frintn_v8f16:
+; CHECK: frintn v0.8h, v0.8h
+; CHECK-NEXT: ret
+  %res = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %op)
+  ret <8 x half> %res
+}
+
+define void @frintn_v16f16(<16 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; CHECK-NEXT: ret
+  %op = load <16 x half>, <16 x half>* %a
+  %res = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %op)
+  store <16 x half> %res, <16 x half>* %a
+  ret void
+}
+
+define void @frintn_v32f16(<32 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v32f16:
+; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
+; VBITS_GE_512-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; VBITS_GE_512-NEXT: ret
+
+; Ensure sensible type legalisation.
+; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
+; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
+; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
+; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-NEXT: ret
+  %op = load <32 x half>, <32 x half>* %a
+  %res = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %op)
+  store <32 x half> %res, <32 x half>* %a
+  ret void
+}
+
+define void @frintn_v64f16(<64 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v64f16:
+; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
+; VBITS_GE_1024-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; VBITS_GE_1024-NEXT: 

[PATCH] D98487: [AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE

2021-03-16 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm accepted this revision.
paulwalker-arm added a comment.

Mainly focused on the SVE side of things, which looks good to me.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98487/new/

https://reviews.llvm.org/D98487

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D98487: [AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE

2021-03-16 Thread Dave Green via Phabricator via cfe-commits
dmgreen accepted this revision.
dmgreen added a comment.
This revision is now accepted and ready to land.

Thanks. LGTM, if no one else has comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98487/new/

https://reviews.llvm.org/D98487

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D98487: [AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE

2021-03-16 Thread Bradley Smith via Phabricator via cfe-commits
bsmith updated this revision to Diff 330951.
bsmith added a comment.

- Remove `SDTFPRoundEvenOp` as it's not a correct mirror of `SDTFPRoundOp` 
since that is not for `ISD::FROUND`.
- Fix comments in `include/llvm/Target/TargetSelectionDAG.td` for 
`SDTFPRoundOp` and `SDTFPExtendOp`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98487/new/

https://reviews.llvm.org/D98487

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-neon-intrinsics.c
  clang/test/CodeGen/aarch64-neon-misc.c
  clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
  clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
  clang/test/CodeGen/arm-neon-directed-rounding.c
  clang/test/CodeGen/arm64-vrnd.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/include/llvm/Target/TargetSelectionDAG.td
  llvm/lib/IR/AutoUpgrade.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/arm64-vcvt.ll
  llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
  llvm/test/CodeGen/AArch64/f16-instructions.ll
  llvm/test/CodeGen/AArch64/fp-intrinsics.ll
  llvm/test/CodeGen/AArch64/frintn.ll
  llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
  llvm/test/CodeGen/AArch64/vec-libcalls.ll

Index: llvm/test/CodeGen/AArch64/vec-libcalls.ll
===
--- llvm/test/CodeGen/AArch64/vec-libcalls.ll
+++ llvm/test/CodeGen/AArch64/vec-libcalls.ll
@@ -29,6 +29,7 @@
 declare <3 x float> @llvm.nearbyint.v3f32(<3 x float>)
 declare <3 x float> @llvm.rint.v3f32(<3 x float>)
 declare <3 x float> @llvm.round.v3f32(<3 x float>)
+declare <3 x float> @llvm.roundeven.v3f32(<3 x float>)
 declare <3 x float> @llvm.sqrt.v3f32(<3 x float>)
 declare <3 x float> @llvm.trunc.v3f32(<3 x float>)
 
@@ -478,6 +479,15 @@
   ret <3 x float> %r
 }
 
+define <3 x float> @roundeven_v3f32(<3 x float> %x) nounwind {
+; CHECK-LABEL: roundeven_v3f32:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:frintn v0.4s, v0.4s
+; CHECK-NEXT:ret
+  %r = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %x)
+  ret <3 x float> %r
+}
+
 define <3 x float> @sqrt_v3f32(<3 x float> %x) nounwind {
 ; CHECK-LABEL: sqrt_v3f32:
 ; CHECK:   // %bb.0:
Index: llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
===
--- llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
+++ llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
@@ -1255,6 +1255,253 @@
   ret void
 }
 
+;
+; ROUNDEVEN -> FRINTN
+;
+
+; Don't use SVE for 64-bit vectors.
+define <4 x half> @frintn_v4f16(<4 x half> %op) #0 {
+; CHECK-LABEL: frintn_v4f16:
+; CHECK: frintn v0.4h, v0.4h
+; CHECK-NEXT: ret
+  %res = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %op)
+  ret <4 x half> %res
+}
+
+; Don't use SVE for 128-bit vectors.
+define <8 x half> @frintn_v8f16(<8 x half> %op) #0 {
+; CHECK-LABEL: frintn_v8f16:
+; CHECK: frintn v0.8h, v0.8h
+; CHECK-NEXT: ret
+  %res = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %op)
+  ret <8 x half> %res
+}
+
+define void @frintn_v16f16(<16 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; CHECK-NEXT: ret
+  %op = load <16 x half>, <16 x half>* %a
+  %res = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %op)
+  store <16 x half> %res, <16 x half>* %a
+  ret void
+}
+
+define void @frintn_v32f16(<32 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v32f16:
+; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
+; VBITS_GE_512-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; VBITS_GE_512-NEXT: ret
+
+; Ensure sensible type legalisation.
+; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
+; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
+; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
+; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-NEXT: ret
+  %op = load <32 x half>, <32 x half>* %a
+  %res = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %op)
+  store <32 x half> %res, <32 x half>* %a
+  ret void
+}
+
+define void @frintn_v64f16(<64 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v64f16:
+; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
+; VBITS_GE_1024-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]

[PATCH] D98487: [AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE

2021-03-16 Thread Dave Green via Phabricator via cfe-commits
dmgreen added inline comments.



Comment at: llvm/include/llvm/Target/TargetSelectionDAG.td:158
 ]>;
+def SDTFPRoundEvenOp  : SDTypeProfile<1, 1, [   // froundeven
+  SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>, 
SDTCisSameNumEltsAs<0, 1>

bsmith wrote:
> dmgreen wrote:
> > Is this used? The one above should maybe say `// fpround`?
> No it's not, I added it for consistency, but perhaps I shouldn't? I think 
> fround is correct for the one above, or at least is consistent with the 
> others in this file, for example fextend below.
It's used below in `def fpround: SDNode<"ISD::FP_ROUND"   , 
SDTFPRoundOp>;`, so it looks like its used with the fptrunc instruction, not 
the fround intrinsic.

I see your point about fextend... I would say they should both be changed to 
fpextend/fpround, for consistency with the nodes they act upon.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98487/new/

https://reviews.llvm.org/D98487

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D98487: [AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE

2021-03-15 Thread Bradley Smith via Phabricator via cfe-commits
bsmith added inline comments.



Comment at: llvm/include/llvm/Target/TargetSelectionDAG.td:158
 ]>;
+def SDTFPRoundEvenOp  : SDTypeProfile<1, 1, [   // froundeven
+  SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>, 
SDTCisSameNumEltsAs<0, 1>

dmgreen wrote:
> Is this used? The one above should maybe say `// fpround`?
No it's not, I added it for consistency, but perhaps I shouldn't? I think 
fround is correct for the one above, or at least is consistent with the others 
in this file, for example fextend below.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98487/new/

https://reviews.llvm.org/D98487

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D98487: [AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE

2021-03-15 Thread Dave Green via Phabricator via cfe-commits
dmgreen added a comment.

Thanks. This looks sensible, from what I can tell.




Comment at: llvm/include/llvm/Target/TargetSelectionDAG.td:158
 ]>;
+def SDTFPRoundEvenOp  : SDTypeProfile<1, 1, [   // froundeven
+  SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>, 
SDTCisSameNumEltsAs<0, 1>

Is this used? The one above should maybe say `// fpround`?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98487/new/

https://reviews.llvm.org/D98487

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D98487: [AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE

2021-03-15 Thread Bradley Smith via Phabricator via cfe-commits
bsmith updated this revision to Diff 330629.
bsmith added a comment.
Herald added a subscriber: dexonsmith.

- Add AutoUpgrade code to convert aarch64.neon.frintn to roundeven
- Add test for above AutoUpgrade


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98487/new/

https://reviews.llvm.org/D98487

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-neon-intrinsics.c
  clang/test/CodeGen/aarch64-neon-misc.c
  clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
  clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
  clang/test/CodeGen/arm-neon-directed-rounding.c
  clang/test/CodeGen/arm64-vrnd.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/include/llvm/Target/TargetSelectionDAG.td
  llvm/lib/IR/AutoUpgrade.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/arm64-vcvt.ll
  llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
  llvm/test/CodeGen/AArch64/f16-instructions.ll
  llvm/test/CodeGen/AArch64/fp-intrinsics.ll
  llvm/test/CodeGen/AArch64/frintn.ll
  llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
  llvm/test/CodeGen/AArch64/vec-libcalls.ll

Index: llvm/test/CodeGen/AArch64/vec-libcalls.ll
===
--- llvm/test/CodeGen/AArch64/vec-libcalls.ll
+++ llvm/test/CodeGen/AArch64/vec-libcalls.ll
@@ -29,6 +29,7 @@
 declare <3 x float> @llvm.nearbyint.v3f32(<3 x float>)
 declare <3 x float> @llvm.rint.v3f32(<3 x float>)
 declare <3 x float> @llvm.round.v3f32(<3 x float>)
+declare <3 x float> @llvm.roundeven.v3f32(<3 x float>)
 declare <3 x float> @llvm.sqrt.v3f32(<3 x float>)
 declare <3 x float> @llvm.trunc.v3f32(<3 x float>)
 
@@ -478,6 +479,15 @@
   ret <3 x float> %r
 }
 
+define <3 x float> @roundeven_v3f32(<3 x float> %x) nounwind {
+; CHECK-LABEL: roundeven_v3f32:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:frintn v0.4s, v0.4s
+; CHECK-NEXT:ret
+  %r = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %x)
+  ret <3 x float> %r
+}
+
 define <3 x float> @sqrt_v3f32(<3 x float> %x) nounwind {
 ; CHECK-LABEL: sqrt_v3f32:
 ; CHECK:   // %bb.0:
Index: llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
===
--- llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
+++ llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
@@ -1255,6 +1255,253 @@
   ret void
 }
 
+;
+; ROUNDEVEN -> FRINTN
+;
+
+; Don't use SVE for 64-bit vectors.
+define <4 x half> @frintn_v4f16(<4 x half> %op) #0 {
+; CHECK-LABEL: frintn_v4f16:
+; CHECK: frintn v0.4h, v0.4h
+; CHECK-NEXT: ret
+  %res = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %op)
+  ret <4 x half> %res
+}
+
+; Don't use SVE for 128-bit vectors.
+define <8 x half> @frintn_v8f16(<8 x half> %op) #0 {
+; CHECK-LABEL: frintn_v8f16:
+; CHECK: frintn v0.8h, v0.8h
+; CHECK-NEXT: ret
+  %res = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %op)
+  ret <8 x half> %res
+}
+
+define void @frintn_v16f16(<16 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; CHECK-NEXT: ret
+  %op = load <16 x half>, <16 x half>* %a
+  %res = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %op)
+  store <16 x half> %res, <16 x half>* %a
+  ret void
+}
+
+define void @frintn_v32f16(<32 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v32f16:
+; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
+; VBITS_GE_512-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; VBITS_GE_512-NEXT: ret
+
+; Ensure sensible type legalisation.
+; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
+; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
+; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
+; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-NEXT: ret
+  %op = load <32 x half>, <32 x half>* %a
+  %res = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %op)
+  store <32 x half> %res, <32 x half>* %a
+  ret void
+}
+
+define void @frintn_v64f16(<64 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v64f16:
+; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
+; VBITS_GE_1024-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; VBITS_GE_1024-NEXT: ret
+  %op = load <64 x half>, <64 x half>* %a
+  %res 

[PATCH] D98487: [AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE

2021-03-15 Thread Dave Green via Phabricator via cfe-commits
dmgreen added inline comments.



Comment at: llvm/include/llvm/IR/IntrinsicsAArch64.td:476
-  // intrinsic.
-  def int_aarch64_neon_frintn : AdvSIMD_1FloatArg_Intrinsic;
-

If you are removing the old intrinsic (which is great), then it will need some 
AutoUpgrade code from the old to the new. Hopefully in this case that's pretty 
simple. Look for how aarch64.rbit is done.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98487/new/

https://reviews.llvm.org/D98487

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D98487: [AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE

2021-03-15 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

In D98487#2625673 , @bsmith wrote:

>> Why is this patch only changing int_aarch64_neon_frintn and not 
>> int_aarch64_sve_frintn?
>> Is there a particular reason to do so?
>
> Things are done slightly differently for SVE in this regard, in principal 
> yes, we could emit roundeven instead of frintn from the ACLE intrinsic, 
> however all of the other ACLE intrinsics also emit SVE specific LLVM 
> intrinsics rather than the arch-indep nodes. This patch doesn't change that 
> in order to stay consistent, if we did want to change that it should be done 
> as a separate patch that changes all of them.

@CarolineConcatto There are two levels at play here.  At the top level 
(C->LLVM) the SVE ACLE cannot use the roundeven intrinsic because that 
operation takes a single data operand whereas for SVE the operation is 
predicated and thus also requires predicate and passthru operands (i.e. the two 
intrinsics are doing different things).  At the bottom level (CodeGen) we 
already lower scalable vector variants of both intrinsics to 
ISD::FROUNDEVEN_MERGE_PASSTHRU which is the "masked" version of ISD::FROUNDEVEN.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98487/new/

https://reviews.llvm.org/D98487

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D98487: [AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE

2021-03-15 Thread Bradley Smith via Phabricator via cfe-commits
bsmith added a comment.

> Why is this patch only changing int_aarch64_neon_frintn and not 
> int_aarch64_sve_frintn?
> Is there a particular reason to do so?

Things are done slightly differently for SVE in this regard, in principal yes, 
we could emit roundeven instead of frintn from the ACLE intrinsic, however all 
of the other ACLE intrinsics also emit SVE specific LLVM intrinsics rather than 
the arch-indep nodes. This patch doesn't change that in order to stay 
consistent, if we did want to change that it should be done as a separate patch 
that changes all of them.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98487/new/

https://reviews.llvm.org/D98487

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D98487: [AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE

2021-03-15 Thread Caroline via Phabricator via cfe-commits
CarolineConcatto added a comment.

Hi @ bsmith,

Thank you for adding me as a reviewer, although I don't think I am the more 
qualified to approve or not this patch.
But I have a question: 
Why is this patch only changing int_aarch64_neon_frintn and not 
int_aarch64_sve_frintn?
Is there a particular reason to do so? 
As you said in the commit message the ISD node for FROUNDEVEN exists now.
If so it would be too much to explain that in the commit message?
Thank you,
Carol


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98487/new/

https://reviews.llvm.org/D98487

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D98487: [AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE

2021-03-12 Thread Bradley Smith via Phabricator via cfe-commits
bsmith created this revision.
bsmith added reviewers: paulwalker-arm, peterwaller-arm, joechrisellis, 
CarolineConcatto, dmgreen.
Herald added subscribers: hiraditya, kristof.beyls, tschuett.
bsmith requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

Previously NEON used a target specific intrinsic for frintn, given that
the FROUNDEVEN ISD node now exists, move over to that instead and add
codegen support for that node for both NEON and fixed length SVE.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D98487

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-neon-intrinsics.c
  clang/test/CodeGen/aarch64-neon-misc.c
  clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
  clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
  clang/test/CodeGen/arm-neon-directed-rounding.c
  clang/test/CodeGen/arm64-vrnd.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/include/llvm/Target/TargetSelectionDAG.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/arm64-vcvt.ll
  llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
  llvm/test/CodeGen/AArch64/f16-instructions.ll
  llvm/test/CodeGen/AArch64/fp-intrinsics.ll
  llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
  llvm/test/CodeGen/AArch64/vec-libcalls.ll

Index: llvm/test/CodeGen/AArch64/vec-libcalls.ll
===
--- llvm/test/CodeGen/AArch64/vec-libcalls.ll
+++ llvm/test/CodeGen/AArch64/vec-libcalls.ll
@@ -29,6 +29,7 @@
 declare <3 x float> @llvm.nearbyint.v3f32(<3 x float>)
 declare <3 x float> @llvm.rint.v3f32(<3 x float>)
 declare <3 x float> @llvm.round.v3f32(<3 x float>)
+declare <3 x float> @llvm.roundeven.v3f32(<3 x float>)
 declare <3 x float> @llvm.sqrt.v3f32(<3 x float>)
 declare <3 x float> @llvm.trunc.v3f32(<3 x float>)
 
@@ -478,6 +479,15 @@
   ret <3 x float> %r
 }
 
+define <3 x float> @roundeven_v3f32(<3 x float> %x) nounwind {
+; CHECK-LABEL: roundeven_v3f32:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:frintn v0.4s, v0.4s
+; CHECK-NEXT:ret
+  %r = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %x)
+  ret <3 x float> %r
+}
+
 define <3 x float> @sqrt_v3f32(<3 x float> %x) nounwind {
 ; CHECK-LABEL: sqrt_v3f32:
 ; CHECK:   // %bb.0:
Index: llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
===
--- llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
+++ llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
@@ -1255,6 +1255,253 @@
   ret void
 }
 
+;
+; ROUNDEVEN -> FRINTN
+;
+
+; Don't use SVE for 64-bit vectors.
+define <4 x half> @frintn_v4f16(<4 x half> %op) #0 {
+; CHECK-LABEL: frintn_v4f16:
+; CHECK: frintn v0.4h, v0.4h
+; CHECK-NEXT: ret
+  %res = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %op)
+  ret <4 x half> %res
+}
+
+; Don't use SVE for 128-bit vectors.
+define <8 x half> @frintn_v8f16(<8 x half> %op) #0 {
+; CHECK-LABEL: frintn_v8f16:
+; CHECK: frintn v0.8h, v0.8h
+; CHECK-NEXT: ret
+  %res = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %op)
+  ret <8 x half> %res
+}
+
+define void @frintn_v16f16(<16 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; CHECK-NEXT: ret
+  %op = load <16 x half>, <16 x half>* %a
+  %res = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %op)
+  store <16 x half> %res, <16 x half>* %a
+  ret void
+}
+
+define void @frintn_v32f16(<32 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v32f16:
+; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
+; VBITS_GE_512-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; VBITS_GE_512-NEXT: ret
+
+; Ensure sensible type legalisation.
+; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
+; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
+; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
+; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-NEXT: ret
+  %op = load <32 x half>, <32 x half>* %a
+  %res = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %op)
+  store <32 x half> %res, <32 x half>* %a
+  ret void
+}
+
+define void @frintn_v64f16(<64 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v64f16:
+; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
+; VBITS_GE_1024-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+;