[PATCH] D58306: [AArch64] Change size suffix for FP16FML intrinsics.

2019-02-20 Thread Ahmed Bougacha via Phabricator via cfe-commits
ab added a comment.

Thanks for checking, much appreciated!


Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58306/new/

https://reviews.llvm.org/D58306



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58306: [AArch64] Change size suffix for FP16FML intrinsics.

2019-02-20 Thread Ahmed Bougacha via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL354538: [AArch64] Change size suffix for FP16FML intrinsics. 
(authored by ab, committed by ).
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D58306?vs=187083=187696#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58306/new/

https://reviews.llvm.org/D58306

Files:
  cfe/trunk/include/clang/Basic/arm_neon.td
  cfe/trunk/test/CodeGen/aarch64-neon-fp16fml.c

Index: cfe/trunk/include/clang/Basic/arm_neon.td
===
--- cfe/trunk/include/clang/Basic/arm_neon.td
+++ cfe/trunk/include/clang/Basic/arm_neon.td
@@ -1651,18 +1651,18 @@
 
 // v8.2-A FP16 fused multiply-add long instructions.
 let ArchGuard = "defined(__ARM_FEATURE_FP16FML) && defined(__aarch64__)" in {
-  def VFMLAL_LOW  : SInst<"vfmlal_low", "ffHH", "UiQUi">;
-  def VFMLSL_LOW  : SInst<"vfmlsl_low", "ffHH", "UiQUi">;
-  def VFMLAL_HIGH : SInst<"vfmlal_high", "ffHH", "UiQUi">;
-  def VFMLSL_HIGH : SInst<"vfmlsl_high", "ffHH", "UiQUi">;
-
-  def VFMLAL_LANE_LOW  : SOpInst<"vfmlal_lane_low", "ffH0i", "UiQUi", OP_FMLAL_LN>;
-  def VFMLSL_LANE_LOW  : SOpInst<"vfmlsl_lane_low", "ffH0i", "UiQUi", OP_FMLSL_LN>;
-  def VFMLAL_LANE_HIGH : SOpInst<"vfmlal_lane_high", "ffH0i", "UiQUi", OP_FMLAL_LN_Hi>;
-  def VFMLSL_LANE_HIGH : SOpInst<"vfmlsl_lane_high", "ffH0i", "UiQUi", OP_FMLSL_LN_Hi>;
-
-  def VFMLAL_LANEQ_LOW  : SOpInst<"vfmlal_laneq_low", "ffH1i", "UiQUi", OP_FMLAL_LN>;
-  def VFMLSL_LANEQ_LOW  : SOpInst<"vfmlsl_laneq_low", "ffH1i", "UiQUi", OP_FMLSL_LN>;
-  def VFMLAL_LANEQ_HIGH : SOpInst<"vfmlal_laneq_high", "ffH1i", "UiQUi", OP_FMLAL_LN_Hi>;
-  def VFMLSL_LANEQ_HIGH : SOpInst<"vfmlsl_laneq_high", "ffH1i", "UiQUi", OP_FMLSL_LN_Hi>;
+  def VFMLAL_LOW  : SInst<"vfmlal_low",  "ffHH", "hQh">;
+  def VFMLSL_LOW  : SInst<"vfmlsl_low",  "ffHH", "hQh">;
+  def VFMLAL_HIGH : SInst<"vfmlal_high", "ffHH", "hQh">;
+  def VFMLSL_HIGH : SInst<"vfmlsl_high", "ffHH", "hQh">;
+
+  def VFMLAL_LANE_LOW  : SOpInst<"vfmlal_lane_low",  "ffH0i", "hQh", OP_FMLAL_LN>;
+  def VFMLSL_LANE_LOW  : SOpInst<"vfmlsl_lane_low",  "ffH0i", "hQh", OP_FMLSL_LN>;
+  def VFMLAL_LANE_HIGH : SOpInst<"vfmlal_lane_high", "ffH0i", "hQh", OP_FMLAL_LN_Hi>;
+  def VFMLSL_LANE_HIGH : SOpInst<"vfmlsl_lane_high", "ffH0i", "hQh", OP_FMLSL_LN_Hi>;
+
+  def VFMLAL_LANEQ_LOW  : SOpInst<"vfmlal_laneq_low",  "ffH1i", "hQh", OP_FMLAL_LN>;
+  def VFMLSL_LANEQ_LOW  : SOpInst<"vfmlsl_laneq_low",  "ffH1i", "hQh", OP_FMLSL_LN>;
+  def VFMLAL_LANEQ_HIGH : SOpInst<"vfmlal_laneq_high", "ffH1i", "hQh", OP_FMLAL_LN_Hi>;
+  def VFMLSL_LANEQ_HIGH : SOpInst<"vfmlsl_laneq_high", "ffH1i", "hQh", OP_FMLSL_LN_Hi>;
 }
Index: cfe/trunk/test/CodeGen/aarch64-neon-fp16fml.c
===
--- cfe/trunk/test/CodeGen/aarch64-neon-fp16fml.c
+++ cfe/trunk/test/CodeGen/aarch64-neon-fp16fml.c
@@ -9,188 +9,188 @@
 
 // Vector form
 
-float32x2_t test_vfmlal_low_u32(float32x2_t a, float16x4_t b, float16x4_t c) {
-// CHECK-LABEL: define <2 x float> @test_vfmlal_low_u32(<2 x float> %a, <4 x half> %b, <4 x half> %c)
+float32x2_t test_vfmlal_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
+// CHECK-LABEL: define <2 x float> @test_vfmlal_low_f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: ret <2 x float> [[RESULT]]
-  return vfmlal_low_u32(a, b, c);
+  return vfmlal_low_f16(a, b, c);
 }
 
-float32x2_t test_vfmlsl_low_u32(float32x2_t a, float16x4_t b, float16x4_t c) {
-// CHECK-LABEL: define <2 x float> @test_vfmlsl_low_u32(<2 x float> %a, <4 x half> %b, <4 x half> %c)
+float32x2_t test_vfmlsl_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
+// CHECK-LABEL: define <2 x float> @test_vfmlsl_low_f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: ret <2 x float> [[RESULT]]
-  return vfmlsl_low_u32(a, b, c);
+  return vfmlsl_low_f16(a, b, c);
 }
 
-float32x2_t test_vfmlal_high_u32(float32x2_t a, float16x4_t b, float16x4_t c) {
-// CHECK-LABEL: define <2 x float> @test_vfmlal_high_u32(<2 x float> %a, <4 x half> %b, <4 x half> %c)
+float32x2_t test_vfmlal_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
+// CHECK-LABEL: define <2 x float> @test_vfmlal_high_f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal2.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: ret <2 x float> [[RESULT]]
-  return vfmlal_high_u32(a, b, c);
+  return vfmlal_high_f16(a, b, c);
 }
 
-float32x2_t test_vfmlsl_high_u32(float32x2_t a, float16x4_t b, float16x4_t c) {
-// CHECK-LABEL: 

[PATCH] D58306: [AArch64] Change size suffix for FP16FML intrinsics.

2019-02-19 Thread Sjoerd Meijer via Phabricator via cfe-commits
SjoerdMeijer accepted this revision.
SjoerdMeijer added a comment.
This revision is now accepted and ready to land.

LGTM

The ACLE has been updated and a new version with change included will be 
released soon.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58306/new/

https://reviews.llvm.org/D58306



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58306: [AArch64] Change size suffix for FP16FML intrinsics.

2019-02-17 Thread Sjoerd Meijer via Phabricator via cfe-commits
SjoerdMeijer added a comment.

I am discussing this with our GCC team as we would like both Clang/GCC 
implementation to be the same. But you're right that _f16 looks like to be the 
more consistent choice. I will let you know as soon I know more.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58306/new/

https://reviews.llvm.org/D58306



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58306: [AArch64] Change size suffix for FP16FML intrinsics.

2019-02-15 Thread Ahmed Bougacha via Phabricator via cfe-commits
ab created this revision.
ab added reviewers: SjoerdMeijer, bryanpkc.
ab added a project: clang.
Herald added a subscriber: javed.absar.

These currently use _u32, but they should instead use _f32 or _f16, the types 
of the accumulator, and of the multiplication.

I'm starting with _f16 (because that seems to match the various integer vmlal 
variants), but either seems fine.


Repository:
  rC Clang

https://reviews.llvm.org/D58306

Files:
  include/clang/Basic/arm_neon.td
  test/CodeGen/aarch64-neon-fp16fml.c

Index: test/CodeGen/aarch64-neon-fp16fml.c
===
--- test/CodeGen/aarch64-neon-fp16fml.c
+++ test/CodeGen/aarch64-neon-fp16fml.c
@@ -9,188 +9,188 @@
 
 // Vector form
 
-float32x2_t test_vfmlal_low_u32(float32x2_t a, float16x4_t b, float16x4_t c) {
-// CHECK-LABEL: define <2 x float> @test_vfmlal_low_u32(<2 x float> %a, <4 x half> %b, <4 x half> %c)
+float32x2_t test_vfmlal_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
+// CHECK-LABEL: define <2 x float> @test_vfmlal_low_f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: ret <2 x float> [[RESULT]]
-  return vfmlal_low_u32(a, b, c);
+  return vfmlal_low_f16(a, b, c);
 }
 
-float32x2_t test_vfmlsl_low_u32(float32x2_t a, float16x4_t b, float16x4_t c) {
-// CHECK-LABEL: define <2 x float> @test_vfmlsl_low_u32(<2 x float> %a, <4 x half> %b, <4 x half> %c)
+float32x2_t test_vfmlsl_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
+// CHECK-LABEL: define <2 x float> @test_vfmlsl_low_f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: ret <2 x float> [[RESULT]]
-  return vfmlsl_low_u32(a, b, c);
+  return vfmlsl_low_f16(a, b, c);
 }
 
-float32x2_t test_vfmlal_high_u32(float32x2_t a, float16x4_t b, float16x4_t c) {
-// CHECK-LABEL: define <2 x float> @test_vfmlal_high_u32(<2 x float> %a, <4 x half> %b, <4 x half> %c)
+float32x2_t test_vfmlal_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
+// CHECK-LABEL: define <2 x float> @test_vfmlal_high_f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal2.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: ret <2 x float> [[RESULT]]
-  return vfmlal_high_u32(a, b, c);
+  return vfmlal_high_f16(a, b, c);
 }
 
-float32x2_t test_vfmlsl_high_u32(float32x2_t a, float16x4_t b, float16x4_t c) {
-// CHECK-LABEL: define <2 x float> @test_vfmlsl_high_u32(<2 x float> %a, <4 x half> %b, <4 x half> %c)
+float32x2_t test_vfmlsl_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
+// CHECK-LABEL: define <2 x float> @test_vfmlsl_high_f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl2.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: ret <2 x float> [[RESULT]]
-  return vfmlsl_high_u32(a, b, c);
+  return vfmlsl_high_f16(a, b, c);
 }
 
-float32x4_t test_vfmlalq_low_u32(float32x4_t a, float16x8_t b, float16x8_t c) {
-// CHECK-LABEL: define <4 x float> @test_vfmlalq_low_u32(<4 x float> %a, <8 x half> %b, <8 x half> %c)
+float32x4_t test_vfmlalq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
+// CHECK-LABEL: define <4 x float> @test_vfmlalq_low_f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
 // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
 // CHECK: ret <4 x float> [[RESULT]]
-  return vfmlalq_low_u32(a, b, c);
+  return vfmlalq_low_f16(a, b, c);
 }
 
-float32x4_t test_vfmlslq_low_u32(float32x4_t a, float16x8_t b, float16x8_t c) {
-// CHECK-LABEL: define <4 x float> @test_vfmlslq_low_u32(<4 x float> %a, <8 x half> %b, <8 x half> %c)
+float32x4_t test_vfmlslq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
+// CHECK-LABEL: define <4 x float> @test_vfmlslq_low_f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
 // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
 // CHECK: ret <4 x float> [[RESULT]]
-  return vfmlslq_low_u32(a, b, c);
+  return vfmlslq_low_f16(a, b, c);
 }
 
-float32x4_t test_vfmlalq_high_u32(float32x4_t a, float16x8_t b, float16x8_t c) {
-// CHECK-LABEL: define <4 x float> @test_vfmlalq_high_u32(<4 x float> %a, <8 x half> %b, <8 x half> %c)
+float32x4_t test_vfmlalq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
+// CHECK-LABEL: define <4 x float> @test_vfmlalq_high_f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
 // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal2.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
 // CHECK: ret <4 x float> [[RESULT]]
-  return vfmlalq_high_u32(a, b, c);
+