[clang] [llvm] [AArch64][NEON] NEON intrinsic compilation error with -fno-lax-vector-conversion flag fix (PR #149329)

Amina Chabane via cfe-commits Mon, 28 Jul 2025 08:39:37 -0700

https://github.com/Amichaxx updated 
https://github.com/llvm/llvm-project/pull/149329


>From 2895e5e7b56c1c611b39a5c85de92d18f3aae71a Mon Sep 17 00:00:00 2001
From: Amina Chabane <amina.chab...@arm.com>
Date: Tue, 15 Jul 2025 15:56:49 +0000
Subject: [PATCH 1/5] [AArch64][NEON] Fix poly lane intrinsics under
 -fno-lax-vector-conversions. Issue originally raised in
 https://github.com/llvm/llvm-project/issues/71362#issuecomment-3028515618.
 Certain NEON intrinsics that operate on poly types (e.g. poly8x8_t) failed to
 compile with the -fno-lax-vector-conversions flag. This patch updates
 NeonEmitter.cpp to insert an explicit __builtin_bit_cast from poly types to
 the required signed integer vector types when generating lane-based
 intrinsics. A test neon-bitcast-poly is included.

---
 clang/utils/TableGen/NeonEmitter.cpp          | 10 +++-
 .../test/CodeGen/AArch64/neon-bitcast-poly.ll | 51 +++++++++++++++++++
 2 files changed, 60 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AArch64/neon-bitcast-poly.ll

diff --git a/clang/utils/TableGen/NeonEmitter.cpp 
b/clang/utils/TableGen/NeonEmitter.cpp
index 409f1c4f71834..574a29d0e4dd9 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -1401,12 +1401,20 @@ void Intrinsic::emitBodyAsBuiltinCall() {
       if (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling())) {
         CastToType.makeInteger(8, true);
         Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
+      }
+      else if ((T.isPoly() ||
+          (T.isInteger() && !T.isSigned() &&
+           StringRef(Name).contains("_p8")) ||
+          StringRef(Name).contains("_p16") ||
+          StringRef(Name).contains("_p64"))) {
+            CastToType.makeSigned();
+            Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
+      }
       } else if (LocalCK == ClassI) {
         if (CastToType.isInteger()) {
           CastToType.makeSigned();
           Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
         }
-      }
     }
 
     S += Arg + ", ";
diff --git a/llvm/test/CodeGen/AArch64/neon-bitcast-poly.ll 
b/llvm/test/CodeGen/AArch64/neon-bitcast-poly.ll
new file mode 100644
index 0000000000000..b577eb1e34b09
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/neon-bitcast-poly.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
+
+; This test verifies that NEON intrinsics using polynomial types (poly8/16/64) 
emit correct AArch64 instructions
+; after bitcasting to signed integer vectors. These intrinsics would 
previously fail under -fno-lax-vector-conversions.
+
+define <8 x i8> @_Z18test_vcopy_lane_p811__Poly8x8_tS_(<8 x i8> %a, <8 x i8> 
%b) {
+; CHECK-LABEL: _Z18test_vcopy_lane_p811__Poly8x8_tS_:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    mov v0.b[0], v1.b[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
+entry:
+  %vset_lane = shufflevector <8 x i8> %b, <8 x i8> %a, <8 x i32> <i32 0, i32 
9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x i8> %vset_lane
+}
+
+define <4 x i16> @_Z18test_vset_lane_p16t12__Poly16x4_t(i16 %val, <4 x i16> 
%vec) {
+; CHECK-LABEL: _Z18test_vset_lane_p16t12__Poly16x4_t:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    mov v0.h[0], w0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
+entry:
+  %vset_lane = insertelement <4 x i16> %vec, i16 %val, i64 0
+  ret <4 x i16> %vset_lane
+}
+
+define i64 @_Z18test_vget_lane_p6412__Poly64x1_t(<1 x i64> %vec){
+; CHECK-LABEL: _Z18test_vget_lane_p6412__Poly64x1_t:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %vget_lane = extractelement <1 x i64> %vec, i64 0
+  ret i64 %vget_lane
+}
+
+define <16 x i8> @_Z18test_vsetq_lane_p8h12__Poly8x16_t(i8 %val, <16 x i8> 
%vec){
+; CHECK-LABEL: _Z18test_vsetq_lane_p8h12__Poly8x16_t:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov v0.b[0], w0
+; CHECK-NEXT:    ret
+entry:
+  %vset_lane = insertelement <16 x i8> %vec, i8 %val, i64 0
+  ret <16 x i8> %vset_lane
+}

>From c300ab6ced97df16728fac0a07c94e38792a2047 Mon Sep 17 00:00:00 2001
From: Amina Chabane <amina.chab...@arm.com>
Date: Wed, 16 Jul 2025 13:53:30 +0000
Subject: [PATCH 2/5] Added isVector() condition to avoid scalar constants.

---
 clang/utils/TableGen/NeonEmitter.cpp | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/clang/utils/TableGen/NeonEmitter.cpp 
b/clang/utils/TableGen/NeonEmitter.cpp
index 574a29d0e4dd9..d3dd1c5589920 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -1401,22 +1401,19 @@ void Intrinsic::emitBodyAsBuiltinCall() {
       if (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling())) {
         CastToType.makeInteger(8, true);
         Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
+      } else if ((T.isPoly() || (T.isVector() && T.isInteger() && 
!T.isSigned() &&
+                  (StringRef(Name).contains("_p8") ||
+                    StringRef(Name).contains("_p16") ||
+                    StringRef(Name).contains("_p64"))))) {
+        CastToType.makeSigned();
+        Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
+      } else if (LocalCK == ClassI && CastToType.isInteger()) {
+        CastToType.makeSigned();
+        Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
       }
-      else if ((T.isPoly() ||
-          (T.isInteger() && !T.isSigned() &&
-           StringRef(Name).contains("_p8")) ||
-          StringRef(Name).contains("_p16") ||
-          StringRef(Name).contains("_p64"))) {
-            CastToType.makeSigned();
-            Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
-      }
-      } else if (LocalCK == ClassI) {
-        if (CastToType.isInteger()) {
-          CastToType.makeSigned();
-          Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
-        }
     }
 
+
     S += Arg + ", ";
   }
 

>From 102ca6f20dac9e2c5a458ee5e637e517f242c949 Mon Sep 17 00:00:00 2001
From: Amina Chabane <amina.chab...@arm.com>
Date: Thu, 17 Jul 2025 14:42:43 +0000
Subject: [PATCH 3/5] Newline deletion

---
 clang/utils/TableGen/NeonEmitter.cpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/clang/utils/TableGen/NeonEmitter.cpp 
b/clang/utils/TableGen/NeonEmitter.cpp
index d3dd1c5589920..1bd8c8b58c396 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -1402,9 +1402,9 @@ void Intrinsic::emitBodyAsBuiltinCall() {
         CastToType.makeInteger(8, true);
         Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
       } else if ((T.isPoly() || (T.isVector() && T.isInteger() && 
!T.isSigned() &&
-                  (StringRef(Name).contains("_p8") ||
-                    StringRef(Name).contains("_p16") ||
-                    StringRef(Name).contains("_p64"))))) {
+                (StringRef(Name).contains("_p8") ||
+                  StringRef(Name).contains("_p16") ||
+                  StringRef(Name).contains("_p64"))))) {
         CastToType.makeSigned();
         Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
       } else if (LocalCK == ClassI && CastToType.isInteger()) {
@@ -1412,8 +1412,6 @@ void Intrinsic::emitBodyAsBuiltinCall() {
         Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
       }
     }
-
-
     S += Arg + ", ";
   }
 

>From 7106ac95552f7bb32321cbc7b6d5e9df3eec578b Mon Sep 17 00:00:00 2001
From: Amina Chabane <amina.chab...@arm.com>
Date: Tue, 22 Jul 2025 08:43:47 +0000
Subject: [PATCH 4/5] Code formatting change

---
 clang/utils/TableGen/NeonEmitter.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/clang/utils/TableGen/NeonEmitter.cpp 
b/clang/utils/TableGen/NeonEmitter.cpp
index 1bd8c8b58c396..da3bbd4303074 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -1401,10 +1401,11 @@ void Intrinsic::emitBodyAsBuiltinCall() {
       if (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling())) {
         CastToType.makeInteger(8, true);
         Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
-      } else if ((T.isPoly() || (T.isVector() && T.isInteger() && 
!T.isSigned() &&
-                (StringRef(Name).contains("_p8") ||
-                  StringRef(Name).contains("_p16") ||
-                  StringRef(Name).contains("_p64"))))) {
+      } else if ((T.isPoly() ||
+                  (T.isVector() && T.isInteger() && !T.isSigned() &&
+                   (StringRef(Name).contains("_p8") ||
+                    StringRef(Name).contains("_p16") ||
+                    StringRef(Name).contains("_p64"))))) {
         CastToType.makeSigned();
         Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
       } else if (LocalCK == ClassI && CastToType.isInteger()) {

>From 6b12c80ca200872f7e9e9f4afac5d42b31d9b349 Mon Sep 17 00:00:00 2001
From: Amina Chabane <amina.chab...@arm.com>
Date: Fri, 25 Jul 2025 14:23:29 +0000
Subject: [PATCH 5/5] - Added neon-bitcast-poly.c test - Amended check

---
 .../test/CodeGen/AArch64/neon-bitcast-poly.c  | 247 ++++++++++++++++++
 clang/utils/TableGen/NeonEmitter.cpp          |  10 +-
 .../test/CodeGen/AArch64/neon-bitcast-poly.ll |  51 ----
 3 files changed, 249 insertions(+), 59 deletions(-)
 create mode 100644 clang/test/CodeGen/AArch64/neon-bitcast-poly.c
 delete mode 100644 llvm/test/CodeGen/AArch64/neon-bitcast-poly.ll

diff --git a/clang/test/CodeGen/AArch64/neon-bitcast-poly.c 
b/clang/test/CodeGen/AArch64/neon-bitcast-poly.c
new file mode 100644
index 0000000000000..6d619ce62d7ed
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/neon-bitcast-poly.c
@@ -0,0 +1,247 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon 
-flax-vector-conversions=none \
+// RUN: -disable-O0-optnone  -emit-llvm -o - %s | opt -S -passes=instcombine | 
FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_neon.h>
+
+// CHECK-LABEL: @test_vdupb_lane_p8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <8 x i8> [[A:%.*]], i64 1
+// CHECK-NEXT:    ret i8 [[VGET_LANE]]
+//
+poly8_t test_vdupb_lane_p8(poly8x8_t a){
+  return vdupb_lane_p8(a, 1);
+}
+
+// CHECK-LABEL: @test_vdupb_laneq_p8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <16 x i8> [[A:%.*]], i64 
5
+// CHECK-NEXT:    ret i8 [[VGETQ_LANE]]
+//
+poly8_t test_vdupb_laneq_p8(poly8x16_t a) {
+  return vdupb_laneq_p8(a, 5);
+}
+
+// CHECK-LABEL: @test_vset_lane_p8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <8 x i8> [[V:%.*]], i8 
[[A:%.*]], i64 3
+// CHECK-NEXT:    ret <8 x i8> [[VSET_LANE]]
+//
+poly8x8_t test_vset_lane_p8(poly8_t a, poly8x8_t v){
+  return vset_lane_p8(a, v, 3);
+}
+
+// CHECK-LABEL: @test_vset_lane_p16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <4 x i16> [[V:%.*]], i16 
[[A:%.*]], i64 3
+// CHECK-NEXT:    ret <4 x i16> [[VSET_LANE]]
+//
+poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t v){
+  return vset_lane_p16(a, v, 3);
+}
+
+// CHECK-LABEL: @test_vset_lane_p64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <1 x i64> poison, i64 
[[A:%.*]], i64 0
+// CHECK-NEXT:    ret <1 x i64> [[VSET_LANE]]
+//
+poly64x1_t test_vset_lane_p64(poly64_t a, poly64x1_t v){
+  return vset_lane_p64(a, v, 0);
+}
+
+// CHECK-LABEL: @test_vsetq_lane_p8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <16 x i8> [[V:%.*]], i8 
[[A:%.*]], i64 3
+// CHECK-NEXT:    ret <16 x i8> [[VSET_LANE]]
+//
+poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t v){
+  return vsetq_lane_p8(a, v, 3);
+}
+
+// CHECK-LABEL: @test_vsetq_lane_p16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 
[[A:%.*]], i64 3
+// CHECK-NEXT:    ret <8 x i16> [[VSET_LANE]]
+//
+poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t v){
+  return vsetq_lane_p16(a, v, 3);
+}
+
+// CHECK-LABEL: @test_vsetq_lane_p64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <2 x i64> [[V:%.*]], i64 
[[A:%.*]], i64 0
+// CHECK-NEXT:    ret <2 x i64> [[VSET_LANE]]
+//
+poly64x2_t test_vsetq_lane_p64(poly64_t a, poly64x2_t v){
+  return vsetq_lane_p64(a, v, 0);
+}
+
+// CHECK-LABEL: @test_vget_lane_p8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <8 x i8> [[V:%.*]], i64 2
+// CHECK-NEXT:    ret i8 [[VGET_LANE]]
+//
+poly8_t test_vget_lane_p8(poly8x8_t v){
+  return vget_lane_p8(v, 2);
+}
+
+// CHECK-LABEL: @test_vget_lane_p16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x i16> [[V:%.*]], i64 2
+// CHECK-NEXT:    ret i16 [[VGET_LANE]]
+//
+poly16_t test_vget_lane_p16(poly16x4_t v){
+  return vget_lane_p16(v, 2);
+}
+
+// CHECK-LABEL: @test_vget_lane_p64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <1 x i64> [[V:%.*]], i64 0
+// CHECK-NEXT:    ret i64 [[VGET_LANE]]
+//
+poly64_t test_vget_lane_p64(poly64x1_t v){
+  return vget_lane_p64(v, 0);
+}
+
+// CHECK-LABEL: @test_vgetq_lane_p8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <16 x i8> [[V:%.*]], i64 
2
+// CHECK-NEXT:    ret i8 [[VGETQ_LANE]]
+//
+poly8_t test_vgetq_lane_p8(poly8x16_t v){
+  return vgetq_lane_p8(v, 2);
+}
+
+// CHECK-LABEL: @test_vgetq_lane_p16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[V:%.*]], i64 
2
+// CHECK-NEXT:    ret i16 [[VGETQ_LANE]]
+//
+poly16_t test_vgetq_lane_p16(poly16x8_t v){
+  return vgetq_lane_p16(v, 2);
+}
+
+// CHECK-LABEL: @test_vgetq_lane_p64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[V:%.*]], i64 0
+// CHECK-NEXT:    ret i64 [[VGETQ_LANE]]
+//
+poly64_t test_vgetq_lane_p64(poly64x2_t v){
+  return vgetq_lane_p64(v, 0);
+}
+
+// CHECK-LABEL: @test_vcopy_lane_p8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x 
i8> [[A:%.*]], <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, 
i32 15>
+// CHECK-NEXT:    ret <8 x i8> [[VSET_LANE]]
+//
+poly8x8_t test_vcopy_lane_p8(poly8x8_t a, poly8x8_t b) {
+  return vcopy_lane_p8(a, 0, b, 0);
+}
+
+// CHECK-LABEL: @test_vcopy_lane_p16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <4 x i16> [[B:%.*]], <4 x 
i16> [[A:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+// CHECK-NEXT:    ret <4 x i16> [[VSET_LANE]]
+//
+poly16x4_t test_vcopy_lane_p16(poly16x4_t a, poly16x4_t b) {
+  return vcopy_lane_p16(a, 0, b, 0);
+}
+
+// CHECK-LABEL: @test_vcopy_lane_p64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <1 x i64> [[B:%.*]]
+//
+poly64x1_t test_vcopy_lane_p64(poly64x1_t a, poly64x1_t b) {
+  return vcopy_lane_p64(a, 0, b, 0);
+}
+
+// CHECK-LABEL: @test_vcopyq_lane_p8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> 
poison, <16 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison>
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x 
i8> [[A:%.*]], <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 
22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+// CHECK-NEXT:    ret <16 x i8> [[VSET_LANE]]
+//
+poly8x16_t test_vcopyq_lane_p8(poly8x16_t a, poly8x8_t b){
+  return vcopyq_lane_p8(a, 0, b, 0);
+}
+
+// CHECK-LABEL: @test_vcopyq_lane_p16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x i16> [[B:%.*]], <4 x i16> 
poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison>
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x 
i16> [[A:%.*]], <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 
14, i32 15>
+// CHECK-NEXT:    ret <8 x i16> [[VSET_LANE]]
+//
+poly16x8_t test_vcopyq_lane_p16(poly16x8_t a, poly16x4_t b){
+  return vcopyq_lane_p16(a, 0, b, 0);
+}
+
+// CHECK-LABEL: @test_vcopyq_lane_p64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <1 x i64> [[B:%.*]], <1 x i64> 
poison, <2 x i32> <i32 0, i32 poison>
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x 
i64> [[A:%.*]], <2 x i32> <i32 0, i32 3>
+// CHECK-NEXT:    ret <2 x i64> [[VSET_LANE]]
+//
+poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b){
+  return vcopyq_lane_p64(a, 0, b, 0);
+}
+
+// CHECK-LABEL: @test_vcopy_laneq_p8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <16 x i8> [[B:%.*]], i64 0
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <8 x i8> [[A:%.*]], i8 
[[VGETQ_LANE]], i64 0
+// CHECK-NEXT:    ret <8 x i8> [[VSET_LANE]]
+//
+poly8x8_t test_vcopy_laneq_p8(poly8x8_t a, poly8x16_t b){
+  return vcopy_laneq_p8(a, 0, b, 0);
+}
+
+// CHECK-LABEL: @test_vcopy_laneq_p16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[B:%.*]], i64 0
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <4 x i16> [[A:%.*]], i16 
[[VGETQ_LANE]], i64 0
+// CHECK-NEXT:    ret <4 x i16> [[VSET_LANE]]
+//
+poly16x4_t test_vcopy_laneq_p16(poly16x4_t a, poly16x8_t b){
+  return vcopy_laneq_p16(a, 0, b, 0);
+}
+
+// CHECK-LABEL: @test_vcopy_laneq_p64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <2 x i64> [[B:%.*]], <2 x 
i64> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    ret <1 x i64> [[VSET_LANE]]
+//
+poly64x1_t test_vcopy_laneq_p64(poly64x1_t a, poly64x2_t b){
+  return vcopy_laneq_p64(a, 0, b, 0);
+}
+
+// CHECK-LABEL: @test_vcopyq_laneq_p8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x 
i8> [[A:%.*]], <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 
22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+// CHECK-NEXT:    ret <16 x i8> [[VSET_LANE]]
+//
+poly8x16_t test_vcopyq_laneq_p8(poly8x16_t a, poly8x16_t b){
+  return vcopyq_laneq_p8(a, 0, b, 0);
+}
+
+// CHECK-LABEL: @test_vcopyq_laneq_p16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x 
i16> [[A:%.*]], <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 
14, i32 15>
+// CHECK-NEXT:    ret <8 x i16> [[VSET_LANE]]
+//
+poly16x8_t test_vcopyq_laneq_p16(poly16x8_t a, poly16x8_t b){
+  return vcopyq_laneq_p16(a, 0, b, 0);
+}
+
+// CHECK-LABEL: @test_vcopyq_laneq_p64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <2 x i64> [[B:%.*]], <2 x 
i64> [[A:%.*]], <2 x i32> <i32 0, i32 3>
+// CHECK-NEXT:    ret <2 x i64> [[VSET_LANE]]
+//
+poly64x2_t test_vcopyq_laneq_p64(poly64x2_t a, poly64x2_t b){
+  return vcopyq_laneq_p64(a, 0, b, 0);
+}
diff --git a/clang/utils/TableGen/NeonEmitter.cpp 
b/clang/utils/TableGen/NeonEmitter.cpp
index da3bbd4303074..946a799a4f6a5 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -1401,14 +1401,8 @@ void Intrinsic::emitBodyAsBuiltinCall() {
       if (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling())) {
         CastToType.makeInteger(8, true);
         Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
-      } else if ((T.isPoly() ||
-                  (T.isVector() && T.isInteger() && !T.isSigned() &&
-                   (StringRef(Name).contains("_p8") ||
-                    StringRef(Name).contains("_p16") ||
-                    StringRef(Name).contains("_p64"))))) {
-        CastToType.makeSigned();
-        Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
-      } else if (LocalCK == ClassI && CastToType.isInteger()) {
+      } else if (LocalCK == ClassI &&
+           (CastToType.isInteger() || CastToType.isPoly())) {
         CastToType.makeSigned();
         Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
       }
diff --git a/llvm/test/CodeGen/AArch64/neon-bitcast-poly.ll 
b/llvm/test/CodeGen/AArch64/neon-bitcast-poly.ll
deleted file mode 100644
index b577eb1e34b09..0000000000000
--- a/llvm/test/CodeGen/AArch64/neon-bitcast-poly.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
-; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
-
-; This test verifies that NEON intrinsics using polynomial types (poly8/16/64) 
emit correct AArch64 instructions
-; after bitcasting to signed integer vectors. These intrinsics would 
previously fail under -fno-lax-vector-conversions.
-
-define <8 x i8> @_Z18test_vcopy_lane_p811__Poly8x8_tS_(<8 x i8> %a, <8 x i8> 
%b) {
-; CHECK-LABEL: _Z18test_vcopy_lane_p811__Poly8x8_tS_:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    mov v0.b[0], v1.b[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
-entry:
-  %vset_lane = shufflevector <8 x i8> %b, <8 x i8> %a, <8 x i32> <i32 0, i32 
9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  ret <8 x i8> %vset_lane
-}
-
-define <4 x i16> @_Z18test_vset_lane_p16t12__Poly16x4_t(i16 %val, <4 x i16> 
%vec) {
-; CHECK-LABEL: _Z18test_vset_lane_p16t12__Poly16x4_t:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov v0.h[0], w0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
-entry:
-  %vset_lane = insertelement <4 x i16> %vec, i16 %val, i64 0
-  ret <4 x i16> %vset_lane
-}
-
-define i64 @_Z18test_vget_lane_p6412__Poly64x1_t(<1 x i64> %vec){
-; CHECK-LABEL: _Z18test_vget_lane_p6412__Poly64x1_t:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    ret
-entry:
-  %vget_lane = extractelement <1 x i64> %vec, i64 0
-  ret i64 %vget_lane
-}
-
-define <16 x i8> @_Z18test_vsetq_lane_p8h12__Poly8x16_t(i8 %val, <16 x i8> 
%vec){
-; CHECK-LABEL: _Z18test_vsetq_lane_p8h12__Poly8x16_t:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov v0.b[0], w0
-; CHECK-NEXT:    ret
-entry:
-  %vset_lane = insertelement <16 x i8> %vec, i8 %val, i64 0
-  ret <16 x i8> %vset_lane
-}

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AArch64][NEON] NEON intrinsic compilation error with -fno-lax-vector-conversion flag fix (PR #149329)

Reply via email to