Author: TelGome
Date: 2026-06-18T22:32:00+08:00
New Revision: b02659ab5c405dd221f625085e2656a190d87a54

URL: 
https://github.com/llvm/llvm-project/commit/b02659ab5c405dd221f625085e2656a190d87a54
DIFF: 
https://github.com/llvm/llvm-project/commit/b02659ab5c405dd221f625085e2656a190d87a54.diff

LOG: [RISCV][P-ext] Support Packed Absolute Value and Absolute Difference 
(#203840)

This pr support RISC-V P extension intrinsics [Packed Absolute Value and
Absolute
Difference](https://github.com/riscv/riscv-p-spec/blob/master/P-ext-intrinsics.adoc#packed-absolute-value-and-absolute-difference)

Added: 
    

Modified: 
    clang/include/clang/Basic/BuiltinsRISCV.td
    clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
    clang/lib/Headers/riscv_packed_simd.h
    clang/test/CodeGen/RISCV/rvp-intrinsics.c
    cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
    llvm/include/llvm/IR/IntrinsicsRISCV.td
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rvp-simd-32.ll
    llvm/test/CodeGen/RISCV/rvp-simd-64.ll

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsRISCV.td 
b/clang/include/clang/Basic/BuiltinsRISCV.td
index 185269bfc6d85..3a1b54763bae6 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -181,6 +181,18 @@ def pasubu_u8x8 : RISCVBuiltin<"_Vector<8, unsigned 
char>(_Vector<8, unsigned ch
 def pasubu_u16x4 : RISCVBuiltin<"_Vector<4, unsigned short>(_Vector<4, 
unsigned short>, _Vector<4, unsigned short>)">;
 def pasubu_u32x2 : RISCVBuiltin<"_Vector<2, unsigned int>(_Vector<2, unsigned 
int>, _Vector<2, unsigned int>)">;
 
+// Packed Absolute Value and Absolute Difference (32-bit)
+def pabd_i8x4 : RISCVBuiltin<"_Vector<4, unsigned char>(_Vector<4, signed 
char>, _Vector<4, signed char>)">;
+def pabd_i16x2 : RISCVBuiltin<"_Vector<2, unsigned short>(_Vector<2, short>, 
_Vector<2, short>)">;
+def pabdu_u8x4 : RISCVBuiltin<"_Vector<4, unsigned char>(_Vector<4, unsigned 
char>, _Vector<4, unsigned char>)">;
+def pabdu_u16x2 : RISCVBuiltin<"_Vector<2, unsigned short>(_Vector<2, unsigned 
short>, _Vector<2, unsigned short>)">;
+
+// Packed Absolute Value and Absolute Difference (64-bit)
+def pabd_i8x8 : RISCVBuiltin<"_Vector<8, unsigned char>(_Vector<8, signed 
char>, _Vector<8, signed char>)">;
+def pabd_i16x4 : RISCVBuiltin<"_Vector<4, unsigned short>(_Vector<4, short>, 
_Vector<4, short>)">;
+def pabdu_u8x8 : RISCVBuiltin<"_Vector<8, unsigned char>(_Vector<8, unsigned 
char>, _Vector<8, unsigned char>)">;
+def pabdu_u16x4 : RISCVBuiltin<"_Vector<4, unsigned short>(_Vector<4, unsigned 
short>, _Vector<4, unsigned short>)">;
+
 } // Features = "experimental-p"
 
 
//===----------------------------------------------------------------------===//

diff  --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp 
b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index 8c0684110dad7..a1e9acb7ec2c8 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -1219,7 +1219,16 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned 
BuiltinID,
   case RISCV::BI__builtin_riscv_pasubu_u16x2:
   case RISCV::BI__builtin_riscv_pasubu_u8x8:
   case RISCV::BI__builtin_riscv_pasubu_u16x4:
-  case RISCV::BI__builtin_riscv_pasubu_u32x2: {
+  case RISCV::BI__builtin_riscv_pasubu_u32x2:
+  // Packed Absolute Value and Absolute Difference
+  case RISCV::BI__builtin_riscv_pabd_i8x4:
+  case RISCV::BI__builtin_riscv_pabd_i16x2:
+  case RISCV::BI__builtin_riscv_pabd_i8x8:
+  case RISCV::BI__builtin_riscv_pabd_i16x4:
+  case RISCV::BI__builtin_riscv_pabdu_u8x4:
+  case RISCV::BI__builtin_riscv_pabdu_u16x2:
+  case RISCV::BI__builtin_riscv_pabdu_u8x8:
+  case RISCV::BI__builtin_riscv_pabdu_u16x4: {
     switch (BuiltinID) {
     default:
       llvm_unreachable("unexpected builtin ID");
@@ -1251,6 +1260,18 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned 
BuiltinID,
     case RISCV::BI__builtin_riscv_pasubu_u32x2:
       ID = Intrinsic::riscv_pasubu;
       break;
+    case RISCV::BI__builtin_riscv_pabd_i8x4:
+    case RISCV::BI__builtin_riscv_pabd_i16x2:
+    case RISCV::BI__builtin_riscv_pabd_i8x8:
+    case RISCV::BI__builtin_riscv_pabd_i16x4:
+      ID = Intrinsic::riscv_pabd;
+      break;
+    case RISCV::BI__builtin_riscv_pabdu_u8x4:
+    case RISCV::BI__builtin_riscv_pabdu_u16x2:
+    case RISCV::BI__builtin_riscv_pabdu_u8x8:
+    case RISCV::BI__builtin_riscv_pabdu_u16x4:
+      ID = Intrinsic::riscv_pabdu;
+      break;
     }
 
     IntrinsicTypes = {ResultType};

diff  --git a/clang/lib/Headers/riscv_packed_simd.h 
b/clang/lib/Headers/riscv_packed_simd.h
index 7e981c91ec3e1..56f6b108d5f14 100644
--- a/clang/lib/Headers/riscv_packed_simd.h
+++ b/clang/lib/Headers/riscv_packed_simd.h
@@ -92,6 +92,17 @@ typedef uint32_t uint32x2_t 
__attribute__((__vector_size__(8)));
     return (rty)(__rs1 op __rs2);                                              
\
   }
 
+#define __packed_pabs(name, ty, rty)                                           
\
+  static __inline__ rty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1) {          
\
+    return (rty)__builtin_elementwise_abs(__rs1);                              
\
+  }
+
+#define __packed_binary_builtin_cast(name, ty, rty, builtin)                   
\
+  static __inline__ rty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1,            
\
+                                                          ty __rs2) {          
\
+    return (rty)builtin(__rs1, __rs2);                                         
\
+  }
+
 // clang-format off: macro call sites have no trailing semicolons, which
 // confuses clang-format into a deeply nested expression.
 
@@ -375,6 +386,22 @@ __packed_binary_builtin(pasubu_u8x8, uint8x8_t, 
__builtin_riscv_pasubu_u8x8)
 __packed_binary_builtin(pasubu_u16x4, uint16x4_t, __builtin_riscv_pasubu_u16x4)
 __packed_binary_builtin(pasubu_u32x2, uint32x2_t, __builtin_riscv_pasubu_u32x2)
 
+/* Packed Absolute Value and Absolute Difference (32-bit) */
+__packed_pabs(pabs_i8x4, int8x4_t, uint8x4_t)
+__packed_pabs(pabs_i16x2, int16x2_t, uint16x2_t)
+__packed_binary_builtin_cast(pabd_i8x4, int8x4_t, uint8x4_t, 
__builtin_riscv_pabd_i8x4)
+__packed_binary_builtin_cast(pabd_i16x2, int16x2_t, uint16x2_t, 
__builtin_riscv_pabd_i16x2)
+__packed_binary_builtin_cast(pabdu_u8x4, uint8x4_t, uint8x4_t, 
__builtin_riscv_pabdu_u8x4)
+__packed_binary_builtin_cast(pabdu_u16x2, uint16x2_t, uint16x2_t, 
__builtin_riscv_pabdu_u16x2)
+
+/* Packed Absolute Value and Absolute Difference (64-bit) */
+__packed_pabs(pabs_i8x8, int8x8_t, uint8x8_t)
+__packed_pabs(pabs_i16x4, int16x4_t, uint16x4_t)
+__packed_binary_builtin_cast(pabd_i8x8, int8x8_t, uint8x8_t, 
__builtin_riscv_pabd_i8x8)
+__packed_binary_builtin_cast(pabd_i16x4, int16x4_t, uint16x4_t, 
__builtin_riscv_pabd_i16x4)
+__packed_binary_builtin_cast(pabdu_u8x8, uint8x8_t, uint8x8_t, 
__builtin_riscv_pabdu_u8x8)
+__packed_binary_builtin_cast(pabdu_u16x4, uint16x4_t, uint16x4_t, 
__builtin_riscv_pabdu_u16x4)
+
 // clang-format on
 
 #undef __packed_splat2
@@ -392,6 +419,8 @@ __packed_binary_builtin(pasubu_u32x2, uint32x2_t, 
__builtin_riscv_pasubu_u32x2)
 #undef __packed_sh1add
 #undef __packed_sh1sadd
 #undef __packed_cmp
+#undef __packed_pabs
+#undef __packed_binary_builtin_cast
 #undef __DEFAULT_FN_ATTRS
 
 #if defined(__cplusplus)

diff  --git a/clang/test/CodeGen/RISCV/rvp-intrinsics.c 
b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
index 363bfa5f5d995..cc388d0ab0328 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
@@ -5235,3 +5235,259 @@ uint16x4_t test_pasubu_u16x4(uint16x4_t rs1, uint16x4_t 
rs2) {
 uint32x2_t test_pasubu_u32x2(uint32x2_t rs1, uint32x2_t rs2) {
   return __riscv_pasubu_u32x2(rs1, rs2);
 }
+
+// RV32-LABEL: define dso_local i32 @test_pabs_i8x4(
+// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
+// RV32-NEXT:    [[ELT_ABS_I:%.*]] = call <4 x i8> @llvm.abs.v4i8(<4 x i8> 
[[TMP0]], i1 false)
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast <4 x i8> [[ELT_ABS_I]] to i32
+// RV32-NEXT:    ret i32 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i32 @test_pabs_i8x4(
+// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
+// RV64-NEXT:    [[ELT_ABS_I:%.*]] = call <4 x i8> @llvm.abs.v4i8(<4 x i8> 
[[TMP0]], i1 false)
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast <4 x i8> [[ELT_ABS_I]] to i32
+// RV64-NEXT:    ret i32 [[TMP1]]
+//
+uint8x4_t test_pabs_i8x4(int8x4_t rs1) {
+  return __riscv_pabs_i8x4(rs1);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pabs_i16x2(
+// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
+// RV32-NEXT:    [[ELT_ABS_I:%.*]] = call <2 x i16> @llvm.abs.v2i16(<2 x i16> 
[[TMP0]], i1 false)
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast <2 x i16> [[ELT_ABS_I]] to i32
+// RV32-NEXT:    ret i32 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i32 @test_pabs_i16x2(
+// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
+// RV64-NEXT:    [[ELT_ABS_I:%.*]] = call <2 x i16> @llvm.abs.v2i16(<2 x i16> 
[[TMP0]], i1 false)
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast <2 x i16> [[ELT_ABS_I]] to i32
+// RV64-NEXT:    ret i32 [[TMP1]]
+//
+uint16x2_t test_pabs_i16x2(int16x2_t rs1) {
+  return __riscv_pabs_i16x2(rs1);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pabd_i8x4(
+// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8>
+// RV32-NEXT:    [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.pabd.v4i8(<4 x i8> 
[[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
+// RV32-NEXT:    ret i32 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i32 @test_pabd_i8x4(
+// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8>
+// RV64-NEXT:    [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.pabd.v4i8(<4 x i8> 
[[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
+// RV64-NEXT:    ret i32 [[TMP3]]
+//
+uint8x4_t test_pabd_i8x4(int8x4_t rs1, int8x4_t rs2) {
+  return __riscv_pabd_i8x4(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pabd_i16x2(
+// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16>
+// RV32-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pabd.v2i16(<2 x 
i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32
+// RV32-NEXT:    ret i32 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i32 @test_pabd_i16x2(
+// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16>
+// RV64-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pabd.v2i16(<2 x 
i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32
+// RV64-NEXT:    ret i32 [[TMP3]]
+//
+uint16x2_t test_pabd_i16x2(int16x2_t rs1, int16x2_t rs2) {
+  return __riscv_pabd_i16x2(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pabdu_u8x4(
+// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8>
+// RV32-NEXT:    [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.pabdu.v4i8(<4 x i8> 
[[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
+// RV32-NEXT:    ret i32 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i32 @test_pabdu_u8x4(
+// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8>
+// RV64-NEXT:    [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.pabdu.v4i8(<4 x i8> 
[[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
+// RV64-NEXT:    ret i32 [[TMP3]]
+//
+uint8x4_t test_pabdu_u8x4(uint8x4_t rs1, uint8x4_t rs2) {
+  return __riscv_pabdu_u8x4(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pabdu_u16x2(
+// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16>
+// RV32-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pabdu.v2i16(<2 x 
i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32
+// RV32-NEXT:    ret i32 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i32 @test_pabdu_u16x2(
+// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16>
+// RV64-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pabdu.v2i16(<2 x 
i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32
+// RV64-NEXT:    ret i32 [[TMP3]]
+//
+uint16x2_t test_pabdu_u16x2(uint16x2_t rs1, uint16x2_t rs2) {
+  return __riscv_pabdu_u16x2(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pabs_i8x8(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
+// RV32-NEXT:    [[ELT_ABS_I:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> 
[[TMP0]], i1 false)
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[ELT_ABS_I]] to i64
+// RV32-NEXT:    ret i64 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i64 @test_pabs_i8x8(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
+// RV64-NEXT:    [[ELT_ABS_I:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> 
[[TMP0]], i1 false)
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[ELT_ABS_I]] to i64
+// RV64-NEXT:    ret i64 [[TMP1]]
+//
+uint8x8_t test_pabs_i8x8(int8x8_t rs1) {
+  return __riscv_pabs_i8x8(rs1);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pabs_i16x4(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
+// RV32-NEXT:    [[ELT_ABS_I:%.*]] = call <4 x i16> @llvm.abs.v4i16(<4 x i16> 
[[TMP0]], i1 false)
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[ELT_ABS_I]] to i64
+// RV32-NEXT:    ret i64 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i64 @test_pabs_i16x4(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
+// RV64-NEXT:    [[ELT_ABS_I:%.*]] = call <4 x i16> @llvm.abs.v4i16(<4 x i16> 
[[TMP0]], i1 false)
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[ELT_ABS_I]] to i64
+// RV64-NEXT:    ret i64 [[TMP1]]
+//
+uint16x4_t test_pabs_i16x4(int16x4_t rs1) {
+  return __riscv_pabs_i16x4(rs1);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pabd_i8x8(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8>
+// RV32-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.pabd.v8i8(<8 x i8> 
[[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64
+// RV32-NEXT:    ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_pabd_i8x8(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8>
+// RV64-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.pabd.v8i8(<8 x i8> 
[[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64
+// RV64-NEXT:    ret i64 [[TMP3]]
+//
+uint8x8_t test_pabd_i8x8(int8x8_t rs1, int8x8_t rs2) {
+  return __riscv_pabd_i8x8(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pabd_i16x4(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16>
+// RV32-NEXT:    [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pabd.v4i16(<4 x 
i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64
+// RV32-NEXT:    ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_pabd_i16x4(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16>
+// RV64-NEXT:    [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pabd.v4i16(<4 x 
i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64
+// RV64-NEXT:    ret i64 [[TMP3]]
+//
+uint16x4_t test_pabd_i16x4(int16x4_t rs1, int16x4_t rs2) {
+  return __riscv_pabd_i16x4(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pabdu_u8x8(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8>
+// RV32-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.pabdu.v8i8(<8 x i8> 
[[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64
+// RV32-NEXT:    ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_pabdu_u8x8(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8>
+// RV64-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.pabdu.v8i8(<8 x i8> 
[[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64
+// RV64-NEXT:    ret i64 [[TMP3]]
+//
+uint8x8_t test_pabdu_u8x8(uint8x8_t rs1, uint8x8_t rs2) {
+  return __riscv_pabdu_u8x8(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pabdu_u16x4(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16>
+// RV32-NEXT:    [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pabdu.v4i16(<4 x 
i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64
+// RV32-NEXT:    ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_pabdu_u16x4(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16>
+// RV64-NEXT:    [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pabdu.v4i16(<4 x 
i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64
+// RV64-NEXT:    ret i64 [[TMP3]]
+//
+uint16x4_t test_pabdu_u16x4(uint16x4_t rs1, uint16x4_t rs2) {
+  return __riscv_pabdu_u16x4(rs1, rs2);
+}

diff  --git a/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c 
b/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
index edbc56ce0e199..e9f90fc17e23d 100644
--- a/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
+++ b/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
@@ -1772,3 +1772,73 @@ uint16x4_t test_pasubu_u16x4(uint16x4_t a, uint16x4_t b) 
{
 uint32x2_t test_pasubu_u32x2(uint32x2_t a, uint32x2_t b) {
   return __riscv_pasubu_u32x2(a, b);
 }
+
+// CHECK-LABEL: test_pabs_i8x4:
+// CHECK:       pabs.b
+uint8x4_t test_pabs_i8x4(int8x4_t a) { return __riscv_pabs_i8x4(a); }
+
+// CHECK-LABEL: test_pabs_i16x2:
+// CHECK:       pabs.h
+uint16x2_t test_pabs_i16x2(int16x2_t a) { return __riscv_pabs_i16x2(a); }
+
+// CHECK-LABEL: test_pabd_i8x4:
+// CHECK:       pabd.b
+uint8x4_t test_pabd_i8x4(int8x4_t a, int8x4_t b) {
+  return __riscv_pabd_i8x4(a, b);
+}
+
+// CHECK-LABEL: test_pabd_i16x2:
+// CHECK:       pabd.h
+uint16x2_t test_pabd_i16x2(int16x2_t a, int16x2_t b) {
+  return __riscv_pabd_i16x2(a, b);
+}
+
+// CHECK-LABEL: test_pabdu_u8x4:
+// CHECK:       pabdu.b
+uint8x4_t test_pabdu_u8x4(uint8x4_t a, uint8x4_t b) {
+  return __riscv_pabdu_u8x4(a, b);
+}
+
+// CHECK-LABEL: test_pabdu_u16x2:
+// CHECK:       pabdu.h
+uint16x2_t test_pabdu_u16x2(uint16x2_t a, uint16x2_t b) {
+  return __riscv_pabdu_u16x2(a, b);
+}
+
+// CHECK-LABEL: test_pabs_i8x8:
+// RV32:        pabs.db
+// RV64:        pabs.b
+uint8x8_t test_pabs_i8x8(int8x8_t a) { return __riscv_pabs_i8x8(a); }
+
+// CHECK-LABEL: test_pabs_i16x4:
+// RV32:        pabs.dh
+// RV64:        pabs.h
+uint16x4_t test_pabs_i16x4(int16x4_t a) { return __riscv_pabs_i16x4(a); }
+
+// CHECK-LABEL: test_pabd_i8x8:
+// RV32:        pabd.db
+// RV64:        pabd.b
+uint8x8_t test_pabd_i8x8(int8x8_t a, int8x8_t b) {
+  return __riscv_pabd_i8x8(a, b);
+}
+
+// CHECK-LABEL: test_pabd_i16x4:
+// RV32:        pabd.dh
+// RV64:        pabd.h
+uint16x4_t test_pabd_i16x4(int16x4_t a, int16x4_t b) {
+  return __riscv_pabd_i16x4(a, b);
+}
+
+// CHECK-LABEL: test_pabdu_u8x8:
+// RV32:        pabdu.db
+// RV64:        pabdu.b
+uint8x8_t test_pabdu_u8x8(uint8x8_t a, uint8x8_t b) {
+  return __riscv_pabdu_u8x8(a, b);
+}
+
+// CHECK-LABEL: test_pabdu_u16x4:
+// RV32:        pabdu.dh
+// RV64:        pabdu.h
+uint16x4_t test_pabdu_u16x4(uint16x4_t a, uint16x4_t b) {
+  return __riscv_pabdu_u16x4(a, b);
+}

diff  --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td 
b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index b2add44b19a5e..a82b17591f780 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -2046,16 +2046,20 @@ def int_riscv_pause : DefaultAttrsIntrinsic<[], [], 
[IntrNoMem, IntrHasSideEffec
 // Packed SIMD extensions
 
//===----------------------------------------------------------------------===//
 let TargetPrefix = "riscv" in {
-// Packed Averaging Addition and Subtraction.
 class RVPBinaryIntrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                             [LLVMMatchType<0>, LLVMMatchType<0>],
                             [IntrNoMem, IntrSpeculatable]>;
 
+  // Packed Averaging Addition and Subtraction.
   def int_riscv_paadd : RVPBinaryIntrinsic;
   def int_riscv_paaddu : RVPBinaryIntrinsic;
   def int_riscv_pasub : RVPBinaryIntrinsic;
   def int_riscv_pasubu : RVPBinaryIntrinsic;
+
+  // Packed Absolute Value and Absolute Difference
+  def int_riscv_pabd  : RVPBinaryIntrinsic;
+  def int_riscv_pabdu : RVPBinaryIntrinsic;
 } // TargetPrefix = "riscv"
 
 
//===----------------------------------------------------------------------===//

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp 
b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d18b52437c98d..8b1a23896ff38 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -11761,7 +11761,9 @@ SDValue 
RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::riscv_paadd:
   case Intrinsic::riscv_paaddu:
   case Intrinsic::riscv_pasub:
-  case Intrinsic::riscv_pasubu: {
+  case Intrinsic::riscv_pasubu:
+  case Intrinsic::riscv_pabd:
+  case Intrinsic::riscv_pabdu: {
     unsigned Opc;
     switch (IntNo) {
     case Intrinsic::riscv_paadd:
@@ -11776,6 +11778,12 @@ SDValue 
RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     case Intrinsic::riscv_pasubu:
       Opc = RISCVISD::ASUBU;
       break;
+    case Intrinsic::riscv_pabd:
+      Opc = ISD::ABDS;
+      break;
+    case Intrinsic::riscv_pabdu:
+      Opc = ISD::ABDU;
+      break;
     }
 
     return DAG.getNode(Opc, DL, Op.getValueType(), Op.getOperand(1),
@@ -15688,7 +15696,9 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
     case Intrinsic::riscv_paadd:
     case Intrinsic::riscv_paaddu:
     case Intrinsic::riscv_pasub:
-    case Intrinsic::riscv_pasubu: {
+    case Intrinsic::riscv_pasubu:
+    case Intrinsic::riscv_pabd:
+    case Intrinsic::riscv_pabdu: {
       EVT VT = N->getValueType(0);
       if (!Subtarget.is64Bit() || (VT != MVT::v4i8 && VT != MVT::v2i16))
         return;
@@ -15707,6 +15717,12 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
       case Intrinsic::riscv_pasubu:
         Opc = RISCVISD::ASUBU;
         break;
+      case Intrinsic::riscv_pabd:
+        Opc = ISD::ABDS;
+        break;
+      case Intrinsic::riscv_pabdu:
+        Opc = ISD::ABDU;
+        break;
       }
 
       EVT WideVT = VT == MVT::v4i8 ? MVT::v8i8 : MVT::v4i16;

diff  --git a/llvm/test/CodeGen/RISCV/rvp-simd-32.ll 
b/llvm/test/CodeGen/RISCV/rvp-simd-32.ll
index ff44c8b3cda4e..56d30878d7ac2 100644
--- a/llvm/test/CodeGen/RISCV/rvp-simd-32.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-simd-32.ll
@@ -2775,3 +2775,39 @@ define <2 x i16> @test_pasubu_v2i16(<2 x i16> %a, <2 x 
i16> %b) {
   %res = call <2 x i16> @llvm.riscv.pasubu.v2i16(<2 x i16> %a, <2 x i16> %b)
   ret <2 x i16> %res
 }
+
+define <4 x i8> @test_pabd_v4i8(<4 x i8> %a, <4 x i8> %b) {
+; CHECK-LABEL: test_pabd_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pabd.b a0, a0, a1
+; CHECK-NEXT:    ret
+  %res = call <4 x i8> @llvm.riscv.pabd.v4i8(<4 x i8> %a, <4 x i8> %b)
+  ret <4 x i8> %res
+}
+
+define <2 x i16> @test_pabd_v2i16(<2 x i16> %a, <2 x i16> %b) {
+; CHECK-LABEL: test_pabd_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pabd.h a0, a0, a1
+; CHECK-NEXT:    ret
+  %res = call <2 x i16> @llvm.riscv.pabd.v2i16(<2 x i16> %a, <2 x i16> %b)
+  ret <2 x i16> %res
+}
+
+define <4 x i8> @test_pabdu_v4i8(<4 x i8> %a, <4 x i8> %b) {
+; CHECK-LABEL: test_pabdu_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pabdu.b a0, a0, a1
+; CHECK-NEXT:    ret
+  %res = call <4 x i8> @llvm.riscv.pabdu.v4i8(<4 x i8> %a, <4 x i8> %b)
+  ret <4 x i8> %res
+}
+
+define <2 x i16> @test_pabdu_v2i16(<2 x i16> %a, <2 x i16> %b) {
+; CHECK-LABEL: test_pabdu_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pabdu.h a0, a0, a1
+; CHECK-NEXT:    ret
+  %res = call <2 x i16> @llvm.riscv.pabdu.v2i16(<2 x i16> %a, <2 x i16> %b)
+  ret <2 x i16> %res
+}

diff  --git a/llvm/test/CodeGen/RISCV/rvp-simd-64.ll 
b/llvm/test/CodeGen/RISCV/rvp-simd-64.ll
index 470434d27c0a3..8bc93f6e3c2e3 100644
--- a/llvm/test/CodeGen/RISCV/rvp-simd-64.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-simd-64.ll
@@ -5094,3 +5094,59 @@ define <2 x i16> @test_pnsra_hs_mask(<2 x i32> %a, i32 
%shamt) {
   %trunc = trunc <2 x i32> %ashr to <2 x i16>
   ret <2 x i16> %trunc
 }
+
+define <8 x i8> @test_pabd_v8i8(<8 x i8> %a, <8 x i8> %b) {
+; RV32-LABEL: test_pabd_v8i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    pabd.db a0, a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test_pabd_v8i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    pabd.b a0, a0, a1
+; RV64-NEXT:    ret
+  %res = call <8 x i8> @llvm.riscv.pabd.v8i8(<8 x i8> %a, <8 x i8> %b)
+  ret <8 x i8> %res
+}
+
+define <4 x i16> @test_pabd_v4i16(<4 x i16> %a, <4 x i16> %b) {
+; RV32-LABEL: test_pabd_v4i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    pabd.dh a0, a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test_pabd_v4i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    pabd.h a0, a0, a1
+; RV64-NEXT:    ret
+  %res = call <4 x i16> @llvm.riscv.pabd.v4i16(<4 x i16> %a, <4 x i16> %b)
+  ret <4 x i16> %res
+}
+
+define <8 x i8> @test_pabdu_v8i8(<8 x i8> %a, <8 x i8> %b) {
+; RV32-LABEL: test_pabdu_v8i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    pabdu.db a0, a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test_pabdu_v8i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    pabdu.b a0, a0, a1
+; RV64-NEXT:    ret
+  %res = call <8 x i8> @llvm.riscv.pabdu.v8i8(<8 x i8> %a, <8 x i8> %b)
+  ret <8 x i8> %res
+}
+
+define <4 x i16> @test_pabdu_v4i16(<4 x i16> %a, <4 x i16> %b) {
+; RV32-LABEL: test_pabdu_v4i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    pabdu.dh a0, a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test_pabdu_v4i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    pabdu.h a0, a0, a1
+; RV64-NEXT:    ret
+  %res = call <4 x i16> @llvm.riscv.pabdu.v4i16(<4 x i16> %a, <4 x i16> %b)
+  ret <4 x i16> %res
+}


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to