dnsampaio created this revision. dnsampaio added a reviewer: LukeCheeseman. Herald added subscribers: cfe-commits, kristof.beyls, javed.absar. Herald added a project: clang.
Change the vsqadd scalar instrinsics to have the second argument as signed values, not unsigned, accordingly to https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics The existing unsigned argument can cause faulty code as float to unsigned conversion is undefined, which llvm/clang optimizes away. Repository: rC Clang https://reviews.llvm.org/D64239 Files: include/clang/Basic/arm_neon.td test/CodeGen/aarch64-neon-intrinsics.c test/CodeGen/aarch64-neon-vsqadd-float-conversion.c
Index: test/CodeGen/aarch64-neon-vsqadd-float-conversion.c =================================================================== --- /dev/null +++ test/CodeGen/aarch64-neon-vsqadd-float-conversion.c @@ -0,0 +1,50 @@ +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ +// RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone -emit-llvm -o - %s \ +// RUN: | opt -S -mem2reg -dce \ +// RUN: | FileCheck %s + +#include <arm_neon.h> + +// Check float conversion is accepted for int argument +uint8_t test_vsqaddb_u8(){ + return vsqaddb_u8(1, -1.0f); +} + +uint16_t test_vsqaddh_u16() { + return vsqaddh_u16(1, -1.0f); +} + +uint32_t test_vsqadds_u32() { + return vsqadds_u32(1, -1.0f); +} + +uint64_t test_vsqaddd_u64() { + return vsqaddd_u64(1, -1.0f); +} + +// CHECK-LABEL: @test_vsqaddb_u8() +// CHECK: entry: +// CHECK-NEXT: [[T0:%.*]] = insertelement <8 x i8> undef, i8 1, i64 0 +// CHECK-NEXT: [[T1:%.*]] = insertelement <8 x i8> undef, i8 -1, i64 0 +// CHECK-NEXT: [[V:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[T0]], <8 x i8> [[T1]]) +// CHECK-NEXT: [[R:%.*]] = extractelement <8 x i8> [[V]], i64 0 +// CHECK-NEXT: ret i8 [[R]] + +// CHECK-LABEL: @test_vsqaddh_u16() +// CHECK: entry: +// CHECK-NEXT: [[T0:%.*]] = insertelement <4 x i16> undef, i16 1, i64 0 +// CHECK-NEXT: [[T1:%.*]] = insertelement <4 x i16> undef, i16 -1, i64 0 +// CHECK-NEXT: [[V:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[T0]], <4 x i16> [[T1]]) +// CHECK-NEXT: [[R:%.*]] = extractelement <4 x i16> [[V]], i64 0 +// CHECK-NEXT: ret i16 [[R]] + +// CHECK-LABEL: @test_vsqadds_u32() +// CHECK: entry: +// CHECK-NEXT: [[V:%.*]] = call i32 @llvm.aarch64.neon.usqadd.i32(i32 1, i32 -1) +// CHECK-NEXT: ret i32 [[V]] + +// CHECK-LABEL: @test_vsqaddd_u64() +// CHECK: entry: +// CHECK-NEXT: [[V:%.*]] = call i64 @llvm.aarch64.neon.usqadd.i64(i64 1, i64 -1) +// CHECK-NEXT: ret i64 [[V]] + Index: test/CodeGen/aarch64-neon-intrinsics.c =================================================================== --- test/CodeGen/aarch64-neon-intrinsics.c +++ test/CodeGen/aarch64-neon-intrinsics.c @@ -13913,7 +13913,7 @@ // CHECK: [[VSQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VSQADDB_U8_I]], i64 0 // CHECK: ret i8 [[TMP2]] -uint8_t test_vsqaddb_u8(uint8_t a, uint8_t b) { +uint8_t test_vsqaddb_u8(uint8_t a, int8_t b) { return (uint8_t)vsqaddb_u8(a, b); } @@ -13923,21 +13923,21 @@ // CHECK: [[VSQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VSQADDH_U16_I]], i64 0 // CHECK: ret i16 [[TMP2]] -uint16_t test_vsqaddh_u16(uint16_t a, uint16_t b) { +uint16_t test_vsqaddh_u16(uint16_t a, int16_t b) { return (uint16_t)vsqaddh_u16(a, b); } // CHECK-LABEL: @test_vsqadds_u32( // CHECK: [[VSQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %a, i32 %b) // CHECK: ret i32 [[VSQADDS_U32_I]] -uint32_t test_vsqadds_u32(uint32_t a, uint32_t b) { +uint32_t test_vsqadds_u32(uint32_t a, int32_t b) { return (uint32_t)vsqadds_u32(a, b); } // CHECK-LABEL: @test_vsqaddd_u64( // CHECK: [[VSQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %a, i64 %b) // CHECK: ret i64 [[VSQADDD_U64_I]] -uint64_t test_vsqaddd_u64(uint64_t a, uint64_t b) { +uint64_t test_vsqaddd_u64(uint64_t a, int64_t b) { return (uint64_t)vsqaddd_u64(a, b); } Index: include/clang/Basic/arm_neon.td =================================================================== --- include/clang/Basic/arm_neon.td +++ include/clang/Basic/arm_neon.td @@ -1337,7 +1337,7 @@ //////////////////////////////////////////////////////////////////////////////// // Scalar Unsigned Saturating Accumulated of Signed Value -def SCALAR_USQADD : SInst<"vsqadd", "sss", "SUcSUsSUiSUl">; +def SCALAR_USQADD : SInst<"vsqadd", "ss$", "SUcSUsSUiSUl">; //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Doubling Multiply-Add Long
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits