llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clangir Author: Omar Hossam (moar55) <details> <summary>Changes</summary> This PR implements CodeGen for rotate builtins in CIR upstream. Issue #<!-- -->167765 --- Patch is 38.27 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169218.diff 4 Files Affected: - (modified) clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp (+36-8) - (added) clang/test/CIR/CodeGen/X86/builtin_test_helpers.h (+304) - (added) clang/test/CIR/CodeGen/X86/xop-builtin.c (+82) - (added) shell.nix (+14) ``````````diff diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index a30c79a83751a..d1a6962507db1 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -13,6 +13,7 @@ #include "CIRGenFunction.h" #include "CIRGenModule.h" +#include "mlir/IR/ValueRange.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/TargetBuiltins.h" #include "clang/CIR/MissingFeatures.h" @@ -21,12 +22,12 @@ using namespace clang; using namespace clang::CIRGen; template <typename... Operands> -static mlir::Value emitIntrinsicCallOp(CIRGenFunction &cgf, const CallExpr *e, - const std::string &str, - const mlir::Type &resTy, - Operands &&...op) { +static mlir::Value +emitIntrinsicCallOp(CIRGenFunction &cgf, const CallExpr *expr, + const std::string &str, const mlir::Type &resTy, + Operands &&...op) { CIRGenBuilderTy &builder = cgf.getBuilder(); - mlir::Location location = cgf.getLoc(e->getExprLoc()); + mlir::Location location = cgf.getLoc(expr->getExprLoc()); return cir::LLVMIntrinsicCallOp::create(builder, location, builder.getStringAttr(str), resTy, std::forward<Operands>(op)...) @@ -68,6 +69,26 @@ static mlir::Value emitVectorFCmp(CIRGenBuilderTy &builder, return bitCast; } +static mlir::Value emitX86FunnelShift(CIRGenFunction &cgf, const CallExpr *e, + mlir::Value &op0, mlir::Value &op1, + mlir::Value &amt, bool isRight) { + auto ty = op0.getType(); + + // Amount may be scalar immediate, in which case create a splat vector. + // Funnel shifts amounts are treated as modulo and types are all power-of-2 + // so we only care about the lowest log2 bits anyway. + if (amt.getType() != ty) { + amt = cgf.getBuilder().createIntCast( + amt, mlir::cast<cir::VectorType>(ty).getElementType()); + amt = cir::VecSplatOp::create(cgf.getBuilder(), cgf.getLoc(e->getExprLoc()), + ty, amt); + } + + const std::string intrinsicName = isRight ? "fshr" : "fshl"; + return emitIntrinsicCallOp(cgf, e, intrinsicName, ty, + mlir::ValueRange{op0, op1, amt}); +} + mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { if (builtinID == Builtin::BI__builtin_cpu_is) { @@ -87,14 +108,15 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, // evaluation. assert(!cir::MissingFeatures::msvcBuiltins()); - // Find out if any arguments are required to be integer constant expressions. + // Find out if any arguments are required to be integer constant + // expressions. assert(!cir::MissingFeatures::handleBuiltinICEArguments()); // The operands of the builtin call llvm::SmallVector<mlir::Value> ops; - // `ICEArguments` is a bitmap indicating whether the argument at the i-th bit - // is required to be a constant integer expression. + // `ICEArguments` is a bitmap indicating whether the argument at the i-th + // bit is required to be a constant integer expression. unsigned iceArguments = 0; ASTContext::GetBuiltinTypeError error; getContext().GetBuiltinType(builtinID, error, &iceArguments); @@ -580,6 +602,10 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_kshiftrihi: case X86::BI__builtin_ia32_kshiftrisi: case X86::BI__builtin_ia32_kshiftridi: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; case X86::BI__builtin_ia32_vprotbi: case X86::BI__builtin_ia32_vprotwi: case X86::BI__builtin_ia32_vprotdi: @@ -590,12 +616,14 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_prolq128: case X86::BI__builtin_ia32_prolq256: case X86::BI__builtin_ia32_prolq512: + return emitX86FunnelShift(*this, expr, ops[0], ops[1], ops[1], false); case X86::BI__builtin_ia32_prord128: case X86::BI__builtin_ia32_prord256: case X86::BI__builtin_ia32_prord512: case X86::BI__builtin_ia32_prorq128: case X86::BI__builtin_ia32_prorq256: case X86::BI__builtin_ia32_prorq512: + return emitX86FunnelShift(*this, expr, ops[0], ops[1], ops[1], true); case X86::BI__builtin_ia32_selectb_128: case X86::BI__builtin_ia32_selectb_256: case X86::BI__builtin_ia32_selectb_512: diff --git a/clang/test/CIR/CodeGen/X86/builtin_test_helpers.h b/clang/test/CIR/CodeGen/X86/builtin_test_helpers.h new file mode 100644 index 0000000000000..fcaf360626a2d --- /dev/null +++ b/clang/test/CIR/CodeGen/X86/builtin_test_helpers.h @@ -0,0 +1,304 @@ +/* Helper methods for builtin intrinsic tests */ + +#include <immintrin.h> + +#if defined(__cplusplus) && (__cplusplus >= 201103L) + +constexpr bool match_m64(__m64 _v, unsigned long long a) { + __v1du v = (__v1du)_v; + return v[0] == a; +} + +constexpr bool match_v1di(__m64 v, long long a) { + return v[0] == a; +} + +constexpr bool match_v1du(__m64 _v, unsigned long long a) { + __v1du v = (__v1du)_v; + return v[0] == a; +} + +constexpr bool match_v2si(__m64 _v, int a, int b) { + __v2si v = (__v2si)_v; + return v[0] == a && v[1] == b; +} + +constexpr bool match_v2su(__m64 _v, unsigned a, unsigned b) { + __v2su v = (__v2su)_v; + return v[0] == a && v[1] == b; +} + +constexpr bool match_v4hi(__m64 _v, short a, short b, short c, short d) { + __v4hi v = (__v4hi)_v; + return v[0] == a && v[1] == b && v[2] == c && v[3] == d; +} + +constexpr bool match_v4hu(__m64 _v, unsigned short a, unsigned short b, unsigned short c, unsigned short d) { + __v4hu v = (__v4hu)_v; + return v[0] == a && v[1] == b && v[2] == c && v[3] == d; +} + +constexpr bool match_v8qi(__m64 _v, signed char a, signed char b, signed char c, signed char d, signed char e, signed char f, signed char g, signed char h) { + __v8qs v = (__v8qs)_v; + return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h; +} + +constexpr bool match_v8qu(__m64 _v, unsigned char a, unsigned char b, unsigned char c, unsigned char d, unsigned char e, unsigned char f, unsigned char g, unsigned char h) { + __v8qu v = (__v8qu)_v; + return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h; +} + +constexpr bool match_m128(__m128 _v, float a, float b, float c, float d) { + __v4su v = (__v4su)_v; + return v[0] == __builtin_bit_cast(unsigned, a) && v[1] == __builtin_bit_cast(unsigned, b) && v[2] == __builtin_bit_cast(unsigned, c) && v[3] == __builtin_bit_cast(unsigned, d); +} + +constexpr bool match_m128d(__m128d _v, double a, double b) { + __v2du v = (__v2du)_v; + return v[0] == __builtin_bit_cast(unsigned long long, a) && v[1] == __builtin_bit_cast(unsigned long long, b); +} + +#ifdef __SSE2__ +constexpr bool match_m128h(__m128h _v, _Float16 __e00, _Float16 __e01, _Float16 __e02, _Float16 __e03, _Float16 __e04, _Float16 __e05, _Float16 __e06, _Float16 __e07) { + __v8hu v = (__v8hu)_v; + return v[ 0] == __builtin_bit_cast(unsigned short, __e00) && v[ 1] == __builtin_bit_cast(unsigned short, __e01) && v[ 2] == __builtin_bit_cast(unsigned short, __e02) && v[ 3] == __builtin_bit_cast(unsigned short, __e03) && + v[ 4] == __builtin_bit_cast(unsigned short, __e04) && v[ 5] == __builtin_bit_cast(unsigned short, __e05) && v[ 6] == __builtin_bit_cast(unsigned short, __e06) && v[ 7] == __builtin_bit_cast(unsigned short, __e07); +} +#endif + +constexpr bool match_m128i(__m128i _v, unsigned long long a, unsigned long long b) { + __v2du v = (__v2du)_v; + return v[0] == a && v[1] == b; +} + +constexpr bool match_v2di(__m128i v, long long a, long long b) { + return v[0] == a && v[1] == b; +} + +constexpr bool match_v2du(__m128i _v, unsigned long long a, unsigned long long b) { + __v2du v = (__v2du)_v; + return v[0] == a && v[1] == b; +} + +constexpr bool match_v4si(__m128i _v, int a, int b, int c, int d) { + __v4si v = (__v4si)_v; + return v[0] == a && v[1] == b && v[2] == c && v[3] == d; +} + +constexpr bool match_v4su(__m128i _v, unsigned a, unsigned b, unsigned c, unsigned d) { + __v4su v = (__v4su)_v; + return v[0] == a && v[1] == b && v[2] == c && v[3] == d; +} + +constexpr bool match_v8hi(__m128i _v, short a, short b, short c, short d, short e, short f, short g, short h) { + __v8hi v = (__v8hi)_v; + return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h; +} + +constexpr bool match_v8hu(__m128i _v, unsigned short a, unsigned short b, unsigned short c, unsigned short d, unsigned short e, unsigned short f, unsigned short g, unsigned short h) { + __v8hu v = (__v8hu)_v; + return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h; +} + +constexpr bool match_v16qi(__m128i _v, signed char a, signed char b, signed char c, signed char d, signed char e, signed char f, signed char g, signed char h, signed char i, signed char j, signed char k, signed char l, signed char m, signed char n, signed char o, signed char p) { + __v16qs v = (__v16qs)_v; + return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p; +} + +constexpr bool match_v16qu(__m128i _v, unsigned char a, unsigned char b, unsigned char c, unsigned char d, unsigned char e, unsigned char f, unsigned char g, unsigned char h, unsigned char i, unsigned char j, unsigned char k, unsigned char l, unsigned char m, unsigned char n, unsigned char o, unsigned char p) { + __v16qu v = (__v16qu)_v; + return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p; +} + +constexpr bool match_m256(__m256 _v, float __e00, float __e01, float __e02, float __e03, float __e04, float __e05, float __e06, float __e07) { + __v8su v = (__v8su)_v; + return v[ 0] == __builtin_bit_cast(unsigned, __e00) && v[ 1] == __builtin_bit_cast(unsigned, __e01) && v[ 2] == __builtin_bit_cast(unsigned, __e02) && v[ 3] == __builtin_bit_cast(unsigned, __e03) && + v[ 4] == __builtin_bit_cast(unsigned, __e04) && v[ 5] == __builtin_bit_cast(unsigned, __e05) && v[ 6] == __builtin_bit_cast(unsigned, __e06) && v[ 7] == __builtin_bit_cast(unsigned, __e07); +} + +constexpr bool match_m256d(__m256d _v, double a, double b, double c, double d) { + __v4du v = (__v4du)_v; + return v[0] == __builtin_bit_cast(unsigned long long, a) && v[1] == __builtin_bit_cast(unsigned long long, b) && v[2] == __builtin_bit_cast(unsigned long long, c) && v[3] == __builtin_bit_cast(unsigned long long, d); +} + +#ifdef __SSE2__ +constexpr bool match_m256h(__m256h _v, _Float16 __e00, _Float16 __e01, _Float16 __e02, _Float16 __e03, _Float16 __e04, _Float16 __e05, _Float16 __e06, _Float16 __e07, + _Float16 __e08, _Float16 __e09, _Float16 __e10, _Float16 __e11, _Float16 __e12, _Float16 __e13, _Float16 __e14, _Float16 __e15) { + __v16hu v = (__v16hu)_v; + return v[ 0] == __builtin_bit_cast(unsigned short, __e00) && v[ 1] == __builtin_bit_cast(unsigned short, __e01) && v[ 2] == __builtin_bit_cast(unsigned short, __e02) && v[ 3] == __builtin_bit_cast(unsigned short, __e03) && + v[ 4] == __builtin_bit_cast(unsigned short, __e04) && v[ 5] == __builtin_bit_cast(unsigned short, __e05) && v[ 6] == __builtin_bit_cast(unsigned short, __e06) && v[ 7] == __builtin_bit_cast(unsigned short, __e07) && + v[ 8] == __builtin_bit_cast(unsigned short, __e08) && v[ 9] == __builtin_bit_cast(unsigned short, __e09) && v[10] == __builtin_bit_cast(unsigned short, __e10) && v[11] == __builtin_bit_cast(unsigned short, __e11) && + v[12] == __builtin_bit_cast(unsigned short, __e12) && v[13] == __builtin_bit_cast(unsigned short, __e13) && v[14] == __builtin_bit_cast(unsigned short, __e14) && v[15] == __builtin_bit_cast(unsigned short, __e15); +} +#endif + +constexpr bool match_m256i(__m256i _v, unsigned long long a, unsigned long long b, unsigned long long c, unsigned long long d) { + __v4du v = (__v4du)_v; + return v[0] == a && v[1] == b && v[2] == c && v[3] == d; +} + +constexpr bool match_v4di(__m256i _v, long long a, long long b, long long c, long long d) { + __v4di v = (__v4di)_v; + return v[0] == a && v[1] == b && v[2] == c && v[3] == d; +} + +constexpr bool match_v8si(__m256i _v, int a, int b, int c, int d, int e, int f, int g, int h) { + __v8si v = (__v8si)_v; + return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h; +} + +constexpr bool match_v8su(__m256i _v, unsigned a, unsigned b, unsigned c, unsigned d, unsigned e, unsigned f, unsigned g, unsigned h) { + __v8su v = (__v8su)_v; + return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h; +} + +constexpr bool match_v16hi(__m256i _v, short a, short b, short c, short d, short e, short f, short g, short h, short i, short j, short k, short l, short m, short n, short o, short p) { + __v16hi v = (__v16hi)_v; + return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p; +} + +constexpr bool match_v16hu(__m256i _v, unsigned short a, unsigned short b, unsigned short c, unsigned short d, unsigned short e, unsigned short f, unsigned short g, unsigned short h, unsigned short i, unsigned short j, unsigned short k, unsigned short l, unsigned short m, unsigned short n, unsigned short o, unsigned short p) { + __v16hu v = (__v16hu)_v; + return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p; +} + +constexpr bool match_v32qi(__m256i _v, signed char __b00, signed char __b01, signed char __b02, signed char __b03, signed char __b04, signed char __b05, signed char __b06, signed char __b07, + signed char __b08, signed char __b09, signed char __b10, signed char __b11, signed char __b12, signed char __b13, signed char __b14, signed char __b15, + signed char __b16, signed char __b17, signed char __b18, signed char __b19, signed char __b20, signed char __b21, signed char __b22, signed char __b23, + signed char __b24, signed char __b25, signed char __b26, signed char __b27, signed char __b28, signed char __b29, signed char __b30, signed char __b31) { + __v32qs v = (__v32qs)_v; + return v[ 0] == __b00 && v[ 1] == __b01 && v[ 2] == __b02 && v[ 3] == __b03 && v[ 4] == __b04 && v[ 5] == __b05 && v[ 6] == __b06 && v[ 7] == __b07 && + v[ 8] == __b08 && v[ 9] == __b09 && v[10] == __b10 && v[11] == __b11 && v[12] == __b12 && v[13] == __b13 && v[14] == __b14 && v[15] == __b15 && + v[16] == __b16 && v[17] == __b17 && v[18] == __b18 && v[19] == __b19 && v[20] == __b20 && v[21] == __b21 && v[22] == __b22 && v[23] == __b23 && + v[24] == __b24 && v[25] == __b25 && v[26] == __b26 && v[27] == __b27 && v[28] == __b28 && v[29] == __b29 && v[30] == __b30 && v[31] == __b31; +} + +constexpr bool match_v32qu(__m256i _v, unsigned char __b00, unsigned char __b01, unsigned char __b02, unsigned char __b03, unsigned char __b04, unsigned char __b05, unsigned char __b06, unsigned char __b07, + unsigned char __b08, unsigned char __b09, unsigned char __b10, unsigned char __b11, unsigned char __b12, unsigned char __b13, unsigned char __b14, unsigned char __b15, + unsigned char __b16, unsigned char __b17, unsigned char __b18, unsigned char __b19, unsigned char __b20, unsigned char __b21, unsigned char __b22, unsigned char __b23, + unsigned char __b24, unsigned char __b25, unsigned char __b26, unsigned char __b27, unsigned char __b28, unsigned char __b29, unsigned char __b30, unsigned char __b31) { + __v32qu v = (__v32qu)_v; + return v[ 0] == __b00 && v[ 1] == __b01 && v[ 2] == __b02 && v[ 3] == __b03 && v[ 4] == __b04 && v[ 5] == __b05 && v[ 6] == __b06 && v[ 7] == __b07 && + v[ 8] == __b08 && v[ 9] == __b09 && v[10] == __b10 && v[11] == __b11 && v[12] == __b12 && v[13] == __b13 && v[14] == __b14 && v[15] == __b15 && + v[16] == __b16 && v[17] == __b17 && v[18] == __b18 && v[19] == __b19 && v[20] == __b20 && v[21] == __b21 && v[22] == __b22 && v[23] == __b23 && + v[24] == __b24 && v[25] == __b25 && v[26] == __b26 && v[27] == __b27 && v[28] == __b28 && v[29] == __b29 && v[30] == __b30 && v[31] == __b31; +} + +constexpr bool match_m512(__m512 _v, float __e00, float __e01, float __e02, float __e03, float __e04, float __e05, float __e06, float __e07, float __e08, float __e09, float __e10, float __e11, float __e12, float __e13, float __e14, float __e15) { + __v16su v = (__v16su)_v; + return v[ 0] == __builtin_bit_cast(unsigned, __e00) && v[ 1] == __builtin_bit_cast(unsigned, __e01) && v[ 2] == __builtin_bit_cast(unsigned, __e02) && v[ 3] == __builtin_bit_cast(unsigned, __e03) && + v[ 4] == __builtin_bit_cast(unsigned, __e04) && v[ 5] == __builtin_bit_cast(unsigned, __e05) && v[ 6] == __builtin_bit_cast(unsigned, __e06) && v[ 7] == __builtin_bit_cast(unsigned, __e07) && + v[ 8] == __builtin_bit_cast(unsigned, __e08) && v[ 9] == __builtin_bit_cast(unsigned, __e09) && v[10] == __builtin_bit_cast(unsigned, __e10) && v[11] == __builtin_bit_cast(unsigned, __e11) && + v[12] == __builtin_bit_cast(unsigned, __e12) && v[13] == __builtin_bit_cast(unsigned, __e13) && v[14] == __builtin_bit_cast(unsigned, __e14) && v[15] == __builtin_bit_cast(unsigned, __e15); +} + +constexpr bool match_m512d(__m512d _v, double __e00, double __e01, double __e02, double __e03, double __e04, double __e05, double __e06, double __e07) { + __v8du v = (__v8du)_v; + return v[ 0] == __builtin_bit_cast(unsigned long long, __e00) && v[ 1] == __builtin_bit_cast(unsigned long long, __e01) && v[ 2] == __builtin_bit_cast(unsigned long long, __e02) && v[ 3] == __builtin_bit_cast(unsigned long long, __e03) && + v[ 4] == __builtin_bit_cast(unsigned long long, __e04) && v[ 5] == __builtin_bit_cast(unsigned long long, __e05) && v[ 6] == __builtin_bit_cast(unsigned long long, __e06) && v[ 7] == __builtin_bit_cast(unsigned long long, __e07); +} + +#ifdef __SSE2__ +constexpr bool match_m512h(__m512h _v, _Float16 __e00, _Float16 __e01, _Float16 __e02, _Float16 __e03, _Float16 __e04, _Float16 __e05, _Float16 __e06, _Float16 __e07, + _Float16 __e08, _Float16 __e09, _Float16 __e10, _Float16 __e11, _Float16 __e12, _Float16 __e13, _Float16 __e14, _Float16 __e15, + _Float16 __e16, _Float16 __e17, _Float16 __e18, _Float16 __e19, _Float16 __e20, _Float16 __e21, _Float16 __e22, _Float16 __e23, + _Float16 __e24, _Float16 __e25, _Float16 __e26, _Float16 __e27, _Float16 __e28, _Float16 __e29, _Float16 __e30, _Float16 __e31) { + __v32hu v = (__v32hu)_v; + return v[ 0] == __builtin_bit_cast(unsigned short, __e00) && v[ 1] == __builtin_bit_cast(unsigned short, __e01) && v[ 2] == __builtin_bit_cast(unsigned short, __e02) && v[ 3] == __builtin_bit_cast(unsigned short, __e03) && + v[ 4] == __builtin_bit_cast(unsigned short, __e04) && v[ 5] == __builtin_bit_cast(unsigned short, __e05) && v[ 6] == __builtin_bit_cast(unsigned short, __e06) && v[ 7] == __builtin_bit_... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/169218 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
