r314104 - [Clang] Adding missing feature to goldmont
Author: mzuckerm Date: Mon Sep 25 06:49:32 2017 New Revision: 314104 URL: http://llvm.org/viewvc/llvm-project?rev=314104&view=rev Log: [Clang] Adding missing feature to goldmont Change-Id: I6c22478d16b8e02ce60dae2f8c80d43bc5ab3a9c Modified: cfe/trunk/lib/Basic/Targets/X86.cpp cfe/trunk/test/Preprocessor/predefined-arch-macros.c Modified: cfe/trunk/lib/Basic/Targets/X86.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.cpp?rev=314104&r1=314103&r2=314104&view=diff == --- cfe/trunk/lib/Basic/Targets/X86.cpp (original) +++ cfe/trunk/lib/Basic/Targets/X86.cpp Mon Sep 25 06:49:32 2017 @@ -215,6 +215,7 @@ bool X86TargetInfo::initFeatureMap( setFeatureEnabledImpl(Features, "xsaves", true); setFeatureEnabledImpl(Features, "clflushopt", true); setFeatureEnabledImpl(Features, "mpx", true); +setFeatureEnabledImpl(Features, "fsgsbase", true); LLVM_FALLTHROUGH; case CK_Silvermont: setFeatureEnabledImpl(Features, "aes", true); Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/predefined-arch-macros.c?rev=314104&r1=314103&r2=314104&view=diff == --- cfe/trunk/test/Preprocessor/predefined-arch-macros.c (original) +++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c Mon Sep 25 06:49:32 2017 @@ -991,6 +991,7 @@ // RUN: | FileCheck %s -check-prefix=CHECK_GLM_M32 // CHECK_GLM_M32: #define __AES__ 1 // CHECK_GLM_M32: #define __CLFLUSHOPT__ 1 +// CHECK_GLM_M32: #define __FSGSBASE__ 1 // CHECK_GLM_M32: #define __FXSR__ 1 // CHECK_GLM_M32: #define __MMX__ 1 // CHECK_GLM_M32: #define __MPX__ 1 @@ -1030,6 +1031,7 @@ // RUN: | FileCheck %s -check-prefix=CHECK_GLM_M64 // CHECK_GLM_M64: #define __AES__ 1 // CHECK_GLM_M64: #define __CLFLUSHOPT__ 1 +// CHECK_GLM_M64: #define __FSGSBASE__ 1 // CHECK_GLM_M64: #define __FXSR__ 1 // CHECK_GLM_M64: #define __MMX__ 1 // CHECK_GLM_M64: #define __MPX__ 1 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r306673 - [Clang][X86][Goldmont]Adding new target-cpu: Goldmont
Author: mzuckerm Date: Thu Jun 29 06:41:04 2017 New Revision: 306673 URL: http://llvm.org/viewvc/llvm-project?rev=306673&view=rev Log: [Clang][X86][Goldmont]Adding new target-cpu: Goldmont [Clang-side] Connecting the GoldMont processor to his feature. Reviewers: 1. igorb 2. delena 3. zvi Differential Revision: https://reviews.llvm.org/D34807 Modified: cfe/trunk/lib/Basic/Targets.cpp cfe/trunk/test/Preprocessor/predefined-arch-macros.c Modified: cfe/trunk/lib/Basic/Targets.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets.cpp?rev=306673&r1=306672&r2=306673&view=diff == --- cfe/trunk/lib/Basic/Targets.cpp (original) +++ cfe/trunk/lib/Basic/Targets.cpp Thu Jun 29 06:41:04 2017 @@ -2737,6 +2737,7 @@ class X86TargetInfo : public TargetInfo //@{ CK_Bonnell, CK_Silvermont, +CK_Goldmont, //@} /// \name Nehalem @@ -2878,6 +2879,7 @@ class X86TargetInfo : public TargetInfo .Case("atom", CK_Bonnell) // Legacy name. .Case("silvermont", CK_Silvermont) .Case("slm", CK_Silvermont) // Legacy name. +.Case("goldmont", CK_Goldmont) .Case("nehalem", CK_Nehalem) .Case("corei7", CK_Nehalem) // Legacy name. .Case("westmere", CK_Westmere) @@ -3093,6 +3095,7 @@ public: case CK_Penryn: case CK_Bonnell: case CK_Silvermont: +case CK_Goldmont: case CK_Nehalem: case CK_Westmere: case CK_SandyBridge: @@ -3285,6 +3288,21 @@ bool X86TargetInfo::initFeatureMap( setFeatureEnabledImpl(Features, "fxsr", true); setFeatureEnabledImpl(Features, "cx16", true); break; + case CK_Goldmont: +setFeatureEnabledImpl(Features, "sha", true); +setFeatureEnabledImpl(Features, "rdseed", true); +setFeatureEnabledImpl(Features, "xsave", true); +setFeatureEnabledImpl(Features, "xsaveopt", true); +setFeatureEnabledImpl(Features, "xsavec", true); +setFeatureEnabledImpl(Features, "xsaves", true); +setFeatureEnabledImpl(Features, "clflushopt", true); +setFeatureEnabledImpl(Features, "mpx", true); +setFeatureEnabledImpl(Features, "aes", true); +setFeatureEnabledImpl(Features, "pclmul", true); +setFeatureEnabledImpl(Features, "sse4.2", true); +setFeatureEnabledImpl(Features, "fxsr", true); +setFeatureEnabledImpl(Features, "cx16", true); + break; case CK_KNL: setFeatureEnabledImpl(Features, "avx512f", true); setFeatureEnabledImpl(Features, "avx512cd", true); @@ -3893,6 +3911,9 @@ void X86TargetInfo::getTargetDefines(con case CK_Silvermont: defineCPUMacros(Builder, "slm"); break; + case CK_Goldmont: +defineCPUMacros(Builder, "goldmont"); +break; case CK_Nehalem: case CK_Westmere: case CK_SandyBridge: Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/predefined-arch-macros.c?rev=306673&r1=306672&r2=306673&view=diff == --- cfe/trunk/test/Preprocessor/predefined-arch-macros.c (original) +++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c Thu Jun 29 06:41:04 2017 @@ -986,6 +986,79 @@ // CHECK_ATOM_M64: #define __x86_64 1 // CHECK_ATOM_M64: #define __x86_64__ 1 // +// RUN: %clang -march=goldmont -m32 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck %s -check-prefix=CHECK_GLM_M32 +// CHECK_GLM_M32: #define __AES__ 1 +// CHECK_GLM_M32: #define __CLFLUSHOPT__ 1 +// CHECK_GLM_M32: #define __FXSR__ 1 +// CHECK_GLM_M32: #define __MMX__ 1 +// CHECK_GLM_M32: #define __MPX__ 1 +// CHECK_GLM_M32: #define __PCLMUL__ 1 +// CHECK_GLM_M32: #define __POPCNT__ 1 +// CHECK_GLM_M32: #define __RDSEED__ 1 +// CHECK_GLM_M32: #define __SHA__ 1 +// CHECK_GLM_M32: #define __SSE2__ 1 +// CHECK_GLM_M32: #define __SSE3__ 1 +// CHECK_GLM_M32: #define __SSE4_1__ 1 +// CHECK_GLM_M32: #define __SSE4_2__ 1 +// CHECK_GLM_M32: #define __SSE_MATH__ 1 +// CHECK_GLM_M32: #define __SSE__ 1 +// CHECK_GLM_M32: #define __SSSE3__ 1 +// CHECK_GLM_M32: #define __XSAVEC__ 1 +// CHECK_GLM_M32: #define __XSAVEOPT__ 1 +// CHECK_GLM_M32: #define __XSAVES__ 1 +// CHECK_GLM_M32: #define __XSAVE__ 1 +// CHECK_GLM_M32: #define __clang__ 1 +// CHECK_GLM_M32: #define __goldmont 1 +// CHECK_GLM_M32: #define __goldmont__ 1 +// CHECK_GLM_M32: #define __i386 1 +// CHECK_GLM_M32: #define __i386__ 1 +// CHECK_GLM_M32: #define __linux 1 +// CHECK_GLM_M32: #define __linux__ 1 +// CHECK_GLM_M32: #define __llvm__ 1 +// CHECK_GLM_M32: #define __tune_goldmont__ 1 +// CHECK_GLM_M32: #define __unix 1 +// CHECK_GLM_M32: #define __unix__ 1 +// CHECK_GLM_M32: #define i386 1 +// CHECK_GLM_M32: #define linux 1 +// CHECK_GLM_M32: #define unix 1 +// +// RUN: %clang -march=goldmont -m64 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck %s -check-prefix=CHECK_G
r299442 - Fix problem with test.
Author: mzuckerm Date: Tue Apr 4 10:44:06 2017 New Revision: 299442 URL: http://llvm.org/viewvc/llvm-project?rev=299442&view=rev Log: Fix problem with test. Modified: cfe/trunk/test/CodeGen/avx512bw-builtins.c cfe/trunk/test/CodeGen/avx512dq-builtins.c cfe/trunk/test/CodeGen/avx512vlbw-builtins.c cfe/trunk/test/CodeGen/avx512vldq-builtins.c Modified: cfe/trunk/test/CodeGen/avx512bw-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512bw-builtins.c?rev=299442&r1=299441&r2=299442&view=diff == --- cfe/trunk/test/CodeGen/avx512bw-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512bw-builtins.c Tue Apr 4 10:44:06 2017 @@ -1543,15 +1543,15 @@ __mmask64 test_mm512_movepi8_mask(__m512 __m512i test_mm512_movm_epi8(__mmask64 __A) { // CHECK-LABEL: @test_mm512_movm_epi8 - // CHECK: %2 = bitcast i64 %1 to <64 x i1> - // CHECK: %vpmovm2.i = sext <64 x i1> %2 to <64 x i8> + // CHECK: %{{.*}} = bitcast i64 %{{.*}} to <64 x i1> + // CHECK: %vpmovm2.i = sext <64 x i1> %{{.*}} to <64 x i8> return _mm512_movm_epi8(__A); } __m512i test_mm512_movm_epi16(__mmask32 __A) { // CHECK-LABEL: @test_mm512_movm_epi16 - // CHECK: %2 = bitcast i32 %1 to <32 x i1> - // CHECK: %vpmovm2.i = sext <32 x i1> %2 to <32 x i16> + // CHECK: %{{.*}} = bitcast i32 %{{.*}} to <32 x i1> + // CHECK: %vpmovm2.i = sext <32 x i1> %{{.*}} to <32 x i16> return _mm512_movm_epi16(__A); } Modified: cfe/trunk/test/CodeGen/avx512dq-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512dq-builtins.c?rev=299442&r1=299441&r2=299442&view=diff == --- cfe/trunk/test/CodeGen/avx512dq-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512dq-builtins.c Tue Apr 4 10:44:06 2017 @@ -929,15 +929,15 @@ __mmask16 test_mm512_movepi32_mask(__m51 __m512i test_mm512_movm_epi32(__mmask16 __A) { // CHECK-LABEL: @test_mm512_movm_epi32 - // CHECK: %2 = bitcast i16 %1 to <16 x i1> - // CHECK: %vpmovm2.i = sext <16 x i1> %2 to <16 x i32> + // CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: %vpmovm2.i = sext <16 x i1> %{{.*}} to <16 x i32> return _mm512_movm_epi32(__A); } __m512i test_mm512_movm_epi64(__mmask8 __A) { // CHECK-LABEL: @test_mm512_movm_epi64 - // CHECK: %2 = bitcast i8 %1 to <8 x i1> - // CHECK: %vpmovm2.i = sext <8 x i1> %2 to <8 x i64> + // CHECK: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: %vpmovm2.i = sext <8 x i1> %{{.*}} to <8 x i64> return _mm512_movm_epi64(__A); } Modified: cfe/trunk/test/CodeGen/avx512vlbw-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vlbw-builtins.c?rev=299442&r1=299441&r2=299442&view=diff == --- cfe/trunk/test/CodeGen/avx512vlbw-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512vlbw-builtins.c Tue Apr 4 10:44:06 2017 @@ -2521,29 +2521,29 @@ __mmask32 test_mm256_movepi8_mask(__m256 __m128i test_mm_movm_epi8(__mmask16 __A) { // CHECK-LABEL: @test_mm_movm_epi8 - // CHECK: %2 = bitcast i16 %1 to <16 x i1> - // CHECK: %vpmovm2.i = sext <16 x i1> %2 to <16 x i8> + // CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: %vpmovm2.i = sext <16 x i1> %{{.*}} to <16 x i8> return _mm_movm_epi8(__A); } __m256i test_mm256_movm_epi8(__mmask32 __A) { // CHECK-LABEL: @test_mm256_movm_epi8 - // CHECK: %2 = bitcast i32 %1 to <32 x i1> - // CHECK: %vpmovm2.i = sext <32 x i1> %2 to <32 x i8> + // CHECK: %{{.*}} = bitcast i32 %{{.*}} to <32 x i1> + // CHECK: %vpmovm2.i = sext <32 x i1> %{{.*}} to <32 x i8> return _mm256_movm_epi8(__A); } __m128i test_mm_movm_epi16(__mmask8 __A) { // CHECK-LABEL: @test_mm_movm_epi16 - // CHECK: %2 = bitcast i8 %1 to <8 x i1> - // CHECK: %vpmovm2.i = sext <8 x i1> %2 to <8 x i16> + // CHECK: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: %vpmovm2.i = sext <8 x i1> %{{.*}} to <8 x i16> return _mm_movm_epi16(__A); } __m256i test_mm256_movm_epi16(__mmask16 __A) { // CHECK-LABEL: @test_mm256_movm_epi16 - // CHECK: %2 = bitcast i16 %1 to <16 x i1> - // CHECK: %vpmovm2.i = sext <16 x i1> %2 to <16 x i16> + // CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: %vpmovm2.i = sext <16 x i1> %{{.*}} to <16 x i16> return _mm256_movm_epi16(__A); } Modified: cfe/trunk/test/CodeGen/avx512vldq-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vldq-builtins.c?rev=299442&r1=299441&r2=299442&view=diff == --- cfe/trunk/test/CodeGen/avx512vldq-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512vldq-builtins.c Tue Apr 4 10:44:06 2017 @@ -865,31 +865,31 @@ __mmask8 test_mm256_movepi32_mask(__m256 __m128i test_mm_m
r299431 - [X86][Clang] Converting __mm{|256|512}_movm_epi{8|16|32|64} LLVMIR call into generic intrinsics.
Author: mzuckerm Date: Tue Apr 4 08:29:53 2017 New Revision: 299431 URL: http://llvm.org/viewvc/llvm-project?rev=299431&view=rev Log: [X86][Clang] Converting __mm{|256|512}_movm_epi{8|16|32|64} LLVMIR call into generic intrinsics. This patch is a part two of two reviews, one for the clang and the other for LLVM. In this patch, I covered the clang side, by introducing the intrinsic to the front end. This is done by creating a generic replacement. Differential Revision: https://reviews.llvm.org/D31394a Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGen/avx512bw-builtins.c cfe/trunk/test/CodeGen/avx512dq-builtins.c cfe/trunk/test/CodeGen/avx512vlbw-builtins.c cfe/trunk/test/CodeGen/avx512vldq-builtins.c Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=299431&r1=299430&r2=299431&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Tue Apr 4 08:29:53 2017 @@ -7168,6 +7168,13 @@ static Value *EmitX86MinMax(CodeGenFunct return EmitX86Select(CGF, Ops[3], Res, Ops[2]); } +static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, + llvm::Type *DstTy) { + unsigned NumberOfElements = DstTy->getVectorNumElements(); + Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements); + return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); +} + Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { if (BuiltinID == X86::BI__builtin_ms_va_start || @@ -7466,6 +7473,21 @@ Value *CodeGenFunction::EmitX86BuiltinEx case X86::BI__builtin_ia32_storesd128_mask: { return EmitX86MaskedStore(*this, Ops, 16); } + + case X86::BI__builtin_ia32_cvtmask2b128: + case X86::BI__builtin_ia32_cvtmask2b256: + case X86::BI__builtin_ia32_cvtmask2b512: + case X86::BI__builtin_ia32_cvtmask2w128: + case X86::BI__builtin_ia32_cvtmask2w256: + case X86::BI__builtin_ia32_cvtmask2w512: + case X86::BI__builtin_ia32_cvtmask2d128: + case X86::BI__builtin_ia32_cvtmask2d256: + case X86::BI__builtin_ia32_cvtmask2d512: + case X86::BI__builtin_ia32_cvtmask2q128: + case X86::BI__builtin_ia32_cvtmask2q256: + case X86::BI__builtin_ia32_cvtmask2q512: +return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType())); + case X86::BI__builtin_ia32_movdqa32store128_mask: case X86::BI__builtin_ia32_movdqa64store128_mask: case X86::BI__builtin_ia32_storeaps128_mask: Modified: cfe/trunk/test/CodeGen/avx512bw-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512bw-builtins.c?rev=299431&r1=299430&r2=299431&view=diff == --- cfe/trunk/test/CodeGen/avx512bw-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512bw-builtins.c Tue Apr 4 08:29:53 2017 @@ -1543,13 +1543,15 @@ __mmask64 test_mm512_movepi8_mask(__m512 __m512i test_mm512_movm_epi8(__mmask64 __A) { // CHECK-LABEL: @test_mm512_movm_epi8 - // CHECK: @llvm.x86.avx512.cvtmask2b.512 + // CHECK: %2 = bitcast i64 %1 to <64 x i1> + // CHECK: %vpmovm2.i = sext <64 x i1> %2 to <64 x i8> return _mm512_movm_epi8(__A); } __m512i test_mm512_movm_epi16(__mmask32 __A) { // CHECK-LABEL: @test_mm512_movm_epi16 - // CHECK: @llvm.x86.avx512.cvtmask2w.512 + // CHECK: %2 = bitcast i32 %1 to <32 x i1> + // CHECK: %vpmovm2.i = sext <32 x i1> %2 to <32 x i16> return _mm512_movm_epi16(__A); } Modified: cfe/trunk/test/CodeGen/avx512dq-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512dq-builtins.c?rev=299431&r1=299430&r2=299431&view=diff == --- cfe/trunk/test/CodeGen/avx512dq-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512dq-builtins.c Tue Apr 4 08:29:53 2017 @@ -929,13 +929,15 @@ __mmask16 test_mm512_movepi32_mask(__m51 __m512i test_mm512_movm_epi32(__mmask16 __A) { // CHECK-LABEL: @test_mm512_movm_epi32 - // CHECK: @llvm.x86.avx512.cvtmask2d.512 + // CHECK: %2 = bitcast i16 %1 to <16 x i1> + // CHECK: %vpmovm2.i = sext <16 x i1> %2 to <16 x i32> return _mm512_movm_epi32(__A); } __m512i test_mm512_movm_epi64(__mmask8 __A) { // CHECK-LABEL: @test_mm512_movm_epi64 - // CHECK: @llvm.x86.avx512.cvtmask2q.512 + // CHECK: %2 = bitcast i8 %1 to <8 x i1> + // CHECK: %vpmovm2.i = sext <8 x i1> %2 to <8 x i64> return _mm512_movm_epi64(__A); } Modified: cfe/trunk/test/CodeGen/avx512vlbw-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vlbw-builtins.c?rev=299431&r1=299430&r2=299431&view=diff == --- cfe/trunk/test/CodeGen/avx512vlbw-builtins.c (original) +++ cfe/trunk
r285688 - [x86][inline-asm][clang] accept 'v' constraint
Author: mzuckerm Date: Tue Nov 1 08:16:44 2016 New Revision: 285688 URL: http://llvm.org/viewvc/llvm-project?rev=285688&view=rev Log: [x86][inline-asm][clang] accept 'v' constraint Commit on behalf of: Coby Tayree 1.'v' constraint for (x86) non-avx arch imitates the already implemented 'x' constraint, i.e. allows XMM{0-15} & YMM{0-15} depending on the apparent arch & mode (32/64). 2.for the avx512 arch it allows [X,Y,Z]MM{0-31} (mode dependent) This patch applies the needed changes to clang LLVM patch: https://reviews.llvm.org/D25005 Differential Revision: https://reviews.llvm.org/D25005 Added: cfe/trunk/test/CodeGen/x86-inline-asm-v-constraint.c Modified: cfe/trunk/lib/Basic/Targets.cpp Modified: cfe/trunk/lib/Basic/Targets.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets.cpp?rev=285688&r1=285687&r2=285688&view=diff == --- cfe/trunk/lib/Basic/Targets.cpp (original) +++ cfe/trunk/lib/Basic/Targets.cpp Tue Nov 1 08:16:44 2016 @@ -4018,6 +4018,7 @@ X86TargetInfo::validateAsmConstraint(con case 'u': // Second from top of floating point stack. case 'q': // Any register accessible as [r]l: a, b, c, and d. case 'y': // Any MMX register. + case 'v': // Any {X,Y,Z}MM register (Arch & context dependent) case 'x': // Any SSE register. case 'k': // Any AVX512 mask register (same as Yk, additionaly allows k0 // for intermideate k reg operations). @@ -4062,6 +4063,7 @@ bool X86TargetInfo::validateOperandSize( case 't': case 'u': return Size <= 128; + case 'v': case 'x': if (SSELevel >= AVX512F) // 512-bit zmm registers can be used if target supports AVX512F. Added: cfe/trunk/test/CodeGen/x86-inline-asm-v-constraint.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/x86-inline-asm-v-constraint.c?rev=285688&view=auto == --- cfe/trunk/test/CodeGen/x86-inline-asm-v-constraint.c (added) +++ cfe/trunk/test/CodeGen/x86-inline-asm-v-constraint.c Tue Nov 1 08:16:44 2016 @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu x86-64 -o - |opt -instnamer -S |FileCheck %s --check-prefix SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake -D AVX -o -|opt -instnamer -S | FileCheck %s --check-prefixes AVX,SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake-avx512 -D AVX512 -D AVX -o -|opt -instnamer -S | FileCheck %s --check-prefixes AVX512,AVX,SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu knl -D AVX -D AVX512 -o - |opt -instnamer -S | FileCheck %s --check-prefixes AVX512,AVX,SSE + +typedef float __m128 __attribute__ ((vector_size (16))); +typedef float __m256 __attribute__ ((vector_size (32))); +typedef float __m512 __attribute__ ((vector_size (64))); + +// SSE: call <4 x float> asm "vmovhlps $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %tmp, <4 x float> %tmp1) +__m128 testXMM(__m128 _xmm0, long _l) { + __asm__("vmovhlps %1, %2, %0" :"=v"(_xmm0) : "v"(_l), "v"(_xmm0)); + return _xmm0; +} + +// AVX: call <8 x float> asm "vmovsldup $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %tmp) +__m256 testYMM(__m256 _ymm0) { +#ifdef AVX + __asm__("vmovsldup %1, %0" :"=v"(_ymm0) : "v"(_ymm0)); +#endif + return _ymm0; +} + +// AVX512: call <16 x float> asm "vpternlogd $$0, $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %tmp, <16 x float> %tmp1) +__m512 testZMM(__m512 _zmm0, __m512 _zmm1) { +#ifdef AVX512 + __asm__("vpternlogd $0, %1, %2, %0" :"=v"(_zmm0) : "v"(_zmm1), "v"(_zmm0)); +#endif + return _zmm0; +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r285617 - Fixing problem with CodeGen/avx512-kconstraints-att_inline_asm.c
Author: mzuckerm Date: Mon Oct 31 13:40:17 2016 New Revision: 285617 URL: http://llvm.org/viewvc/llvm-project?rev=285617&view=rev Log: Fixing problem with CodeGen/avx512-kconstraints-att_inline_asm.c Modified: cfe/trunk/test/CodeGen/avx512-kconstraints-att_inline_asm.c Modified: cfe/trunk/test/CodeGen/avx512-kconstraints-att_inline_asm.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512-kconstraints-att_inline_asm.c?rev=285617&r1=285616&r2=285617&view=diff == --- cfe/trunk/test/CodeGen/avx512-kconstraints-att_inline_asm.c (original) +++ cfe/trunk/test/CodeGen/avx512-kconstraints-att_inline_asm.c Mon Oct 31 13:40:17 2016 @@ -1,58 +1,58 @@ -// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -O0 -emit-llvm -S -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror |opt -instnamer -S |FileCheck %s // This test checks validity of att\gcc style inline assmebly for avx512 k and Yk constraints. // Also checks mask register allows flexible type (size <= 64 bit) void mask_Yk_i8(char msk){ -//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1} +//CHECK: vpaddb\09 %xmm1, %xmm0, %xmm1 {$0}\09 asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t" ://output : "Yk" (msk)); //inputs } void mask_Yk_i16(short msk){ -//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1} +//CHECK: vpaddb\09 %xmm1, %xmm0, %xmm1 {$0}\09 asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t" ://output : "Yk" (msk)); //inputs } void mask_Yk_i32(int msk){ -//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1} +//CHECK: vpaddb\09 %xmm1, %xmm0, %xmm1 {$0}\09 asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t" ://output : "Yk" (msk)); //inputs } void mask_Yk_i64(long long msk){ -//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1} +//CHECK: vpaddb\09 %xmm1, %xmm0, %xmm1 {$0}\09 asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t" ://output : "Yk" (msk)); //inputs } void k_wise_op_i8(char msk_dst,char msk_src1,char msk_src2){ -//CHECK: kandw %k1, %k0, %k0 +//CHECK: kandw\09$2, $1, $0 asm ("kandw\t%2, %1, %0" : "=k" (msk_dst) : "k" (msk_src1), "k" (msk_src2)); } void k_wise_op_i16(short msk_dst, short msk_src1, short msk_src2){ -//CHECK: kandw %k1, %k0, %k0 +//CHECK: kandw\09$2, $1, $0 asm ("kandw\t%2, %1, %0" : "=k" (msk_dst) : "k" (msk_src1), "k" (msk_src2)); } void k_wise_op_i32(int msk_dst, int msk_src1, int msk_src2){ -//CHECK: kandw %k1, %k0, %k0 +//CHECK: kandw\09$2, $1, $0 asm ("kandw\t%2, %1, %0" : "=k" (msk_dst) : "k" (msk_src1), "k" (msk_src2)); } void k_wise_op_i64(long long msk_dst, long long msk_src1, long long msk_src2){ -//CHECK: kandw %k1, %k0, %k0 +//CHECK: kandw\09$2, $1, $0 asm ("kandw\t%2, %1, %0" : "=k" (msk_dst) : "k" (msk_src1), "k" (msk_src2)); ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r285604 - [x86][inline-asm][AVX512][clang][PART-1] Introducing "k" and "Yk" constraints for extended inline assembly, enabling use of AVX512 masked vectorized instructions.
Author: mzuckerm Date: Mon Oct 31 12:23:52 2016 New Revision: 285604 URL: http://llvm.org/viewvc/llvm-project?rev=285604&view=rev Log: [x86][inline-asm][AVX512][clang][PART-1] Introducing "k" and "Yk" constraints for extended inline assembly, enabling use of AVX512 masked vectorized instructions. Commit on behalf of mharoush Extending inline assembly support, compatible with GCC as folowing: "k" constraint hints the compiler to select any of AVX512 k0-k7 registers. "Yk" constraint is a subset of "k" excluding k0 which is not allowd to be used as a mask. Reviewer: 1. rnk Differential Revision: https://reviews.llvm.org/D25063 Added: cfe/trunk/test/CodeGen/avx512-kconstraints-att_inline_asm.c Modified: cfe/trunk/lib/Basic/Targets.cpp Modified: cfe/trunk/lib/Basic/Targets.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets.cpp?rev=285604&r1=285603&r2=285604&view=diff == --- cfe/trunk/lib/Basic/Targets.cpp (original) +++ cfe/trunk/lib/Basic/Targets.cpp Mon Oct 31 12:23:52 2016 @@ -3997,6 +3997,7 @@ X86TargetInfo::validateAsmConstraint(con case 't': // Any SSE register, when SSE2 is enabled. case 'i': // Any SSE register, when SSE2 and inter-unit moves enabled. case 'm': // Any MMX register, when inter-unit moves enabled. +case 'k': // AVX512 arch mask registers: k1-k7. Info.setAllowsRegister(); return true; } @@ -4018,6 +4019,8 @@ X86TargetInfo::validateAsmConstraint(con case 'q': // Any register accessible as [r]l: a, b, c, and d. case 'y': // Any MMX register. case 'x': // Any SSE register. + case 'k': // Any AVX512 mask register (same as Yk, additionaly allows k0 +// for intermideate k reg operations). case 'Q': // Any register accessible as [r]h: a, b, c, and d. case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp. case 'l': // "Index" registers: any general register that can be used as an @@ -4051,6 +4054,8 @@ bool X86TargetInfo::validateOperandSize( unsigned Size) const { switch (Constraint[0]) { default: break; + case 'k': + // Registers k0-k7 (AVX512) size limit is 64 bit. case 'y': return Size <= 64; case 'f': @@ -4071,6 +4076,7 @@ bool X86TargetInfo::validateOperandSize( default: break; case 'm': // 'Ym' is synonymous with 'y'. +case 'k': return Size <= 64; case 'i': case 't': @@ -4102,6 +4108,20 @@ X86TargetInfo::convertConstraint(const c return std::string("{st}"); case 'u': // second from top of floating point stack. return std::string("{st(1)}"); // second from top of floating point stack. + case 'Y': +switch (Constraint[1]) { +default: + // Break from inner switch and fall through (copy single char), + // continue parsing after copying the current constraint into + // the return string. + break; +case 'k': + // "^" hints llvm that this is a 2 letter constraint. + // "Constraint++" is used to promote the string iterator + // to the next constraint. + return std::string("^") + std::string(Constraint++, 2); +} +LLVM_FALLTHROUGH; default: return std::string(1, *Constraint); } Added: cfe/trunk/test/CodeGen/avx512-kconstraints-att_inline_asm.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512-kconstraints-att_inline_asm.c?rev=285604&view=auto == --- cfe/trunk/test/CodeGen/avx512-kconstraints-att_inline_asm.c (added) +++ cfe/trunk/test/CodeGen/avx512-kconstraints-att_inline_asm.c Mon Oct 31 12:23:52 2016 @@ -0,0 +1,59 @@ +// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -O0 -emit-llvm -S -o - -Wall -Werror | FileCheck %s +// This test checks validity of att\gcc style inline assmebly for avx512 k and Yk constraints. +// Also checks mask register allows flexible type (size <= 64 bit) + +void mask_Yk_i8(char msk){ +//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1} + asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t" + ://output + : "Yk" (msk)); //inputs +} + +void mask_Yk_i16(short msk){ +//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1} + asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t" + ://output + : "Yk" (msk)); //inputs +} + +void mask_Yk_i32(int msk){ +//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1} +asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t" + ://output + : "Yk" (msk)); //inputs +} + +void mask_Yk_i64(long long msk){ +//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1} + asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t" + ://output + : "Yk" (msk)); //inputs +} + +void k_wise_op_i8(char msk_dst,char msk_src1,char msk_src2){ +//CHECK: kandw %k1, %k0, %k0 + asm ("kandw\t%2, %1, %0" + : "=k" (msk_ds
r285585 - [x86][inline-asm] Add support for curly brackets escape using "%" in extended inline asm.
Author: mzuckerm Date: Mon Oct 31 10:27:54 2016 New Revision: 285585 URL: http://llvm.org/viewvc/llvm-project?rev=285585&view=rev Log: [x86][inline-asm] Add support for curly brackets escape using "%" in extended inline asm. Commit on behalf of mharoush After LGTM and check all: This patch is a compatibility fix for clang, matching GCC support for charter escape when using extended in-line assembly (i.e, "%{" ,"%}" --> "{" ,"}" ). It is meant to enable support for advanced features such as AVX512 conditional\masked vector instructions/broadcast assembly syntax. Reviewer: 1. rnk Differential Revision: https://reviews.llvm.org/D25012 Added: cfe/trunk/test/CodeGen/x86_inlineasm_curly_bracket_escape.c Modified: cfe/trunk/lib/AST/Stmt.cpp Modified: cfe/trunk/lib/AST/Stmt.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/Stmt.cpp?rev=285585&r1=285584&r2=285585&view=diff == --- cfe/trunk/lib/AST/Stmt.cpp (original) +++ cfe/trunk/lib/AST/Stmt.cpp Mon Oct 31 10:27:54 2016 @@ -533,15 +533,17 @@ unsigned GCCAsmStmt::AnalyzeAsmString(Sm DiagOffs = CurPtr-StrStart-1; return diag::err_asm_invalid_escape; } - +// Handle escaped char and continue looping over the asm string. char EscapedChar = *CurPtr++; -if (EscapedChar == '%') { // %% -> % - // Escaped percentage sign. - CurStringPiece += '%'; +switch (EscapedChar) { +default: + break; +case '%': // %% -> % +case '{': // %{ -> { +case '}': // %} -> } + CurStringPiece += EscapedChar; continue; -} - -if (EscapedChar == '=') { // %= -> Generate an unique ID. +case '=': // %= -> Generate a unique ID. CurStringPiece += "${:uid}"; continue; } Added: cfe/trunk/test/CodeGen/x86_inlineasm_curly_bracket_escape.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/x86_inlineasm_curly_bracket_escape.c?rev=285585&view=auto == --- cfe/trunk/test/CodeGen/x86_inlineasm_curly_bracket_escape.c (added) +++ cfe/trunk/test/CodeGen/x86_inlineasm_curly_bracket_escape.c Mon Oct 31 10:27:54 2016 @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -O0 -S -emit-llvm -o - -Wall -Werror | FileCheck %s +// This test checks validity of inline assembly using curly brackets syntax +// for extended inline asm. + +void test_curly_brackets() { +//CHECK: %xmm1,%xmm0,%xmm1 {%k1}{z} +asm("vpaddb\t %%xmm1,%%xmm0,%%xmm1 %{%%k1%}%{z%}\t":::); +} \ No newline at end of file ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r285573 - second attempt at r285565.
Author: mzuckerm Date: Mon Oct 31 09:16:57 2016 New Revision: 285573 URL: http://llvm.org/viewvc/llvm-project?rev=285573&view=rev Log: second attempt at r285565. Added: cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c Modified: cfe/trunk/lib/Basic/Targets.cpp Modified: cfe/trunk/lib/Basic/Targets.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets.cpp?rev=285573&r1=285572&r2=285573&view=diff == --- cfe/trunk/lib/Basic/Targets.cpp (original) +++ cfe/trunk/lib/Basic/Targets.cpp Mon Oct 31 09:16:57 2016 @@ -2397,6 +2397,7 @@ static const char* const GCCRegNames[] = "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", + "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7", }; const TargetInfo::AddlRegName AddlRegNames[] = { Added: cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c?rev=285573&view=auto == --- cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c (added) +++ cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c Mon Oct 31 09:16:57 2016 @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s +// This test checks basic inline assembly recognition of k0-k7 registers for avx512. + +void test_basic_inline_asm_with_k_regs() { +//CHECK: kandw %k1, %k2, %k3 +asm("kandw %k1, %k2, %k3\t"); +//CHECK: kandw %k4, %k5, %k6 +asm("kandw %k4, %k5, %k6\t"); +//CHECK: kandw %k7, %k0, %k1 +asm("kandw %k7, %k0, %k1\t"); +} \ No newline at end of file ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r285565 - revert r285563 fail in test CodeGen/avx512-inline-asm-kregisters-basics.c
Author: mzuckerm Date: Mon Oct 31 07:49:36 2016 New Revision: 285565 URL: http://llvm.org/viewvc/llvm-project?rev=285565&view=rev Log: revert r285563 fail in test CodeGen/avx512-inline-asm-kregisters-basics.c Removed: cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c Modified: cfe/trunk/lib/Basic/Targets.cpp Modified: cfe/trunk/lib/Basic/Targets.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets.cpp?rev=285565&r1=285564&r2=285565&view=diff == --- cfe/trunk/lib/Basic/Targets.cpp (original) +++ cfe/trunk/lib/Basic/Targets.cpp Mon Oct 31 07:49:36 2016 @@ -2397,7 +2397,6 @@ static const char* const GCCRegNames[] = "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", - "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7", }; const TargetInfo::AddlRegName AddlRegNames[] = { Removed: cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c?rev=285564&view=auto == --- cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c (original) +++ cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c (removed) @@ -1,17 +0,0 @@ -// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -O0 -S -o - -Wall -Werror | FileCheck %s -// This test checks basic inline assembly recognition of k0-k7 registers for avx512. - -void test_basic_inline_asm_with_k_regs() { -//CHECK: ## InlineAsm Start -//CHECK: kandw %k1, %k2, %k3 -//CHECK: ## InlineAsm End -asm("kandw %k1, %k2, %k3\t"); -//CHECK: ## InlineAsm Start -//CHECK: kandw %k4, %k5, %k6 -//CHECK: ## InlineAsm End -asm("kandw %k4, %k5, %k6\t"); -//CHECK: ## InlineAsm Start -//CHECK: kandw %k7, %k0, %k1 -//CHECK: ## InlineAsm End -asm("kandw %k7, %k0, %k1\t"); -} ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r285563 - [x86][inline-asm] Introducing (AVX512) k0-k7 registers for inline-asm usage
Author: mzuckerm Date: Mon Oct 31 07:05:41 2016 New Revision: 285563 URL: http://llvm.org/viewvc/llvm-project?rev=285563&view=rev Log: [x86][inline-asm] Introducing (AVX512) k0-k7 registers for inline-asm usage Commit on behalf of mharoush After LGTM and check all: This patch enables usage of k registers in inline assembly syntax. Adding triple Reviewer: 1. rnk 2. delena Differential Revision: https://reviews.llvm.org/D25011 Added: cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c Modified: cfe/trunk/lib/Basic/Targets.cpp Modified: cfe/trunk/lib/Basic/Targets.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets.cpp?rev=285563&r1=285562&r2=285563&view=diff == --- cfe/trunk/lib/Basic/Targets.cpp (original) +++ cfe/trunk/lib/Basic/Targets.cpp Mon Oct 31 07:05:41 2016 @@ -2397,6 +2397,7 @@ static const char* const GCCRegNames[] = "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", + "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7", }; const TargetInfo::AddlRegName AddlRegNames[] = { Added: cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c?rev=285563&view=auto == --- cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c (added) +++ cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c Mon Oct 31 07:05:41 2016 @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -O0 -S -o - -Wall -Werror | FileCheck %s +// This test checks basic inline assembly recognition of k0-k7 registers for avx512. + +void test_basic_inline_asm_with_k_regs() { +//CHECK: ## InlineAsm Start +//CHECK: kandw %k1, %k2, %k3 +//CHECK: ## InlineAsm End +asm("kandw %k1, %k2, %k3\t"); +//CHECK: ## InlineAsm Start +//CHECK: kandw %k4, %k5, %k6 +//CHECK: ## InlineAsm End +asm("kandw %k4, %k5, %k6\t"); +//CHECK: ## InlineAsm Start +//CHECK: kandw %k7, %k0, %k1 +//CHECK: ## InlineAsm End +asm("kandw %k7, %k0, %k1\t"); +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r285556 - Revert reviosion 285555
Author: mzuckerm Date: Mon Oct 31 05:12:36 2016 New Revision: 285556 URL: http://llvm.org/viewvc/llvm-project?rev=285556&view=rev Log: Revert reviosion 28 Removed: cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c Modified: cfe/trunk/lib/Basic/Targets.cpp Modified: cfe/trunk/lib/Basic/Targets.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets.cpp?rev=285556&r1=28&r2=285556&view=diff == --- cfe/trunk/lib/Basic/Targets.cpp (original) +++ cfe/trunk/lib/Basic/Targets.cpp Mon Oct 31 05:12:36 2016 @@ -2397,7 +2397,6 @@ static const char* const GCCRegNames[] = "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", - "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7", }; const TargetInfo::AddlRegName AddlRegNames[] = { Removed: cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c?rev=28&view=auto == --- cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c (original) +++ cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c (removed) @@ -1,17 +0,0 @@ -// RUN: %clang_cc1 %s -target-cpu skylake-avx512 -O0 -S -o - -Wall -Werror | FileCheck %s -// This test checks basic inline assembly recognition of k0-k7 registers for avx512. - -void test_basic_inline_asm_with_k_regs() { -//CHECK: #APP -//CHECK: kandw %k1, %k2, %k3 -//CHECK: #NO_APP -asm("kandw %k1, %k2, %k3\t"); -//CHECK: #APP -//CHECK: kandw %k4, %k5, %k6 -//CHECK: #NO_APP -asm("kandw %k4, %k5, %k6\t"); -//CHECK: #APP -//CHECK: kandw %k7, %k0, %k1 -//CHECK: #NO_APP -asm("kandw %k7, %k0, %k1\t"); -} \ No newline at end of file ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r285555 - [x86][inline-asm] Introducing (AVX512) k0-k7 registers for inline-asm usage
Author: mzuckerm Date: Mon Oct 31 04:37:59 2016 New Revision: 28 URL: http://llvm.org/viewvc/llvm-project?rev=28&view=rev Log: [x86][inline-asm] Introducing (AVX512) k0-k7 registers for inline-asm usage Commit on behalf of mharoush After LGTM and check all: This patch enables usage of k registers in inline assembly syntax. Reviewer: 1. rnk 2. delena Differential Revision: https://reviews.llvm.org/D25011 Added: cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c Modified: cfe/trunk/lib/Basic/Targets.cpp Modified: cfe/trunk/lib/Basic/Targets.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets.cpp?rev=28&r1=285554&r2=28&view=diff == --- cfe/trunk/lib/Basic/Targets.cpp (original) +++ cfe/trunk/lib/Basic/Targets.cpp Mon Oct 31 04:37:59 2016 @@ -2397,6 +2397,7 @@ static const char* const GCCRegNames[] = "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", + "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7", }; const TargetInfo::AddlRegName AddlRegNames[] = { Added: cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c?rev=28&view=auto == --- cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c (added) +++ cfe/trunk/test/CodeGen/avx512-inline-asm-kregisters-basics.c Mon Oct 31 04:37:59 2016 @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 %s -target-cpu skylake-avx512 -O0 -S -o - -Wall -Werror | FileCheck %s +// This test checks basic inline assembly recognition of k0-k7 registers for avx512. + +void test_basic_inline_asm_with_k_regs() { +//CHECK: #APP +//CHECK: kandw %k1, %k2, %k3 +//CHECK: #NO_APP +asm("kandw %k1, %k2, %k3\t"); +//CHECK: #APP +//CHECK: kandw %k4, %k5, %k6 +//CHECK: #NO_APP +asm("kandw %k4, %k5, %k6\t"); +//CHECK: #APP +//CHECK: kandw %k7, %k0, %k1 +//CHECK: #NO_APP +asm("kandw %k7, %k0, %k1\t"); +} \ No newline at end of file ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r285519 - Fixing "type" issue for (epi32)
Author: mzuckerm Date: Sun Oct 30 09:54:05 2016 New Revision: 285519 URL: http://llvm.org/viewvc/llvm-project?rev=285519&view=rev Log: Fixing "type" issue for (epi32) and replaceing hardcoded inf with clang builtin inf "__builtin_inff()" for float ({max|min}_{pd|ps}) Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512-reduceIntrin.c cfe/trunk/test/CodeGen/avx512-reduceMinMaxIntrin.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=285519&r1=285518&r2=285519&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Sun Oct 30 09:54:05 2016 @@ -9784,43 +9784,43 @@ _mm512_mask_reduce_mul_pd(__mmask8 __M, #define _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1) __extension__({ \ __m256##T1 Vec256 = \ -(__m256##T1)__builtin_shufflevector( \ +(__m256##T1)(__builtin_shufflevector( \ (__v16s##T2)Vec512, \ (__v16s##T2)Vec512, \ 0, 1, 2, 3, 4, 5, 6, 7) \ Operator \ -(__m256##T1)__builtin_shufflevector( \ + __builtin_shufflevector( \ (__v16s##T2)Vec512, \ (__v16s##T2)Vec512, \ -8, 9, 10, 11, 12, 13, 14, 15); \ +8, 9, 10, 11, 12, 13, 14, 15)); \ __m128##T1 Vec128 = \ - (__m128##T1)__builtin_shufflevector( \ + (__m128##T1)(__builtin_shufflevector( \ (__v8s##T2)Vec256, \ (__v8s##T2)Vec256, \ 0, 1, 2, 3) \ Operator \ - (__m128##T1)__builtin_shufflevector( \ + __builtin_shufflevector( \ (__v8s##T2)Vec256, \ (__v8s##T2)Vec256, \ -4, 5, 6, 7); \ -Vec128 = (__m128##T1)__builtin_shufflevector( \ +4, 5, 6, 7)); \ +Vec128 = (__m128##T1)(__builtin_shufflevector( \ (__v4s##T2)Vec128, \ (__v4s##T2)Vec128, \ 0, 1, -1, -1) \ Operator \ - (__m128##T1)__builtin_shufflevector( \ + __builtin_shufflevector( \ (__v4s##T2)Vec128, \ (__v4s##T2)Vec128, \ -2, 3, -1, -1); \ -Vec128 = (__m128##T1)__builtin_shufflevector( \ +2, 3, -1, -1)); \ +Vec128 = (__m128##T1)(__builtin_shufflevector( \ (__v4s##T2)Vec128, \ (__v4s##T2)Vec128, \ 0, -1, -1, -1) \ Operator \ - (__m128##T1)__builtin_shufflevector( \ + __builtin_shufflevector( \ (__v4s##T2)Vec128, \ (__v4s##T2)Vec128, \ -1, -1, -1, -1);
r285493 - [X86][AVX512][Clang][Intrinsics][reduce] Adding missing reduce (max|min) intrinsics to Clang .
Author: mzuckerm Date: Sat Oct 29 05:29:20 2016 New Revision: 285493 URL: http://llvm.org/viewvc/llvm-project?rev=285493&view=rev Log: [X86][AVX512][Clang][Intrinsics][reduce] Adding missing reduce (max|min) intrinsics to Clang . After LGTM and Check-all Vector-reduction arithmetic accepts vectors as inputs and produces scalars as outputs.This class of vector operation forms the basis of many scientific computations. In vector-reduction arithmetic, the evaluation off is independent of the order of the input elements of V. Reviewer: 1. craig.topper 2. igorb Differential Revision: https://reviews.llvm.org/D25988 Added: cfe/trunk/test/CodeGen/avx512-reduceMinMaxIntrin.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=285493&r1=285492&r2=285493&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Sat Oct 29 05:29:20 2016 @@ -9904,6 +9904,286 @@ _mm512_mask_reduce_mul_ps(__mmask16 __M, _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(1), *, __M, f, , ps); } +// Used bisection method. At each step, we partition the vector with previous +// step in half, and the operation is performed on its two halves. +// This takes log2(n) steps where n is the number of elements in the vector. +// This macro uses only intrinsics from the AVX512F feature. + +// Vec512 - Vector with size of 512. +// IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example: +// __mm512_max_epi64 +// T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}] +// T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}] + +#define _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2) __extension__({ \ +Vec512 = _mm512_##IntrinName( \ +(__m512##T1)__builtin_shufflevector( \ +(__v8d##T2)Vec512, \ +(__v8d##T2)Vec512, \ + 0, 1, 2, 3, -1, -1, -1, -1), \ +(__m512##T1)__builtin_shufflevector( \ +(__v8d##T2)Vec512, \ +(__v8d##T2)Vec512, \ + 4, 5, 6, 7, -1, -1, -1, -1)); \ +Vec512 = _mm512_##IntrinName( \ +(__m512##T1)__builtin_shufflevector( \ +(__v8d##T2)Vec512, \ +(__v8d##T2)Vec512, \ + 0, 1, -1, -1, -1, -1, -1, -1),\ +(__m512##T1)__builtin_shufflevector( \ +(__v8d##T2)Vec512, \ +(__v8d##T2)Vec512, \ + 2, 3, -1, -1, -1, -1, -1, \ + -1)); \ +Vec512 = _mm512_##IntrinName( \ +(__m512##T1)__builtin_shufflevector( \ +(__v8d##T2)Vec512, \ +(__v8d##T2)Vec512, \ +0, -1, -1, -1, -1, -1, -1, -1),\ +(__m512##T1)__builtin_shufflevector( \ +(__v8d##T2)Vec512, \ +(__v8d##T2)Vec512, \ +1, -1, -1, -1, -1, -1, -1, -1))\ +; \ +return Vec512[0]; \ + }) + +static __inline__ long long __DEFAULT_FN_ATTRS +_mm512_reduce_max_epi64(__m512i __V) { + _mm512_reduce_maxMin_64bit(__V, max_epi64, i, i); +} + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +_mm512_reduce_max_epu64(__m512i __V) { + _mm512_reduce_maxMin_64bit(__V, max_epu64, i, i); +} + +static __inline__ double __DEFAULT_FN_ATTRS +_mm512_reduce_max_pd(__m512d __V) { + _mm512_reduce_maxMin_64bit(__V, max_pd, d, f); +} + +static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_min_epi64 +(__m512i __V) { + _mm512_reduce_maxMin_64bit(__V, min_epi64,
r285419 - Fixing small problem with avx512-reduceIntrin.c test on some OS.
Author: mzuckerm Date: Fri Oct 28 12:25:26 2016 New Revision: 285419 URL: http://llvm.org/viewvc/llvm-project?rev=285419&view=rev Log: Fixing small problem with avx512-reduceIntrin.c test on some OS. Modified: cfe/trunk/test/CodeGen/avx512-reduceIntrin.c Modified: cfe/trunk/test/CodeGen/avx512-reduceIntrin.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512-reduceIntrin.c?rev=285419&r1=285418&r2=285419&view=diff == --- cfe/trunk/test/CodeGen/avx512-reduceIntrin.c (original) +++ cfe/trunk/test/CodeGen/avx512-reduceIntrin.c Fri Oct 28 12:25:26 2016 @@ -391,7 +391,7 @@ double test_mm512_mask_reduce_add_pd(__m double test_mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W){ // CHECK: {{.*}} = bitcast i8 %__M to <8 x i1> - // CHECK: {{.*}} = select <8 x i1> %0, <8 x double> %__W, <8 x double> + // CHECK: {{.*}} = select <8 x i1> {{.*}}, <8 x double> %__W, <8 x double> // CHECK: %shuffle.i = shufflevector <8 x double> {{.*}}, <8 x double> undef, <4 x i32> // CHECK: %shuffle1.i = shufflevector <8 x double> {{.*}}, <8 x double> undef, <4 x i32> // CHECK: %mul.i = fmul <4 x double> %shuffle.i, %shuffle1.i @@ -425,7 +425,7 @@ float test_mm512_mask_reduce_add_ps(__mm float test_mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W){ // CHECK: {{.*}} = bitcast i16 %__M to <16 x i1> - // CHECK: {{.*}} = select <16 x i1> %0, <16 x float> %__W, <16 x float> + // CHECK: {{.*}} = select <16 x i1> {{.*}}, <16 x float> %__W, <16 x float> // CHECK: %shuffle.i = shufflevector <16 x float> {{.*}}, <16 x float> undef, <8 x i32> // CHECK: %shuffle1.i = shufflevector <16 x float> {{.*}}, <16 x float> undef, <8 x i32> // CHECK: %mul.i = fmul <8 x float> %shuffle.i, %shuffle1.i ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r285405 - 1. Fixing small types issue (PD|PS) (reduce) .
Author: mzuckerm Date: Fri Oct 28 10:16:03 2016 New Revision: 285405 URL: http://llvm.org/viewvc/llvm-project?rev=285405&view=rev Log: 1. Fixing small types issue (PD|PS) (reduce) . 2. Cosmetic changes Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512-reduceIntrin.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=285405&r1=285404&r2=285405&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Fri Oct 28 10:16:03 2016 @@ -9668,7 +9668,7 @@ _mm512_mask_abs_pd(__m512d __W, __mmask8 // This takes log2(n) steps where n is the number of elements in the vector. // Vec512 - Vector with size 512. -// Operator - Can be one of following: +,*,&&,|| +// Operator - Can be one of following: +,*,&,| // T2 - Can get 'i' for int and 'f' for float. // T1 - Can get 'i' for int and 'd' for double. @@ -9725,54 +9725,60 @@ static __inline__ double __DEFAULT_FN_AT } // Vec512 - Vector with size 512. -// Operator - Can be one of following: +,*,&&,|| +// Vec512Neutral - All vector elements set to the identity element. +// Identity element: {+,0},{*,1},{&,0x},{|,0} +// Operator - Can be one of following: +,*,&,| // Mask - Intrinsic Mask -// Neutral - Identity element: {+,0},{*,1},{&&,0x},{||,0} // T2 - Can get 'i' for int and 'f' for float. // T1 - Can get 'i' for int and 'd' for packed double-precision. // T3 - Can be Pd for packed double or q for q-word. -#define _mm512_mask_reduce_operator_64bit(Vec512, Operator, Mask, Neutral, \ - T2, T1, T3) \ +#define _mm512_mask_reduce_operator_64bit(Vec512, Vec512Neutral, Operator, \ + Mask, T2, T1, T3) \ __extension__({ \ Vec512 = __builtin_ia32_select##T3##_512( \ - (__mmask8)Mask, (__v8d##T2)Vec512, \ - (__v8d##T2)_mm512_set1_epi64(Neutral)); \ + (__mmask8)Mask, \ + (__v8d##T2)Vec512, \ + (__v8d##T2)Vec512Neutral); \ _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1); \ }) static __inline__ long long __DEFAULT_FN_ATTRS _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) { - _mm512_mask_reduce_operator_64bit(__W, +, __M, 0, i, i, q); + _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), +, __M, i, i, q); } static __inline__ long long __DEFAULT_FN_ATTRS _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) { - _mm512_mask_reduce_operator_64bit(__W, *, __M, 1, i, i, q); + _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(1), *, __M, i, i, q); } static __inline__ long long __DEFAULT_FN_ATTRS _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) { - _mm512_mask_reduce_operator_64bit(__W, &, __M, 0x, i, i, q); + _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0x), +&, __M, i, i, q); } static __inline__ long long __DEFAULT_FN_ATTRS _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) { - _mm512_mask_reduce_operator_64bit(__W, |, __M, 0, i, i, q); + _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), |, __M, +i, i, q); } static __inline__ double __DEFAULT_FN_ATTRS _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) { - _mm512_mask_reduce_operator_64bit(__W, +, __M, 0, f, d, pd); + _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(0), +, __M, +f, d, pd); } static __inline__ double __DEFAULT_FN_ATTRS _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) { - _mm512_mask_reduce_operator_64bit(__W, *, __M, 1, f, d, pd); + _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(1), *, __M, +f, d, pd); } // Vec512 - Vector with size 512. -// Operator - Can be one of following: +,*,&&,|| +// Operator - Can be one of following: +,*,&,| // T2 - Can get 'i' for int and ' ' for packed single. // T1 - Can get 'i' for int and 'f' for float. @@ -9849,50 +9855,53 @@ _mm512_reduce_mul_ps(__m512 __W) { } // Vec512 - Vector with size 512. -// Operator - Can be one of following: +,*,&&,|| +// Vec512Neutral - All vector elements set to the identity element. +// Identity element: {+,0},{*,1},{&,0x},{|,0} +// Operator - Can be one of following: +,*,&,| // Mask - Intrinsic Mask -// Neutral - Identity element: {
r285054 - [X86][AVX512][Clang][Intrinsics][reduce] Adding missing reduce (Operators: +, *, &&, ||) intrinsics to Clang
Author: mzuckerm Date: Tue Oct 25 02:56:04 2016 New Revision: 285054 URL: http://llvm.org/viewvc/llvm-project?rev=285054&view=rev Log: [X86][AVX512][Clang][Intrinsics][reduce] Adding missing reduce (Operators: +,*,&&,||) intrinsics to Clang Committed after LGTM and check-all Vector-reduction arithmetic accepts vectors as inputs and produces scalars as outputs. This class of vector operation forms the basis of many scientific computations. In vector-reduction arithmetic, the evaluation off is independent of the order of the input elements of V. Used bisection method. At each step, we partition the vector with previous step in half, and the operation is performed on its two halves. This takes log2(n) steps where n is the number of elements in the vector. Reviwer: 1. igorb 2. craig.topper Differential Revision: https://reviews.llvm.org/D25527 Added: cfe/trunk/test/CodeGen/avx512-reduceIntrin.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=285054&r1=285053&r2=285054&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Tue Oct 25 02:56:04 2016 @@ -9658,6 +9658,243 @@ _mm512_mask_abs_pd(__m512d __W, __mmask8 return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFF),(__v8di)__A); } +// Vector-reduction arithmetic accepts vectors as inputs and produces scalars as +// outputs. This class of vector operation forms the basis of many scientific +// computations. In vector-reduction arithmetic, the evaluation off is +// independent of the order of the input elements of V. + +// Used bisection method. At each step, we partition the vector with previous +// step in half, and the operation is performed on its two halves. +// This takes log2(n) steps where n is the number of elements in the vector. + +// Vec512 - Vector with size 512. +// Operator - Can be one of following: +,*,&&,|| +// T2 - Can get 'i' for int and 'f' for float. +// T1 - Can get 'i' for int and 'd' for double. + +#define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1) \ + __extension__({ \ +__m256##T1 Vec256 = __builtin_shufflevector( \ +(__v8d##T2)Vec512, \ +(__v8d##T2)Vec512, \ +0, 1, 2, 3)\ +Operator \ +__builtin_shufflevector( \ +(__v8d##T2)Vec512, \ +(__v8d##T2)Vec512, \ +4, 5, 6, 7); \ +__m128##T1 Vec128 = __builtin_shufflevector( \ +(__v4d##T2)Vec256, \ +(__v4d##T2)Vec256, \ +0, 1) \ +Operator \ +__builtin_shufflevector( \ +(__v4d##T2)Vec256, \ +(__v4d##T2)Vec256, \ +2, 3); \ +Vec128 = __builtin_shufflevector((__v2d##T2)Vec128,\ + (__v2d##T2)Vec128, 0, -1) \ + Operator \ + __builtin_shufflevector((__v2d##T2)Vec128,\ + (__v2d##T2)Vec128, 1, -1);\ +return Vec128[0]; \ + }) + +static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W) { + _mm512_reduce_operator_64bit(__W, +, i, i); +} + +static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W) { + _mm512_reduce_operator_64bit(__W, *, i, i); +} + +static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W) { + _mm512_reduce_operator_64bit(__W, &, i, i); +} + +static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W) { + _mm512_reduce_operator_64bit(__W, |, i, i); +} + +static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W) { + _mm512_reduce_operator_64bit(__W, +, f, d); +} + +static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W) { + _mm512_reduce_operator_64bit(__W, *, f, d); +}
r284967 - revert r284963
Author: mzuckerm Date: Mon Oct 24 06:30:23 2016 New Revision: 284967 URL: http://llvm.org/viewvc/llvm-project?rev=284967&view=rev Log: revert r284963 because new test file is failing in some OS. test/CodeGen/avx512-reduceIntrin.c Removed: cfe/trunk/test/CodeGen/avx512-reduceIntrin.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=284967&r1=284966&r2=284967&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Mon Oct 24 06:30:23 2016 @@ -9658,243 +9658,6 @@ _mm512_mask_abs_pd(__m512d __W, __mmask8 return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFF),(__v8di)__A); } -// Vector-reduction arithmetic accepts vectors as inputs and produces scalars as -// outputs. This class of vector operation forms the basis of many scientific -// computations. In vector-reduction arithmetic, the evaluation off is -// independent of the order of the input elements of V. - -// Used bisection method. At each step, we partition the vector with previous -// step in half, and the operation is performed on its two halves. -// This takes log2(n) steps where n is the number of elements in the vector. - -// Vec512 - Vector with size 512. -// Operator - Can be one of following: +,*,&&,|| -// T2 - Can get 'i' for int and 'f' for float. -// T1 - Can get 'i' for int and 'd' for double. - -#define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1) \ - __extension__({ \ -__m256##T1 Vec256 = __builtin_shufflevector( \ -(__v8d##T2)Vec512, \ -(__v8d##T2)Vec512, \ -0, 1, 2, 3)\ -Operator \ -__builtin_shufflevector( \ -(__v8d##T2)Vec512, \ -(__v8d##T2)Vec512, \ -4, 5, 6, 7); \ -__m128##T1 Vec128 = __builtin_shufflevector( \ -(__v4d##T2)Vec256, \ -(__v4d##T2)Vec256, \ -0, 1) \ -Operator \ -__builtin_shufflevector( \ -(__v4d##T2)Vec256, \ -(__v4d##T2)Vec256, \ -2, 3); \ -Vec128 = __builtin_shufflevector((__v2d##T2)Vec128,\ - (__v2d##T2)Vec128, 0, -1) \ - Operator \ - __builtin_shufflevector((__v2d##T2)Vec128,\ - (__v2d##T2)Vec128, 1, -1);\ -return Vec128[0]; \ - }) - -static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W) { - _mm512_reduce_operator_64bit(__W, +, i, i); -} - -static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W) { - _mm512_reduce_operator_64bit(__W, *, i, i); -} - -static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W) { - _mm512_reduce_operator_64bit(__W, &, i, i); -} - -static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W) { - _mm512_reduce_operator_64bit(__W, |, i, i); -} - -static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W) { - _mm512_reduce_operator_64bit(__W, +, f, d); -} - -static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W) { - _mm512_reduce_operator_64bit(__W, *, f, d); -} - -// Vec512 - Vector with size 512. -// Operator - Can be one of following: +,*,&&,|| -// Mask - Intrinsic Mask -// Neutral - Identity element: {+,0},{*,1},{&&,0x},{||,0} -// T2 - Can get 'i' for int and 'f' for float. -// T1 - Can get 'i' for int and 'd' for packed double-precision. -// T3 - Can be Pd for packed double or q for q-word. - -#define _mm512_mask_reduce_operator_64bit(Vec512, Operator, Mask, Neutral, \ - T2, T1, T3) \ - __extension__({ \ -Vec512 = __builtin_ia32_select
r284963 - [X86][AVX512][Clang][Intrinsics][reduce] Adding missing reduce (Operators: +, *, &&, ||) intrinsics to Clang
Author: mzuckerm Date: Mon Oct 24 05:53:20 2016 New Revision: 284963 URL: http://llvm.org/viewvc/llvm-project?rev=284963&view=rev Log: [X86][AVX512][Clang][Intrinsics][reduce] Adding missing reduce (Operators: +,*,&&,||) intrinsics to Clang Committed after LGTM and check-all Vector-reduction arithmetic accepts vectors as inputs and produces scalars as outputs. This class of vector operation forms the basis of many scientific computations. In vector-reduction arithmetic, the evaluation off is independent of the order of the input elements of V. Used bisection method. At each step, we partition the vector with previous step in half, and the operation is performed on its two halves. This takes log2(n) steps where n is the number of elements in the vector. Differential Revision: https://reviews.llvm.org/D25527 Added: cfe/trunk/test/CodeGen/avx512-reduceIntrin.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=284963&r1=284962&r2=284963&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Mon Oct 24 05:53:20 2016 @@ -9658,6 +9658,243 @@ _mm512_mask_abs_pd(__m512d __W, __mmask8 return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFF),(__v8di)__A); } +// Vector-reduction arithmetic accepts vectors as inputs and produces scalars as +// outputs. This class of vector operation forms the basis of many scientific +// computations. In vector-reduction arithmetic, the evaluation off is +// independent of the order of the input elements of V. + +// Used bisection method. At each step, we partition the vector with previous +// step in half, and the operation is performed on its two halves. +// This takes log2(n) steps where n is the number of elements in the vector. + +// Vec512 - Vector with size 512. +// Operator - Can be one of following: +,*,&&,|| +// T2 - Can get 'i' for int and 'f' for float. +// T1 - Can get 'i' for int and 'd' for double. + +#define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1) \ + __extension__({ \ +__m256##T1 Vec256 = __builtin_shufflevector( \ +(__v8d##T2)Vec512, \ +(__v8d##T2)Vec512, \ +0, 1, 2, 3)\ +Operator \ +__builtin_shufflevector( \ +(__v8d##T2)Vec512, \ +(__v8d##T2)Vec512, \ +4, 5, 6, 7); \ +__m128##T1 Vec128 = __builtin_shufflevector( \ +(__v4d##T2)Vec256, \ +(__v4d##T2)Vec256, \ +0, 1) \ +Operator \ +__builtin_shufflevector( \ +(__v4d##T2)Vec256, \ +(__v4d##T2)Vec256, \ +2, 3); \ +Vec128 = __builtin_shufflevector((__v2d##T2)Vec128,\ + (__v2d##T2)Vec128, 0, -1) \ + Operator \ + __builtin_shufflevector((__v2d##T2)Vec128,\ + (__v2d##T2)Vec128, 1, -1);\ +return Vec128[0]; \ + }) + +static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W) { + _mm512_reduce_operator_64bit(__W, +, i, i); +} + +static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W) { + _mm512_reduce_operator_64bit(__W, *, i, i); +} + +static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W) { + _mm512_reduce_operator_64bit(__W, &, i, i); +} + +static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W) { + _mm512_reduce_operator_64bit(__W, |, i, i); +} + +static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W) { + _mm512_reduce_operator_64bit(__W, +, f, d); +} + +static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W) { + _mm512_reduce_operator_64bit(__W, *, f, d); +} + +// Vec512 - Vector with size 512. +// Operator -
r284213 - [x86][ms-inline-asm] use of "jmp short" in asm is not supported
Author: mzuckerm Date: Fri Oct 14 03:13:27 2016 New Revision: 284213 URL: http://llvm.org/viewvc/llvm-project?rev=284213&view=rev Log: [x86][ms-inline-asm] use of "jmp short" in asm is not supported Test linked to: https://reviews.llvm.org/D24957 Committing in the name of Ziv Izhar: After check-all and LGTM . Differential Revision: https://reviews.llvm.org/D24958 Modified: cfe/trunk/test/CodeGen/ms-inline-asm.c Modified: cfe/trunk/test/CodeGen/ms-inline-asm.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/ms-inline-asm.c?rev=284213&r1=284212&r2=284213&view=diff == --- cfe/trunk/test/CodeGen/ms-inline-asm.c (original) +++ cfe/trunk/test/CodeGen/ms-inline-asm.c Fri Oct 14 03:13:27 2016 @@ -634,6 +634,15 @@ void label5() { // CHECK: call void asm sideeffect inteldialect "jmp {{.*}}__MSASMLABEL_.5__dollar_label$$\0A\09{{.*}}__MSASMLABEL_.5__dollar_label$$:", "~{dirflag},~{fpsr},~{flags}"() } +void label6(){ + __asm { + jmp short label +label: + } + // CHECK-LABEL: define void @label6 + // CHECK: call void asm sideeffect inteldialect "jmp {{.*}}__MSASMLABEL_.6__label\0A\09{{.*}}__MSASMLABEL_.6__label:", "~{dirflag},~{fpsr},~{flags}"() +} + typedef union _LARGE_INTEGER { struct { unsigned int LowPart; ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r283716 - [x86][inline-asm][clang] accept 'v' constraint
Author: mzuckerm Date: Mon Oct 10 00:45:54 2016 New Revision: 283716 URL: http://llvm.org/viewvc/llvm-project?rev=283716&view=rev Log: [x86][inline-asm][clang] accept 'v' constraint Commit in the name of: Coby Tayree 1.'v' constraint for (x86) non-avx arch imitates the already implemented 'x' constraint, i.e. allows XMM{0-15} & YMM{0-15} depending on the apparent arch & mode (32/64). 2.for the avx512 arch it allows [X,Y,Z]MM{0-31} (mode dependent) This patch applies the needed changes to clang LLVM patch: https://reviews.llvm.org/D25005 Differential Revision: D25004 Added: cfe/trunk/test/CodeGen/x86-inline-asm-v-constraint.c (with props) Modified: cfe/trunk/lib/Basic/Targets.cpp Modified: cfe/trunk/lib/Basic/Targets.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets.cpp?rev=283716&r1=283715&r2=283716&view=diff == --- cfe/trunk/lib/Basic/Targets.cpp (original) +++ cfe/trunk/lib/Basic/Targets.cpp Mon Oct 10 00:45:54 2016 @@ -4005,6 +4005,7 @@ X86TargetInfo::validateAsmConstraint(con case 'u': // Second from top of floating point stack. case 'q': // Any register accessible as [r]l: a, b, c, and d. case 'y': // Any MMX register. + case 'v': // Any {X,Y,Z}MM register (Arch & context dependent) case 'x': // Any SSE register. case 'Q': // Any register accessible as [r]h: a, b, c, and d. case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp. @@ -4045,6 +4046,7 @@ bool X86TargetInfo::validateOperandSize( case 't': case 'u': return Size <= 128; + case 'v': case 'x': if (SSELevel >= AVX512F) // 512-bit zmm registers can be used if target supports AVX512F. Added: cfe/trunk/test/CodeGen/x86-inline-asm-v-constraint.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/x86-inline-asm-v-constraint.c?rev=283716&view=auto == --- cfe/trunk/test/CodeGen/x86-inline-asm-v-constraint.c (added) +++ cfe/trunk/test/CodeGen/x86-inline-asm-v-constraint.c Mon Oct 10 00:45:54 2016 @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu x86-64 -o - | FileCheck %s --check-prefix SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake -D AVX -o - | FileCheck %s --check-prefixes AVX,SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake-avx512 -D AVX512 -D AVX -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu knl -D AVX -D AVX512 -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE + +typedef float __m128 __attribute__ ((vector_size (16))); +typedef float __m256 __attribute__ ((vector_size (32))); +typedef float __m512 __attribute__ ((vector_size (64))); + +// SSE: call <4 x float> asm "vmovhlps $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %0, <4 x float> %1) +__m128 testXMM(__m128 _xmm0, long _l) { + __asm__("vmovhlps %1, %2, %0" :"=v"(_xmm0) : "v"(_l), "v"(_xmm0)); + return _xmm0; +} + +// AVX: call <8 x float> asm "vmovsldup $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %0) +__m256 testYMM(__m256 _ymm0) { +#ifdef AVX + __asm__("vmovsldup %1, %0" :"=v"(_ymm0) : "v"(_ymm0)); +#endif + return _ymm0; +} + +// AVX512: call <16 x float> asm "vpternlogd $$0, $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %0, <16 x float> %1) +__m512 testZMM(__m512 _zmm0, __m512 _zmm1) { +#ifdef AVX512 + __asm__("vpternlogd $0, %1, %2, %0" :"=v"(_zmm0) : "v"(_zmm1), "v"(_zmm0)); +#endif + return _zmm0; +} Propchange: cfe/trunk/test/CodeGen/x86-inline-asm-v-constraint.c -- svn:eol-style = native Propchange: cfe/trunk/test/CodeGen/x86-inline-asm-v-constraint.c -- svn:keywords = Author Date Id Rev URL Propchange: cfe/trunk/test/CodeGen/x86-inline-asm-v-constraint.c -- svn:mime-type = text/plain ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r283314 - [Clang][AVX512][BuiltIn]Adding missing intrinsics move_{sd|ss} to clang
Author: mzuckerm Date: Wed Oct 5 07:56:06 2016 New Revision: 283314 URL: http://llvm.org/viewvc/llvm-project?rev=283314&view=rev Log: [Clang][AVX512][BuiltIn]Adding missing intrinsics move_{sd|ss} to clang Differential Revision: http://reviews.llvm.org/D21021 Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=283314&r1=283313&r2=283314&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed Oct 5 07:56:06 2016 @@ -2021,6 +2021,8 @@ TARGET_BUILTIN(__builtin_ia32_expandload TARGET_BUILTIN(__builtin_ia32_expandsf512_mask, "V16fV16fV16fUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_expandsi512_mask, "V16iV16iV16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_cvtps2pd512_mask, "V8dV8fV8dUcIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_movss_mask, "V4fV4fV4fV4fUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_movsd_mask, "V2dV2dV2dV2dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_compressstoredf512_mask, "vV8d*V8dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_compressstoredi512_mask, "vV8LLi*V8LLiUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_compressstoresf512_mask, "vV16f*V16fUs","","avx512f") Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=283314&r1=283313&r2=283314&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Wed Oct 5 07:56:06 2016 @@ -9140,6 +9140,40 @@ _mm512_maskz_moveldup_ps (__mmask16 __U, (__v16sf)_mm512_setzero_ps()); } +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_movss_mask ((__v4sf) __A, (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_movss_mask ((__v4sf) __A, (__v4sf) __B, + (__v4sf) + _mm_setzero_si128(), + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_movsd_mask ((__v2df) __A, (__v2df) __B, + (__v2df) __W, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_movsd_mask ((__v2df) __A, (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U); +} + #define _mm512_shuffle_epi32(A, I) __extension__ ({ \ (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ (__v16si)_mm512_undefined_epi32(), \ Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=283314&r1=283313&r2=283314&view=diff == --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Wed Oct 5 07:56:06 2016 @@ -7863,6 +7863,34 @@ __m512d test_mm512_setzero_pd() return _mm512_setzero_pd(); } +__m128 test_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + // CHECK-LABEL: @test_mm_mask_move_ss + // CHECK: @llvm.x86.avx512.mask.move.ss + return _mm_mask_move_ss ( __W, __U, __A, __B); +} + +__m128 test_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) +{ + // CHECK-LABEL: @test_mm_maskz_move_ss + // CHECK: @llvm.x86.avx512.mask.move.ss + return _mm_maskz_move_ss (__U, __A, __B); +} + +__m128d test_mm_mask_move_sd (__m128 __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + // CHECK-LABEL: @test_mm_mask_move_sd + // CHECK: @llvm.x86.avx512.mask.move.sd + return _mm_mask_move_sd ( __W, __U, __A, __B); +} + +__m128d test_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) +{ + // CHECK-LABEL: @test_mm_maskz_move_sd + // CHECK: @llvm.x86.avx512.mask.move.sd + return _mm_maskz_move_sd (__U, __A, __B); +} + __m512d test_mm512_abs_pd(__m512d a){ // CHECK-LABEL: @test_mm512_abs_pd // CHECK: and <8 x i64> ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21505: [Clang][AVX512][Intrinsics]Adding intrinsics for mov{ss|sd} instruction set
m_zuckerman updated this revision to Diff 70383. https://reviews.llvm.org/D21505 Files: lib/Headers/avx512fintrin.h test/CodeGen/avx512f-builtins.c Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -241,6 +241,20 @@ _mm512_mask_store_pd(p, m, a); } +void test_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A) +{ + // CHECK-LABEL: @test_mm_mask_store_ss + // CHECK: store float {{.*}}, float* {{.*}} + return _mm_mask_store_ss (__W, __U, __A); +} + +void test_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A) +{ + // CHECK-LABEL: @test_mm_mask_store_sd + // CHECK: store double {{.*}}, double* {{.*}} + return _mm_mask_store_sd ( __W, __U, __A); +} + void test_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_storeu_epi32 // CHECK: @llvm.masked.store.v16i32.p0v16i32(<16 x i32> %{{.*}}, <16 x i32>* %{{.*}}, i32 1, <16 x i1> %{{.*}}) @@ -371,6 +385,46 @@ return _mm512_maskz_load_pd(__U, __P); } +__m128 test_mm_mask_load_ss (__m128 __W, __mmask8 __U, float const* __A) +{ + // CHECK-LABEL: @test_mm_mask_load_ss + // CHECK:cond.true.i + // CHECK: store <4 x float> {{.*}}, <4 x float>* {{.*}} + // CHECK:cond.false.i + // CHECK: store <4 x float> {{.*}}, <4 x float>* {{.*}} + return _mm_mask_load_ss ( __W, __U, __A); +} + +__m128 test_mm_maskz_load_ss (__mmask8 __U, float const* __A) +{ + // CHECK-LABEL: @test_mm_maskz_load_ss + // CHECK:cond.true.i + // CHECK: store <4 x float> {{.*}}, <4 x float>* {{.*}} + // CHECK:cond.false.i + // CHECK: store <4 x float> zeroinitializer, <4 x float>* {{.*}} + return _mm_maskz_load_ss (__U, __A); +} + +__m128d test_mm_mask_load_sd (__m128 __W, __mmask8 __U, double const* __A) +{ + // CHECK-LABEL: test_mm_mask_load_sd + // CHECK:cond.true.i + // CHECK: store <2 x double> {{.*}}, <2 x double>* {{.*}} + // CHECK:cond.false.i + // CHECK: store <2 x double> {{.*}}, <2 x double>* {{.*}} + return _mm_mask_load_sd ( __W, __U, __A); +} + +__m128d test_mm_maskz_load_sd (__mmask8 __U, double const* __A) +{ + // CHECK-LABEL: test_mm_maskz_load_sd + // CHECK:cond.true.i + // CHECK: store <2 x double> {{.*}}, <2 x double>* {{.*}} + // CHECK:cond.false.i + // CHECK: store <2 x double> zeroinitializer, <2 x double>* {{.*}} + return _mm_maskz_load_sd (__U, __A); +} + __m512d test_mm512_set1_pd(double d) { // CHECK-LABEL: @test_mm512_set1_pd @@ -6199,6 +6253,54 @@ return _mm512_maskz_mov_ps(__U, __A); } +__m128 test_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + // CHECK-LABEL: @test_mm_mask_move_ss + // CHECK: cond.true.i + // CHECK: %6 = load <4 x float>, <4 x float>* %__B.addr.i, align 16 + // CHECK: cond.false.i + // CHECK: %7 = load <4 x float>, <4 x float>* %__W.addr.i, align 16 + // CHECK: %8 = load <4 x float>, <4 x float>* %res.i, align 16 + // CHECK: %vecins.i = insertelement <4 x float> %8, float %cond.i, i32 0 + return _mm_mask_move_ss ( __W, __U, __A, __B); +} + +__m128 test_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) +{ + // CHECK-LABEL: @test_mm_maskz_move_ss + // CHECK: cond.true.i + // CHECK: %5 = load <4 x float>, <4 x float>* %__B.addr.i + // CHECK: cond.false.i + // CHECK: br label %_mm_maskz_move_ss.exit + // CHECK: %6 = load <4 x float>, <4 x float>* %res.i, align 16 + // CHECK: %vecins.i = insertelement <4 x float> %6, float %cond.i, i32 0 + return _mm_maskz_move_ss (__U, __A, __B); +} + +__m128d test_mm_mask_move_sd (__m128 __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + // CHECK-LABEL: @test_mm_mask_move_sd + // CHECK: cond.true.i + // CHECK: %7 = load <2 x double>, <2 x double>* %__B.addr.i, align 16 + // CHECK: cond.false.i + // CHECK: %8 = load <2 x double>, <2 x double>* %__W.addr.i, align 16 + // CHECK: %9 = load <2 x double>, <2 x double>* %res.i, align 16 + // CHECK: %vecins.i = insertelement <2 x double> %9, double %cond.i, i32 0 + return _mm_mask_move_sd ( __W, __U, __A, __B); +} + +__m128d test_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) +{ + // CHECK-LABEL: @test_mm_maskz_move_sd + // CHECK: cond.true.i + // CHECK: %5 = load <2 x double>, <2 x double>* %__B.addr.i, align 16 + // CHECK: cond.false.i + // CHECK: br label %_mm_maskz_move_sd.exit + // CHECK: %6 = load <2 x double>, <2 x double>* %res.i, align 16 + // CHECK: %vecins.i = insertelement <2 x double> %6, double %cond.i, i32 0 + return _mm_maskz_move_sd (__U, __A, __B); +} + void test_mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A) { // CHECK-LABEL: @test_mm512_mask_compressstoreu_pd // CHECK: @llvm.x86.avx512.mask.compress.store.pd.512 Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -4558,6 +4558,30 @@ r
Re: [PATCH] D21505: [Clang][AVX512][Intrinsics]Adding intrinsics for mov{ss|sd} instruction set
m_zuckerman updated this revision to Diff 70318. https://reviews.llvm.org/D21505 Files: lib/Headers/avx512fintrin.h test/CodeGen/avx512f-builtins.c Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -241,6 +241,21 @@ _mm512_mask_store_pd(p, m, a); } +void test_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A) +{ + // CHECK-LABEL: @test_mm_mask_store_ss + // CHECK: store float {{.*}}, float* {{.*}} + return _mm_mask_store_ss (__W, __U, __A); +} + +void test_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A) +{ + // CHECK-LABEL: @test_mm_mask_store_sd + // CHECK: store double {{.*}}, double* {{.*}} + return _mm_mask_store_sd ( __W, __U, __A); +} + + void test_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_storeu_epi32 // CHECK: @llvm.masked.store.v16i32.p0v16i32(<16 x i32> %{{.*}}, <16 x i32>* %{{.*}}, i32 1, <16 x i1> %{{.*}}) @@ -371,6 +386,38 @@ return _mm512_maskz_load_pd(__U, __P); } +__m128 test_mm_mask_load_ss (__m128 __W, __mmask8 __U, float const* __A) +{ + // CHECK-LABEL: @test_mm_mask_load_ss + // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}} + // CHECK: load <4 x float>, <4 x float>* {{.*}} + return _mm_mask_load_ss ( __W, __U, __A); +} + +__m128 test_mm_maskz_load_ss (__mmask8 __U, float const* __A) +{ + // CHECK-LABEL: @test_mm_maskz_load_ss + // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}} + // CHECK: load <4 x float>, <4 x float>* {{.*}} + return _mm_maskz_load_ss (__U, __A); +} + +__m128d test_mm_mask_load_sd (__m128 __W, __mmask8 __U, double const* __A) +{ + // CHECK-LABEL: @test_mm_mask_load_sd + // CHECK: select <2 x i1>{{.*}}, <2 x double>{{.*}}, <2 x double>{{.*}} + // CHECK: load <2 x double>, <2 x double>* {{.*}} + return _mm_mask_load_sd ( __W, __U, __A); +} + +__m128d test_mm_maskz_load_sd (__mmask8 __U, double const* __A) +{ + // CHECK-LABEL: @test_mm_maskz_load_sd + // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} + // CHECK: load <2 x double>, <2 x double>* {{.*}} + return _mm_maskz_load_sd (__U, __A); +} + __m512d test_mm512_set1_pd(double d) { // CHECK-LABEL: @test_mm512_set1_pd @@ -6199,6 +6246,38 @@ return _mm512_maskz_mov_ps(__U, __A); } +__m128 test_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + // CHECK-LABEL: @test_mm_mask_move_ss + // CHECK: select <4 x i1>{{.*}}, <4 x float> {{.*}}, <4 x float>{{.*}} + // CHECK: load <4 x float>, <4 x float>* {{.*}} + return _mm_mask_move_ss ( __W, __U, __A, __B); +} + +__m128 test_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) +{ + // CHECK-LABEL: @test_mm_maskz_move_ss + // CHECK: select <4 x i1>{{.*}}, <4 x float> {{.*}}, <4 x float>{{.*}} + // CHECK: load <4 x float>, <4 x float>* {{.*}} + return _mm_maskz_move_ss (__U, __A, __B); +} + +__m128d test_mm_mask_move_sd (__m128 __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + // CHECK-LABEL: @test_mm_mask_move_sd + // CHECK: select <2 x i1>{{.*}}, <2 x double>{{.*}}, <2 x double>{{.*}} + // CHECK: load <2 x double>, <2 x double>* {{.*}} + return _mm_mask_move_sd ( __W, __U, __A, __B); +} + +__m128d test_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) +{ + // CHECK-LABEL: @test_mm_maskz_move_sd + // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} + // CHECK: load <2 x double>, <2 x double>* {{.*}} + return _mm_maskz_move_sd (__U, __A, __B); +} + void test_mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A) { // CHECK-LABEL: @test_mm512_mask_compressstoreu_pd // CHECK: @llvm.x86.avx512.mask.compress.store.pd.512 Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -4558,6 +4558,30 @@ return *(__m512i *) __P; } +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A) +{ + return (__U & 1) ? _mm_load_ss(__A) : (__m128) { __W[0], 0, 0, 0}; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_load_ss (__mmask8 __U, const float* __A) +{ + return (__U & 1) ? _mm_load_ss(__A) : (__m128) { 0, 0, 0, 0}; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A) +{ + return (__U & 1) ? _mm_load_sd(__A) :(__m128d) { __W[0], 0}; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_load_sd (__mmask8 __U, const double* __A) +{ + return (__U & 1) ? _mm_load_sd(__A) :(__m128d) { 0, 0}; +} + /* SIMD store ops */ static __inline void __DEFAULT_FN_ATTRS @@ -4649,6 +4673,20 @@ *(__m512i *) __P = __A; } +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __
Re: [PATCH] D21505: [Clang][AVX512][Intrinsics]Adding intrinsics for mov{ss|sd} instruction set
m_zuckerman marked an inline comment as done. m_zuckerman added a comment. https://reviews.llvm.org/D21505 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r275169 - [Clang][AVX512] Making cosmetic changes
Author: mzuckerm Date: Tue Jul 12 07:42:27 2016 New Revision: 275169 URL: http://llvm.org/viewvc/llvm-project?rev=275169&view=rev Log: [Clang][AVX512] Making cosmetic changes Modified: cfe/trunk/lib/Headers/avx512vbmiintrin.h Modified: cfe/trunk/lib/Headers/avx512vbmiintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vbmiintrin.h?rev=275169&r1=275168&r2=275169&view=diff == --- cfe/trunk/lib/Headers/avx512vbmiintrin.h (original) +++ cfe/trunk/lib/Headers/avx512vbmiintrin.h Tue Jul 12 07:42:27 2016 @@ -40,8 +40,7 @@ _mm512_mask2_permutex2var_epi8 (__m512i (__v64qi) __I /* idx */ , (__v64qi) __B, - (__mmask64) - __U); + (__mmask64) __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -51,8 +50,7 @@ _mm512_permutex2var_epi8 (__m512i __A, _ /* idx */ , (__v64qi) __A, (__v64qi) __B, - (__mmask64) - - 1); + (__mmask64) -1); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -63,8 +61,7 @@ _mm512_mask_permutex2var_epi8 (__m512i _ /* idx */ , (__v64qi) __A, (__v64qi) __B, - (__mmask64) - __U); + (__mmask64) __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -75,8 +72,7 @@ _mm512_maskz_permutex2var_epi8 (__mmask6 /* idx */ , (__v64qi) __A, (__v64qi) __B, - (__mmask64) - __U); + (__mmask64) __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -122,8 +118,7 @@ _mm512_maskz_multishift_epi64_epi8 (__mm { return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X, (__v64qi) __Y, -(__v64qi) -_mm512_setzero_si512 (), +(__v64qi) _mm512_setzero_si512 (), (__mmask64) __M); } @@ -132,8 +127,7 @@ _mm512_multishift_epi64_epi8 (__m512i __ { return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X, (__v64qi) __Y, -(__v64qi) -_mm512_undefined_epi32 (), +(__v64qi) _mm512_undefined_epi32 (), (__mmask64) -1); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21505: [Clang][AVX512][Intrinsics]Adding intrinsics for mov{ss|sd} instruction set
m_zuckerman updated this revision to Diff 62768. http://reviews.llvm.org/D21505 Files: lib/Headers/avx512fintrin.h test/CodeGen/avx512f-builtins.c Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -241,6 +241,23 @@ _mm512_mask_store_pd(p, m, a); } +void test_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A) +{ + // CHECK-LABEL: @test_mm_mask_store_ss + // CHECK: store float {{.*}}, float* {{.*}} + // CHECK: load <4 x float>, <4 x float>* {{.*}} + return _mm_mask_store_ss (__W, __U, __A); +} + +void test_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A) +{ + // CHECK-LABEL: @test_mm_mask_store_sd + // CHECK: store double {{.*}}, double* {{.*}} + // CHECK: load <2 x double>, <2 x double>* {{.*}} + return _mm_mask_store_sd ( __W, __U, __A); +} + + void test_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_storeu_epi32 // CHECK: @llvm.masked.store.v16i32.p0v16i32(<16 x i32> %{{.*}}, <16 x i32>* %{{.*}}, i32 1, <16 x i1> %{{.*}}) @@ -371,6 +388,38 @@ return _mm512_maskz_load_pd(__U, __P); } +__m128 test_mm_mask_load_ss (__m128 __W, __mmask8 __U, float const* __A) +{ + // CHECK-LABEL: @test_mm_mask_load_ss + // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}} + // CHECK: load <4 x float>, <4 x float>* {{.*}} + return _mm_mask_load_ss ( __W, __U, __A); +} + +__m128 test_mm_maskz_load_ss (__mmask8 __U, float const* __A) +{ + // CHECK-LABEL: @test_mm_maskz_load_ss + // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}} + // CHECK: load <4 x float>, <4 x float>* {{.*}} + return _mm_maskz_load_ss (__U, __A); +} + +__m128d test_mm_mask_load_sd (__m128 __W, __mmask8 __U, double const* __A) +{ + // CHECK-LABEL: @test_mm_mask_load_sd + // CHECK: select <2 x i1>{{.*}}, <2 x double>{{.*}}, <2 x double>{{.*}} + // CHECK: load <2 x double>, <2 x double>* {{.*}} + return _mm_mask_load_sd ( __W, __U, __A); +} + +__m128d test_mm_maskz_load_sd (__mmask8 __U, double const* __A) +{ + // CHECK-LABEL: @test_mm_maskz_load_sd + // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} + // CHECK: load <2 x double>, <2 x double>* {{.*}} + return _mm_maskz_load_sd (__U, __A); +} + __m512d test_mm512_set1_pd(double d) { // CHECK-LABEL: @test_mm512_set1_pd @@ -6199,6 +6248,38 @@ return _mm512_maskz_mov_ps(__U, __A); } +__m128 test_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + // CHECK-LABEL: @test_mm_mask_move_ss + // CHECK: select <4 x i1>{{.*}}, <4 x float> {{.*}}, <4 x float>{{.*}} + // CHECK: load <4 x float>, <4 x float>* {{.*}} + return _mm_mask_move_ss ( __W, __U, __A, __B); +} + +__m128 test_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) +{ + // CHECK-LABEL: @test_mm_maskz_move_ss + // CHECK: select <4 x i1>{{.*}}, <4 x float> {{.*}}, <4 x float>{{.*}} + // CHECK: load <4 x float>, <4 x float>* {{.*}} + return _mm_maskz_move_ss (__U, __A, __B); +} + +__m128d test_mm_mask_move_sd (__m128 __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + // CHECK-LABEL: @test_mm_mask_move_sd + // CHECK: select <2 x i1>{{.*}}, <2 x double>{{.*}}, <2 x double>{{.*}} + // CHECK: load <2 x double>, <2 x double>* {{.*}} + return _mm_mask_move_sd ( __W, __U, __A, __B); +} + +__m128d test_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) +{ + // CHECK-LABEL: @test_mm_maskz_move_sd + // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} + // CHECK: load <2 x double>, <2 x double>* {{.*}} + return _mm_maskz_move_sd (__U, __A, __B); +} + void test_mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A) { // CHECK-LABEL: @test_mm512_mask_compressstoreu_pd // CHECK: @llvm.x86.avx512.mask.compress.store.pd.512 Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -4558,6 +4558,30 @@ return *(__m512i *) __P; } +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A) +{ + return (__U & 1) ? _mm_load_ss(__A) : (__m128) { __W[0], 0, 0, 0}; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_load_ss (__mmask8 __U, const float* __A) +{ + return (__U & 1) ? _mm_load_ss(__A) : (__m128) { 0, 0, 0, 0}; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A) +{ + return (__U & 1) ? _mm_load_sd(__A) :(__m128d) { __W[0], 0}; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_load_sd (__mmask8 __U, const double* __A) +{ + return (__U & 1) ? _mm_load_sd(__A) :(__m128d) { 0, 0}; +} + /* SIMD store ops */ static __inline void __DEFAULT_FN_ATTRS @@ -4649,6 +4673,20 @@ *(__m512i *) __P = __A;
Re: [PATCH] D21505: [Clang][AVX512][Intrinsics]Adding intrinsics for mov{ss|sd} instruction set
m_zuckerman updated this revision to Diff 62766. http://reviews.llvm.org/D21505 Files: lib/Headers/avx512fintrin.h test/CodeGen/avx512f-builtins.c Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -241,6 +241,23 @@ _mm512_mask_store_pd(p, m, a); } +__m128 test_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A) +{ + // CHECK-LABEL: @test_mm_mask_store_ss + // CHECK: store float {{.*}}, float* {{.*}} + // CHECK: load <4 x float>, <4 x float>* {{.*}} + return _mm_mask_store_ss (__W, __U, __A); +} + +__m128d test_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A) +{ + // CHECK-LABEL: @test_mm_mask_store_sd + // CHECK: store double {{.*}}, double* {{.*}} + // CHECK: load <2 x double>, <2 x double>* {{.*}} + return _mm_mask_store_sd ( __W, __U, __A); +} + + void test_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_storeu_epi32 // CHECK: @llvm.masked.store.v16i32.p0v16i32(<16 x i32> %{{.*}}, <16 x i32>* %{{.*}}, i32 1, <16 x i1> %{{.*}}) @@ -371,6 +388,38 @@ return _mm512_maskz_load_pd(__U, __P); } +__m128 test_mm_mask_load_ss (__m128 __W, __mmask8 __U, float const* __A) +{ + // CHECK-LABEL: @test_mm_mask_load_ss + // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}} + // CHECK: load <4 x float>, <4 x float>* {{.*}} + return _mm_mask_load_ss ( __W, __U, __A); +} + +__m128 test_mm_maskz_load_ss (__mmask8 __U, float const* __A) +{ + // CHECK-LABEL: @test_mm_maskz_load_ss + // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}} + // CHECK: load <4 x float>, <4 x float>* {{.*}} + return _mm_maskz_load_ss (__U, __A); +} + +__m128d test_mm_mask_load_sd (__m128 __W, __mmask8 __U, double const* __A) +{ + // CHECK-LABEL: @test_mm_mask_load_sd + // CHECK: select <2 x i1>{{.*}}, <2 x double>{{.*}}, <2 x double>{{.*}} + // CHECK: load <2 x double>, <2 x double>* {{.*}} + return _mm_mask_load_sd ( __W, __U, __A); +} + +__m128d test_mm_maskz_load_sd (__mmask8 __U, double const* __A) +{ + // CHECK-LABEL: @test_mm_maskz_load_sd + // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} + // CHECK: load <2 x double>, <2 x double>* {{.*}} + return _mm_maskz_load_sd (__U, __A); +} + __m512d test_mm512_set1_pd(double d) { // CHECK-LABEL: @test_mm512_set1_pd @@ -6199,6 +6248,38 @@ return _mm512_maskz_mov_ps(__U, __A); } +__m128 test_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + // CHECK-LABEL: @test_mm_mask_move_ss + // CHECK: select <4 x i1>{{.*}}, <4 x float> {{.*}}, <4 x float>{{.*}} + // CHECK: load <4 x float>, <4 x float>* {{.*}} + return _mm_mask_move_ss ( __W, __U, __A, __B); +} + +__m128 test_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) +{ + // CHECK-LABEL: @test_mm_maskz_move_ss + // CHECK: select <4 x i1>{{.*}}, <4 x float> {{.*}}, <4 x float>{{.*}} + // CHECK: load <4 x float>, <4 x float>* {{.*}} + return _mm_maskz_move_ss (__U, __A, __B); +} + +__m128d test_mm_mask_move_sd (__m128 __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + // CHECK-LABEL: @test_mm_mask_move_sd + // CHECK: select <2 x i1>{{.*}}, <2 x double>{{.*}}, <2 x double>{{.*}} + // CHECK: load <2 x double>, <2 x double>* {{.*}} + return _mm_mask_move_sd ( __W, __U, __A, __B); +} + +__m128d test_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) +{ + // CHECK-LABEL: @test_mm_maskz_move_sd + // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} + // CHECK: load <2 x double>, <2 x double>* {{.*}} + return _mm_maskz_move_sd (__U, __A, __B); +} + void test_mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A) { // CHECK-LABEL: @test_mm512_mask_compressstoreu_pd // CHECK: @llvm.x86.avx512.mask.compress.store.pd.512 Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -4558,6 +4558,30 @@ return *(__m512i *) __P; } +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A) +{ + return (__U & 1) ? _mm_load_ss(__A) : (__m128) { __W[0], 0, 0, 0}; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_load_ss (__mmask8 __U, const float* __A) +{ + return (__U & 1) ? _mm_load_ss(__A) : (__m128) { 0, 0, 0, 0}; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A) +{ + return (__U & 1) ? _mm_load_sd(__A) :(__m128d) { __W[0], 0}; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_load_sd (__mmask8 __U, const double* __A) +{ + return (__U & 1) ? _mm_load_sd(__A) :(__m128d) { 0, 0}; +} + /* SIMD store ops */ static __inline void __DEFAULT_FN_ATTRS @@ -4649,6 +4673,20 @@ *(__m512i *) __P = _
r274559 - [Clang][Feature] Adding CLFLUSHOPT feature and intrinsic to clang
Author: mzuckerm Date: Tue Jul 5 10:56:03 2016 New Revision: 274559 URL: http://llvm.org/viewvc/llvm-project?rev=274559&view=rev Log: [Clang][Feature] Adding CLFLUSHOPT feature and intrinsic to clang Differential Revision: http://reviews.llvm.org/D21792 Added: cfe/trunk/lib/Headers/clflushoptintrin.h cfe/trunk/test/CodeGen/builtin-clflushopt.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/CMakeLists.txt cfe/trunk/lib/Headers/immintrin.h Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=274559&r1=274558&r2=274559&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Tue Jul 5 10:56:03 2016 @@ -652,6 +652,9 @@ TARGET_BUILTIN(__builtin_ia32_xsavec64, TARGET_BUILTIN(__builtin_ia32_xsaves, "vv*ULLi", "", "xsaves") TARGET_BUILTIN(__builtin_ia32_xsaves64, "vv*ULLi", "", "xsaves") +//CLFLUSHOPT +TARGET_BUILTIN(__builtin_ia32_clflushopt, "vc*", "", "clflushopt") + // ADX TARGET_BUILTIN(__builtin_ia32_addcarryx_u32, "UcUcUiUiUi*", "", "adx") TARGET_BUILTIN(__builtin_ia32_addcarryx_u64, "UcUcULLiULLiULLi*", "", "adx") Modified: cfe/trunk/lib/Headers/CMakeLists.txt URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/CMakeLists.txt?rev=274559&r1=274558&r2=274559&view=diff == --- cfe/trunk/lib/Headers/CMakeLists.txt (original) +++ cfe/trunk/lib/Headers/CMakeLists.txt Tue Jul 5 10:56:03 2016 @@ -27,6 +27,7 @@ set(files __clang_cuda_runtime_wrapper.h cpuid.h cuda_builtin_vars.h + clflushoptintrin.h emmintrin.h f16cintrin.h float.h Added: cfe/trunk/lib/Headers/clflushoptintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/clflushoptintrin.h?rev=274559&view=auto == --- cfe/trunk/lib/Headers/clflushoptintrin.h (added) +++ cfe/trunk/lib/Headers/clflushoptintrin.h Tue Jul 5 10:56:03 2016 @@ -0,0 +1,41 @@ +/*=== clflushoptintrin.h - CLFLUSHOPT intrinsic === + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===---=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __CLFLUSHOPTINTRIN_H +#define __CLFLUSHOPTINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("clflushopt"))) + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_clflushopt(char * __m) { + __builtin_ia32_clflushopt(__m); +} + +#undef __DEFAULT_FN_ATTRS + +#endif Modified: cfe/trunk/lib/Headers/immintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/immintrin.h?rev=274559&r1=274558&r2=274559&view=diff == --- cfe/trunk/lib/Headers/immintrin.h (original) +++ cfe/trunk/lib/Headers/immintrin.h Tue Jul 5 10:56:03 2016 @@ -54,6 +54,10 @@ #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__) +#include +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__) #include #endif Added: cfe/trunk/test/CodeGen/builtin-clflushopt.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtin-clflushopt.c?rev=274559&view=auto == --- cfe/trunk/test/CodeGen/builtin-clflushopt.c (added) +++ cfe/trunk/test/CodeGen/builtin-clflushopt.c Tue Jul 5 10:56:03 2016 @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -tar
r274539 - ntrinsics _mm256_permutexvar_epi64 doesn't accept three parameters as specify bellow.
Author: mzuckerm Date: Tue Jul 5 06:30:31 2016 New Revision: 274539 URL: http://llvm.org/viewvc/llvm-project?rev=274539&view=rev Log: ntrinsics _mm256_permutexvar_epi64 doesn't accept three parameters as specify bellow. I deleted the extra mask parameter. __m256i _mm256_permutexvar_epi64 (__m256i idx, __m256i a) #include "immintrin.h" Instruction: vpermq CPUID Flags: AVX512VL + AVX512F Description Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. Operation FOR j := 0 to 3 i := j*64 id := idx[i+1:i]*64 dst[i+63:i] := a[id+63:id] ENDFOR dst[MAX:256] := 0 dst[MAX:256] := 0 (From: Intel intrinsics guide) Modified: cfe/trunk/lib/Headers/avx512vlintrin.h Modified: cfe/trunk/lib/Headers/avx512vlintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlintrin.h?rev=274539&r1=274538&r2=274539&view=diff == --- cfe/trunk/lib/Headers/avx512vlintrin.h (original) +++ cfe/trunk/lib/Headers/avx512vlintrin.h Tue Jul 5 06:30:31 2016 @@ -8870,7 +8870,7 @@ _mm256_maskz_permutexvar_epi64 (__mmask8 } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y) +_mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, (__v4di) __X, ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r274532 - [Clang][BuiltIn][AVX512] adding _mm{|256|512}_mask_cvt{s|us|}epi16_storeu_epi8 intrinsics
Author: mzuckerm Date: Tue Jul 5 03:08:01 2016 New Revision: 274532 URL: http://llvm.org/viewvc/llvm-project?rev=274532&view=rev Log: [Clang][BuiltIn][AVX512] adding _mm{|256|512}_mask_cvt{s|us|}epi16_storeu_epi8 intrinsics Differential Revision: http://reviews.llvm.org/D21729 Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512bwintrin.h cfe/trunk/lib/Headers/avx512vlbwintrin.h cfe/trunk/test/CodeGen/avx512bw-builtins.c cfe/trunk/test/CodeGen/avx512vlbw-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=274532&r1=274531&r2=274532&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Tue Jul 5 03:08:01 2016 @@ -1945,6 +1945,7 @@ TARGET_BUILTIN(__builtin_ia32_pbroadcast TARGET_BUILTIN(__builtin_ia32_pbroadcastw128_gpr_mask, "V8ssV8sUc","","avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovsdb512_mask, "V16cV16iV16cUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_pmovsdb512mem_mask, "vV16c*V16iUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovswb512mem_mask, "vV32c*V32sUi","","avx512bw") TARGET_BUILTIN(__builtin_ia32_pmovsdw512_mask, "V16sV16iV16sUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_pmovsdw512mem_mask, "vV16s*V16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_pmovsqb512_mask, "V16cV8LLiV16cUc","","avx512f") @@ -1955,8 +1956,10 @@ TARGET_BUILTIN(__builtin_ia32_pmovsqw512 TARGET_BUILTIN(__builtin_ia32_pmovsqw512mem_mask, "vV8s*V8LLiUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_pmovsdb128_mask, "V16cV4iV16cUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovsdb128mem_mask, "vV16c*V4iUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovswb128mem_mask, "vV16c*V8sUc","","avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_pmovsdb256_mask, "V16cV8iV16cUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovsdb256mem_mask, "vV16c*V8iUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovswb256mem_mask, "vV16c*V16sUs","","avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_pmovsdw128_mask, "V8sV4iV8sUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovsdw128mem_mask, "vV8s*V4iUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovsdw256_mask, "V8sV8iV8sUc","","avx512vl") @@ -1975,6 +1978,7 @@ TARGET_BUILTIN(__builtin_ia32_pmovsqw256 TARGET_BUILTIN(__builtin_ia32_pmovsqw256mem_mask, "vV8s*V4LLiUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovusdb512_mask, "V16cV16iV16cUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_pmovusdb512mem_mask, "vV16c*V16iUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovuswb512mem_mask, "vV32c*V32sUi","","avx512bw") TARGET_BUILTIN(__builtin_ia32_pmovusdw512_mask, "V16sV16iV16sUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_pmovusdw512mem_mask, "vV16s*V16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_pmovusqb512_mask, "V16cV8LLiV16cUc","","avx512f") @@ -1985,8 +1989,10 @@ TARGET_BUILTIN(__builtin_ia32_pmovusqw51 TARGET_BUILTIN(__builtin_ia32_pmovusqw512mem_mask, "vV8s*V8LLiUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_pmovusdb128_mask, "V16cV4iV16cUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovusdb128mem_mask, "vV16c*V4iUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovuswb128mem_mask, "vV16c*V8sUc","","avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_pmovusdb256_mask, "V16cV8iV16cUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovusdb256mem_mask, "vV16c*V8iUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovuswb256mem_mask, "vV16c*V16sUs","","avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_pmovusdw128_mask, "V8sV4iV8sUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovusdw128mem_mask, "vV8s*V4iUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovusdw256_mask, "V8sV8iV8sUc","","avx512vl") @@ -2005,6 +2011,7 @@ TARGET_BUILTIN(__builtin_ia32_pmovusqw25 TARGET_BUILTIN(__builtin_ia32_pmovusqw256mem_mask, "vV8s*V4LLiUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovdb512_mask, "V16cV16iV16cUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_pmovdb512mem_mask, "vV16c*V16iUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovwb512mem_mask, "vV32c*V32sUi","","avx512bw") TARGET_BUILTIN(__builtin_ia32_pmovdw512_mask, "V16sV16iV16sUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_pmovdw512mem_mask, "vV16s*V16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_pmovqb512_mask, "V16cV8LLiV16cUc","","avx512f") @@ -2014,9 +2021,11 @@ TARGET_BUILTIN(__builtin_ia32_pmovqd512m TARGET_BUILTIN(__builtin_ia32_pmovqw512_mask, "V8sV8LLiV8sUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_pmovqw512mem_mask, "vV8s*V8LLiUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_pmovdb128_mask, "V16cV4iV16cUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovwb128mem_mask, "vV16c*V8sUc","","avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_pmovdb1
r274224 - [AVX512][BUILTIN][vpermilps][intrinsics] Fixing two incorrect IMM check.
Author: mzuckerm Date: Thu Jun 30 07:12:20 2016 New Revision: 274224 URL: http://llvm.org/viewvc/llvm-project?rev=274224&view=rev Log: [AVX512][BUILTIN][vpermilps][intrinsics] Fixing two incorrect IMM check. Differential Revision: http://reviews.llvm.org/D21836 Modified: cfe/trunk/lib/Sema/SemaChecking.cpp Modified: cfe/trunk/lib/Sema/SemaChecking.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=274224&r1=274223&r2=274224&view=diff == --- cfe/trunk/lib/Sema/SemaChecking.cpp (original) +++ cfe/trunk/lib/Sema/SemaChecking.cpp Thu Jun 30 07:12:20 2016 @@ -1389,7 +1389,6 @@ bool Sema::CheckX86BuiltinFunctionCall(u case X86::BI__builtin_ia32_extractf32x4_mask: case X86::BI__builtin_ia32_extracti32x4_mask: case X86::BI__builtin_ia32_vpermilpd_mask: - case X86::BI__builtin_ia32_vpermilps_mask: case X86::BI__builtin_ia32_extractf64x2_512_mask: case X86::BI__builtin_ia32_extracti64x2_512_mask: i = 1; l = 0; u = 3; @@ -1461,7 +1460,6 @@ bool Sema::CheckX86BuiltinFunctionCall(u case X86::BI__builtin_ia32_roundps256: case X86::BI__builtin_ia32_roundpd256: case X86::BI__builtin_ia32_vpermilpd256_mask: - case X86::BI__builtin_ia32_vpermilps256_mask: i = 1; l = 0; u = 15; break; case X86::BI__builtin_ia32_roundss: @@ -1541,7 +1539,6 @@ bool Sema::CheckX86BuiltinFunctionCall(u case X86::BI__builtin_ia32_psrlwi128_mask: case X86::BI__builtin_ia32_psrlwi256_mask: case X86::BI__builtin_ia32_vpermilpd512_mask: - case X86::BI__builtin_ia32_vpermilps512_mask: case X86::BI__builtin_ia32_psradi128_mask: case X86::BI__builtin_ia32_psradi256_mask: case X86::BI__builtin_ia32_psradi512_mask: @@ -1566,6 +1563,9 @@ bool Sema::CheckX86BuiltinFunctionCall(u case X86::BI__builtin_ia32_fpclasspd512_mask: case X86::BI__builtin_ia32_fpclasssd_mask: case X86::BI__builtin_ia32_fpcla_mask: + case X86::BI__builtin_ia32_vpermilps_mask: + case X86::BI__builtin_ia32_vpermilps256_mask: + case X86::BI__builtin_ia32_vpermilps512_mask: i = 1; l = 0; u = 255; break; case X86::BI__builtin_ia32_palignr: ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r274218 - [Clang][Intrinsics][AVX512][BuiltIn] adding intrinsics for vrangesd instruction set
Author: mzuckerm Date: Thu Jun 30 03:05:46 2016 New Revision: 274218 URL: http://llvm.org/viewvc/llvm-project?rev=274218&view=rev Log: [Clang][Intrinsics][AVX512][BuiltIn] adding intrinsics for vrangesd instruction set Differential Revision: http://reviews.llvm.org/D21734 Modified: cfe/trunk/lib/Headers/avx512dqintrin.h cfe/trunk/test/CodeGen/avx512dq-builtins.c Modified: cfe/trunk/lib/Headers/avx512dqintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512dqintrin.h?rev=274218&r1=274217&r2=274218&view=diff == --- cfe/trunk/lib/Headers/avx512dqintrin.h (original) +++ cfe/trunk/lib/Headers/avx512dqintrin.h Thu Jun 30 03:05:46 2016 @@ -792,6 +792,8 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U, (__mmask8) -1, (int)(C),\ (int)(R)); }) +#define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION) + #define _mm_mask_range_round_ss(W, U, A, B, C, R) __extension__ ({ \ (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ @@ -799,6 +801,8 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U, (__mmask8)(U), (int)(C),\ (int)(R)); }) +#define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION) + #define _mm_maskz_range_round_ss(U, A, B, C, R) __extension__ ({ \ (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ @@ -806,6 +810,8 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U, (__mmask8)(U), (int)(C),\ (int)(R)); }) +#define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) + #define _mm_range_round_sd(A, B, C, R) __extension__ ({ \ (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ @@ -813,6 +819,8 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U, (__mmask8) -1, (int)(C),\ (int)(R)); }) +#define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION) + #define _mm_mask_range_round_sd(W, U, A, B, C, R) __extension__ ({ \ (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ @@ -820,6 +828,8 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U, (__mmask8)(U), (int)(C),\ (int)(R)); }) +#define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION) + #define _mm_maskz_range_round_sd(U, A, B, C, R) __extension__ ({ \ (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ @@ -827,6 +837,8 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U, (__mmask8)(U), (int)(C),\ (int)(R)); }) +#define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) + #define _mm512_reduce_pd(A, B) __extension__ ({ \ (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ (__v8df)_mm512_setzero_pd(), \ Modified: cfe/trunk/test/CodeGen/avx512dq-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512dq-builtins.c?rev=274218&r1=274217&r2=274218&view=diff == --- cfe/trunk/test/CodeGen/avx512dq-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512dq-builtins.c Thu Jun 30 03:05:46 2016 @@ -671,6 +671,42 @@ __m128 test_mm512_maskz_range_round_ss(_ return _mm_maskz_range_round_ss(__U, __A, __B, 4, 8); } +__m128d test_mm_range_sd(__m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_range_sd + // CHECK: @llvm.x86.avx512.mask.range.sd + return _mm_range_sd(__A, __B, 4); +} + +__m128d test_mm_mask_range_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + // CHECK-LABEL: test_mm_mask_range_sd + // CHECK: @llvm.x86.avx512.mask.range.sd + return _mm_mask_range_sd(__W, __U, __A, __B, 4); +} + +__m128d test_mm_maskz_range_sd(__mmask8 __U, __m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_maskz_range_sd + // CHECK: @llvm.x86.avx512.mask.range.sd + return _mm_maskz_range_sd(__U, __A, __B, 4); +} + +__m128d test_mm_range
Re: [PATCH] D21836: [AVX512][BUILTIN][vpermilps][intrinsics] Fixing two incorrect IMM check.
m_zuckerman added a comment. No problem http://reviews.llvm.org/D21836 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D21836: [AVX512][BUILTIN] Deleting two incorrect lines, conflicting SPEC
m_zuckerman created this revision. m_zuckerman added reviewers: AsafBadouh, igorb, delena, craig.topper. m_zuckerman added a subscriber: cfe-commits. By SPEC, instruction vpermilps gets IMM bigger than 15. Below, you can see that IMM can be any number between 0 to 255 include. SELECT4(src, control){ CASE(control[1:0]) 0: tmp[31:0] := src[31:0] 1: tmp[31:0] := src[63:32] 2: tmp[31:0] := src[95:64] 3: tmp[31:0] := src[127:96] ESAC RETURN tmp[31:0] } tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) FOR j := 0 to 3 i := j*32 IF k[j] dst[i+31:i] := tmp_dst[i+31:i] ELSE dst[i+31:i] := src[i+31:i] FI ENDFOR dst[MAX:128] := 0 http://reviews.llvm.org/D21836 Files: lib/Sema/SemaChecking.cpp Index: lib/Sema/SemaChecking.cpp === --- lib/Sema/SemaChecking.cpp +++ lib/Sema/SemaChecking.cpp @@ -1389,7 +1389,6 @@ case X86::BI__builtin_ia32_extractf32x4_mask: case X86::BI__builtin_ia32_extracti32x4_mask: case X86::BI__builtin_ia32_vpermilpd_mask: - case X86::BI__builtin_ia32_vpermilps_mask: case X86::BI__builtin_ia32_extractf64x2_512_mask: case X86::BI__builtin_ia32_extracti64x2_512_mask: i = 1; l = 0; u = 3; @@ -1461,7 +1460,6 @@ case X86::BI__builtin_ia32_roundps256: case X86::BI__builtin_ia32_roundpd256: case X86::BI__builtin_ia32_vpermilpd256_mask: - case X86::BI__builtin_ia32_vpermilps256_mask: i = 1; l = 0; u = 15; break; case X86::BI__builtin_ia32_roundss: @@ -1566,6 +1564,8 @@ case X86::BI__builtin_ia32_fpclasspd512_mask: case X86::BI__builtin_ia32_fpclasssd_mask: case X86::BI__builtin_ia32_fpcla_mask: + case X86::BI__builtin_ia32_vpermilps_mask: + case X86::BI__builtin_ia32_vpermilps256_mask: i = 1; l = 0; u = 255; break; case X86::BI__builtin_ia32_palignr: Index: lib/Sema/SemaChecking.cpp === --- lib/Sema/SemaChecking.cpp +++ lib/Sema/SemaChecking.cpp @@ -1389,7 +1389,6 @@ case X86::BI__builtin_ia32_extractf32x4_mask: case X86::BI__builtin_ia32_extracti32x4_mask: case X86::BI__builtin_ia32_vpermilpd_mask: - case X86::BI__builtin_ia32_vpermilps_mask: case X86::BI__builtin_ia32_extractf64x2_512_mask: case X86::BI__builtin_ia32_extracti64x2_512_mask: i = 1; l = 0; u = 3; @@ -1461,7 +1460,6 @@ case X86::BI__builtin_ia32_roundps256: case X86::BI__builtin_ia32_roundpd256: case X86::BI__builtin_ia32_vpermilpd256_mask: - case X86::BI__builtin_ia32_vpermilps256_mask: i = 1; l = 0; u = 15; break; case X86::BI__builtin_ia32_roundss: @@ -1566,6 +1564,8 @@ case X86::BI__builtin_ia32_fpclasspd512_mask: case X86::BI__builtin_ia32_fpclasssd_mask: case X86::BI__builtin_ia32_fpcla_mask: + case X86::BI__builtin_ia32_vpermilps_mask: + case X86::BI__builtin_ia32_vpermilps256_mask: i = 1; l = 0; u = 255; break; case X86::BI__builtin_ia32_palignr: ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D21792: [Clang][Feature] Adding CLFLUSHOPT feature and intrinsic to clang
m_zuckerman created this revision. m_zuckerman added reviewers: AsafBadouh, igorb, delena. m_zuckerman added a subscriber: cfe-commits. http://reviews.llvm.org/D21792 Files: include/clang/Basic/BuiltinsX86.def lib/Headers/CMakeLists.txt lib/Headers/clflushoptintrin.h lib/Headers/immintrin.h test/CodeGen/builtin-clflushopt.c Index: test/CodeGen/builtin-clflushopt.c === --- test/CodeGen/builtin-clflushopt.c +++ test/CodeGen/builtin-clflushopt.c @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +clflushopt -emit-llvm -o - -Werror | FileCheck %s +#define __MM_MALLOC_H + +#include +void test_mm_clflushopt(char * __m) { + //CHECK-LABLE: @test_mm_clflushopt + //CHECK: @llvm.x86.clflushopt + _mm_clflushopt(__m); +} Index: lib/Headers/immintrin.h === --- lib/Headers/immintrin.h +++ lib/Headers/immintrin.h @@ -54,6 +54,10 @@ #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__) +#include +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__) #include #endif Index: lib/Headers/clflushoptintrin.h === --- lib/Headers/clflushoptintrin.h +++ lib/Headers/clflushoptintrin.h @@ -0,0 +1,41 @@ +/*=== clflushoptintrin.h - CLFLUSHOPT intrinsic === + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===---=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __CLFLUSHOPTINTRIN_H +#define __CLFLUSHOPTINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("clflushopt"))) + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_clflushopt(char * __m) { + __builtin_ia32_clflushopt(__m); +} + +#undef __DEFAULT_FN_ATTRS + +#endif Index: lib/Headers/CMakeLists.txt === --- lib/Headers/CMakeLists.txt +++ lib/Headers/CMakeLists.txt @@ -27,6 +27,7 @@ __clang_cuda_runtime_wrapper.h cpuid.h cuda_builtin_vars.h + clflushoptintrin.h emmintrin.h f16cintrin.h float.h Index: include/clang/Basic/BuiltinsX86.def === --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -652,6 +652,9 @@ TARGET_BUILTIN(__builtin_ia32_xsaves, "vv*ULLi", "", "xsaves") TARGET_BUILTIN(__builtin_ia32_xsaves64, "vv*ULLi", "", "xsaves") +//CLFLUSHOPT +TARGET_BUILTIN(__builtin_ia32_clflushopt, "vc*", "", "clflushopt") + // ADX TARGET_BUILTIN(__builtin_ia32_addcarryx_u32, "UcUcUiUiUi*", "", "adx") TARGET_BUILTIN(__builtin_ia32_addcarryx_u64, "UcUcULLiULLiULLi*", "", "adx") ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D21790: [Clang][Feature] Adding CLFLUSHOPT feature to clang
m_zuckerman created this revision. m_zuckerman added reviewers: AsafBadouh, igorb, delena. m_zuckerman added a subscriber: cfe-commits. http://reviews.llvm.org/D21790 Files: include/clang/Basic/BuiltinsX86.def lib/Headers/CMakeLists.txt lib/Headers/immintrin.h Index: lib/Headers/immintrin.h === --- lib/Headers/immintrin.h +++ lib/Headers/immintrin.h @@ -54,6 +54,10 @@ #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__) +#include +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__) #include #endif Index: lib/Headers/CMakeLists.txt === --- lib/Headers/CMakeLists.txt +++ lib/Headers/CMakeLists.txt @@ -27,6 +27,7 @@ __clang_cuda_runtime_wrapper.h cpuid.h cuda_builtin_vars.h + clflushoptintrin.h emmintrin.h f16cintrin.h float.h Index: include/clang/Basic/BuiltinsX86.def === --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -652,6 +652,9 @@ TARGET_BUILTIN(__builtin_ia32_xsaves, "vv*ULLi", "", "xsaves") TARGET_BUILTIN(__builtin_ia32_xsaves64, "vv*ULLi", "", "xsaves") +//CLFLUSHOPT +TARGET_BUILTIN(__builtin_ia32_clflushopt, "vc*", "", "clflushopt") + // ADX TARGET_BUILTIN(__builtin_ia32_addcarryx_u32, "UcUcUiUiUi*", "", "adx") TARGET_BUILTIN(__builtin_ia32_addcarryx_u64, "UcUcULLiULLiULLi*", "", "adx") Index: lib/Headers/immintrin.h === --- lib/Headers/immintrin.h +++ lib/Headers/immintrin.h @@ -54,6 +54,10 @@ #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__) +#include +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__) #include #endif Index: lib/Headers/CMakeLists.txt === --- lib/Headers/CMakeLists.txt +++ lib/Headers/CMakeLists.txt @@ -27,6 +27,7 @@ __clang_cuda_runtime_wrapper.h cpuid.h cuda_builtin_vars.h + clflushoptintrin.h emmintrin.h f16cintrin.h float.h Index: include/clang/Basic/BuiltinsX86.def === --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -652,6 +652,9 @@ TARGET_BUILTIN(__builtin_ia32_xsaves, "vv*ULLi", "", "xsaves") TARGET_BUILTIN(__builtin_ia32_xsaves64, "vv*ULLi", "", "xsaves") +//CLFLUSHOPT +TARGET_BUILTIN(__builtin_ia32_clflushopt, "vc*", "", "clflushopt") + // ADX TARGET_BUILTIN(__builtin_ia32_addcarryx_u32, "UcUcUiUiUi*", "", "adx") TARGET_BUILTIN(__builtin_ia32_addcarryx_u64, "UcUcULLiULLiULLi*", "", "adx") ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D21734: [Intrinsics][AVX512][BuiltIn] adding intrinsics for vrangesd instruction set
m_zuckerman created this revision. m_zuckerman added reviewers: AsafBadouh, igorb, delena. m_zuckerman added a subscriber: cfe-commits. http://reviews.llvm.org/D21734 Files: lib/Headers/avx512dqintrin.h test/CodeGen/avx512dq-builtins.c Index: test/CodeGen/avx512dq-builtins.c === --- test/CodeGen/avx512dq-builtins.c +++ test/CodeGen/avx512dq-builtins.c @@ -671,6 +671,42 @@ return _mm_maskz_range_round_ss(__U, __A, __B, 4, 8); } +__m128d test_mm_range_sd(__m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_range_sd + // CHECK: @llvm.x86.avx512.mask.range.sd + return _mm_range_sd(__A, __B, 4); +} + +__m128d test_mm_mask_range_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + // CHECK-LABEL: test_mm_mask_range_sd + // CHECK: @llvm.x86.avx512.mask.range.sd + return _mm_mask_range_sd(__W, __U, __A, __B, 4); +} + +__m128d test_mm_maskz_range_sd(__mmask8 __U, __m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_maskz_range_sd + // CHECK: @llvm.x86.avx512.mask.range.sd + return _mm_maskz_range_sd(__U, __A, __B, 4); +} + +__m128d test_mm_range_ss(__m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_range_ss + // CHECK: @llvm.x86.avx512.mask.range.ss + return _mm_range_ss(__A, __B, 4); +} + +__m128d test_mm_mask_range_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_mask_range_ss + // CHECK: @llvm.x86.avx512.mask.range.ss + return _mm_mask_range_ss(__W, __U, __A, __B, 4); +} + +__m128 test_mm_maskz_range_ss(__mmask8 __U, __m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_maskz_range_ss + // CHECK: @llvm.x86.avx512.mask.range.ss + return _mm_maskz_range_ss(__U, __A, __B, 4); +} + __m512 test_mm512_range_ps(__m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_range_ps // CHECK: @llvm.x86.avx512.mask.range.ps.512 Index: lib/Headers/avx512dqintrin.h === --- lib/Headers/avx512dqintrin.h +++ lib/Headers/avx512dqintrin.h @@ -792,41 +792,53 @@ (__mmask8) -1, (int)(C),\ (int)(R)); }) +#define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION) + #define _mm_mask_range_round_ss(W, U, A, B, C, R) __extension__ ({ \ (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W),\ (__mmask8)(U), (int)(C),\ (int)(R)); }) +#define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION) + #define _mm_maskz_range_round_ss(U, A, B, C, R) __extension__ ({ \ (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(C),\ (int)(R)); }) +#define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) + #define _mm_range_round_sd(A, B, C, R) __extension__ ({ \ (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8) -1, (int)(C),\ (int)(R)); }) +#define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION) + #define _mm_mask_range_round_sd(W, U, A, B, C, R) __extension__ ({ \ (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W),\ (__mmask8)(U), (int)(C),\ (int)(R)); }) +#define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION) + #define _mm_maskz_range_round_sd(U, A, B, C, R) __extension__ ({ \ (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(C),\ (int)(R)); }) +#define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) + #define _mm512_reduce_pd(A, B) __ext
[PATCH] D21729: [Clang][BuiltIn][AVX512] adding _mm{|256|512}_mask_cvt{s|us|}epi16_storeu_epi8 intrinsics
m_zuckerman created this revision. m_zuckerman added reviewers: AsafBadouh, igorb, delena. m_zuckerman added a subscriber: cfe-commits. http://reviews.llvm.org/D21729 Files: include/clang/Basic/BuiltinsX86.def lib/Headers/avx512bwintrin.h lib/Headers/avx512vlbwintrin.h test/CodeGen/avx512bw-builtins.c test/CodeGen/avx512vlbw-builtins.c Index: test/CodeGen/avx512vlbw-builtins.c === --- test/CodeGen/avx512vlbw-builtins.c +++ test/CodeGen/avx512vlbw-builtins.c @@ -2446,3 +2446,45 @@ // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_shufflelo_epi16(__U, __A, 5); } + +void test_mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) +{ + // CHECK-LABEL:@test_mm_mask_cvtepi16_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.wb.mem.128 + _mm_mask_cvtepi16_storeu_epi8 (__P, __M, __A); +} + +void test_mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) +{ + // CHECK-LABEL:@test_mm_mask_cvtsepi16_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.wb.mem.128 + _mm_mask_cvtsepi16_storeu_epi8 ( __P, __M, __A); +} + +void test_mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) +{ + // CHECK-LABEL:@test_mm_mask_cvtusepi16_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.wb.mem.128 + _mm_mask_cvtusepi16_storeu_epi8 (__P, __M, __A); +} + +void test_mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) +{ + // CHECK-LABEL:@test_mm256_mask_cvtusepi16_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.wb.mem.256 + _mm256_mask_cvtusepi16_storeu_epi8 ( __P, __M, __A); +} + +void test_mm256_mask_cvtepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) +{ + // CHECK-LABEL:@test_mm256_mask_cvtepi16_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.wb.mem.256 + _mm256_mask_cvtepi16_storeu_epi8 ( __P, __M, __A); +} + +void test_mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) +{ + // CHECK-LABEL:@test_mm256_mask_cvtsepi16_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.wb.mem.256 + _mm256_mask_cvtsepi16_storeu_epi8 ( __P, __M, __A); +} Index: test/CodeGen/avx512bw-builtins.c === --- test/CodeGen/avx512bw-builtins.c +++ test/CodeGen/avx512bw-builtins.c @@ -1554,3 +1554,23 @@ return _mm512_movepi16_mask(__A); } +void test_mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) +{ + // CHECK-LABEL: @test_mm512_mask_cvtepi16_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.wb.mem.512 + __builtin_ia32_pmovwb512mem_mask ( __P, __A, __M); +} + +void test_mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) +{ + // CHECK-LABEL: @test_mm512_mask_cvtsepi16_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.wb.mem.512 + __builtin_ia32_pmovswb512mem_mask ( __P, __A, __M); +} + +void test_mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) +{ + // CHECK-LABEL: @test_mm512_mask_cvtusepi16_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.wb.mem.512 + __builtin_ia32_pmovuswb512mem_mask ( __P, __A, __M); +} Index: lib/Headers/avx512vlbwintrin.h === --- lib/Headers/avx512vlbwintrin.h +++ lib/Headers/avx512vlbwintrin.h @@ -1999,6 +1999,25 @@ __M); } +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) +{ + __builtin_ia32_pmovwb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); +} + + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) +{ + __builtin_ia32_pmovswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) +{ + __builtin_ia32_pmovuswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); +} + static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi16_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A, @@ -2020,6 +2039,23 @@ __M); } +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_cvtepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) +{ + __builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) +{ + __builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) +{ + __builtin_ia32_pmovuswb256mem_mask ((__v16qi*) __P, (__v16hi) __A, __M); +} static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mulhrs_epi16 (__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_pmul
r273401 - [Clang][bmi][intrinsics] Adding _mm_tzcnt_64 _mm_tzcnt_32 intrinsics to clang.
Author: mzuckerm Date: Wed Jun 22 07:32:43 2016 New Revision: 273401 URL: http://llvm.org/viewvc/llvm-project?rev=273401&view=rev Log: [Clang][bmi][intrinsics] Adding _mm_tzcnt_64 _mm_tzcnt_32 intrinsics to clang. Differential Revision: http://reviews.llvm.org/D21373 Modified: cfe/trunk/lib/Headers/bmiintrin.h cfe/trunk/test/CodeGen/bmi-builtins.c Modified: cfe/trunk/lib/Headers/bmiintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/bmiintrin.h?rev=273401&r1=273400&r2=273401&view=diff == --- cfe/trunk/lib/Headers/bmiintrin.h (original) +++ cfe/trunk/lib/Headers/bmiintrin.h Wed Jun 22 07:32:43 2016 @@ -287,6 +287,22 @@ __tzcnt_u32(unsigned int __X) return __X ? __builtin_ctz(__X) : 32; } +/// \brief Counts the number of trailing zero bits in the operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c TZCNT instruction. +/// +/// \param __X +///An unsigned 32-bit integer whose trailing zeros are to be counted. +/// \returns An 32-bit integer containing the number of trailing zero +///bits in the operand. +static __inline__ int __RELAXED_FN_ATTRS +_mm_tzcnt_32(unsigned int __X) +{ + return __X ? __builtin_ctz(__X) : 32; +} + #ifdef __x86_64__ /// \brief Performs a bitwise AND of the second operand with the one's @@ -507,6 +523,22 @@ __tzcnt_u64(unsigned long long __X) { return __X ? __builtin_ctzll(__X) : 64; } + +/// \brief Counts the number of trailing zero bits in the operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c TZCNT instruction. +/// +/// \param __X +///An unsigned 64-bit integer whose trailing zeros are to be counted. +/// \returns An 64-bit integer containing the number of trailing zero +///bits in the operand. +static __inline__ long long __RELAXED_FN_ATTRS +_mm_tzcnt_64(unsigned long long __X) +{ + return __X ? __builtin_ctzll(__X) : 64; +} #endif /* __x86_64__ */ Modified: cfe/trunk/test/CodeGen/bmi-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/bmi-builtins.c?rev=273401&r1=273400&r2=273401&view=diff == --- cfe/trunk/test/CodeGen/bmi-builtins.c (original) +++ cfe/trunk/test/CodeGen/bmi-builtins.c Wed Jun 22 07:32:43 2016 @@ -64,6 +64,13 @@ unsigned int test__tzcnt_u32(unsigned in return __tzcnt_u32(__X); } +int test_mm_tzcnt_32(unsigned int __X) { + // CHECK-LABEL: test_mm_tzcnt_32 + // CHECK: icmp ne i32 %{{.*}}, 0 + // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 true) + return _mm_tzcnt_32(__X); +} + unsigned long long test__andn_u64(unsigned long __X, unsigned long __Y) { // CHECK-LABEL: test__andn_u64 // CHECK: xor i64 %{{.*}}, -1 @@ -105,6 +112,13 @@ unsigned long long test__tzcnt_u64(unsig return __tzcnt_u64(__X); } +long long test_mm_tzcnt_64(unsigned long long __X) { + // CHECK-LABEL: test_mm_tzcnt_64 + // CHECK: icmp ne i64 %{{.*}}, 0 + // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 true) + return _mm_tzcnt_64(__X); +} + // Intel intrinsics unsigned short test_tzcnt_u16(unsigned short __X) { ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21373: [Clang][bmi][intrinsics] Adding _mm_tzcnt_64 _mm_tzcnt_32 intrinsics to clang.
m_zuckerman added inline comments. Comment at: lib/Headers/bmiintrin.h:296 @@ +295,3 @@ +/// This intrinsic corresponds to the \c TZCNT instruction. +/// +/// \param __X We can't use #define Here. The __mm_tzcnt_32(a) intrinsics is deferent from __tzcnt_u32 in the return value. The __mm_tzcnt_32 intrinsic return sign int while the __tzcnt_u32 return unsign value. http://reviews.llvm.org/D21373 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21373: [Clang][bmi][intrinsics] Adding _mm_tzcnt_64 _mm_tzcnt_32 intrinsics to clang.
m_zuckerman updated this revision to Diff 61351. http://reviews.llvm.org/D21373 Files: lib/Headers/bmiintrin.h test/CodeGen/bmi-builtins.c Index: test/CodeGen/bmi-builtins.c === --- test/CodeGen/bmi-builtins.c +++ test/CodeGen/bmi-builtins.c @@ -64,6 +64,13 @@ return __tzcnt_u32(__X); } +int test_mm_tzcnt_32(unsigned int __X) { + // CHECK-LABEL: test_mm_tzcnt_32 + // CHECK: icmp ne i32 %{{.*}}, 0 + // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 true) + return _mm_tzcnt_32(__X); +} + unsigned long long test__andn_u64(unsigned long __X, unsigned long __Y) { // CHECK-LABEL: test__andn_u64 // CHECK: xor i64 %{{.*}}, -1 @@ -105,6 +112,13 @@ return __tzcnt_u64(__X); } +long long test_mm_tzcnt_64(unsigned long long __X) { + // CHECK-LABEL: test_mm_tzcnt_64 + // CHECK: icmp ne i64 %{{.*}}, 0 + // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 true) + return _mm_tzcnt_64(__X); +} + // Intel intrinsics unsigned short test_tzcnt_u16(unsigned short __X) { Index: lib/Headers/bmiintrin.h === --- lib/Headers/bmiintrin.h +++ lib/Headers/bmiintrin.h @@ -281,12 +281,30 @@ ///An unsigned 32-bit integer whose trailing zeros are to be counted. /// \returns An unsigned 32-bit integer containing the number of trailing zero ///bits in the operand. + static __inline__ unsigned int __RELAXED_FN_ATTRS __tzcnt_u32(unsigned int __X) { return __X ? __builtin_ctz(__X) : 32; } +/// \brief Counts the number of trailing zero bits in the operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c TZCNT instruction. +/// +/// \param __X +///An unsigned 32-bit integer whose trailing zeros are to be counted. +/// \returns An 32-bit integer containing the number of trailing zero +///bits in the operand. + +static __inline__ int __RELAXED_FN_ATTRS +_mm_tzcnt_32(unsigned int __X) +{ + return __X ? __builtin_ctz(__X) : 32; +} + #ifdef __x86_64__ /// \brief Performs a bitwise AND of the second operand with the one's @@ -502,12 +520,30 @@ ///An unsigned 64-bit integer whose trailing zeros are to be counted. /// \returns An unsigned 64-bit integer containing the number of trailing zero ///bits in the operand. + static __inline__ unsigned long long __RELAXED_FN_ATTRS __tzcnt_u64(unsigned long long __X) { return __X ? __builtin_ctzll(__X) : 64; } +/// \brief Counts the number of trailing zero bits in the operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c TZCNT instruction. +/// +/// \param __X +///An unsigned 64-bit integer whose trailing zeros are to be counted. +/// \returns An 64-bit integer containing the number of trailing zero +///bits in the operand. + +static __inline__ long long __RELAXED_FN_ATTRS +_mm_tzcnt_64(unsigned long long __X) +{ + return __X ? __builtin_ctzll(__X) : 64; +} + #endif /* __x86_64__ */ #undef __DEFAULT_FN_ATTRS Index: test/CodeGen/bmi-builtins.c === --- test/CodeGen/bmi-builtins.c +++ test/CodeGen/bmi-builtins.c @@ -64,6 +64,13 @@ return __tzcnt_u32(__X); } +int test_mm_tzcnt_32(unsigned int __X) { + // CHECK-LABEL: test_mm_tzcnt_32 + // CHECK: icmp ne i32 %{{.*}}, 0 + // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 true) + return _mm_tzcnt_32(__X); +} + unsigned long long test__andn_u64(unsigned long __X, unsigned long __Y) { // CHECK-LABEL: test__andn_u64 // CHECK: xor i64 %{{.*}}, -1 @@ -105,6 +112,13 @@ return __tzcnt_u64(__X); } +long long test_mm_tzcnt_64(unsigned long long __X) { + // CHECK-LABEL: test_mm_tzcnt_64 + // CHECK: icmp ne i64 %{{.*}}, 0 + // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 true) + return _mm_tzcnt_64(__X); +} + // Intel intrinsics unsigned short test_tzcnt_u16(unsigned short __X) { Index: lib/Headers/bmiintrin.h === --- lib/Headers/bmiintrin.h +++ lib/Headers/bmiintrin.h @@ -281,12 +281,30 @@ ///An unsigned 32-bit integer whose trailing zeros are to be counted. /// \returns An unsigned 32-bit integer containing the number of trailing zero ///bits in the operand. + static __inline__ unsigned int __RELAXED_FN_ATTRS __tzcnt_u32(unsigned int __X) { return __X ? __builtin_ctz(__X) : 32; } +/// \brief Counts the number of trailing zero bits in the operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c TZCNT instruction. +/// +/// \param __X +///An unsigned 32-bit integer whose trailing zeros are to be counted. +/// \returns An 32-bit integer containing the number of trailing zero +///bits in the operand. + +static __inline__ int __RELAXED_FN_ATTRS +_mm_tzcnt_32(unsigned int __X) +{ + return __X ? __builtin_ctz(__X) : 32; +} + #ifdef __x86_64__ /// \brief Performs a bitwise AND of the second operand with the one's @@ -502,
Re: [PATCH] D21505: [Clang][AVX512][Intrinsics]Adding intrinsics for mov{ss|sd} instruction set
m_zuckerman updated this revision to Diff 61250. http://reviews.llvm.org/D21505 Files: lib/Headers/avx512fintrin.h test/CodeGen/avx512f-builtins.c Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -241,6 +241,23 @@ _mm512_mask_store_pd(p, m, a); } +__m128 test_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A) +{ + // CHECK-LABEL: @test_mm_mask_store_ss + // CHECK: store float {{.*}}, float* {{.*}} + // CHECK: load <4 x float>, <4 x float>* {{.*}} + return _mm_mask_store_ss (__W, __U, __A); +} + +__m128d test_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A) +{ + // CHECK-LABEL: @test_mm_mask_store_sd + // CHECK: store double {{.*}}, double* {{.*}} + // CHECK: load <2 x double>, <2 x double>* {{.*}} + return _mm_mask_store_sd ( __W, __U, __A); +} + + void test_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_storeu_epi32 // CHECK: @llvm.masked.store.v16i32(<16 x i32> %{{.*}}, <16 x i32>* %{{.*}}, i32 1, <16 x i1> %{{.*}}) @@ -371,6 +388,38 @@ return _mm512_maskz_load_pd(__U, __P); } +__m128 test_mm_mask_load_ss (__m128 __W, __mmask8 __U, float const* __A) +{ + // CHECK-LABEL: @test_mm_mask_load_ss + // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}} + // CHECK: load <4 x float>, <4 x float>* {{.*}} + return _mm_mask_load_ss ( __W, __U, __A); +} + +__m128 test_mm_maskz_load_ss (__mmask8 __U, float const* __A) +{ + // CHECK-LABEL: @test_mm_maskz_load_ss + // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}} + // CHECK: load <4 x float>, <4 x float>* {{.*}} + return _mm_maskz_load_ss (__U, __A); +} + +__m128d test_mm_mask_load_sd (__m128 __W, __mmask8 __U, double const* __A) +{ + // CHECK-LABEL: @test_mm_mask_load_sd + // CHECK: select <2 x i1>{{.*}}, <2 x double>{{.*}}, <2 x double>{{.*}} + // CHECK: load <2 x double>, <2 x double>* {{.*}} + return _mm_mask_load_sd ( __W, __U, __A); +} + +__m128d test_mm_maskz_load_sd (__mmask8 __U, double const* __A) +{ + // CHECK-LABEL: @test_mm_maskz_load_sd + // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} + // CHECK: load <2 x double>, <2 x double>* {{.*}} + return _mm_maskz_load_sd (__U, __A); +} + __m512d test_mm512_set1_pd(double d) { // CHECK-LABEL: @test_mm512_set1_pd @@ -6125,6 +6174,38 @@ return _mm512_maskz_mov_ps(__U, __A); } +__m128 test_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + // CHECK-LABEL: @test_mm_mask_move_ss + // CHECK: select <4 x i1>{{.*}}, <4 x float> {{.*}}, <4 x float>{{.*}} + // CHECK: load <4 x float>, <4 x float>* {{.*}} + return _mm_mask_move_ss ( __W, __U, __A, __B); +} + +__m128 test_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) +{ + // CHECK-LABEL: @test_mm_maskz_move_ss + // CHECK: select <4 x i1>{{.*}}, <4 x float> {{.*}}, <4 x float>{{.*}} + // CHECK: load <4 x float>, <4 x float>* {{.*}} + return _mm_maskz_move_ss (__U, __A, __B); +} + +__m128d test_mm_mask_move_sd (__m128 __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + // CHECK-LABEL: @test_mm_mask_move_sd + // CHECK: select <2 x i1>{{.*}}, <2 x double>{{.*}}, <2 x double>{{.*}} + // CHECK: load <2 x double>, <2 x double>* {{.*}} + return _mm_mask_move_sd ( __W, __U, __A, __B); +} + +__m128d test_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) +{ + // CHECK-LABEL: @test_mm_maskz_move_sd + // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} + // CHECK: load <2 x double>, <2 x double>* {{.*}} + return _mm_maskz_move_sd (__U, __A, __B); +} + void test_mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A) { // CHECK-LABEL: @test_mm512_mask_compressstoreu_pd // CHECK: @llvm.x86.avx512.mask.compress.store.pd.512 Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -4400,6 +4400,30 @@ return *(__m512i *) __P; } +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A) +{ + return (__U & 1) ? _mm_load_ss(__A) : (__m128) { __W[0], 0, 0, 0}; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_load_ss (__mmask8 __U, const float* __A) +{ + return (__U & 1) ? _mm_load_ss(__A) : (__m128) { 0, 0, 0, 0}; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A) +{ + return (__U & 1) ? _mm_load_sd(__A) :(__m128d) { __W[0], 0}; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_load_sd (__mmask8 __U, const double* __A) +{ + return (__U & 1) ? _mm_load_sd(__A) :(__m128d) { 0, 0}; +} + /* SIMD store ops */ static __inline void __DEFAULT_FN_ATTRS @@ -4491,6 +4515,20 @@ *(__m512i *) __P = __A; }
[PATCH] D21505: [Clang][AVX512][Intrinsics]Adding intrinsics for mov{ss|sd} instruction set
m_zuckerman created this revision. m_zuckerman added reviewers: AsafBadouh, igorb, delena. m_zuckerman added a subscriber: cfe-commits. http://reviews.llvm.org/D21505 Files: lib/Headers/avx512fintrin.h test/CodeGen/avx512f-builtins.c Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -241,6 +241,23 @@ _mm512_mask_store_pd(p, m, a); } +__m128 test_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A) +{ + // CHECK-LABEL: @test_mm_mask_store_ss + // CHECK: store float {{.*}}, float* {{.*}} + // CHECK: load <4 x float>, <4 x float>* {{.*}} + return _mm_mask_store_ss (__W, __U, __A); +} + +__m128d test_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A) +{ + // CHECK-LABEL: @test_mm_mask_store_sd + // CHECK: store double {{.*}}, double* {{.*}} + // CHECK: load <2 x double>, <2 x double>* {{.*}} + return _mm_mask_store_sd ( __W, __U, __A); +} + + void test_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_storeu_epi32 // CHECK: @llvm.masked.store.v16i32(<16 x i32> %{{.*}}, <16 x i32>* %{{.*}}, i32 1, <16 x i1> %{{.*}}) @@ -371,6 +388,38 @@ return _mm512_maskz_load_pd(__U, __P); } +__m128 test_mm_mask_load_ss (__m128 __W, __mmask8 __U, float const* __A) +{ + // CHECK-LABEL: @test_mm_mask_load_ss + // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}} + // CHECK: load <4 x float>, <4 x float>* {{.*}} + return _mm_mask_load_ss ( __W, __U, __A); +} + +__m128 test_mm_maskz_load_ss (__mmask8 __U, float const* __A) +{ + // CHECK-LABEL: @test_mm_maskz_load_ss + // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}} + // CHECK: load <4 x float>, <4 x float>* {{.*}} + return _mm_maskz_load_ss (__U, __A); +} + +__m128d test_mm_mask_load_sd (__m128 __W, __mmask8 __U, double const* __A) +{ + // CHECK-LABEL: @test_mm_mask_load_sd + // CHECK: select <2 x i1>{{.*}}, <2 x double>{{.*}}, <2 x double>{{.*}} + // CHECK: load <2 x double>, <2 x double>* {{.*}} + return _mm_mask_load_sd ( __W, __U, __A); +} + +__m128d test_mm_maskz_load_sd (__mmask8 __U, double const* __A) +{ + // CHECK-LABEL: @test_mm_maskz_load_sd + // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} + // CHECK: load <2 x double>, <2 x double>* {{.*}} + return _mm_maskz_load_sd (__U, __A); +} + __m512d test_mm512_set1_pd(double d) { // CHECK-LABEL: @test_mm512_set1_pd @@ -6125,6 +6174,38 @@ return _mm512_maskz_mov_ps(__U, __A); } +__m128 test_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + // CHECK-LABEL: @test_mm_mask_move_ss + // CHECK: select <4 x i1>{{.*}}, <4 x float> {{.*}}, <4 x float>{{.*}} + // CHECK: load <4 x float>, <4 x float>* {{.*}} + return _mm_mask_move_ss ( __W, __U, __A, __B); +} + +__m128 test_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) +{ + // CHECK-LABEL: @test_mm_maskz_move_ss + // CHECK: select <4 x i1>{{.*}}, <4 x float> {{.*}}, <4 x float>{{.*}} + // CHECK: load <4 x float>, <4 x float>* {{.*}} + return _mm_maskz_move_ss (__U, __A, __B); +} + +__m128d test_mm_mask_move_sd (__m128 __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + // CHECK-LABEL: @test_mm_mask_move_sd + // CHECK: select <2 x i1>{{.*}}, <2 x double>{{.*}}, <2 x double>{{.*}} + // CHECK: load <2 x double>, <2 x double>* {{.*}} + return _mm_mask_move_sd ( __W, __U, __A, __B); +} + +__m128d test_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) +{ + // CHECK-LABEL: @test_mm_maskz_move_sd + // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} + // CHECK: load <2 x double>, <2 x double>* {{.*}} + return _mm_maskz_move_sd (__U, __A, __B); +} + void test_mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A) { // CHECK-LABEL: @test_mm512_mask_compressstoreu_pd // CHECK: @llvm.x86.avx512.mask.compress.store.pd.512 Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -4400,6 +4400,42 @@ return *(__m512i *) __P; } +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A) +{ + __m128 temp = __builtin_ia32_selectps_128 ((__mmask8) __U, + (__v4sf) _mm_load_ss(__A), + (__v4sf) __W); + return (__m128) { temp[0], 0, 0, 0}; +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_load_ss (__mmask8 __U, const float* __A) +{ + __m128 temp = __builtin_ia32_selectps_128 ((__mmask8) __U, + (__v4sf) _mm_load_ss(__A), + (__v4sf) _mm_setzero_si128()); + return (__m128) {temp[0], 0, 0, 0}; +} + +static __inline__ __m128d __DEFAULT_F
[PATCH] D21373: [Clang][bmi][intrinsics] Adding _mm_tzcnt_64 _mm_tzcnt_32 intrinsics to clang.
m_zuckerman created this revision. m_zuckerman added reviewers: AsafBadouh, igorb, delena. m_zuckerman added a subscriber: cfe-commits. http://reviews.llvm.org/D21373 Files: lib/Headers/bmiintrin.h test/CodeGen/bmi-builtins.c Index: test/CodeGen/bmi-builtins.c === --- test/CodeGen/bmi-builtins.c +++ test/CodeGen/bmi-builtins.c @@ -64,6 +64,13 @@ return __tzcnt_u32(__X); } +int test_mm_tzcnt_32(unsigned int __X) { + // CHECK-LABEL: test_mm_tzcnt_32 + // CHECK: icmp ne i32 %{{.*}}, 0 + // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 true) + return _mm_tzcnt_32(__X); +} + unsigned long long test__andn_u64(unsigned long __X, unsigned long __Y) { // CHECK-LABEL: test__andn_u64 // CHECK: xor i64 %{{.*}}, -1 @@ -105,6 +112,13 @@ return __tzcnt_u64(__X); } +long long test_mm_tzcnt_64(unsigned long long __X) { + // CHECK-LABEL: test_mm_tzcnt_64 + // CHECK: icmp ne i64 %{{.*}}, 0 + // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 true) + return _mm_tzcnt_64(__X); +} + // Intel intrinsics unsigned short test_tzcnt_u16(unsigned short __X) { Index: lib/Headers/bmiintrin.h === --- lib/Headers/bmiintrin.h +++ lib/Headers/bmiintrin.h @@ -281,12 +281,19 @@ ///An unsigned 32-bit integer whose trailing zeros are to be counted. /// \returns An unsigned 32-bit integer containing the number of trailing zero ///bits in the operand. + static __inline__ unsigned int __RELAXED_FN_ATTRS __tzcnt_u32(unsigned int __X) { return __X ? __builtin_ctz(__X) : 32; } +static __inline__ int __RELAXED_FN_ATTRS +_mm_tzcnt_32(unsigned int __X) +{ + return __X ? __builtin_ctz(__X) : 32; +} + #ifdef __x86_64__ /// \brief Performs a bitwise AND of the second operand with the one's @@ -502,12 +509,19 @@ ///An unsigned 64-bit integer whose trailing zeros are to be counted. /// \returns An unsigned 64-bit integer containing the number of trailing zero ///bits in the operand. + static __inline__ unsigned long long __RELAXED_FN_ATTRS __tzcnt_u64(unsigned long long __X) { return __X ? __builtin_ctzll(__X) : 64; } +static __inline__ long long __RELAXED_FN_ATTRS +_mm_tzcnt_64(unsigned long long __X) +{ + return __X ? __builtin_ctzll(__X) : 64; +} + #endif /* __x86_64__ */ #undef __DEFAULT_FN_ATTRS Index: test/CodeGen/bmi-builtins.c === --- test/CodeGen/bmi-builtins.c +++ test/CodeGen/bmi-builtins.c @@ -64,6 +64,13 @@ return __tzcnt_u32(__X); } +int test_mm_tzcnt_32(unsigned int __X) { + // CHECK-LABEL: test_mm_tzcnt_32 + // CHECK: icmp ne i32 %{{.*}}, 0 + // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 true) + return _mm_tzcnt_32(__X); +} + unsigned long long test__andn_u64(unsigned long __X, unsigned long __Y) { // CHECK-LABEL: test__andn_u64 // CHECK: xor i64 %{{.*}}, -1 @@ -105,6 +112,13 @@ return __tzcnt_u64(__X); } +long long test_mm_tzcnt_64(unsigned long long __X) { + // CHECK-LABEL: test_mm_tzcnt_64 + // CHECK: icmp ne i64 %{{.*}}, 0 + // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 true) + return _mm_tzcnt_64(__X); +} + // Intel intrinsics unsigned short test_tzcnt_u16(unsigned short __X) { Index: lib/Headers/bmiintrin.h === --- lib/Headers/bmiintrin.h +++ lib/Headers/bmiintrin.h @@ -281,12 +281,19 @@ ///An unsigned 32-bit integer whose trailing zeros are to be counted. /// \returns An unsigned 32-bit integer containing the number of trailing zero ///bits in the operand. + static __inline__ unsigned int __RELAXED_FN_ATTRS __tzcnt_u32(unsigned int __X) { return __X ? __builtin_ctz(__X) : 32; } +static __inline__ int __RELAXED_FN_ATTRS +_mm_tzcnt_32(unsigned int __X) +{ + return __X ? __builtin_ctz(__X) : 32; +} + #ifdef __x86_64__ /// \brief Performs a bitwise AND of the second operand with the one's @@ -502,12 +509,19 @@ ///An unsigned 64-bit integer whose trailing zeros are to be counted. /// \returns An unsigned 64-bit integer containing the number of trailing zero ///bits in the operand. + static __inline__ unsigned long long __RELAXED_FN_ATTRS __tzcnt_u64(unsigned long long __X) { return __X ? __builtin_ctzll(__X) : 64; } +static __inline__ long long __RELAXED_FN_ATTRS +_mm_tzcnt_64(unsigned long long __X) +{ + return __X ? __builtin_ctzll(__X) : 64; +} + #endif /* __x86_64__ */ #undef __DEFAULT_FN_ATTRS ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r272667 - [Clang][avx512][Intrinsics] adding prefetch gather intrinsics
Author: mzuckerm Date: Tue Jun 14 08:45:17 2016 New Revision: 272667 URL: http://llvm.org/viewvc/llvm-project?rev=272667&view=rev Log: [Clang][avx512][Intrinsics] adding prefetch gather intrinsics Differential Revision: http://reviews.llvm.org/D21322 Modified: cfe/trunk/lib/Headers/avx512pfintrin.h cfe/trunk/test/CodeGen/avx512pf-builtins.c Modified: cfe/trunk/lib/Headers/avx512pfintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512pfintrin.h?rev=272667&r1=272666&r2=272667&view=diff == --- cfe/trunk/lib/Headers/avx512pfintrin.h (original) +++ cfe/trunk/lib/Headers/avx512pfintrin.h Tue Jun 14 08:45:17 2016 @@ -35,21 +35,40 @@ __builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \ (long long const *)(addr), (int)(scale), \ (int)(hint)); }) + +#define _mm512_prefetch_i32gather_pd(index, addr, scale, hint) __extension__ ({\ + __builtin_ia32_gatherpfdpd((__mmask8) -1, (__v8si)(__m256i)(index), \ + (long long const *)(addr), (int)(scale), \ + (int)(hint)); }) #define _mm512_mask_prefetch_i32gather_ps(index, mask, addr, scale, hint) ({\ __builtin_ia32_gatherpfdps((__mmask16)(mask), \ (__v16si)(__m512i)(index), (int const *)(addr), \ (int)(scale), (int)(hint)); }) +#define _mm512_prefetch_i32gather_ps(index, addr, scale, hint) ({\ + __builtin_ia32_gatherpfdps((__mmask16) -1, \ + (__v16si)(__m512i)(index), (int const *)(addr), \ + (int)(scale), (int)(hint)); }) + #define _mm512_mask_prefetch_i64gather_pd(index, mask, addr, scale, hint) __extension__ ({\ __builtin_ia32_gatherpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \ (long long const *)(addr), (int)(scale), \ (int)(hint)); }) +#define _mm512_prefetch_i64gather_pd(index, addr, scale, hint) __extension__ ({\ + __builtin_ia32_gatherpfqpd((__mmask8) -1, (__v8di)(__m512i)(index), \ + (long long const *)(addr), (int)(scale), \ + (int)(hint)); }) + #define _mm512_mask_prefetch_i64gather_ps(index, mask, addr, scale, hint) ({\ __builtin_ia32_gatherpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \ (int const *)(addr), (int)(scale), (int)(hint)); }) +#define _mm512_prefetch_i64gather_ps(index, addr, scale, hint) ({\ + __builtin_ia32_gatherpfqps((__mmask8) -1, (__v8di)(__m512i)(index), \ + (int const *)(addr), (int)(scale), (int)(hint)); }) + #define _mm512_prefetch_i32scatter_pd(addr, index, scale, hint) __extension__ ({\ __builtin_ia32_scatterpfdpd((__mmask8)-1, (__v8si)(__m256i)(index), \ (long long *)(addr), (int)(scale), \ Modified: cfe/trunk/test/CodeGen/avx512pf-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512pf-builtins.c?rev=272667&r1=272666&r2=272667&view=diff == --- cfe/trunk/test/CodeGen/avx512pf-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512pf-builtins.c Tue Jun 14 08:45:17 2016 @@ -11,24 +11,48 @@ void test_mm512_mask_prefetch_i32gather_ return _mm512_mask_prefetch_i32gather_pd(index, mask, addr, 2, 1); } +void test_mm512_prefetch_i32gather_pd(__m256i index, void const *addr, int hint) { + // CHECK-LABEL: @test_mm512_prefetch_i32gather_pd + // CHECK: @llvm.x86.avx512.gatherpf.dpd + return _mm512_prefetch_i32gather_pd(index, addr, 2, 1); +} + void test_mm512_mask_prefetch_i32gather_ps(__m512i index, __mmask16 mask, void const *addr, int hint) { // CHECK-LABEL: @test_mm512_mask_prefetch_i32gather_ps // CHECK: @llvm.x86.avx512.gatherpf.dps return _mm512_mask_prefetch_i32gather_ps(index, mask, addr, 2, 1); } +void test_mm512_prefetch_i32gather_ps(__m512i index, void const *addr, int hint) { + // CHECK-LABEL: @test_mm512_prefetch_i32gather_ps + // CHECK: @llvm.x86.avx512.gatherpf.dps + return _mm512_prefetch_i32gather_ps(index, addr, 2, 1); +} + void test_mm512_mask_prefetch_i64gather_pd(__m512i index, __mmask8 mask, void const *addr, int hint) { // CHECK-LABEL: @test_mm512_mask_prefetch_i64gather_pd // CHECK: @llvm.x86.avx512.gatherpf.qpd return _mm512_mask_prefetch_i64gather_pd(index, mask, addr, 2, 1); } +void test_mm512_prefetch_i64gather_pd(__m512i index, void const *addr, int hint) { + // CHECK-LABEL: @test_mm512_prefetch_i64gather_pd + // CHECK: @llvm.x86.avx512.gatherpf.qpd + return _mm512_prefetch_i64gather_pd(index, addr, 2, 1); +} + void test_mm512_mask_prefetch_i64gather_ps(__m512i index, __mmask8 mask, void const *addr, int hint) { // CHECK-LABEL: @
r272658 - [Clang][AVX512][intrinsics] Adding missing intrinsics div_pd and div_ps
Author: mzuckerm Date: Tue Jun 14 07:38:58 2016 New Revision: 272658 URL: http://llvm.org/viewvc/llvm-project?rev=272658&view=rev Log: [Clang][AVX512][intrinsics] Adding missing intrinsics div_pd and div_ps Differential Revision: http://reviews.llvm.org/D20626 Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=272658&r1=272657&r2=272658&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Tue Jun 14 07:38:58 2016 @@ -2446,6 +2446,12 @@ _mm_maskz_div_sd(__mmask8 __U,__m128d __ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R)); }) +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_div_pd(__m512d __a, __m512d __b) +{ + return (__m512d)((__v8df)__a/(__v8df)__b); +} + static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, @@ -2465,6 +2471,12 @@ _mm512_maskz_div_pd(__mmask8 __U, __m512 _MM_FROUND_CUR_DIRECTION); } +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_div_ps(__m512 __a, __m512 __b) +{ + return (__m512)((__v16sf)__a/(__v16sf)__b); +} + static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=272658&r1=272657&r2=272658&view=diff == --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Tue Jun 14 07:38:58 2016 @@ -1972,10 +1972,15 @@ __m512d test_mm512_maskz_div_round_pd(__ // CHECK: @llvm.x86.avx512.mask.div.pd.512 return _mm512_maskz_div_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); } -__m512d test_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_div_pd +__m512d test_mm512_div_pd(__m512d __a, __m512d __b) { + // CHECK-LABLE: @test_mm512_div_pd + // CHECK: fdiv <8 x double> + return _mm512_div_pd(__a,__b); +} +__m512d test_mm512_mask_div_pd(__m512d __w, __mmask8 __u, __m512d __a, __m512d __b) { + // CHECK-LABLE: @test_mm512_mask_div_pd // CHECK: @llvm.x86.avx512.mask.div.pd.512 - return _mm512_mask_div_pd(__W,__U,__A,__B); + return _mm512_mask_div_pd(__w,__u,__a,__b); } __m512d test_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_maskz_div_pd @@ -1997,6 +2002,11 @@ __m512 test_mm512_maskz_div_round_ps(__m // CHECK: @llvm.x86.avx512.mask.div.ps.512 return _mm512_maskz_div_round_ps(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); } +__m512 test_mm512_div_ps(__m512 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_div_ps + // CHECK: fdiv <16 x float> + return _mm512_div_ps(__A,__B); +} __m512 test_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_mask_div_ps // CHECK: @llvm.x86.avx512.mask.div.ps.512 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D21322: [Clang][avx512][Intrinsics] adding prefetch gather intrinsics
m_zuckerman created this revision. m_zuckerman added reviewers: AsafBadouh, igorb, delena. m_zuckerman added a subscriber: cfe-commits. _mm512_prefetch_i64gather_ps _mm512_prefetch_i32gather_pd _mm512_prefetch_i64gather_pd _mm512_prefetch_i32gather_ps http://reviews.llvm.org/D21322 Files: lib/Headers/avx512pfintrin.h test/CodeGen/avx512pf-builtins.c Index: lib/Headers/avx512pfintrin.h === --- lib/Headers/avx512pfintrin.h +++ lib/Headers/avx512pfintrin.h @@ -35,21 +35,40 @@ __builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \ (long long const *)(addr), (int)(scale), \ (int)(hint)); }) + +#define _mm512_prefetch_i32gather_pd(index, addr, scale, hint) __extension__ ({\ + __builtin_ia32_gatherpfdpd((__mmask8) -1, (__v8si)(__m256i)(index), \ + (long long const *)(addr), (int)(scale), \ + (int)(hint)); }) #define _mm512_mask_prefetch_i32gather_ps(index, mask, addr, scale, hint) ({\ __builtin_ia32_gatherpfdps((__mmask16)(mask), \ (__v16si)(__m512i)(index), (int const *)(addr), \ (int)(scale), (int)(hint)); }) +#define _mm512_prefetch_i32gather_ps(index, addr, scale, hint) ({\ + __builtin_ia32_gatherpfdps((__mmask16) -1, \ + (__v16si)(__m512i)(index), (int const *)(addr), \ + (int)(scale), (int)(hint)); }) + #define _mm512_mask_prefetch_i64gather_pd(index, mask, addr, scale, hint) __extension__ ({\ __builtin_ia32_gatherpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \ (long long const *)(addr), (int)(scale), \ (int)(hint)); }) +#define _mm512_prefetch_i64gather_pd(index, addr, scale, hint) __extension__ ({\ + __builtin_ia32_gatherpfqpd((__mmask8) -1, (__v8di)(__m512i)(index), \ + (long long const *)(addr), (int)(scale), \ + (int)(hint)); }) + #define _mm512_mask_prefetch_i64gather_ps(index, mask, addr, scale, hint) ({\ __builtin_ia32_gatherpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \ (int const *)(addr), (int)(scale), (int)(hint)); }) +#define _mm512_prefetch_i64gather_ps(index, addr, scale, hint) ({\ + __builtin_ia32_gatherpfqps((__mmask8) -1, (__v8di)(__m512i)(index), \ + (int const *)(addr), (int)(scale), (int)(hint)); }) + #define _mm512_prefetch_i32scatter_pd(addr, index, scale, hint) __extension__ ({\ __builtin_ia32_scatterpfdpd((__mmask8)-1, (__v8si)(__m256i)(index), \ (long long *)(addr), (int)(scale), \ Index: test/CodeGen/avx512pf-builtins.c === --- test/CodeGen/avx512pf-builtins.c +++ test/CodeGen/avx512pf-builtins.c @@ -11,24 +11,48 @@ return _mm512_mask_prefetch_i32gather_pd(index, mask, addr, 2, 1); } +void test_mm512_prefetch_i32gather_pd(__m256i index, void const *addr, int hint) { + // CHECK-LABEL: @test_mm512_prefetch_i32gather_pd + // CHECK: @llvm.x86.avx512.gatherpf.dpd + return _mm512_prefetch_i32gather_pd(index, addr, 2, 1); +} + void test_mm512_mask_prefetch_i32gather_ps(__m512i index, __mmask16 mask, void const *addr, int hint) { // CHECK-LABEL: @test_mm512_mask_prefetch_i32gather_ps // CHECK: @llvm.x86.avx512.gatherpf.dps return _mm512_mask_prefetch_i32gather_ps(index, mask, addr, 2, 1); } +void test_mm512_prefetch_i32gather_ps(__m512i index, void const *addr, int hint) { + // CHECK-LABEL: @test_mm512_prefetch_i32gather_ps + // CHECK: @llvm.x86.avx512.gatherpf.dps + return _mm512_prefetch_i32gather_ps(index, addr, 2, 1); +} + void test_mm512_mask_prefetch_i64gather_pd(__m512i index, __mmask8 mask, void const *addr, int hint) { // CHECK-LABEL: @test_mm512_mask_prefetch_i64gather_pd // CHECK: @llvm.x86.avx512.gatherpf.qpd return _mm512_mask_prefetch_i64gather_pd(index, mask, addr, 2, 1); } +void test_mm512_prefetch_i64gather_pd(__m512i index, void const *addr, int hint) { + // CHECK-LABEL: @test_mm512_prefetch_i64gather_pd + // CHECK: @llvm.x86.avx512.gatherpf.qpd + return _mm512_prefetch_i64gather_pd(index, addr, 2, 1); +} + void test_mm512_mask_prefetch_i64gather_ps(__m512i index, __mmask8 mask, void const *addr, int hint) { // CHECK-LABEL: @test_mm512_mask_prefetch_i64gather_ps // CHECK: @llvm.x86.avx512.gatherpf.qps return _mm512_mask_prefetch_i64gather_ps(index, mask, addr, 2, 1); } +void test_mm512_prefetch_i64gather_ps(__m512i index, void const *addr, int hint) { + // CHECK-LABEL: @test_mm512_prefetch_i64gather_ps + // CHECK: @llvm.x86.avx512.gatherpf.qps + return _mm512_prefet
r272123 - [Clang][AVX512][BUILTIN]Adding intrinsics for range_round_{sd|ss}
Author: mzuckerm Date: Wed Jun 8 03:19:27 2016 New Revision: 272123 URL: http://llvm.org/viewvc/llvm-project?rev=272123&view=rev Log: [Clang][AVX512][BUILTIN]Adding intrinsics for range_round_{sd|ss} Differential Revision: http://reviews.llvm.org/D21002 Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512dqintrin.h cfe/trunk/test/CodeGen/avx512dq-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=272123&r1=272122&r2=272123&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed Jun 8 03:19:27 2016 @@ -1527,6 +1527,8 @@ TARGET_BUILTIN(__builtin_ia32_rangepd128 TARGET_BUILTIN(__builtin_ia32_rangepd256_mask, "V4dV4dV4dIiV4dUc", "", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_rangeps128_mask, "V4fV4fV4fIiV4fUc", "", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_rangeps256_mask, "V8fV8fV8fIiV8fUc", "", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_rangesd128_round_mask, "V2dV2dV2dV2dUcIiIi", "", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_rangess128_round_mask, "V4fV4fV4fV4fUcIiIi", "", "avx512dq") TARGET_BUILTIN(__builtin_ia32_reducepd128_mask, "V2dV2dIiV2dUc", "", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_reducepd256_mask, "V4dV4dIiV4dUc", "", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_reduceps128_mask, "V4fV4fIiV4fUc", "", "avx512vl,avx512dq") Modified: cfe/trunk/lib/Headers/avx512dqintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512dqintrin.h?rev=272123&r1=272122&r2=272123&view=diff == --- cfe/trunk/lib/Headers/avx512dqintrin.h (original) +++ cfe/trunk/lib/Headers/avx512dqintrin.h Wed Jun 8 03:19:27 2016 @@ -785,6 +785,48 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U, (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), (int)(R)); }) +#define _mm_range_round_ss(A, B, C, R) __extension__ ({ \ + (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8) -1, (int)(C),\ + (int)(R)); }) + +#define _mm_mask_range_round_ss(W, U, A, B, C, R) __extension__ ({ \ + (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(W),\ + (__mmask8)(U), (int)(C),\ + (int)(R)); }) + +#define _mm_maskz_range_round_ss(U, A, B, C, R) __extension__ ({ \ + (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), (int)(C),\ + (int)(R)); }) + +#define _mm_range_round_sd(A, B, C, R) __extension__ ({ \ + (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ +(__v2df)(__m128d)(B), \ +(__v2df)_mm_setzero_pd(), \ +(__mmask8) -1, (int)(C),\ +(int)(R)); }) + +#define _mm_mask_range_round_sd(W, U, A, B, C, R) __extension__ ({ \ + (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ +(__v2df)(__m128d)(B), \ +(__v2df)(__m128d)(W),\ +(__mmask8)(U), (int)(C),\ +(int)(R)); }) + +#define _mm_maskz_range_round_sd(U, A, B, C, R) __extension__ ({ \ + (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ +(__v2df)(__m128d)(B), \ +(__v2df)_mm_setzero_pd(), \ +(__mmask8)(U), (int)(C),\ +(int)(R)); }) + #define _mm512_reduce_pd(A, B) __extension__ ({ \ (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ (__v8df)_mm512_setzero_pd(), \ Modified: cfe/trunk/test
r272012 - [clang][AVX512][Intrinsics] Adding intrinsics reduce_[round]_{ss|sd} to clang
Author: mzuckerm Date: Tue Jun 7 09:00:20 2016 New Revision: 272012 URL: http://llvm.org/viewvc/llvm-project?rev=272012&view=rev Log: [clang][AVX512][Intrinsics] Adding intrinsics reduce_[round]_{ss|sd} to clang Differential Revision: http://reviews.llvm.org/D21014 Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512dqintrin.h cfe/trunk/test/CodeGen/avx512dq-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=272012&r1=272011&r2=272012&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Tue Jun 7 09:00:20 2016 @@ -1531,6 +1531,8 @@ TARGET_BUILTIN(__builtin_ia32_reducepd12 TARGET_BUILTIN(__builtin_ia32_reducepd256_mask, "V4dV4dIiV4dUc", "", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_reduceps128_mask, "V4fV4fIiV4fUc", "", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_reduceps256_mask, "V8fV8fIiV8fUc", "", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_reducesd_mask, "V2dV2dV2dV2dUcIiIi", "", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_reducess_mask, "V4fV4fV4fV4fUcIiIi", "", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_pmaddubsw128_mask, "V8sV16cV16cV8sUc", "", "avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_pmaddubsw256_mask, "V16sV32cV32cV16sUs", "", "avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_pmaddwd128_mask, "V4iV8sV8sV4iUc", "", "avx512vl,avx512bw") Modified: cfe/trunk/lib/Headers/avx512dqintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512dqintrin.h?rev=272012&r1=272011&r2=272012&view=diff == --- cfe/trunk/lib/Headers/avx512dqintrin.h (original) +++ cfe/trunk/lib/Headers/avx512dqintrin.h Tue Jun 7 09:00:20 2016 @@ -851,6 +851,78 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U, (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), (int)(R)); }) +#define _mm_reduce_ss(A, B, C) __extension__ ({ \ + (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1, \ + (int)(C),_MM_FROUND_CUR_DIRECTION); }) + +#define _mm_mask_reduce_ss(W, U, A, B, C) __extension__ ({ \ + (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128 )(A), (__v4sf)(__m128 )(B), \ + (__v4sf)(__m128 )(W), \ + (__mmask8)(U), \ + (int)(C),_MM_FROUND_CUR_DIRECTION); }) + +#define _mm_maskz_reduce_ss(U, A, B, C) __extension__ ({ \ + (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), (__v4sf)(__m128 )(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), \ + (int)(C),_MM_FROUND_CUR_DIRECTION); }) + +#define _mm_reduce_round_ss(A, B, C, R) __extension__ ({ \ + (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), (__v4sf)(__m128 )(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1, \ + (int)(C),(int)(R)); }) + +#define _mm_mask_reduce_round_ss(W, U, A, B, C, R) __extension__ ({ \ + (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128 )(A), (__v4sf)(__m128 )(B), \ + (__v4sf)(__m128 )(W), \ + (__mmask8)(U), \ + (int)(C),(int)(R)); }) + +#define _mm_maskz_reduce_round_ss(U, A, B, C, R) __extension__ ({ \ + (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), (__v4sf)(__m128 )(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), \ + (int)(C),(int)(R)); }) + +#define _mm_reduce_sd(A, B, C) __extension__ ({ \ + (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128)(A), (__v2df)(__m128)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1, \ + (int)(C),_MM_FROUND_CUR_DIRECTION); }) + +#define _mm_mask_reduce_sd(W, U, A, B, C) __extension__ ({ \ + (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128 )(A), (__v2df)(__m128)(B), \ + (__v2df)(__m128 )(W), \ +
[PATCH] D21058: [Clang][AVX512][BUILTIN]Adding missing intrinsics srl and sll
m_zuckerman created this revision. m_zuckerman added reviewers: AsafBadouh, igorb, delena. m_zuckerman added a subscriber: cfe-commits. http://reviews.llvm.org/D21058 Files: include/clang/Basic/BuiltinsX86.def lib/Headers/avx512bwintrin.h test/CodeGen/avx512bw-builtins.c Index: test/CodeGen/avx512bw-builtins.c === --- test/CodeGen/avx512bw-builtins.c +++ test/CodeGen/avx512bw-builtins.c @@ -1505,8 +1505,6 @@ return _mm512_maskz_alignr_epi8(__U, __A, __B, 2); } - - __m512i test_mm512_mm_dbsad_epu8(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mm_dbsad_epu8 // CHECK: @llvm.x86.avx512.mask.dbpsadbw.512 @@ -1537,3 +1535,15 @@ return _mm512_movepi16_mask(__A); } +__m512i test_mm512_bslli_epi128 (__m512i __A){ + // CHECK-LABEL: @test_mm512_bslli_epi128 + // CHECK: @lvm.x86.avx512.psll.dq.512( + return (__m512i) __builtin_ia32_pslldq512 (__A, 4); +} + +__m512i test_mm512_bsrli_epi128 (__m512i __A){ + // CHECK-LABEL: @test_mm512_bsrli_epi128 + // CHECK: @lvm.x86.avx512.psrl.dq.512( + return (__m512i) __builtin_ia32_psrldq512 (__A, 4); +} + Index: lib/Headers/avx512bwintrin.h === --- lib/Headers/avx512bwintrin.h +++ lib/Headers/avx512bwintrin.h @@ -2186,6 +2186,14 @@ (__v64qi) __B); } +#define _mm512_bslli_epi128 (__A,__N)({\ + return (__m512i) __builtin_ia32_pslldq512 ((__m512i)__A,(int) __N);\ +}) + +#define _mm512_bsrli_epi128 ( __A, __N)({\ + return (__m512i) __builtin_ia32_psrldq512 ((__m512i)__A, (int)__N);\ +}) + #undef __DEFAULT_FN_ATTRS Index: include/clang/Basic/BuiltinsX86.def === --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -2255,6 +2255,8 @@ TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512_mask, "V64cV64cV64cV64cULLi","","avx512vbmi") TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128_mask, "V16cV16cV16cV16cUs","","avx512vbmi,avx512vl") TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb256_mask, "V32cV32cV32cV32cUi","","avx512vbmi,avx512vl") +TARGET_BUILTIN(__builtin_ia32_pslldq512, "V8dV8dIi","","avx512bw") +TARGET_BUILTIN(__builtin_ia32_psrldq512, "V8dV8dIi","","avx512bw") // MONITORX/MWAITX TARGET_BUILTIN(__builtin_ia32_monitorx, "vv*UiUi", "", "mwaitx") Index: test/CodeGen/avx512bw-builtins.c === --- test/CodeGen/avx512bw-builtins.c +++ test/CodeGen/avx512bw-builtins.c @@ -1505,8 +1505,6 @@ return _mm512_maskz_alignr_epi8(__U, __A, __B, 2); } - - __m512i test_mm512_mm_dbsad_epu8(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mm_dbsad_epu8 // CHECK: @llvm.x86.avx512.mask.dbpsadbw.512 @@ -1537,3 +1535,15 @@ return _mm512_movepi16_mask(__A); } +__m512i test_mm512_bslli_epi128 (__m512i __A){ + // CHECK-LABEL: @test_mm512_bslli_epi128 + // CHECK: @lvm.x86.avx512.psll.dq.512( + return (__m512i) __builtin_ia32_pslldq512 (__A, 4); +} + +__m512i test_mm512_bsrli_epi128 (__m512i __A){ + // CHECK-LABEL: @test_mm512_bsrli_epi128 + // CHECK: @lvm.x86.avx512.psrl.dq.512( + return (__m512i) __builtin_ia32_psrldq512 (__A, 4); +} + Index: lib/Headers/avx512bwintrin.h === --- lib/Headers/avx512bwintrin.h +++ lib/Headers/avx512bwintrin.h @@ -2186,6 +2186,14 @@ (__v64qi) __B); } +#define _mm512_bslli_epi128 (__A,__N)({\ + return (__m512i) __builtin_ia32_pslldq512 ((__m512i)__A,(int) __N);\ +}) + +#define _mm512_bsrli_epi128 ( __A, __N)({\ + return (__m512i) __builtin_ia32_psrldq512 ((__m512i)__A, (int)__N);\ +}) + #undef __DEFAULT_FN_ATTRS Index: include/clang/Basic/BuiltinsX86.def === --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -2255,6 +2255,8 @@ TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512_mask, "V64cV64cV64cV64cULLi","","avx512vbmi") TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128_mask, "V16cV16cV16cV16cUs","","avx512vbmi,avx512vl") TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb256_mask, "V32cV32cV32cV32cUi","","avx512vbmi,avx512vl") +TARGET_BUILTIN(__builtin_ia32_pslldq512, "V8dV8dIi","","avx512bw") +TARGET_BUILTIN(__builtin_ia32_psrldq512, "V8dV8dIi","","avx512bw") // MONITORX/MWAITX TARGET_BUILTIN(__builtin_ia32_monitorx, "vv*UiUi", "", "mwaitx") ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D21021: [Clang][AVX512][BuiltIn]Adding intrinsics move_{sd|ss} to clang
m_zuckerman created this revision. m_zuckerman added reviewers: AsafBadouh, igorb, delena. m_zuckerman added a subscriber: cfe-commits. http://reviews.llvm.org/D21021 Files: include/clang/Basic/BuiltinsX86.def lib/Headers/avx512fintrin.h test/CodeGen/avx512f-builtins.c Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -7433,4 +7433,32 @@ // CHECK-LABEL: @test_mm512_setzero_pd // CHECK: zeroinitializer return _mm512_setzero_pd(); -} \ No newline at end of file +} + +__m128 test_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + // CHECK-LABEL: @test_mm_mask_move_ss + // CHECK: @llvm.x86.avx512.mask.move.ss + return _mm_mask_move_ss ( __W, __U, __A, __B); +} + +__m128 test_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) +{ + // CHECK-LABEL: @test_mm_maskz_move_ss + // CHECK: @llvm.x86.avx512.mask.move.ss + return _mm_maskz_move_ss (__U, __A, __B); +} + +__m128d test_mm_mask_move_sd (__m128 __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + // CHECK-LABEL: @test_mm_mask_move_sd + // CHECK: @llvm.x86.avx512.mask.move.sd + return _mm_mask_move_sd ( __W, __U, __A, __B); +} + +__m128d test_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) +{ + // CHECK-LABEL: @test_mm_maskz_move_sd + // CHECK: @llvm.x86.avx512.mask.move.sd + return _mm_maskz_move_sd (__U, __A, __B); +} Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -9118,6 +9118,40 @@ (__mmask16) __U); } +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_movss_mask ((__v4sf) __A, (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_movss_mask ((__v4sf) __A, (__v4sf) __B, + (__v4sf) + _mm_setzero_si128(), + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_movsd_mask ((__v2df) __A, (__v2df) __B, + (__v2df) __W, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_movsd_mask ((__v2df) __A, (__v2df) __B, + (__v2di) + _mm_setzero_pd (), + (__mmask8) __U); +} + #define _mm512_shuffle_epi32(A, I) __extension__ ({ \ (__m512i)__builtin_ia32_pshufd512_mask((__v16si)(__m512i)(A), (int)(I), \ (__v16si)_mm512_undefined_epi32(), \ Index: include/clang/Basic/BuiltinsX86.def === --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -2232,6 +2232,8 @@ TARGET_BUILTIN(__builtin_ia32_movapd256_mask, "V4dV4dV4dUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_movaps128_mask, "V4fV4fV4fUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_movaps256_mask, "V8fV8fV8fUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_movss_mask, "V4fV4fV4fV4fUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_movsd_mask, "V2dV2dV2dV2dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_compressstoredf512_mask, "vV8d*V8dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_compressstoredi512_mask, "vV8LLi*V8LLiUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_compressstoresf512_mask, "vV16f*V16fUs","","avx512f") Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -7433,4 +7433,32 @@ // CHECK-LABEL: @test_mm512_setzero_pd // CHECK: zeroinitializer return _mm512_setzero_pd(); -} \ No newline at end of file +} + +__m128 test_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + // CHECK-LABEL: @test_mm_mask_move_ss + // CHECK: @llvm.x86.avx512.mask.move.ss + return _mm_mask_move_ss ( __W, __U, __A, __B); +} + +__m128 test_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) +{ + // CHECK-LABEL: @test_mm_maskz_move_ss + // CHECK: @llvm.x86.avx512.mask.move.ss + return _mm_maskz_move_ss (__U, __A, __B); +} + +__m128d test_mm_mask_move_sd (__m128 __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + // CHECK-LABEL: @test_mm_mask_move_sd + // CHECK: @llvm.x86.avx512.mask.move.sd + return _mm_mask_move_sd ( __W, __U, __A, __B); +} + +__m128d test_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) +{ + // CHECK-LABEL: @test_mm_maskz_move_sd + // CHECK: @llvm.x8
[PATCH] D21014: [Clang][AVX512][Intrinsics] Adding intrinsics reduce_[round]_{ss|sd} to clang
m_zuckerman created this revision. m_zuckerman added reviewers: AsafBadouh, igorb, delena. m_zuckerman added a subscriber: cfe-commits. http://reviews.llvm.org/D21014 Files: include/clang/Basic/BuiltinsX86.def lib/Headers/avx512dqintrin.h test/CodeGen/avx512dq-builtins.c Index: test/CodeGen/avx512dq-builtins.c === --- test/CodeGen/avx512dq-builtins.c +++ test/CodeGen/avx512dq-builtins.c @@ -743,6 +743,78 @@ return _mm512_maskz_reduce_round_ps(__U, __A, 4, 8); } +__m128 test_mm_reduce_ss(__m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_reduce_ss + // CHECK: @llvm.x86.avx512.mask.reduce.ss + return _mm_reduce_ss(__A, __B, 4); +} + +__m128 test_mm_mask_reduce_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_mask_reduce_ss + // CHECK: @llvm.x86.avx512.mask.reduce.ss + return _mm_mask_reduce_ss(__W, __U, __A, __B, 4); +} + +__m128 test_mm_maskz_reduce_ss(__mmask8 __U, __m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_maskz_reduce_ss + // CHECK: @llvm.x86.avx512.mask.reduce.ss + return _mm_maskz_reduce_ss(__U, __A, __B, 4); +} + +__m128 test_mm_reduce_round_ss(__m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_reduce_round_ss + // CHECK: @llvm.x86.avx512.mask.reduce.ss + return _mm_reduce_round_ss(__A, __B, 4, 8); +} + +__m128 test_mm_mask_reduce_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_mask_reduce_round_ss + // CHECK: @llvm.x86.avx512.mask.reduce.ss + return _mm_mask_reduce_round_ss(__W, __U, __A, __B, 4, 8); +} + +__m128 test_mm_maskz_reduce_round_ss(__mmask8 __U, __m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_maskz_reduce_round_ss + // CHECK: @llvm.x86.avx512.mask.reduce.ss + return _mm_maskz_reduce_round_ss(__U, __A, __B, 4, 8); +} + +__m128d test_mm_reduce_sd(__m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_reduce_sd + // CHECK: @llvm.x86.avx512.mask.reduce.sd + return _mm_reduce_sd(__A, __B, 4); +} + +__m128d test_mm_mask_reduce_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_mask_reduce_sd + // CHECK: @llvm.x86.avx512.mask.reduce.sd + return _mm_mask_reduce_sd(__W, __U, __A, __B, 4); +} + +__m128d test_mm_maskz_reduce_sd(__mmask8 __U, __m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_maskz_reduce_sd + // CHECK: @llvm.x86.avx512.mask.reduce.sd + return _mm_maskz_reduce_sd(__U, __A, __B, 4); +} + +__m128d test_mm_reduce_round_sd(__m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_reduce_round_sd + // CHECK: @llvm.x86.avx512.mask.reduce.sd + return _mm_reduce_round_sd(__A, __B, 4, 8); +} + +__m128d test_mm_mask_reduce_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_mask_reduce_round_sd + // CHECK: @llvm.x86.avx512.mask.reduce.sd + return _mm_mask_reduce_round_sd(__W, __U, __A, __B, 4, 8); +} + +__m128d test_mm_maskz_reduce_round_sd(__mmask8 __U, __m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_maskz_reduce_round_sd + // CHECK: @llvm.x86.avx512.mask.reduce.sd + return _mm_maskz_reduce_round_sd(__U, __A, __B, 4, 8); +} + __mmask16 test_mm512_movepi32_mask(__m512i __A) { // CHECK-LABEL: @test_mm512_movepi32_mask // CHECK: @llvm.x86.avx512.cvtd2mask.512 Index: lib/Headers/avx512dqintrin.h === --- lib/Headers/avx512dqintrin.h +++ lib/Headers/avx512dqintrin.h @@ -851,6 +851,78 @@ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), (int)(R)); }) +#define _mm_reduce_ss(A, B, C) __extension__ ({ \ + (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1, \ + (int)(C),_MM_FROUND_CUR_DIRECTION); }) + +#define _mm_mask_reduce_ss(W, U, A, B, C) __extension__ ({ \ + (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128 )(A), (__v4sf)(__m128 )(B), \ + (__v4sf)(__m128 )(W), \ + (__mmask8)(U), \ + (int)(C),_MM_FROUND_CUR_DIRECTION); }) + +#define _mm_maskz_reduce_ss(U, A, B, C) __extension__ ({ \ + (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), (__v4sf)(__m128 )(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), \ + (int)(C),_MM_FROUND_CUR_DIRECTION); }) + +#define _mm_reduce_round_ss(A, B, C, R) __extension__ ({ \ + (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), (__v4sf)(__m128 )(B), \ + (__v4sf)_mm_setzero_ps(), \ +
r271836 - Fixing problem with rsqrt28_sd
Author: mzuckerm Date: Sun Jun 5 10:57:49 2016 New Revision: 271836 URL: http://llvm.org/viewvc/llvm-project?rev=271836&view=rev Log: Fixing problem with rsqrt28_sd maskz_rsqrt28_sd mapped to mask_rsqrt28_sd and not to the maskz. Modified: cfe/trunk/lib/Headers/avx512erintrin.h Modified: cfe/trunk/lib/Headers/avx512erintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512erintrin.h?rev=271836&r1=271835&r2=271836&view=diff == --- cfe/trunk/lib/Headers/avx512erintrin.h (original) +++ cfe/trunk/lib/Headers/avx512erintrin.h Sun Jun 5 10:57:49 2016 @@ -177,7 +177,7 @@ _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) #define _mm_maskz_rsqrt28_sd(M, A, B) \ - _mm_mask_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) + _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) // rcp28 #define _mm512_rcp28_round_pd(A, R) __extension__ ({ \ ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r271835 - [Clang][AVX512]Adding set4 intrinsics
Author: mzuckerm Date: Sun Jun 5 10:43:30 2016 New Revision: 271835 URL: http://llvm.org/viewvc/llvm-project?rev=271835&view=rev Log: [Clang][AVX512]Adding set4 intrinsics Differential Revision: http://reviews.llvm.org/D20866 Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=271835&r1=271834&r2=271835&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Sun Jun 5 10:43:30 2016 @@ -343,6 +343,49 @@ _mm512_broadcastss_ps(__m128 __X) __f, __f, __f, __f }; } +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_set4_epi32 (int __A, int __B, int __C, int __D) +{ + return (__m512i)(__v16si) + { __D, __C, __B, __A, __D, __C, __B, __A, + __D, __C, __B, __A, __D, __C, __B, __A }; +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_set4_epi64 (long long __A, long long __B, long long __C, + long long __D) +{ + return (__m512i) (__v8di) + { __D, __C, __B, __A, __D, __C, __B, __A }; +} + +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_set4_pd (double __A, double __B, double __C, double __D) +{ + return (__m512d) + { __D, __C, __B, __A, __D, __C, __B, __A }; +} + +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_set4_ps (float __A, float __B, float __C, float __D) +{ + return (__m512) + { __D, __C, __B, __A, __D, __C, __B, __A, + __D, __C, __B, __A, __D, __C, __B, __A }; +} + +#define _mm512_setr4_epi32(e0,e1,e2,e3) \ + _mm512_set4_epi32(e3,e2,e1,e0) + +#define _mm512_setr4_epi64(e0,e1,e2,e3) \ + _mm512_set4_epi64(e3,e2,e1,e0) + +#define _mm512_setr4_pd(e0,e1,e2,e3)\ + _mm512_set4_pd(e3,e2,e1,e0) + +#define _mm512_setr4_ps(e0,e1,e2,e3)\ + _mm512_set4_ps(e3,e2,e1,e0) + static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_broadcastsd_pd(__m128d __X) { Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=271835&r1=271834&r2=271835&view=diff == --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Sun Jun 5 10:43:30 2016 @@ -6236,6 +6236,62 @@ __m512d test_mm512_set1_epi16(short d) return _mm512_set1_epi16(d); } +__m512i test_mm512_set4_epi32 (int __A, int __B, int __C, int __D) +{ + // CHECK-LABEL: @test_mm512_set4_epi32 + // CHECK: insertelement <16 x i32> {{.*}}, i32 15 + return _mm512_set4_epi32 (__A,__B,__C,__D); +} + +__m512i test_mm512_set4_epi64 (long long __A, long long __B, long long __C, long long __D) +{ + // CHECK-LABEL: @test_mm512_set4_epi64 + // CHECK: insertelement <8 x i64> {{.*}}, i32 7 + return _mm512_set4_epi64 (__A,__B,__C,__D); +} + +__m512d test_mm512_set4_pd (double __A, double __B, double __C, double __D) +{ + // CHECK-LABEL: @test_mm512_set4_pd + // CHECK: insertelement <8 x double> {{.*}}, i32 7 + return _mm512_set4_pd (__A,__B,__C,__D); +} + +__m512 test_mm512_set4_ps (float __A, float __B, float __C, float __D) +{ + // CHECK-LABEL: @test_mm512_set4_ps + // CHECK: insertelement <16 x float> {{.*}}, i32 15 + return _mm512_set4_ps (__A,__B,__C,__D); +} + +__m512i test_mm512_setr4_epi32(int e0, int e1, int e2, int e3) +{ + // CHECK-LABEL: @test_mm512_setr4_epi32 + // CHECK: insertelement <16 x i32> {{.*}}, i32 15 + return _mm512_setr4_epi32(e0, e1, e2, e3); +} + + __m512i test_mm512_setr4_epi64(long long e0, long long e1, long long e2, long long e3) +{ + // CHECK-LABEL: @test_mm512_setr4_epi64 + // CHECK: insertelement <8 x i64> {{.*}}, i32 7 + return _mm512_setr4_epi64(e0, e1, e2, e3); +} + +__m512i test_mm512_setr4_pd(double e0, double e1, double e2, double e3) +{ + // CHECK-LABEL: @test_mm512_setr4_pd + // CHECK: insertelement <8 x double> {{.*}}, i32 7 + return _mm512_setr4_pd(e0,e1,e2,e3); +} + + __m512i test_mm512_setr4_ps(float e0, float e1, float e2, float e3) +{ + // CHECK-LABEL: @test_mm512_setr4_ps + // CHECK: insertelement <16 x float> {{.*}}, i32 15 + return _mm512_setr4_ps(e0,e1,e2,e3); +} + __m512d test_mm512_castpd256_pd512(__m256d a) { // CHECK-LABEL: @test_mm512_castpd256_pd512 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r271832 - [Clang][AVX512][Intrinsics] Adding two definitions _mm512_setzero and _mm512_setzero_epi32
Author: mzuckerm Date: Sun Jun 5 10:12:52 2016 New Revision: 271832 URL: http://llvm.org/viewvc/llvm-project?rev=271832&view=rev Log: [Clang][AVX512][Intrinsics] Adding two definitions _mm512_setzero and _mm512_setzero_epi32 Differential Revision: http://reviews.llvm.org/D20871 Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=271832&r1=271831&r2=271832&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Sun Jun 5 10:12:52 2016 @@ -170,6 +170,8 @@ _mm512_setzero_si512(void) return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; } +#define _mm512_setzero_epi32 _mm512_setzero_si512 + static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_undefined_pd() { @@ -274,6 +276,9 @@ _mm512_setzero_ps(void) return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; } + +#define _mm512_setzero _mm512_setzero_ps + static __inline __m512d __DEFAULT_FN_ATTRS _mm512_setzero_pd(void) { Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=271832&r1=271831&r2=271832&view=diff == --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Sun Jun 5 10:12:52 2016 @@ -7343,3 +7343,38 @@ __m128 test_mm_maskz_cvtsd_ss(__mmask8 _ return _mm_maskz_cvtsd_ss(__U, __A, __B); } + +__m512i test_mm512_setzero_epi32() +{ + // CHECK-LABEL: @test_mm512_setzero_epi32 + // CHECK: zeroinitializer + return _mm512_setzero_epi32(); +} + +__m512i test_mm512_setzero() +{ + // CHECK-LABEL: @test_mm512_setzero + // CHECK: zeroinitializer + return _mm512_setzero(); +} + +__m512i test_mm512_setzero_si512() +{ + // CHECK-LABEL: @test_mm512_setzero_si512 + // CHECK: zeroinitializer + return _mm512_setzero_si512(); +} + +__m512i test_mm512_setzero_ps() +{ + // CHECK-LABEL: @test_mm512_setzero_ps + // CHECK: zeroinitializer + return _mm512_setzero_ps(); +} + +__m512d test_mm512_setzero_pd() +{ + // CHECK-LABEL: @test_mm512_setzero_pd + // CHECK: zeroinitializer + return _mm512_setzero_pd(); +} \ No newline at end of file ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D21002: [Clang][AVX512][BUILTIN]Adding intrinsics for range_round_{sd|ss}
m_zuckerman created this revision. m_zuckerman added reviewers: AsafBadouh, igorb, delena. m_zuckerman added a subscriber: cfe-commits. http://reviews.llvm.org/D21002 Files: include/clang/Basic/BuiltinsX86.def lib/Headers/avx512dqintrin.h test/CodeGen/avx512dq-builtins.c Index: test/CodeGen/avx512dq-builtins.c === --- test/CodeGen/avx512dq-builtins.c +++ test/CodeGen/avx512dq-builtins.c @@ -635,6 +635,42 @@ return _mm512_maskz_range_round_pd(__U, __A, __B, 4, 8); } +__m128d test_mm512_range_round_sd(__m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm512_range_round_sd + // CHECK: @llvm.x86.avx512.mask.range.sd + return _mm_range_round_sd(__A, __B, 4, 8); +} + +__m128d test_mm512_mask_range_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + // CHECK-LABEL: test_mm512_mask_range_round_sd + // CHECK: @llvm.x86.avx512.mask.range.sd + return _mm_mask_range_round_sd(__W, __U, __A, __B, 4, 8); +} + +__m128d test_mm512_maskz_range_round_sd(__mmask8 __U, __m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm512_maskz_range_round_sd + // CHECK: @llvm.x86.avx512.mask.range.sd + return _mm_maskz_range_round_sd(__U, __A, __B, 4, 8); +} + +__m128d test_mm512_range_round_ss(__m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm512_range_round_ss + // CHECK: @llvm.x86.avx512.mask.range.ss + return _mm_range_round_ss(__A, __B, 4, 8); +} + +__m128d test_mm512_mask_range_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm512_mask_range_round_ss + // CHECK: @llvm.x86.avx512.mask.range.ss + return _mm_mask_range_round_ss(__W, __U, __A, __B, 4, 8); +} + +__m128 test_mm512_maskz_range_round_ss(__mmask8 __U, __m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm512_maskz_range_round_ss + // CHECK: @llvm.x86.avx512.mask.range.ss + return _mm_maskz_range_round_ss(__U, __A, __B, 4, 8); +} + __m512 test_mm512_range_ps(__m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_range_ps // CHECK: @llvm.x86.avx512.mask.range.ps.512 Index: lib/Headers/avx512dqintrin.h === --- lib/Headers/avx512dqintrin.h +++ lib/Headers/avx512dqintrin.h @@ -785,6 +785,42 @@ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), (int)(R)); }) +#define _mm_range_round_ss(A, B, C, R) __extension__ ({ \ + (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8) -1,(int)(C), (int)(R));}) + +#define _mm_mask_range_round_ss(W, U, A, B, C, R) __extension__ ({ \ + (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(W),\ + (__mmask8)(U), (int)(C), (int)(R)); }) + +#define _mm_maskz_range_round_ss(U, A, B, C, R) __extension__ ({ \ + (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U),(int)(C), (int)(R));}) + +#define _mm_range_round_sd(A, B, C, R) __extension__ ({ \ + (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128)(A), \ + (__v2df)(__m128)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8) -1, (int)(C), (int)(R));}) + +#define _mm_mask_range_round_sd(W, U, A, B, C, R) __extension__ ({ \ + (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(W), (__mmask8)(U), \ + (int)(C), (int)(R));}) + +#define _mm_maskz_range_round_sd(U, A, B, C, R) __extension__ ({ \ + (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U),(int)(C), (int)(R));}) + #define _mm512_reduce_pd(A, B) __extension__ ({ \ (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ (__v8df)_mm512_setzero_pd(), \ Index: include/clang/Basic/BuiltinsX86.def === --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -1
Re: [PATCH] D20626: [Clang][AVX512][intrinsics] Adding missing intrinsics div_pd and div_ps
m_zuckerman updated this revision to Diff 59663. http://reviews.llvm.org/D20626 Files: lib/Headers/avx512fintrin.h test/CodeGen/avx512f-builtins.c Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -1972,10 +1972,15 @@ // CHECK: @llvm.x86.avx512.mask.div.pd.512 return _mm512_maskz_div_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); } -__m512d test_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_div_pd +__m512d test_mm512_div_pd(__m512d __a, __m512d __b) { + // CHECK-LABLE: @test_mm512_div_pd + // CHECK: fdiv <8 x double> + return _mm512_div_pd(__a,__b); +} +__m512d test_mm512_mask_div_pd(__m512d __w, __mmask8 __u, __m512d __a, __m512d __b) { + // CHECK-LABLE: @test_mm512_mask_div_pd // CHECK: @llvm.x86.avx512.mask.div.pd.512 - return _mm512_mask_div_pd(__W,__U,__A,__B); + return _mm512_mask_div_pd(__w,__u,__a,__b); } __m512d test_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_maskz_div_pd @@ -1997,6 +2002,11 @@ // CHECK: @llvm.x86.avx512.mask.div.ps.512 return _mm512_maskz_div_round_ps(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); } +__m512 test_mm512_div_ps(__m512 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_div_ps + // CHECK: fdiv <16 x float> + return _mm512_div_ps(__A,__B); +} __m512 test_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_mask_div_ps // CHECK: @llvm.x86.avx512.mask.div.ps.512 Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -2422,6 +2422,12 @@ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R)); }) +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_div_pd(__m512d __a, __m512d __b) +{ + return (__m512d)((__v8df)__a/(__v8df)__b); +} + static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, @@ -2441,6 +2447,12 @@ _MM_FROUND_CUR_DIRECTION); } +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_div_ps(__m512 __a, __m512 __b) +{ + return (__m512)((__v16sf)__a/(__v16sf)__b); +} + static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -1972,10 +1972,15 @@ // CHECK: @llvm.x86.avx512.mask.div.pd.512 return _mm512_maskz_div_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); } -__m512d test_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_div_pd +__m512d test_mm512_div_pd(__m512d __a, __m512d __b) { + // CHECK-LABLE: @test_mm512_div_pd + // CHECK: fdiv <8 x double> + return _mm512_div_pd(__a,__b); +} +__m512d test_mm512_mask_div_pd(__m512d __w, __mmask8 __u, __m512d __a, __m512d __b) { + // CHECK-LABLE: @test_mm512_mask_div_pd // CHECK: @llvm.x86.avx512.mask.div.pd.512 - return _mm512_mask_div_pd(__W,__U,__A,__B); + return _mm512_mask_div_pd(__w,__u,__a,__b); } __m512d test_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_maskz_div_pd @@ -1997,6 +2002,11 @@ // CHECK: @llvm.x86.avx512.mask.div.ps.512 return _mm512_maskz_div_round_ps(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); } +__m512 test_mm512_div_ps(__m512 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_div_ps + // CHECK: fdiv <16 x float> + return _mm512_div_ps(__A,__B); +} __m512 test_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_mask_div_ps // CHECK: @llvm.x86.avx512.mask.div.ps.512 Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -2422,6 +2422,12 @@ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R)); }) +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_div_pd(__m512d __a, __m512d __b) +{ + return (__m512d)((__v8df)__a/(__v8df)__b); +} + static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, @@ -2441,6 +2447,12 @@ _MM_FROUND_CUR_DIRECTION); } +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_div_ps(__m512 __a,
Re: [PATCH] D20871: [Clang][AVX512][Intrinsics] Adding two definitions _mm512_setzero and _mm512_setzero_epi32
m_zuckerman updated this revision to Diff 59659. http://reviews.llvm.org/D20871 Files: lib/Headers/avx512fintrin.h test/CodeGen/avx512f-builtins.c Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -7266,3 +7266,31 @@ return _mm512_setr_ps( __A, __B, __C, __D, __E, __F, __G, __H, __I, __J, __K, __L, __M, __N, __O, __P); } + +__m512i test_mm512_setzero_epi32() +{ + // CHECK-LABEL: @test_mm512_setzero_epi32 + // CHECK: zeroinitializer + return _mm512_setzero_epi32(); +} + +__m512i test_mm512_setzero() +{ + // CHECK-LABEL: @test_mm512_setzero + // CHECK: zeroinitializer + return _mm512_setzero(); +} + +__m512i test_mm512_setzero_si512() +{ + // CHECK-LABEL: @test_mm512_setzero_si512 + // CHECK: zeroinitializer + return _mm512_setzero_si512(); +} + +__m512i test_mm512_setzero_ps() +{ + // CHECK-LABEL: @test_mm512_setzero_ps + // CHECK: zeroinitializer + return _mm512_setzero_ps(); +} Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -164,6 +164,8 @@ return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; } +#define _mm512_setzero_epi32 _mm512_setzero_si512 + static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_undefined_pd() { @@ -268,6 +270,9 @@ return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; } + +#define _mm512_setzero _mm512_setzero_ps + static __inline __m512d __DEFAULT_FN_ATTRS _mm512_setzero_pd(void) { Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -7266,3 +7266,31 @@ return _mm512_setr_ps( __A, __B, __C, __D, __E, __F, __G, __H, __I, __J, __K, __L, __M, __N, __O, __P); } + +__m512i test_mm512_setzero_epi32() +{ + // CHECK-LABEL: @test_mm512_setzero_epi32 + // CHECK: zeroinitializer + return _mm512_setzero_epi32(); +} + +__m512i test_mm512_setzero() +{ + // CHECK-LABEL: @test_mm512_setzero + // CHECK: zeroinitializer + return _mm512_setzero(); +} + +__m512i test_mm512_setzero_si512() +{ + // CHECK-LABEL: @test_mm512_setzero_si512 + // CHECK: zeroinitializer + return _mm512_setzero_si512(); +} + +__m512i test_mm512_setzero_ps() +{ + // CHECK-LABEL: @test_mm512_setzero_ps + // CHECK: zeroinitializer + return _mm512_setzero_ps(); +} Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -164,6 +164,8 @@ return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; } +#define _mm512_setzero_epi32 _mm512_setzero_si512 + static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_undefined_pd() { @@ -268,6 +270,9 @@ return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; } + +#define _mm512_setzero _mm512_setzero_ps + static __inline __m512d __DEFAULT_FN_ATTRS _mm512_setzero_pd(void) { ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20871: [Clang][AVX512][Intrinsics] Adding two definitions _mm512_setzero and _mm512_setzero_epi32
m_zuckerman updated this revision to Diff 59389. http://reviews.llvm.org/D20871 Files: lib/Headers/avx512fintrin.h test/CodeGen/avx512f-builtins.c Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -7266,3 +7266,17 @@ return _mm512_setr_ps( __A, __B, __C, __D, __E, __F, __G, __H, __I, __J, __K, __L, __M, __N, __O, __P); } + +__m512i test_mm512_setzero_epi32() +{ + // CHECK-LABEL: @test_mm512_setzero_epi32 + // CHECK: zeroinitializer + return _mm512_setzero_epi32(); +} + +__m512i test_mm512_setzero() +{ + // CHECK-LABEL: @test_mm512_setzero + // CHECK: zeroinitializer + return _mm512_setzero(); +} \ No newline at end of file Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -164,6 +164,8 @@ return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; } +#define _mm512_setzero_epi32 _mm512_setzero_si512 + static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_undefined_pd() { @@ -268,6 +270,9 @@ return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; } + +#define _mm512_setzero _mm512_setzero_ps + static __inline __m512d __DEFAULT_FN_ATTRS _mm512_setzero_pd(void) { Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -7266,3 +7266,17 @@ return _mm512_setr_ps( __A, __B, __C, __D, __E, __F, __G, __H, __I, __J, __K, __L, __M, __N, __O, __P); } + +__m512i test_mm512_setzero_epi32() +{ + // CHECK-LABEL: @test_mm512_setzero_epi32 + // CHECK: zeroinitializer + return _mm512_setzero_epi32(); +} + +__m512i test_mm512_setzero() +{ + // CHECK-LABEL: @test_mm512_setzero + // CHECK: zeroinitializer + return _mm512_setzero(); +} \ No newline at end of file Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -164,6 +164,8 @@ return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; } +#define _mm512_setzero_epi32 _mm512_setzero_si512 + static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_undefined_pd() { @@ -268,6 +270,9 @@ return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; } + +#define _mm512_setzero _mm512_setzero_ps + static __inline __m512d __DEFAULT_FN_ATTRS _mm512_setzero_pd(void) { ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20871: [Clang][AVX512][Intrinsics] Adding two definitions _mm512_setzero and _mm512_setzero_epi32
m_zuckerman added a comment. Yes its API http://reviews.llvm.org/D20871 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20866: [Clang][AVX512]Adding set4 intrinsics
m_zuckerman updated this revision to Diff 59379. http://reviews.llvm.org/D20866 Files: lib/Headers/avx512fintrin.h test/CodeGen/avx512f-builtins.c Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -6236,6 +6236,62 @@ return _mm512_set1_epi16(d); } +__m512i test_mm512_set4_epi32 (int __A, int __B, int __C, int __D) +{ + // CHECK-LABEL: @test_mm512_set4_epi32 + // CHECK: insertelement <16 x i32> {{.*}}, i32 15 + return _mm512_set4_epi32 (__A,__B,__C,__D); +} + +__m512i test_mm512_set4_epi64 (long long __A, long long __B, long long __C, long long __D) +{ + // CHECK-LABEL: @test_mm512_set4_epi64 + // CHECK: insertelement <8 x i64> {{.*}}, i32 7 + return _mm512_set4_epi64 (__A,__B,__C,__D); +} + +__m512d test_mm512_set4_pd (double __A, double __B, double __C, double __D) +{ + // CHECK-LABEL: @test_mm512_set4_pd + // CHECK: insertelement <8 x double> {{.*}}, i32 7 + return _mm512_set4_pd (__A,__B,__C,__D); +} + +__m512 test_mm512_set4_ps (float __A, float __B, float __C, float __D) +{ + // CHECK-LABEL: @test_mm512_set4_ps + // CHECK: insertelement <16 x float> {{.*}}, i32 15 + return _mm512_set4_ps (__A,__B,__C,__D); +} + +__m512i test_mm512_setr4_epi32(int e0, int e1, int e2, int e3) +{ + // CHECK-LABEL: @test_mm512_setr4_epi32 + // CHECK: insertelement <16 x i32> {{.*}}, i32 15 + return _mm512_setr4_epi32(e0, e1, e2, e3); +} + + __m512i test_mm512_setr4_epi64(long long e0, long long e1, long long e2, long long e3) +{ + // CHECK-LABEL: @test_mm512_setr4_epi64 + // CHECK: insertelement <8 x i64> {{.*}}, i32 7 + return _mm512_setr4_epi64(e0, e1, e2, e3); +} + +__m512i test_mm512_setr4_pd(double e0, double e1, double e2, double e3) +{ + // CHECK-LABEL: @test_mm512_setr4_pd + // CHECK: insertelement <8 x double> {{.*}}, i32 7 + return _mm512_setr4_pd(e0,e1,e2,e3); +} + + __m512i test_mm512_setr4_ps(float e0, float e1, float e2, float e3) +{ + // CHECK-LABEL: @test_mm512_setr4_ps + // CHECK: insertelement <16 x float> {{.*}}, i32 15 + return _mm512_setr4_ps(e0,e1,e2,e3); +} + __m512d test_mm512_castpd256_pd512(__m256d a) { // CHECK-LABEL: @test_mm512_castpd256_pd512 Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -332,6 +332,49 @@ __f, __f, __f, __f }; } +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_set4_epi32 (int __A, int __B, int __C, int __D) +{ + return (__m512i)(__v16si) + { __D, __C, __B, __A, __D, __C, __B, __A, + __D, __C, __B, __A, __D, __C, __B, __A }; +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_set4_epi64 (long long __A, long long __B, long long __C, + long long __D) +{ + return (__m512i) (__v8di) + { __D, __C, __B, __A, __D, __C, __B, __A }; +} + +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_set4_pd (double __A, double __B, double __C, double __D) +{ + return (__m512d) + { __D, __C, __B, __A, __D, __C, __B, __A }; +} + +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_set4_ps (float __A, float __B, float __C, float __D) +{ + return (__m512) + { __D, __C, __B, __A, __D, __C, __B, __A, + __D, __C, __B, __A, __D, __C, __B, __A }; +} + +#define _mm512_setr4_epi32(e0,e1,e2,e3) \ + _mm512_set4_epi32(e3,e2,e1,e0) + +#define _mm512_setr4_epi64(e0,e1,e2,e3) \ + _mm512_set4_epi64(e3,e2,e1,e0) + +#define _mm512_setr4_pd(e0,e1,e2,e3)\ + _mm512_set4_pd(e3,e2,e1,e0) + +#define _mm512_setr4_ps(e0,e1,e2,e3)\ + _mm512_set4_ps(e3,e2,e1,e0) + static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_broadcastsd_pd(__m128d __X) { ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r271498 - [Clang][AVX512][INTRINSICS] adding round cvt and fix regular cvtps_ph
Author: mzuckerm Date: Thu Jun 2 02:44:08 2016 New Revision: 271498 URL: http://llvm.org/viewvc/llvm-project?rev=271498&view=rev Log: [Clang][AVX512][INTRINSICS] adding round cvt and fix regular cvtps_ph Differential Revision: http://reviews.llvm.org/D20870 Modified: cfe/trunk/lib/Headers/avx512vlintrin.h cfe/trunk/test/CodeGen/avx512vl-builtins.c Modified: cfe/trunk/lib/Headers/avx512vlintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlintrin.h?rev=271498&r1=271497&r2=271498&view=diff == --- cfe/trunk/lib/Headers/avx512vlintrin.h (original) +++ cfe/trunk/lib/Headers/avx512vlintrin.h Thu Jun 2 02:44:08 2016 @@ -9383,26 +9383,58 @@ _mm256_maskz_cvtph_ps (__mmask8 __U, __m (__mmask8) __U); } -#define _mm_mask_cvtps_ph(W, U, A, I) __extension__ ({ \ +static __inline __m128i __DEFAULT_FN_ATTRS +_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline __m128i __DEFAULT_FN_ATTRS +_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) __U); +} + +#define _mm_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \ (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ (__v8hi)(__m128i)(W), \ (__mmask8)(U)); }) -#define _mm_maskz_cvtps_ph(U, A, I) __extension__ ({ \ +#define _mm_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \ (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ (__v8hi)_mm_setzero_si128(), \ (__mmask8)(U)); }) -#define _mm256_mask_cvtps_ph(W, U, A, I) __extension__ ({ \ +static __inline __m128i __DEFAULT_FN_ATTRS +_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A) +{ + return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline __m128i __DEFAULT_FN_ATTRS +_mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A) +{ + return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION, + (__v8hi) _mm_setzero_si128(), + (__mmask8) __U); +} +#define _mm256_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \ (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ (__v8hi)(__m128i)(W), \ (__mmask8)(U)); }) -#define _mm256_maskz_cvtps_ph(U, A, I) __extension__ ({ \ +#define _mm256_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \ (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ (__v8hi)_mm_setzero_si128(), \ (__mmask8)(U)); }) + #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_BOTH Modified: cfe/trunk/test/CodeGen/avx512vl-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vl-builtins.c?rev=271498&r1=271497&r2=271498&view=diff == --- cfe/trunk/test/CodeGen/avx512vl-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512vl-builtins.c Thu Jun 2 02:44:08 2016 @@ -6726,24 +6726,47 @@ __m256 test_mm256_maskz_cvtph_ps(__mmask __m128i test_mm_mask_cvtps_ph(__m128i __W, __mmask8 __U, __m128 __A) { // CHECK-LABEL: @test_mm_mask_cvtps_ph // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128 - return _mm_mask_cvtps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); + return _mm_mask_cvtps_ph(__W, __U, __A); } __m128i test_mm_maskz_cvtps_ph(__mmask8 __U, __m128 __A) { // CHECK-LABEL: @test_mm_maskz_cvtps_ph // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128 - return _mm_maskz_cvtps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION); + return _mm_maskz_cvtps_ph(__U, __A); } __m128i test_mm256_mask_cvtps_ph(__m128i __W, __mmask8 __U, __m256 __A) { // CHECK-LABEL: @test_mm256_mask_cvtps_ph // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256 - return _mm256_mask_cvtps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); + return _mm256_mask_cvtps_ph(__W, __U, __A); } __m128i test_mm256_maskz_cvtps_ph(__mmask8 __U, __m256 __A) { // CHECK-
[PATCH] D20871: [Clang][AVX512][Intrinsics] Adding two definitions _mm512_setzero and _mm512_setzero_epi32
m_zuckerman created this revision. m_zuckerman added reviewers: AsafBadouh, igorb, delena. m_zuckerman added a subscriber: cfe-commits. http://reviews.llvm.org/D20871 Files: lib/Headers/avx512fintrin.h Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -164,6 +164,8 @@ return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; } +#define _mm512_setzero_epi32 _mm512_setzero_si512 + static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_undefined_pd() { @@ -268,6 +270,9 @@ return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; } + +#define _mm512_setzero _mm512_setzero_ps + static __inline __m512d __DEFAULT_FN_ATTRS _mm512_setzero_pd(void) { Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -164,6 +164,8 @@ return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; } +#define _mm512_setzero_epi32 _mm512_setzero_si512 + static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_undefined_pd() { @@ -268,6 +270,9 @@ return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; } + +#define _mm512_setzero _mm512_setzero_ps + static __inline __m512d __DEFAULT_FN_ATTRS _mm512_setzero_pd(void) { ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D20870: [Clang][AVX512][INTRINSICS] adding round cvt and fix regular cvtps_ph
m_zuckerman created this revision. m_zuckerman added reviewers: AsafBadouh, igorb, delena. m_zuckerman added a subscriber: cfe-commits. http://reviews.llvm.org/D20870 Files: lib/Headers/avx512vlintrin.h test/CodeGen/avx512vl-builtins.c Index: test/CodeGen/avx512vl-builtins.c === --- test/CodeGen/avx512vl-builtins.c +++ test/CodeGen/avx512vl-builtins.c @@ -6726,24 +6726,47 @@ __m128i test_mm_mask_cvtps_ph(__m128i __W, __mmask8 __U, __m128 __A) { // CHECK-LABEL: @test_mm_mask_cvtps_ph // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128 - return _mm_mask_cvtps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); + return _mm_mask_cvtps_ph(__W, __U, __A); } __m128i test_mm_maskz_cvtps_ph(__mmask8 __U, __m128 __A) { // CHECK-LABEL: @test_mm_maskz_cvtps_ph // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128 - return _mm_maskz_cvtps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION); + return _mm_maskz_cvtps_ph(__U, __A); } __m128i test_mm256_mask_cvtps_ph(__m128i __W, __mmask8 __U, __m256 __A) { // CHECK-LABEL: @test_mm256_mask_cvtps_ph // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256 - return _mm256_mask_cvtps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); + return _mm256_mask_cvtps_ph(__W, __U, __A); } __m128i test_mm256_maskz_cvtps_ph(__mmask8 __U, __m256 __A) { // CHECK-LABEL: @test_mm256_maskz_cvtps_ph // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256 - return _mm256_maskz_cvtps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION); + return _mm256_maskz_cvtps_ph(__U, __A); } +__m128i test_mm_mask_cvt_roundps_ph(__m128i __W, __mmask8 __U, __m128 __A) { + // CHECK-LABEL: @test_mm_mask_cvt_roundps_ph + // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128 + return _mm_mask_cvt_roundps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m128i test_mm_maskz_cvt_roundps_ph(__mmask8 __U, __m128 __A) { + // CHECK-LABEL: @test_mm_maskz_cvt_roundps_ph + // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128 + return _mm_maskz_cvt_roundps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m128i test_mm256_mask_cvt_roundps_ph(__m128i __W, __mmask8 __U, __m256 __A) { + // CHECK-LABEL: @test_mm256_mask_cvt_roundps_ph + // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256 + return _mm256_mask_cvt_roundps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m128i test_mm256_maskz_cvt_roundps_ph(__mmask8 __U, __m256 __A) { + // CHECK-LABEL: @test_mm256_maskz_cvt_roundps_ph + // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256 + return _mm256_maskz_cvt_roundps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION); +} Index: lib/Headers/avx512vlintrin.h === --- lib/Headers/avx512vlintrin.h +++ lib/Headers/avx512vlintrin.h @@ -9383,26 +9383,58 @@ (__mmask8) __U); } -#define _mm_mask_cvtps_ph(W, U, A, I) __extension__ ({ \ +static __inline __m128i __DEFAULT_FN_ATTRS +_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline __m128i __DEFAULT_FN_ATTRS +_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) __U); +} + +#define _mm_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \ (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ (__v8hi)(__m128i)(W), \ (__mmask8)(U)); }) -#define _mm_maskz_cvtps_ph(U, A, I) __extension__ ({ \ +#define _mm_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \ (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ (__v8hi)_mm_setzero_si128(), \ (__mmask8)(U)); }) -#define _mm256_mask_cvtps_ph(W, U, A, I) __extension__ ({ \ +static __inline __m128i __DEFAULT_FN_ATTRS +_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A) +{ + return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline __m128i __DEFAULT_FN_ATTRS +_mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A) +{ + return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION, + (__v8hi) _mm_setzero_si128(), + (__mmask8) __U); +} +#define _mm256_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \ (__m128i)__builtin_i
r271398 - [Clang][Intrinsics][avx512] Continue Adding round cvt to clang
Author: mzuckerm Date: Wed Jun 1 09:41:41 2016 New Revision: 271398 URL: http://llvm.org/viewvc/llvm-project?rev=271398&view=rev Log: [Clang][Intrinsics][avx512] Continue Adding round cvt to clang And remove trailing spaces in intrinsic f test Differential Revision: http://reviews.llvm.org/D20810 Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=271398&r1=271397&r2=271398&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Wed Jun 1 09:41:41 2016 @@ -3585,6 +3585,27 @@ _mm512_mask_blend_epi32(__mmask16 __U, _ /* Conversion */ +#define _mm512_cvtt_roundps_epu32( __A, __R) __extension__ ({ \ +__builtin_ia32_cvttps2udq512_mask ((__v16sf)( __A),\ + (__v16si)\ + _mm512_undefined_epi32 (),\ + (__mmask16) -1,( __R));\ +}) + +#define _mm512_mask_cvtt_roundps_epu32( __W, __U, __A, __R) __extension__ ({ \ +__builtin_ia32_cvttps2udq512_mask ((__v16sf)( __A),\ + (__v16si)( __W),\ + (__mmask16)( __U),( __R));\ +}) + +#define _mm512_maskz_cvtt_roundps_epu32( __U, __A, __R) __extension__ ({ \ +__builtin_ia32_cvttps2udq512_mask ((__v16sf)( __A),\ + (__v16si)\ + _mm512_setzero_si512 (),\ + (__mmask16)( __U),( __R));\ +}) + + static __inline __m512i __DEFAULT_FN_ATTRS _mm512_cvttps_epu32(__m512 __A) { @@ -3795,6 +3816,28 @@ _mm512_maskz_cvtpd_ps (__mmask8 __U, __m _MM_FROUND_CUR_DIRECTION); } +#define _mm512_cvt_roundps_ph( __A, __I) __extension__ ({ \ + (__m256i)__builtin_ia32_vcvtps2ph512_mask ((__v16sf)( __A),\ + (__I),\ + (__v16hi)_mm256_undefined_si256 (),\ + (__mmask16) -1);\ +}) + +#define _mm512_mask_cvt_roundps_ph( __U, __W, __A, __I) __extension__ ({ \ + (__m256i)__builtin_ia32_vcvtps2ph512_mask ((__v16sf)( __A),\ + (__I),\ + (__v16hi)( __U),\ + (__mmask16)( __W));\ +}) + +#define _mm512_maskz_cvt_roundps_ph( __W, __A, __I) __extension__ ({ \ + (__m256i)__builtin_ia32_vcvtps2ph512_mask ((__v16sf)( __A),\ + (__I),\ + (__v16hi)\ + _mm256_setzero_si256 (),\ + (__mmask16)( __W));\ +}) + #define _mm512_cvtps_ph(A, I) __extension__ ({ \ (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ (__v16hi)_mm256_setzero_si256(), \ @@ -3809,7 +3852,28 @@ _mm512_maskz_cvtpd_ps (__mmask8 __U, __m (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ (__v16hi)_mm256_setzero_si256(), \ (__mmask16)(W)); }) - + +#define _mm512_cvt_roundph_ps( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvtph2ps512_mask ((__v16hi)( __A),\ + (__v16sf)\ + _mm512_undefined_ps (),\ + (__mmask16) -1,( __R));\ +}) + +#define _mm512_mask_cvt_roundph_ps( __W, __U, __A, __R) __extension__ ({ \ +__builtin_ia32_vcvtph2ps512_mask ((__v16hi)( __A),\ + (__v16sf)( __W),\ + (__mmask16)( __U),( __R));\ +}) + +#define _mm512_maskz_cvt_roundph_ps( __U, __A, __R) __extension__ ({ \ +__builtin_ia32_vcvtph2ps512_mask ((__v16hi)( __A),\ + (__v16sf)\ + _mm512_setzero_ps (),\ + (__mmask16)( __U),( __R));\ +}) + + static __inline __m512 __DEFAULT_FN_ATTRS _mm512_cvtph_ps(__m256i __A) { Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=271398&r1=271397&r2=271398&view=diff == --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Wed Jun 1 09:41:41 2016 @@ -2948,19 +2948,19 @@ __m512 test_mm512_maskz_unpacklo_ps(__mm int test_mm_cvt_roundsd_si32(__m128d __A) { // CHECK-LABEL: @test_mm_cvt_roundsd_si32 // CHECK: @llvm.x86.avx512.vcvtsd2si32 - return _mm_cvt_roundsd_si32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvt_roundsd_si32(__A, _MM_FROUND_CUR_DIRECTION); } int test_mm_cvt_roundsd_i32(__m128d __A) { // CHECK-LABEL: @test_mm_cvt_roundsd_i32 // CHECK: @llvm.x86.avx512.vcvtsd2si32 - re
[PATCH] D20866: [Clang][AVX512]Adding set4 intrinsics
m_zuckerman created this revision. m_zuckerman added reviewers: delena, AsafBadouh, igorb. m_zuckerman added a subscriber: cfe-commits. http://reviews.llvm.org/D20866 Files: lib/Headers/avx512fintrin.h test/CodeGen/avx512f-builtins.c Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -6130,6 +6130,62 @@ return _mm512_set1_epi16(d); } +__m512i test_mm512_set4_epi32 (int __A, int __B, int __C, int __D) +{ + // CHECK-LABEL: @test_mm512_set4_epi32 + // CHECK: insertelement <16 x i32> {{.*}}, i32 15 + return _mm512_set4_epi32 (__A,__B,__C,__D); +} + +__m512i test_mm512_set4_epi64 (long long __A, long long __B, long long __C, long long __D) +{ + // CHECK-LABEL: @test_mm512_set4_epi64 + // CHECK: insertelement <8 x i64> {{.*}}, i32 7 + return _mm512_set4_epi64 (__A,__B,__C,__D); +} + +__m512d test_mm512_set4_pd (double __A, double __B, double __C, double __D) +{ + // CHECK-LABEL: @test_mm512_set4_pd + // CHECK: insertelement <8 x double> {{.*}}, i32 7 + return _mm512_set4_pd (__A,__B,__C,__D); +} + +__m512 test_mm512_set4_ps (float __A, float __B, float __C, float __D) +{ + // CHECK-LABEL: @test_mm512_set4_ps + // CHECK: insertelement <16 x float> {{.*}}, i32 15 + return _mm512_set4_ps (__A,__B,__C,__D); +} + +__m512i test_mm512_setr4_epi32(e0,e1,e2,e3) +{ + // CHECK-LABEL: @test_mm512_setr4_epi32 + // CHECK: insertelement <16 x i32> {{.*}}, i32 15 + return _mm512_setr4_epi32(e0,e1,e2,e3); +} + + __m512i test_mm512_setr4_epi64(e0,e1,e2,e3) +{ + // CHECK-LABEL: @test_mm512_setr4_epi64 + // CHECK: insertelement <8 x i64> {{.*}}, i32 7 + return _mm512_setr4_epi64(e0,e1,e2,e3); +} + +__m512i test_mm512_setr4_pd(e0,e1,e2,e3) +{ + // CHECK-LABEL: @test_mm512_setr4_pd + // CHECK: insertelement <8 x double> {{.*}}, i32 7 + return _mm512_setr4_pd(e0,e1,e2,e3); +} + + __m512i test_mm512_setr4_ps(e0,e1,e2,e3) +{ + // CHECK-LABEL: @test_mm512_setr4_ps + // CHECK: insertelement <16 x float> {{.*}}, i32 15 + return _mm512_setr4_ps(e0,e1,e2,e3); +} + __m512d test_mm512_castpd256_pd512(__m256d a) { // CHECK-LABEL: @test_mm512_castpd256_pd512 Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -332,6 +332,49 @@ __f, __f, __f, __f }; } +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_set4_epi32 (int __A, int __B, int __C, int __D) +{ + return (__m512i)(__v16si) + { __D, __C, __B, __A, __D, __C, __B, __A, + __D, __C, __B, __A, __D, __C, __B, __A }; +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_set4_epi64 (long long __A, long long __B, long long __C, + long long __D) +{ + return (__m512i) (__v8di) + { __D, __C, __B, __A, __D, __C, __B, __A }; +} + +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_set4_pd (double __A, double __B, double __C, double __D) +{ + return (__m512d) + { __D, __C, __B, __A, __D, __C, __B, __A }; +} + +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_set4_ps (float __A, float __B, float __C, float __D) +{ + return (__m512) + { __D, __C, __B, __A, __D, __C, __B, __A, + __D, __C, __B, __A, __D, __C, __B, __A }; +} + +#define _mm512_setr4_epi32(e0,e1,e2,e3) \ + _mm512_set4_epi32(e3,e2,e1,e0) + +#define _mm512_setr4_epi64(e0,e1,e2,e3) \ + _mm512_set4_epi64(e3,e2,e1,e0) + +#define _mm512_setr4_pd(e0,e1,e2,e3)\ + _mm512_set4_pd(e3,e2,e1,e0) + +#define _mm512_setr4_ps(e0,e1,e2,e3)\ + _mm512_set4_ps(e3,e2,e1,e0) + static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_broadcastsd_pd(__m128d __X) { ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r271387 - Adding front-end support to several intrinsics (bit scanning, conversion and state reading intrinsics)
Author: mzuckerm Date: Wed Jun 1 07:21:00 2016 New Revision: 271387 URL: http://llvm.org/viewvc/llvm-project?rev=271387&view=rev Log: Adding front-end support to several intrinsics (bit scanning, conversion and state reading intrinsics) Adding LLVM front-end support to two intrinsics dealing with bit scan: _bit_scan_forward and _bit_scan_reverse. Their functionality is as described in Intel intrinsics guide: https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bit_scan_forward&expand=371,370 https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bit_scan_reverse&expand=371,370 Furthermore, adding clang front-end support to these conversion intrinsics: _mm256_cvtsd_f64, _mm256_cvtsi256_si32 and _mm256_cvtss_f32. Finally, adding tests to all of the above, as well as to the state reading intrinsics _rdpmc and _rdtsc. Their functionality is also specified in the Intel intrinsics guide. Commit on behalf of Omer Paparo Bivas Added: cfe/trunk/test/CodeGen/bitscan-builtins.c cfe/trunk/test/CodeGen/rd-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avxintrin.h cfe/trunk/lib/Headers/ia32intrin.h cfe/trunk/lib/Headers/immintrin.h cfe/trunk/test/CodeGen/avx-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=271387&r1=271386&r2=271387&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed Jun 1 07:21:00 2016 @@ -35,6 +35,10 @@ BUILTIN(__builtin_ms_va_start, "vc*&.", BUILTIN(__builtin_ms_va_end, "vc*&", "n") BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n") +// Bit scan +TARGET_BUILTIN(__builtin_ia32_bit_scan_forward, "ii", "", "") +TARGET_BUILTIN(__builtin_ia32_bit_scan_reverse, "ii", "", "") + // Undefined Values // TARGET_BUILTIN(__builtin_ia32_undef128, "V2d", "nc", "") Modified: cfe/trunk/lib/Headers/avxintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avxintrin.h?rev=271387&r1=271386&r2=271387&view=diff == --- cfe/trunk/lib/Headers/avxintrin.h (original) +++ cfe/trunk/lib/Headers/avxintrin.h Wed Jun 1 07:21:00 2016 @@ -2123,6 +2123,25 @@ _mm256_cvttps_epi32(__m256 __a) return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a); } +static __inline double __DEFAULT_FN_ATTRS +_mm256_cvtsd_f64(__m256d __a) +{ + return __a[0]; +} + +static __inline int __DEFAULT_FN_ATTRS +_mm256_cvtsi256_si32(__m256i __a) +{ + __v8si __b = (__v8si)__a; + return __b[0]; +} + +static __inline float __DEFAULT_FN_ATTRS +_mm256_cvtss_f32(__m256 __a) +{ + return __a[0]; +} + /* Vector replicate */ static __inline __m256 __DEFAULT_FN_ATTRS _mm256_movehdup_ps(__m256 __a) Modified: cfe/trunk/lib/Headers/ia32intrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/ia32intrin.h?rev=271387&r1=271386&r2=271387&view=diff == --- cfe/trunk/lib/Headers/ia32intrin.h (original) +++ cfe/trunk/lib/Headers/ia32intrin.h Wed Jun 1 07:21:00 2016 @@ -74,4 +74,6 @@ __rdtscp(unsigned int *__A) { #define _rdtsc() __rdtsc() +#define _rdpmc(A) __rdpmc(A) + #endif /* __IA32INTRIN_H */ Modified: cfe/trunk/lib/Headers/immintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/immintrin.h?rev=271387&r1=271386&r2=271387&view=diff == --- cfe/trunk/lib/Headers/immintrin.h (original) +++ cfe/trunk/lib/Headers/immintrin.h Wed Jun 1 07:21:00 2016 @@ -169,6 +169,18 @@ _rdrand32_step(unsigned int *__p) return __builtin_ia32_rdrand32_step(__p); } +/* __bit_scan_forward */ +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_bit_scan_forward(int __A) { + return __builtin_ia32_bit_scan_forward(__A); +} + +/* __bit_scan_reverse */ +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_bit_scan_reverse(int __A) { + return __builtin_ia32_bit_scan_reverse(__A); +} + #ifdef __x86_64__ static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) _rdrand64_step(unsigned long long *__p) @@ -227,6 +239,7 @@ _writegsbase_u64(unsigned long long __V) { return __builtin_ia32_wrgsbase64(__V); } + #endif #endif /* __FSGSBASE__ */ Modified: cfe/trunk/test/CodeGen/avx-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx-builtins.c?rev=271387&r1=271386&r2=271387&view=diff == --- cfe/trunk/test/CodeGen/avx-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx-builtins.c Wed Jun 1 07:21:00 2016 @@ -1385,3 +1385,24 @@ void test_mm256_zeroupp
r271373 - [Clang][Intrinsics][avx512] Adding round intrinsics fot max/min/sqrt instruction set to clang
Author: mzuckerm Date: Wed Jun 1 03:34:03 2016 New Revision: 271373 URL: http://llvm.org/viewvc/llvm-project?rev=271373&view=rev Log: [Clang][Intrinsics][avx512] Adding round intrinsics fot max/min/sqrt instruction set to clang Differential Revision: http://reviews.llvm.org/D20812 Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=271373&r1=271372&r2=271373&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Wed Jun 1 03:34:03 2016 @@ -813,6 +813,29 @@ _mm512_maskz_sub_epi32 (__mmask16 __U, _ (__mmask16) __U); } +#define _mm512_mask_max_round_pd( __W, __U, __A, __B, __R) __extension__ ({ \ +__builtin_ia32_maxpd512_mask ((__v8df)( __A),\ +(__v8df)( __B),\ +(__v8df)( __W),\ +(__mmask8)( __U),( __R));\ +}) + +#define _mm512_maskz_max_round_pd( __U, __A, __B, __R) __extension__ ({ \ +__builtin_ia32_maxpd512_mask ((__v8df)( __A),\ +(__v8df)( __B),\ +(__v8df)\ +_mm512_setzero_pd (),\ +(__mmask8)( __U),( __R));\ +}) + +#define _mm512_max_round_pd( __A, __B, __R) __extension__ ({ \ +__builtin_ia32_maxpd512_mask ((__v8df)( __A),\ +(__v8df)( __B),\ +(__v8df)\ +_mm512_undefined_pd (),\ +(__mmask8) -1,( __R));\ +}) + static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_max_pd(__m512d __A, __m512d __B) { @@ -845,6 +868,29 @@ _mm512_maskz_max_pd (__mmask8 __U, __m51 _MM_FROUND_CUR_DIRECTION); } +#define _mm512_mask_max_round_ps( __W, __U, __A, __B, __R) __extension__ ({ \ +__builtin_ia32_maxps512_mask ((__v16sf)( __A),\ + (__v16sf)( __B),\ + (__v16sf)( __W),\ + (__mmask16)( __U),( __R));\ +}) + +#define _mm512_maskz_max_round_ps( __U, __A, __B, __R) __extension__ ({ \ +__builtin_ia32_maxps512_mask ((__v16sf)( __A),\ + (__v16sf)( __B),\ + (__v16sf)\ + _mm512_setzero_ps (),\ + (__mmask16)( __U),( __R));\ +}) + +#define _mm512_max_round_ps( __A, __B, __R) __extension__ ({ \ +__builtin_ia32_maxps512_mask ((__v16sf)( __A),\ + (__v16sf)( __B),\ + (__v16sf)\ + _mm512_undefined_ps (),\ + (__mmask16) -1,( __R));\ +}) + static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_max_ps(__m512 __A, __m512 __B) { @@ -1062,6 +1108,29 @@ _mm512_maskz_max_epu64 (__mmask8 __M, __ __M); } +#define _mm512_mask_min_round_pd( __W, __U, __A, __B, __R) __extension__ ({ \ +__builtin_ia32_minpd512_mask ((__v8df)( __A),\ +(__v8df)( __B),\ +(__v8df)( __W),\ +(__mmask8)( __U),( __R));\ +}) + +#define _mm512_maskz_min_round_pd( __U, __A, __B, __R) __extension__ ({ \ +__builtin_ia32_minpd512_mask ((__v8df)( __A),\ +(__v8df)( __B),\ +(__v8df)\ +_mm512_setzero_pd (),\ +(__mmask8)( __U),( __R));\ +}) + +#define _mm512_min_round_pd( __A, __B, __R) __extension__ ({ \ +__builtin_ia32_minpd512_mask ((__v8df)( __A),\ +(__v8df)( __B),\ +(__v8df)\ +_mm512_undefined_pd (),\ +(__mmask8) -1,( __R));\ +}) + static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_min_pd(__m512d __A, __m512d __B) { @@ -1083,6 +1152,29 @@ _mm512_mask_min_pd (__m512d __W, __mmask _MM_FROUND_CUR_DIRECTION); } +#define _mm512_mask_min_round_ps( __W, __U, __A, __B, __R) __extension__ ({ \ +__builtin_ia32_minps512_mask ((__v16sf)( __A),\ + (__v16sf)( __B),\ + (__v16sf)( __W),\ + (__mmask16)( __U),( __R));\ +}) + +#define _mm512_maskz_min_round_ps( __U, __A, __B, __R) __extension__ ({ \ +__builtin_ia32_minps512_mask ((__v16sf)( __A),\ + (__v16sf)( __B),\ + (__v16sf)\ + _mm512_setzero_ps (),\ + (__mmask16)( __U),( __R));\ +}) + +#define _mm512_min_round_ps( __A, __B, __R) __extension__ ({ \ +__builtin_ia32_minps512_mask ((__v16sf)( __A),\ + (__v16sf)( __B),\ + (__v16sf)\ + _mm512_undefined_ps (),\ + (__mmask16) -1,( __R));\ +}) + static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) { @@ -1391,6 +1483,26 @@ _mm512_mask_mullo_epi32 (__m512i __W, __ (__v16si) __W, __M); } +#define _mm512_mask_sqrt_round_pd( __W, __U, __A, __R) __extension__ ({ \ +__builtin_ia32_sqrtpd512_mask ((__v8df)( __A),\ + (__v8df)( __W),\ + (__mmask8)( __U),( __R));\ +}) + +#define _mm512_maskz_sqrt_round_pd( __U, __A, __R) __extension__ ({ \ +__builtin_ia32_sqrtpd512_mask ((__v8df)( __A),\ + (__v8df)\ + _mm512_setzero_pd (),\ +
r271368 - [Clang][Intrinsics][avx512] Adding round roundscale to clang
Author: mzuckerm Date: Wed Jun 1 02:35:44 2016 New Revision: 271368 URL: http://llvm.org/viewvc/llvm-project?rev=271368&view=rev Log: [Clang][Intrinsics][avx512] Adding round roundscale to clang Differential Revision: http://reviews.llvm.org/D20815 Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=271368&r1=271367&r2=271368&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Wed Jun 1 02:35:44 2016 @@ -2382,6 +2382,24 @@ _mm512_maskz_div_ps(__mmask16 __U, __m51 (__mmask16)(A), \ _MM_FROUND_CUR_DIRECTION); }) +#define _mm512_mask_roundscale_round_ps( __A, __B, __C, __imm, __R) __extension__ ({ \ + (__m512)__builtin_ia32_rndscaleps_mask ((__v16sf)( __C), (int)__imm,\ + (__v16sf)( __A),\ + (__mmask16)( __B),(int) __R);\ +}) + +#define _mm512_maskz_roundscale_round_ps( __A, __B, __imm,__R) __extension__ ({ \ + (__m512)__builtin_ia32_rndscaleps_mask ((__v16sf)( __B), (int)__imm,\ + (__v16sf)_mm512_setzero_ps (),\ + (__mmask16)( __A),(int) __R);\ +}) + +#define _mm512_roundscale_round_ps( __A, __imm, __R) __extension__ ({ \ + (__m512)__builtin_ia32_rndscaleps_mask ((__v16sf)( __A),(int) __imm,\ + (__v16sf) _mm512_undefined_ps (),\ + (__mmask16) -1,(int) __R);\ +}) + #define _mm512_roundscale_pd(A, B) __extension__ ({ \ (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \ (__v8df)(__m512d)(A), (__mmask8)-1, \ @@ -2398,6 +2416,24 @@ _mm512_maskz_div_ps(__mmask16 __U, __m51 (__mmask8)(A), \ _MM_FROUND_CUR_DIRECTION); }) +#define _mm512_mask_roundscale_round_pd( __A, __B, __C, __imm ,__R) __extension__ ({ \ + (__m512d)__builtin_ia32_rndscalepd_mask ((__v8df)( __C),(int)__imm,\ + (__v8df)( __A),\ + (__mmask8)( __B),(int)__R);\ +}) + +#define _mm512_maskz_roundscale_round_pd( __A, __B, __imm, __R) __extension__ ({ \ + (__m512d)__builtin_ia32_rndscalepd_mask ((__v8df)( __B),(int)__imm,\ +(__v8df)_mm512_setzero_pd (),\ +(__mmask8)( __A),(int) __R);\ +}) + +#define _mm512_roundscale_round_pd( __A, __imm , __R) __extension__ ({ \ + (__m512d)__builtin_ia32_rndscalepd_mask ((__v8df)( __A),(int) __imm,\ +(__v8df)_mm512_undefined_pd (),\ +(__mmask8) -1,(int) __R);\ +}) + #define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \ (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=271368&r1=271367&r2=271368&view=diff == --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Wed Jun 1 02:35:44 2016 @@ -6653,6 +6653,27 @@ __m512 test_mm512_maskz_roundscale_ps(__ return _mm512_maskz_roundscale_ps(__U,__A, 1); } +__m512 test_mm512_mask_roundscale_round_ps(__m512 __A,__mmask16 __U,__m512 __C) +{ + // CHECK-LABEL: @test_mm512_mask_roundscale_round_ps + // CHECK: @llvm.x86.avx512.mask.rndscale.ps.512 + return _mm512_mask_roundscale_round_ps(__A,__U,__C,3,_MM_FROUND_CUR_DIRECTION); +} + +__m512 test_mm512_maskz_roundscale_round_ps(__m512 __A,__mmask16 __U) +{ + // CHECK-LABEL: @test_mm512_maskz_roundscale_round_ps + // CHECK: @llvm.x86.avx512.mask.rndscale.ps.512 + return _mm512_maskz_roundscale_round_ps(__U,__A,3,_MM_FROUND_CUR_DIRECTION); +} + +__m512 test_mm512_roundscale_round_ps(__m512 __A) +{ + // CHECK-LABEL: @test_mm512_roundscale_round_ps + // CHECK: @llvm.x86.avx512.mask.rndscale.ps.512 + return _mm512_roundscale_round_ps(__A,3,_MM_FROUND_CUR_DIRECTION); +} + __m512d test_mm512_mask_roundscale_pd(__m512d __W, __mmask8 __U, __m512d __A) { // CHECK-LABEL: @test_mm512_mask_roundscale_pd @@ -6667,6 +6688,27 @@ __m512d test_mm512_maskz_roundscale_pd(_ return _mm512_maskz_roundscale_pd(__U,__A, 1); } +__m512d test_mm512_mask_roundscale_round_pd(__m512d __A,__mmask8 __U,__m512d __C) +{ + // CHECK-LAB
[PATCH] D20810: [Clang][Intrinsics][avx512] Continue Adding round cvt to clang
m_zuckerman created this revision. m_zuckerman added reviewers: AsafBadouh, igorb, delena. m_zuckerman added a subscriber: cfe-commits. http://reviews.llvm.org/D20810 Files: lib/Headers/avx512fintrin.h test/CodeGen/avx512f-builtins.c Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -3107,6 +3107,71 @@ return _mm_cvttss_u64(__A); } +__m512i test_mm512_cvtt_roundps_epu32(__m512 __A) +{ +// CHECK-LABEL: @test_mm512_cvtt_roundps_epu32 +// CHECK: @llvm.x86.avx512.mask.cvttps2udq.512 +return _mm512_cvtt_roundps_epu32(__A, _MM_FROUND_CUR_DIRECTION); +} + +__m512i test_mm512_mask_cvtt_roundps_epu32(__m512i __W, __mmask16 __U, __m512 __A) +{ +// CHECK-LABEL: @test_mm512_mask_cvtt_roundps_epu32 +// CHECK: @llvm.x86.avx512.mask.cvttps2udq.512 + +return _mm512_mask_cvtt_roundps_epu32(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m512i test_mm512_maskz_cvtt_roundps_epu32( __mmask16 __U, __m512 __A) +{ +// CHECK-LABEL: @test_mm512_maskz_cvtt_roundps_epu32 +// CHECK: @llvm.x86.avx512.mask.cvttps2udq.512 + +return _mm512_maskz_cvtt_roundps_epu32(__U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m256i test_mm512_cvt_roundps_ph(__m512 __A) +{ +// CHECK-LABEL: @test_mm512_cvt_roundps_ph +// CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512 +return _mm512_cvt_roundps_ph(__A, _MM_FROUND_CUR_DIRECTION); +} + +__m256i test_mm512_mask_cvt_roundps_ph(__m256i __W , __mmask16 __U, __m512 __A) +{ +// CHECK-LABEL: @test_mm512_mask_cvt_roundps_ph +// CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512 +return _mm512_mask_cvt_roundps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m256i test_mm512_maskz_cvt_roundps_ph(__mmask16 __U, __m512 __A) +{ +// CHECK-LABEL: @test_mm512_maskz_cvt_roundps_ph +// CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512 +return _mm512_maskz_cvt_roundps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m512 test_mm512_cvt_roundph_ps(__m256i __A) +{ +// CHECK-LABEL: @test_mm512_cvt_roundph_ps +// CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512 +return _mm512_cvt_roundph_ps(__A, _MM_FROUND_CUR_DIRECTION); +} + +__m512 test_mm512_mask_cvt_roundph_ps(__m512 __W, __mmask16 __U, __m256i __A) +{ +// CHECK-LABEL: @test_mm512_mask_cvt_roundph_ps +// CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512 +return _mm512_mask_cvt_roundph_ps(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m512 test_mm512_maskz_cvt_roundph_ps(__mmask16 __U, __m256i __A) +{ +// CHECK-LABEL: @test_mm512_maskz_cvt_roundph_ps +// CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512 +return _mm512_maskz_cvt_roundph_ps(__U, __A, _MM_FROUND_CUR_DIRECTION); +} + __m512 test_mm512_mask_cvt_roundepi32_ps(__m512 __W, __mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_cvt_roundepi32_ps Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -3419,6 +3419,27 @@ /* Conversion */ +#define _mm512_cvtt_roundps_epu32( __A, __R) __extension__ ({ \ +__builtin_ia32_cvttps2udq512_mask ((__v16sf)( __A),\ + (__v16si)\ + _mm512_undefined_epi32 (),\ + (__mmask16) -1,( __R));\ +}) + +#define _mm512_mask_cvtt_roundps_epu32( __W, __U, __A, __R) __extension__ ({ \ +__builtin_ia32_cvttps2udq512_mask ((__v16sf)( __A),\ + (__v16si)( __W),\ + (__mmask16)( __U),( __R));\ +}) + +#define _mm512_maskz_cvtt_roundps_epu32( __U, __A, __R) __extension__ ({ \ +__builtin_ia32_cvttps2udq512_mask ((__v16sf)( __A),\ + (__v16si)\ + _mm512_setzero_si512 (),\ + (__mmask16)( __U),( __R));\ +}) + + static __inline __m512i __DEFAULT_FN_ATTRS _mm512_cvttps_epu32(__m512 __A) { @@ -3629,6 +3650,29 @@ _MM_FROUND_CUR_DIRECTION); } +#define _mm512_cvt_roundps_ph( __A, __I) __extension__ ({ \ +__builtin_ia32_vcvtps2ph512_mask ((__v16sf)( __A),\ + ( __I),\ + (__v16hi)\ + _mm256_undefined_si256 (),\ + -1);\ +}) + +#define _mm512_mask_cvt_roundps_ph( __U, __W, __A, __I) __extension__ ({ \ +__builtin_ia32_vcvtps2ph512_mask ((__v16sf)( __A),\ + ( __I),\ + (__v16hi)( __U),\ + (__mmask16)( __W));\ +}) + +#define _mm512_maskz_cvt_roundps_ph( __W, __A, __I) __extension__ ({ \ +__builtin_ia32_vcvtps2ph512_mask ((__v16sf)( __A),\ + ( __I),\ + (__v16hi)\ + _mm256_setzero_si256 (),\ + (__mmask16)( __W));\ +}) + #define _mm512_cvtps_ph(A, I) __extension__ ({ \ (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ (__v16hi)_mm256_setzero_si256(), \ @@ -3644,7 +3688,27 @@ (__v16hi)_mm256_setzero_si256(), \
r271265 - [Clang][Intrinsics][avx512] Adding round cvt to clang
Author: mzuckerm Date: Tue May 31 06:27:34 2016 New Revision: 271265 URL: http://llvm.org/viewvc/llvm-project?rev=271265&view=rev Log: [Clang][Intrinsics][avx512] Adding round cvt to clang Differential Revision: http://reviews.llvm.org/D20790 Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=271265&r1=271264&r2=271265&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Tue May 31 06:27:34 2016 @@ -3452,11 +3452,37 @@ _mm512_maskz_cvttps_epu32 (__mmask16 __U (__v16sf)_mm512_setzero_ps(), \ (__mmask16)-1, (int)(R)); }) +#define _mm512_mask_cvt_roundepi32_ps( __W, __U, __A, __R) __extension__ ({ \ +__builtin_ia32_cvtdq2ps512_mask ((__v16si)( __A),\ + (__v16sf)( __W),\ + (__mmask16)( __U),( __R));\ +}) + +#define _mm512_maskz_cvt_roundepi32_ps( __U, __A, __R) __extension__ ({ \ +__builtin_ia32_cvtdq2ps512_mask ((__v16si)( __A),\ + (__v16sf)\ + _mm512_setzero_ps (),\ + (__mmask16)( __U),( __R));\ +}) + #define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \ (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)-1, (int)(R)); }) +#define _mm512_mask_cvt_roundepu32_ps( __W, __U, __A, __R) __extension__ ({ \ +__builtin_ia32_cvtudq2ps512_mask ((__v16si)( __A),\ + (__v16sf)( __W),\ + (__mmask16)( __U),( __R));\ +}) + +#define _mm512_maskz_cvt_roundepu32_ps( __U, __A, __R) __extension__ ({ \ +__builtin_ia32_cvtudq2ps512_mask ((__v16si)( __A),\ + (__v16sf)\ + _mm512_setzero_ps (),\ + (__mmask16)( __U),( __R));\ +}) + static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_cvtepu32_ps (__m512i __A) { @@ -3566,6 +3592,16 @@ _mm512_maskz_cvtepu32_pd (__mmask8 __U, (__v8sf)_mm256_setzero_ps(), \ (__mmask8)-1, (int)(R)); }) +#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \ + (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ + (__v8sf)(W), \ + (__mmask8)(U), (int)(R)); }) + +#define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \ + (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)(U), (int)(R)); }) + static __inline__ __m256 __DEFAULT_FN_ATTRS _mm512_cvtpd_ps (__m512d __A) { @@ -3637,6 +3673,21 @@ _mm512_maskz_cvtph_ps (__mmask16 __U, __ _MM_FROUND_CUR_DIRECTION); } +#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \ + (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ +(__v8si)_mm256_setzero_si256(), \ +(__mmask8)-1, (int)(R)); }) + +#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \ + (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ +(__v8si)(W), \ +(__mmask8)(U), (int)(R)); }) + +#define _mm512_maskz_cvtt_roundpd_epi32( U, A, R) __extension__ ({ \ + (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ +(__v8si)_mm256_setzero_si256(), \ +(__mmask8)(U), (int)(R)); }) + static __inline __m256i __DEFAULT_FN_ATTRS _mm512_cvttpd_epi32(__m512d __a) { @@ -3664,16 +3715,21 @@ _mm512_maskz_cvttpd_epi32 (__mmask8 __U, _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \ - (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ -(__v8si)_mm256_setzero_si256(), \ -(__mmask8)-1, (int)(R)); }) - #define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \ (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_setzero_si512(), \ (__mmask16)-1, (int)(R)); }) +#define _mm512_mask_cvtt_roundps_epi32( W, U, A, R) __extension__ ({ \ + (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ +(__v16si)(W), \ +
Re: [PATCH] D20790: [Clang][Intrinsics][avx512] Adding round cvt to clang
m_zuckerman updated this revision to Diff 58957. http://reviews.llvm.org/D20790 Files: lib/Headers/avx512fintrin.h test/CodeGen/avx512f-builtins.c Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -3103,6 +3103,140 @@ // CHECK: @llvm.x86.avx512.cvttss2usi64 return _mm_cvttss_u64(__A); } + +__m512 test_mm512_mask_cvt_roundepi32_ps(__m512 __W, __mmask16 __U, __m512i __A) +{ + // CHECK-LABEL: @test_mm512_mask_cvt_roundepi32_ps + // CHECK: @llvm.x86.avx512.mask.cvtdq2ps.512 + return _mm512_mask_cvt_roundepi32_ps(__W, __U, __A, 4); +} + +__m512 test_mm512_maskz_cvt_roundepi32_ps(__mmask16 __U, __m512i __A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvt_roundepi32_ps + // CHECK: @llvm.x86.avx512.mask.cvtdq2ps.512 + return _mm512_maskz_cvt_roundepi32_ps(__U, __A, 4); +} + +__m512 test_mm512_mask_cvt_roundepu32_ps(__m512 __W, __mmask16 __U, __m512i __A) +{ + // CHECK-LABEL: @test_mm512_mask_cvt_roundepu32_ps + // CHECK: @llvm.x86.avx512.mask.cvtudq2ps.512 + return _mm512_mask_cvt_roundepu32_ps(__W, __U, __A, 4); +} + +__m512 test_mm512_maskz_cvt_roundepu32_ps(__mmask16 __U, __m512i __A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvt_roundepu32_ps + // CHECK: @llvm.x86.avx512.mask.cvtudq2ps.512 + return _mm512_maskz_cvt_roundepu32_ps(__U , __A, 4); +} + +__m256 test_mm512_mask_cvt_roundpd_ps(__m256 W, __mmask8 U, __m512d A) +{ + // CHECK-LABEL: @test_mm512_mask_cvt_roundpd_ps + // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512 + return _mm512_mask_cvt_roundpd_ps(W, U, A, 4); +} + +__m256 test_mm512_maskz_cvt_roundpd_ps(__mmask8 U, __m512d A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvt_roundpd_ps + // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512 + return _mm512_maskz_cvt_roundpd_ps(U, A, 4); +} + +__m256i test_mm512_cvtt_roundpd_epi32(__m512d A) +{ + // CHECK-LABEL: @test_mm512_cvtt_roundpd_epi32 + // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.512 + return _mm512_cvtt_roundpd_epi32(A, 4); +} + +__m256i test_mm512_mask_cvtt_roundpd_epi32(__m256i W, __mmask8 U, __m512d A) +{ + // CHECK-LABEL: @test_mm512_mask_cvtt_roundpd_epi32 + // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.512 + return _mm512_mask_cvtt_roundpd_epi32(W, U, A, 4); +} + +__m256i test_mm512_maskz_cvtt_roundpd_epi32(__mmask8 U, __m512d A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvtt_roundpd_epi32 + // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.512 + return _mm512_maskz_cvtt_roundpd_epi32(U, A, 4); +} + +__m512i test_mm512_mask_cvtt_roundps_epi32(__m512i W, __mmask16 U, __m512 A) +{ + // CHECK-LABEL: @test_mm512_mask_cvtt_roundps_epi32 + // CHECK: @llvm.x86.avx512.mask.cvttps2dq.512 + return _mm512_mask_cvtt_roundps_epi32(W, U, A, 4); +} + +__m512i test_mm512_maskz_cvtt_roundps_epi32(__mmask16 U, __m512 A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvtt_roundps_epi32 + // CHECK: @llvm.x86.avx512.mask.cvttps2dq.512 + return _mm512_maskz_cvtt_roundps_epi32(U, A, 4); +} + +__m512i test_mm512_mask_cvt_roundps_epi32(__m512i __W, __mmask16 __U, __m512 __A) +{ + // CHECK-LABEL: @test_mm512_mask_cvt_roundps_epi32 + // CHECK: @llvm.x86.avx512.mask.cvtps2dq.512 + return _mm512_mask_cvt_roundps_epi32(__W, __U, __A, 4); +} + +__m512i test_mm512_maskz_cvt_roundps_epi32(__mmask16 __U, __m512 __A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvt_roundps_epi32 + // CHECK: @llvm.x86.avx512.mask.cvtps2dq.512 + return _mm512_maskz_cvt_roundps_epi32(__U, __A, 4); +} + +__m256i test_mm512_mask_cvt_roundpd_epi32(__m256i W, __mmask8 U, __m512d A) +{ + // CHECK-LABEL: @test_mm512_mask_cvt_roundpd_epi32 + // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.512 + return _mm512_mask_cvt_roundpd_epi32(W, U, A, 4); +} + +__m256i test_mm512_maskz_cvt_roundpd_epi32(__mmask8 U, __m512d A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvt_roundpd_epi32 + // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.512 + return _mm512_maskz_cvt_roundpd_epi32(U, A, 4); +} + +__m512i test_mm512_mask_cvt_roundps_epu32(__m512i __W, __mmask16 __U, __m512 __A) +{ + // CHECK-LABEL: @test_mm512_mask_cvt_roundps_epu32 + // CHECK: @llvm.x86.avx512.mask.cvtps2udq.512 + return _mm512_mask_cvt_roundps_epu32(__W, __U, __A, 4); +} + +__m512i test_mm512_maskz_cvt_roundps_epu32(__mmask16 __U, __m512 __A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvt_roundps_epu32 + // CHECK: @llvm.x86.avx512.mask.cvtps2udq.512 + return _mm512_maskz_cvt_roundps_epu32(__U, __A, 4); +} + +__m256i test_mm512_mask_cvt_roundpd_epu32(__m256i W, __mmask8 U, __m512d A) +{ + // CHECK-LABEL: @test_mm512_mask_cvt_roundpd_epu32 + // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.512 + return _mm512_mask_cvt_roundpd_epu32(W, U, A, 4); +} + +__m256i test_mm512_maskz_cvt_roundpd_epu32(__mmask8 U, __m512d A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvt_roundpd_epu32 + // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.512 + return _mm512_maskz_cvt_roundpd_epu32(U, A, 4); +} + __m512 test_mm512_mask2_permutex2var_ps(__m512 __A, __m512i
[PATCH] D20790: [Clang][Intrinsics][avx512] Adding round cvt to clang
m_zuckerman created this revision. m_zuckerman added reviewers: AsafBadouh, igorb, delena. m_zuckerman added a subscriber: cfe-commits. http://reviews.llvm.org/D20790 Files: lib/Headers/avx512fintrin.h test/CodeGen/avx512f-builtins.c Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -3103,6 +3103,140 @@ // CHECK: @llvm.x86.avx512.cvttss2usi64 return _mm_cvttss_u64(__A); } + +__m512 test_mm512_mask_cvt_roundepi32_ps(__m512 __W, __mmask16 __U, __m512i __A) +{ + // CHECK-LABEL: @test_mm512_mask_cvt_roundepi32_ps + // CHECK: @llvm.x86.avx512.mask.cvtdq2ps.512 + return _mm512_mask_cvt_roundepi32_ps(__W,__U,__A,4); +} + +__m512 test_mm512_maskz_cvt_roundepi32_ps(__mmask16 __U, __m512i __A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvt_roundepi32_ps + // CHECK: @llvm.x86.avx512.mask.cvtdq2ps.512 + return _mm512_maskz_cvt_roundepi32_ps(__U,__A,4); +} + +__m512 test_mm512_mask_cvt_roundepu32_ps(__m512 __W, __mmask16 __U,__m512i __A) +{ + // CHECK-LABEL: @test_mm512_mask_cvt_roundepu32_ps + // CHECK: @llvm.x86.avx512.mask.cvtudq2ps.512 + return _mm512_mask_cvt_roundepu32_ps(__W,__U,__A,4); +} + +__m512 test_mm512_maskz_cvt_roundepu32_ps(__mmask16 __U,__m512i __A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvt_roundepu32_ps + // CHECK: @llvm.x86.avx512.mask.cvtudq2ps.512 + return _mm512_maskz_cvt_roundepu32_ps(__U,__A,4); +} + +__m256 test_mm512_mask_cvt_roundpd_ps(__m256 W, __mmask8 U,__m512d A) +{ + // CHECK-LABEL: @test_mm512_mask_cvt_roundpd_ps + // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512 + return _mm512_mask_cvt_roundpd_ps(W,U,A,4); +} + +__m256 test_mm512_maskz_cvt_roundpd_ps(__mmask8 U, __m512d A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvt_roundpd_ps + // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512 + return _mm512_maskz_cvt_roundpd_ps(U,A,4); +} + +__m256i test_mm512_cvtt_roundpd_epi32(__m512d A) +{ + // CHECK-LABEL: @test_mm512_cvtt_roundpd_epi32 + // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.512 + return _mm512_cvtt_roundpd_epi32(A,4); +} + +__m256i test_mm512_mask_cvtt_roundpd_epi32(__m256i W, __mmask8 U, __m512d A) +{ + // CHECK-LABEL: @test_mm512_mask_cvtt_roundpd_epi32 + // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.512 + return _mm512_mask_cvtt_roundpd_epi32(W,U,A,4); +} + +__m256i test_mm512_maskz_cvtt_roundpd_epi32(__mmask8 U, __m512d A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvtt_roundpd_epi32 + // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.512 + return _mm512_maskz_cvtt_roundpd_epi32(U,A,4); +} + +__m512i test_mm512_mask_cvtt_roundps_epi32(__m512i W,__mmask16 U, __m512 A) +{ + // CHECK-LABEL: @test_mm512_mask_cvtt_roundps_epi32 + // CHECK: @llvm.x86.avx512.mask.cvttps2dq.512 + return _mm512_mask_cvtt_roundps_epi32(W,U,A,4); +} + +__m512i test_mm512_maskz_cvtt_roundps_epi32(__mmask16 U, __m512 A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvtt_roundps_epi32 + // CHECK: @llvm.x86.avx512.mask.cvttps2dq.512 + return _mm512_maskz_cvtt_roundps_epi32(U,A,4); +} + +__m512i test_mm512_mask_cvt_roundps_epi32(__m512i __W,__mmask16 __U,__m512 __A) +{ + // CHECK-LABEL: @test_mm512_mask_cvt_roundps_epi32 + // CHECK: @llvm.x86.avx512.mask.cvtps2dq.512 + return _mm512_mask_cvt_roundps_epi32(__W,__U,__A,4); +} + +__m512i test_mm512_maskz_cvt_roundps_epi32(__mmask16 __U, __m512 __A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvt_roundps_epi32 + // CHECK: @llvm.x86.avx512.mask.cvtps2dq.512 + return _mm512_maskz_cvt_roundps_epi32(__U,__A,4); +} + +__m256i test_mm512_mask_cvt_roundpd_epi32(__m256i W,__mmask8 U,__m512d A) +{ + // CHECK-LABEL: @test_mm512_mask_cvt_roundpd_epi32 + // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.512 + return _mm512_mask_cvt_roundpd_epi32(W,U,A,4); +} + +__m256i test_mm512_maskz_cvt_roundpd_epi32(__mmask8 U, __m512d A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvt_roundpd_epi32 + // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.512 + return _mm512_maskz_cvt_roundpd_epi32(U,A,4); +} + +__m512i test_mm512_mask_cvt_roundps_epu32(__m512i __W,__mmask16 __U,__m512 __A) +{ + // CHECK-LABEL: @test_mm512_mask_cvt_roundps_epu32 + // CHECK: @llvm.x86.avx512.mask.cvtps2udq.512 + return _mm512_mask_cvt_roundps_epu32(__W,__U,__A,4); +} + +__m512i test_mm512_maskz_cvt_roundps_epu32(__mmask16 __U,__m512 __A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvt_roundps_epu32 + // CHECK: @llvm.x86.avx512.mask.cvtps2udq.512 + return _mm512_maskz_cvt_roundps_epu32(__U,__A, 4); +} + +__m256i test_mm512_mask_cvt_roundpd_epu32(__m256i W, __mmask8 U, __m512d A) +{ + // CHECK-LABEL: @test_mm512_mask_cvt_roundpd_epu32 + // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.512 + return _mm512_mask_cvt_roundpd_epu32(W,U,A,4); +} + +__m256i test_mm512_maskz_cvt_roundpd_epu32(__mmask8 U, __m512d A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvt_roundpd_epu32 + // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.512 + return _mm512_maskz_cvt_roundpd_epu32(U, A, 4); +} + __m512 test_mm512_mask2_per
r271205 - [Clang][avx512][builtin] Adding missing intrinsics for cvt
Author: mzuckerm Date: Mon May 30 08:22:12 2016 New Revision: 271205 URL: http://llvm.org/viewvc/llvm-project?rev=271205&view=rev Log: [Clang][avx512][builtin] Adding missing intrinsics for cvt Differential Revision: http://reviews.llvm.org/D20618 Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=271205&r1=271204&r2=271205&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Mon May 30 08:22:12 2016 @@ -3792,6 +3792,16 @@ _mm512_mask_cvtps_epu32 (__m512i __W, __ _MM_FROUND_CUR_DIRECTION); } +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U , + _MM_FROUND_CUR_DIRECTION); +} + #define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \ (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=271205&r1=271204&r2=271205&view=diff == --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Mon May 30 08:22:12 2016 @@ -6411,6 +6411,12 @@ __m512i test_mm512_mask_cvtps_epu32 (__m // CHECK: @llvm.x86.avx512.mask.cvtps2udq.512 return _mm512_mask_cvtps_epu32( __W, __U, __A); } +__m512i test_mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvtps_epu32 + // CHECK: @llvm.x86.avx512.mask.cvtps2udq.512 + return _mm512_maskz_cvtps_epu32( __U, __A); +} __m512d test_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20626: [Clang][AVX512][intrinsics] Adding missing intrinsics div_pd and div_ps
m_zuckerman added inline comments. Comment at: test/CodeGen/avx512f-builtins.c:1927 @@ +1926,3 @@ + // check-label: @test_mm512_div_pd + // check: @llvm.x86.avx512.mask.div.pd.512 + return _mm512_div_pd(__a,__b); craig.topper wrote: > delena wrote: > > I don't understand how do you receive intrinsic if you issue IR. > The word "check" being in lowercase causes filecheck to ignore it so it isn't > being checked. You are right. Thanks I didn't saw it. I will fix it. http://reviews.llvm.org/D20626 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20614: Remove trailing spaces in x86 intrinsic headers
m_zuckerman accepted this revision. m_zuckerman added a comment. This revision is now accepted and ready to land. LGTM Repository: rL LLVM http://reviews.llvm.org/D20614 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r270851 - Adding missing _mm512_castsi512_si256 intrinsic.
Author: mzuckerm Date: Thu May 26 09:32:11 2016 New Revision: 270851 URL: http://llvm.org/viewvc/llvm-project?rev=270851&view=rev Log: Adding missing _mm512_castsi512_si256 intrinsic. Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=270851&r1=270850&r2=270851&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Thu May 26 09:32:11 2016 @@ -445,6 +445,12 @@ _mm512_castsi512_si128 (__m512i __A) return (__m128i)__builtin_shufflevector(__A, __A , 0, 1); } +static __inline __m256i __DEFAULT_FN_ATTRS +_mm512_castsi512_si256 (__m512i __A) +{ + return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3); +} + /* Bitwise operators */ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_and_epi32(__m512i __a, __m512i __b) Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=270851&r1=270850&r2=270851&view=diff == --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Thu May 26 09:32:11 2016 @@ -6041,6 +6041,13 @@ __m128i test_mm512_castsi512_si128 (__m5 return _mm512_castsi512_si128 (__A); } +__m256i test_mm512_castsi512_si256 (__m512i __A) +{ + // CHECK-LABEL: @test_mm512_castsi512_si256 + // CHECK: shufflevector <8 x i64> %{{.}}, <8 x i64> %{{.}}, <4 x i32> + return _mm512_castsi512_si256 (__A); +} + __m128 test_mm_cvt_roundsd_ss(__m128 __A, __m128d __B) { // CHECK-LABEL: @test_mm_cvt_roundsd_ss // CHECK: @llvm.x86.avx512.mask.cvtsd2ss.round ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r270830 - Fix instrinsics names:
Author: mzuckerm Date: Thu May 26 03:10:12 2016 New Revision: 270830 URL: http://llvm.org/viewvc/llvm-project?rev=270830&view=rev Log: Fix instrinsics names: _mm128_cmp_ps_mask-->_mm_cmp_ps_mask _mm128_mask_cmp_ps_mask-->_mm_mask_cmp_ps_mask _mm128_cmp_pd_mask-->_mm_cmp_pd_mask _mm128_mask_cmp_pd_mask-->_mm_mask_cmp_pd_mask Modified: cfe/trunk/lib/Headers/avx512vlintrin.h cfe/trunk/test/CodeGen/avx512vl-builtins.c Modified: cfe/trunk/lib/Headers/avx512vlintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlintrin.h?rev=270830&r1=270829&r2=270830&view=diff == --- cfe/trunk/lib/Headers/avx512vlintrin.h (original) +++ cfe/trunk/lib/Headers/avx512vlintrin.h Thu May 26 03:10:12 2016 @@ -1302,22 +1302,22 @@ _mm_maskz_xor_epi64 (__mmask8 __U, __m12 (__v4df)(__m256d)(b), (int)(p), \ (__mmask8)(m)); }) -#define _mm128_cmp_ps_mask(a, b, p) __extension__ ({ \ +#define _mm_cmp_ps_mask(a, b, p) __extension__ ({ \ (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ (__v4sf)(__m128)(b), (int)(p), \ (__mmask8)-1); }) -#define _mm128_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \ +#define _mm_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \ (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ (__v4sf)(__m128)(b), (int)(p), \ (__mmask8)(m)); }) -#define _mm128_cmp_pd_mask(a, b, p) __extension__ ({ \ +#define _mm_cmp_pd_mask(a, b, p) __extension__ ({ \ (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ (__v2df)(__m128d)(b), (int)(p), \ (__mmask8)-1); }) -#define _mm128_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \ +#define _mm_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \ (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ (__v2df)(__m128d)(b), (int)(p), \ (__mmask8)(m)); }) Modified: cfe/trunk/test/CodeGen/avx512vl-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vl-builtins.c?rev=270830&r1=270829&r2=270830&view=diff == --- cfe/trunk/test/CodeGen/avx512vl-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512vl-builtins.c Thu May 26 03:10:12 2016 @@ -993,16 +993,16 @@ __mmask8 test_mm256_mask_cmp_ps_mask(__m return _mm256_mask_cmp_ps_mask(m, __A, __B, 0); } -__mmask8 test_mm128_cmp_ps_mask(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm128_cmp_ps_mask +__mmask8 test_mm_cmp_ps_mask(__m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_cmp_ps_mask // CHECK: @llvm.x86.avx512.mask.cmp.ps.128 - return (__mmask8)_mm128_cmp_ps_mask(__A, __B, 0); + return (__mmask8)_mm_cmp_ps_mask(__A, __B, 0); } -__mmask8 test_mm128_mask_cmp_ps_mask(__mmask8 m, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm128_mask_cmp_ps_mask +__mmask8 test_mm_mask_cmp_ps_mask(__mmask8 m, __m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_mask_cmp_ps_mask // CHECK: @llvm.x86.avx512.mask.cmp.ps.128 - return _mm128_mask_cmp_ps_mask(m, __A, __B, 0); + return _mm_mask_cmp_ps_mask(m, __A, __B, 0); } __mmask8 test_mm256_cmp_pd_mask(__m256d __A, __m256d __B) { @@ -1017,21 +1017,18 @@ __mmask8 test_mm256_mask_cmp_pd_mask(__m return _mm256_mask_cmp_pd_mask(m, __A, __B, 0); } -__mmask8 test_mm128_cmp_pd_mask(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm128_cmp_pd_mask +__mmask8 test_mm_cmp_pd_mask(__m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_cmp_pd_mask // CHECK: @llvm.x86.avx512.mask.cmp.pd.128 - return (__mmask8)_mm128_cmp_pd_mask(__A, __B, 0); + return (__mmask8)_mm_cmp_pd_mask(__A, __B, 0); } -__mmask8 test_mm128_mask_cmp_pd_mask(__mmask8 m, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm128_mask_cmp_pd_mask +__mmask8 test_mm_mask_cmp_pd_mask(__mmask8 m, __m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_mask_cmp_pd_mask // CHECK: @llvm.x86.avx512.mask.cmp.pd.128 - return _mm128_mask_cmp_pd_mask(m, __A, __B, 0); + return _mm_mask_cmp_pd_mask(m, __A, __B, 0); } - -//igorb - __m128d test_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { // CHECK-LABEL: @test_mm_mask_fmadd_pd // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.128 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r270825 - [Clang][AVX512][BUILTIN] Adding intrinsics for set1
Author: mzuckerm Date: Thu May 26 01:54:52 2016 New Revision: 270825 URL: http://llvm.org/viewvc/llvm-project?rev=270825&view=rev Log: [Clang][AVX512][BUILTIN] Adding intrinsics for set1 Differential Revision: http://reviews.llvm.org/D20562 Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=270825&r1=270824&r2=270825&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Thu May 26 01:54:52 2016 @@ -27,6 +27,8 @@ #ifndef __AVX512FINTRIN_H #define __AVX512FINTRIN_H +typedef char __v64qi __attribute__((__vector_size__(64))); +typedef short __v32hi __attribute__((__vector_size__(64))); typedef double __v8df __attribute__((__vector_size__(64))); typedef float __v16sf __attribute__((__vector_size__(64))); typedef long long __v8di __attribute__((__vector_size__(64))); @@ -286,6 +288,28 @@ _mm512_set1_pd(double __w) } static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_set1_epi8(char __w) +{ + return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w }; +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_set1_epi16(short __w) +{ + return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w }; +} + +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set1_epi32(int __s) { return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s, Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=270825&r1=270824&r2=270825&view=diff == --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Thu May 26 01:54:52 2016 @@ -5951,6 +5951,36 @@ __m512d test_mm512_castpd128_pd512(__m12 return _mm512_castpd128_pd512(__A); } +__m512d test_mm512_set1_epi8(char d) +{ + // CHECK-LABEL: @test_mm512_set1_epi8 + // CHECK: insertelement <64 x i8> {{.*}}, i32 0 + // CHECK: insertelement <64 x i8> {{.*}}, i32 1 + // CHECK: insertelement <64 x i8> {{.*}}, i32 2 + // CHECK: insertelement <64 x i8> {{.*}}, i32 3 + // CHECK: insertelement <64 x i8> {{.*}}, i32 4 + // CHECK: insertelement <64 x i8> {{.*}}, i32 5 + // CHECK: insertelement <64 x i8> {{.*}}, i32 6 + // CHECK: insertelement <64 x i8> {{.*}}, i32 7 + // CHECK: insertelement <64 x i8> {{.*}}, i32 63 + return _mm512_set1_epi8(d); +} + +__m512d test_mm512_set1_epi16(short d) +{ + // CHECK-LABEL: @test_mm512_set1_epi16 + // CHECK: insertelement <32 x i16> {{.*}}, i32 0 + // CHECK: insertelement <32 x i16> {{.*}}, i32 1 + // CHECK: insertelement <32 x i16> {{.*}}, i32 2 + // CHECK: insertelement <32 x i16> {{.*}}, i32 3 + // CHECK: insertelement <32 x i16> {{.*}}, i32 4 + // CHECK: insertelement <32 x i16> {{.*}}, i32 5 + // CHECK: insertelement <32 x i16> {{.*}}, i32 6 + // CHECK: insertelement <32 x i16> {{.*}}, i32 7 + // CHECK: insertelement <32 x i16> {{.*}}, i32 31 + return _mm512_set1_epi16(d); +} + __m512d test_mm512_castpd256_pd512(__m256d a) { // CHECK-LABEL: @test_mm512_castpd256_pd512 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D20626: [Clang][AVX512][intrinsics] Adding missing intrinsics div_pd and div_ps
m_zuckerman created this revision. m_zuckerman added reviewers: AsafBadouh, igorb, delena. m_zuckerman added a subscriber: cfe-commits. http://reviews.llvm.org/D20626 Files: lib/Headers/avx512fintrin.h test/CodeGen/avx512f-builtins.c Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -2209,6 +2209,12 @@ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R)); }) +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_div_pd(__m512d __a, __m512d __b) +{ + return (__m512d)((__v8df)__a/(__v8df)__b); +} + static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, @@ -2228,6 +2234,12 @@ _MM_FROUND_CUR_DIRECTION); } +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_div_ps(__m512 __a, __m512 __b) +{ + return (__m512)((__v16sf)__a/(__v16sf)__b); +} + static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -1922,10 +1922,15 @@ // CHECK: @llvm.x86.avx512.mask.div.pd.512 return _mm512_maskz_div_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); } -__m512d test_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_div_pd - // CHECK: @llvm.x86.avx512.mask.div.pd.512 - return _mm512_mask_div_pd(__W,__U,__A,__B); +__m512d test_mm512_div_pd(__m512d __a, __m512d __b) { + // check-label: @test_mm512_div_pd + // check: @llvm.x86.avx512.mask.div.pd.512 + return _mm512_div_pd(__a,__b); +} +__m512d test_mm512_mask_div_pd(__m512d __w, __mmask8 __u, __m512d __a, __m512d __b) { + // check-label: @test_mm512_mask_div_pd + // check: @llvm.x86.avx512.mask.div.pd.512 + return _mm512_mask_div_pd(__w,__u,__a,__b); } __m512d test_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_maskz_div_pd @@ -1947,6 +1952,11 @@ // CHECK: @llvm.x86.avx512.mask.div.ps.512 return _mm512_maskz_div_round_ps(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); } +__m512 test_mm512_div_ps(__m512 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_div_ps + // CHECK: @llvm.x86.avx512.mask.div.ps.512 + return _mm512_div_ps(__W,__U,__A,__B); +} __m512 test_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_mask_div_ps // CHECK: @llvm.x86.avx512.mask.div.ps.512 Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -2209,6 +2209,12 @@ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R)); }) +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_div_pd(__m512d __a, __m512d __b) +{ + return (__m512d)((__v8df)__a/(__v8df)__b); +} + static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, @@ -2228,6 +2234,12 @@ _MM_FROUND_CUR_DIRECTION); } +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_div_ps(__m512 __a, __m512 __b) +{ + return (__m512)((__v16sf)__a/(__v16sf)__b); +} + static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -1922,10 +1922,15 @@ // CHECK: @llvm.x86.avx512.mask.div.pd.512 return _mm512_maskz_div_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); } -__m512d test_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_div_pd - // CHECK: @llvm.x86.avx512.mask.div.pd.512 - return _mm512_mask_div_pd(__W,__U,__A,__B); +__m512d test_mm512_div_pd(__m512d __a, __m512d __b) { + // check-label: @test_mm512_div_pd + // check: @llvm.x86.avx512.mask.div.pd.512 + return _mm512_div_pd(__a,__b); +} +__m512d test_mm512_mask_div_pd(__m512d __w, __mmask8 __u, __m512d __a, __m512d __b) { + // check-label: @test_mm512_mask_div_pd + // check: @llvm.x86.avx512.mask.div.pd.512 + return _mm512_mask_div_pd(__w,__u,__a,__b); } __m512d test_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_maskz_div_pd
r270707 - [Clang][AVX512][Builtin] Fix palignr intrinsics header
Author: mzuckerm Date: Wed May 25 10:05:03 2016 New Revision: 270707 URL: http://llvm.org/viewvc/llvm-project?rev=270707&view=rev Log: [Clang][AVX512][Builtin] Fix palignr intrinsics header Differential Revision: http://reviews.llvm.org/D20620 Modified: cfe/trunk/lib/Headers/avx512bwintrin.h Modified: cfe/trunk/lib/Headers/avx512bwintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=270707&r1=270706&r2=270707&view=diff == --- cfe/trunk/lib/Headers/avx512bwintrin.h (original) +++ cfe/trunk/lib/Headers/avx512bwintrin.h Wed May 25 10:05:03 2016 @@ -2145,19 +2145,19 @@ _mm512_mask_permutexvar_epi16 (__m512i _ #define _mm512_alignr_epi8(A, B, N) __extension__ ({\ (__m512i)__builtin_ia32_palignr512_mask((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), (int)(N) * 8, \ + (__v64qi)(__m512i)(B), (int)(N), \ (__v64qi)_mm512_undefined_pd(), \ (__mmask64)-1); }) #define _mm512_mask_alignr_epi8(W, U, A, B, N) __extension__({\ (__m512i)__builtin_ia32_palignr512_mask((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), (int)(N) * 8, \ + (__v64qi)(__m512i)(B), (int)(N), \ (__v64qi)(__m512i)(W), \ (__mmask64)(U)); }) #define _mm512_maskz_alignr_epi8(U, A, B, N) __extension__({\ (__m512i)__builtin_ia32_palignr512_mask((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), (int)(N) * 8, \ + (__v64qi)(__m512i)(B), (int)(N), \ (__v64qi)_mm512_setzero_si512(), \ (__mmask64)(U)); }) ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r270699 - [Clang][AVX512][BUILTIN] Add missing intrinsics for cast
Author: mzuckerm Date: Wed May 25 09:04:21 2016 New Revision: 270699 URL: http://llvm.org/viewvc/llvm-project?rev=270699&view=rev Log: [Clang][AVX512][BUILTIN] Add missing intrinsics for cast Differential Revision: http://reviews.llvm.org/D20523 Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=270699&r1=270698&r2=270699&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Wed May 25 09:04:21 2016 @@ -337,12 +337,35 @@ _mm512_castpd512_pd128(__m512d __a) return __builtin_shufflevector(__a, __a, 0, 1); } +static __inline __m256d __DEFAULT_FN_ATTRS +_mm512_castpd512_pd256 (__m512d __A) +{ + return __builtin_shufflevector(__A, __A, 0, 1, 2, 3); +} + static __inline __m128 __DEFAULT_FN_ATTRS _mm512_castps512_ps128(__m512 __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); } +static __inline __m256 __DEFAULT_FN_ATTRS +_mm512_castps512_ps256 (__m512 __A) +{ + return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7); +} + +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_castpd_ps (__m512d __A) +{ + return (__m512) (__A); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_castpd_si512 (__m512d __A) +{ + return (__m512i) (__A); +} static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_castpd128_pd512 (__m128d __A) @@ -350,6 +373,18 @@ _mm512_castpd128_pd512 (__m128d __A) return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1); } +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_castps_pd (__m512 __A) +{ + return (__m512d) (__A); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_castps_si512 (__m512 __A) +{ + return (__m512i) (__A); +} + static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_castps128_ps512 (__m128 __A) { @@ -368,6 +403,24 @@ _mm512_castsi256_si512 (__m256i __A) return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1); } +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_castsi512_ps (__m512i __A) +{ + return (__m512) (__A); +} + +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_castsi512_pd (__m512i __A) +{ + return (__m512d) (__A); +} + +static __inline __m128i __DEFAULT_FN_ATTRS +_mm512_castsi512_si128 (__m512i __A) +{ + return (__m128i)__builtin_shufflevector(__A, __A , 0, 1); +} + /* Bitwise operators */ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_and_epi32(__m512i __a, __m512i __b) Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=270699&r1=270698&r2=270699&view=diff == --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Wed May 25 09:04:21 2016 @@ -328,13 +328,6 @@ __m512d test_mm512_set1_pd(double d) return _mm512_set1_pd(d); } -__m512d test_mm512_castpd256_pd512(__m256d a) -{ - // CHECK-LABEL: @test_mm512_castpd256_pd512 - // CHECK: shufflevector <4 x double> {{.*}} - return _mm512_castpd256_pd512(a); -} - __mmask16 test_mm512_knot(__mmask16 a) { // CHECK-LABEL: @test_mm512_knot @@ -5925,10 +5918,25 @@ __m256i test_mm512_maskz_cvttpd_epu32(__ return _mm512_maskz_cvttpd_epu32(__U, __A); } -__m512d test_mm512_castpd128_pd512(__m128d __A) { - // CHECK-LABEL: @test_mm512_castpd128_pd512 - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> - return _mm512_castpd128_pd512(__A); +__m512 test_mm512_castpd_ps (__m512d __A) +{ + // CHECK-LABEL: @test_mm512_castpd_ps + // CHECK: bitcast <8 x double> %1 to <16 x float> + return _mm512_castpd_ps (__A); +} + +__m512d test_mm512_castps_pd (__m512 __A) +{ + // CHECK-LABEL: @test_mm512_castps_pd + // CHECK: bitcast <16 x float> %1 to <8 x double> + return _mm512_castps_pd (__A); +} + +__m512i test_mm512_castpd_si512 (__m512d __A) +{ + // CHECK-LABEL: @test_mm512_castpd_si512 + // CHECK: bitcast <8 x double> %1 to <8 x i64> + return _mm512_castpd_si512 (__A); } __m512 test_mm512_castps128_ps512(__m128 __A) { @@ -5937,6 +5945,39 @@ __m512 test_mm512_castps128_ps512(__m128 return _mm512_castps128_ps512(__A); } +__m512d test_mm512_castpd128_pd512(__m128d __A) { + // CHECK-LABEL: @test_mm512_castpd128_pd512 + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> + return _mm512_castpd128_pd512(__A); +} + +__m512d test_mm512_castpd256_pd512(__m256d a) +{ + // CHECK-LABEL: @test_mm512_castpd256_pd512 + // CHECK: shufflevector <4 x double> {{.*}} + return _mm512_castpd256_pd512(a); +} + +__m256d test_mm512_castpd512_pd256 (__m512d __A) +{ + // CHECK-LABEL: @test_mm512_castpd512_pd256 + // CHECK: shufflevector <8 x do
[PATCH] D20620: [Clang][AVX512][Builtin] Fix palignr intrinsics header
m_zuckerman created this revision. m_zuckerman added reviewers: AsafBadouh, igorb, delena. m_zuckerman added a subscriber: cfe-commits. We don't need to multiply by eight the IMM. The instruction is doing that. http://reviews.llvm.org/D20620 Files: lib/Headers/avx512bwintrin.h Index: lib/Headers/avx512bwintrin.h === --- lib/Headers/avx512bwintrin.h +++ lib/Headers/avx512bwintrin.h @@ -2145,19 +2145,19 @@ #define _mm512_alignr_epi8(A, B, N) __extension__ ({\ (__m512i)__builtin_ia32_palignr512_mask((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), (int)(N) * 8, \ + (__v64qi)(__m512i)(B), (int)(N), \ (__v64qi)_mm512_undefined_pd(), \ (__mmask64)-1); }) #define _mm512_mask_alignr_epi8(W, U, A, B, N) __extension__({\ (__m512i)__builtin_ia32_palignr512_mask((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), (int)(N) * 8, \ + (__v64qi)(__m512i)(B), (int)(N), \ (__v64qi)(__m512i)(W), \ (__mmask64)(U)); }) #define _mm512_maskz_alignr_epi8(U, A, B, N) __extension__({\ (__m512i)__builtin_ia32_palignr512_mask((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), (int)(N) * 8, \ + (__v64qi)(__m512i)(B), (int)(N), \ (__v64qi)_mm512_setzero_si512(), \ (__mmask64)(U)); }) Index: lib/Headers/avx512bwintrin.h === --- lib/Headers/avx512bwintrin.h +++ lib/Headers/avx512bwintrin.h @@ -2145,19 +2145,19 @@ #define _mm512_alignr_epi8(A, B, N) __extension__ ({\ (__m512i)__builtin_ia32_palignr512_mask((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), (int)(N) * 8, \ + (__v64qi)(__m512i)(B), (int)(N), \ (__v64qi)_mm512_undefined_pd(), \ (__mmask64)-1); }) #define _mm512_mask_alignr_epi8(W, U, A, B, N) __extension__({\ (__m512i)__builtin_ia32_palignr512_mask((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), (int)(N) * 8, \ + (__v64qi)(__m512i)(B), (int)(N), \ (__v64qi)(__m512i)(W), \ (__mmask64)(U)); }) #define _mm512_maskz_alignr_epi8(U, A, B, N) __extension__({\ (__m512i)__builtin_ia32_palignr512_mask((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), (int)(N) * 8, \ + (__v64qi)(__m512i)(B), (int)(N), \ (__v64qi)_mm512_setzero_si512(), \ (__mmask64)(U)); }) ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D20618: [Clang][avx512][builtin] Adding missing intrinsics for cvt
m_zuckerman created this revision. m_zuckerman added reviewers: AsafBadouh, igorb, delena. m_zuckerman added a subscriber: cfe-commits. http://reviews.llvm.org/D20618 Files: lib/Headers/avx512fintrin.h test/CodeGen/avx512f-builtins.c Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -3709,6 +3709,17 @@ _MM_FROUND_CUR_DIRECTION); } +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U , + _MM_FROUND_CUR_DIRECTION); +} + + #define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \ (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -6302,6 +6302,13 @@ return _mm512_mask_cvtps_epu32( __W, __U, __A); } +__m512i test_mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A) +{ + // CHECK-LABEL: @test_mm512_mask_cvtps_epu32 + // CHECK: @llvm.x86.avx512.mask.cvtps2udq.512 + return _mm512_mask_cvtps_epu32( __U, __A); +} + __m512d test_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_mask_max_pd Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -3709,6 +3709,17 @@ _MM_FROUND_CUR_DIRECTION); } +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U , + _MM_FROUND_CUR_DIRECTION); +} + + #define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \ (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -6302,6 +6302,13 @@ return _mm512_mask_cvtps_epu32( __W, __U, __A); } +__m512i test_mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A) +{ + // CHECK-LABEL: @test_mm512_mask_cvtps_epu32 + // CHECK: @llvm.x86.avx512.mask.cvtps2udq.512 + return _mm512_mask_cvtps_epu32( __U, __A); +} + __m512d test_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_mask_max_pd ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20614: Remove trailing spaces in x86 intrinsic headers
m_zuckerman added a comment. First thanks I don't see any problem with the patch. but if you can please add full svn diff from clang. svn diff --diff-cmd=diff -x -U99 > x.patch Repository: rL LLVM http://reviews.llvm.org/D20614 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D20562: [Clang][AVX512][BUILTIN] Adding intrinsics for set1
m_zuckerman created this revision. m_zuckerman added reviewers: AsafBadouh, igorb, delena. m_zuckerman added a subscriber: cfe-commits. http://reviews.llvm.org/D20562 Files: lib/Headers/avx512fintrin.h test/CodeGen/avx512f-builtins.c Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -328,6 +328,34 @@ return _mm512_set1_pd(d); } +__m512d test_mm512_set1_epi8(char d) +{ + // CHECK-LABEL: @test_mm512_set1_epi8 + // CHECK: insertelement <64 x i8> {{.*}}, i32 0 + // CHECK: insertelement <64 x i8> {{.*}}, i32 1 + // CHECK: insertelement <64 x i8> {{.*}}, i32 2 + // CHECK: insertelement <64 x i8> {{.*}}, i32 3 + // CHECK: insertelement <64 x i8> {{.*}}, i32 4 + // CHECK: insertelement <64 x i8> {{.*}}, i32 5 + // CHECK: insertelement <64 x i8> {{.*}}, i32 6 + // CHECK: insertelement <64 x i8> {{.*}}, i32 7 + return _mm512_set1_epi8(d); +} + +__m512d test_mm512_set1_epi16(short d) +{ + // CHECK-LABEL: @test_mm512_set1_epi16 + // CHECK: insertelement <32 x i16> {{.*}}, i32 0 + // CHECK: insertelement <32 x i16> {{.*}}, i32 1 + // CHECK: insertelement <32 x i16> {{.*}}, i32 2 + // CHECK: insertelement <32 x i16> {{.*}}, i32 3 + // CHECK: insertelement <32 x i16> {{.*}}, i32 4 + // CHECK: insertelement <32 x i16> {{.*}}, i32 5 + // CHECK: insertelement <32 x i16> {{.*}}, i32 6 + // CHECK: insertelement <32 x i16> {{.*}}, i32 7 + return _mm512_set1_epi16(d); +} + __m512d test_mm512_castpd256_pd512(__m256d a) { // CHECK-LABEL: @test_mm512_castpd256_pd512 Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -27,6 +27,8 @@ #ifndef __AVX512FINTRIN_H #define __AVX512FINTRIN_H +typedef char __v64qi __attribute__((__vector_size__(64))); +typedef short __v32hi __attribute__((__vector_size__(64))); typedef double __v8df __attribute__((__vector_size__(64))); typedef float __v16sf __attribute__((__vector_size__(64))); typedef long long __v8di __attribute__((__vector_size__(64))); @@ -286,6 +288,28 @@ } static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_set1_epi8(char __w) +{ + return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w }; +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_set1_epi16(short __w) +{ + return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w }; +} + +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set1_epi32(int __s) { return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s, Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -328,6 +328,34 @@ return _mm512_set1_pd(d); } +__m512d test_mm512_set1_epi8(char d) +{ + // CHECK-LABEL: @test_mm512_set1_epi8 + // CHECK: insertelement <64 x i8> {{.*}}, i32 0 + // CHECK: insertelement <64 x i8> {{.*}}, i32 1 + // CHECK: insertelement <64 x i8> {{.*}}, i32 2 + // CHECK: insertelement <64 x i8> {{.*}}, i32 3 + // CHECK: insertelement <64 x i8> {{.*}}, i32 4 + // CHECK: insertelement <64 x i8> {{.*}}, i32 5 + // CHECK: insertelement <64 x i8> {{.*}}, i32 6 + // CHECK: insertelement <64 x i8> {{.*}}, i32 7 + return _mm512_set1_epi8(d); +} + +__m512d test_mm512_set1_epi16(short d) +{ + // CHECK-LABEL: @test_mm512_set1_epi16 + // CHECK: insertelement <32 x i16> {{.*}}, i32 0 + // CHECK: insertelement <32 x i16> {{.*}}, i32 1 + // CHECK: insertelement <32 x i16> {{.*}}, i32 2 + // CHECK: insertelement <32 x i16> {{.*}}, i32 3 + // CHECK: insertelement <32 x i16> {{.*}}, i32 4 + // CHECK: insertelement <32 x i16> {{.*}}, i32 5 + // CHECK: insertelement <32 x i16> {{.*}}, i32 6 + // CHECK: insertelement <32 x i16> {{.*}}, i32 7 + return _mm512_set1_epi16(d); +} + __m512d test_mm512_castpd256_pd512(__m256d a) { // CHECK-LABEL: @test_mm512_castpd256_pd512 Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -27,6 +27,8 @@ #ifndef __AVX512FINTRIN_H #define __AVX512FINTRIN_H
Re: [PATCH] D20359: [LLVM][AVX512][Intrinsics] Convert AVX non-temporal store builtins to LLVM-native IR.
m_zuckerman added a comment. Yes, we can delete it. We don't need it any more . http://reviews.llvm.org/D20359 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20359: [LLVM][AVX512][Intrinsics] Convert AVX non-temporal store builtins to LLVM-native IR.
m_zuckerman updated this revision to Diff 58201. http://reviews.llvm.org/D20359 Files: include/llvm/IR/IntrinsicsX86.td lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86IntrinsicsInfo.h test/CodeGen/X86/avx512-intrinsics.ll Index: lib/Target/X86/X86IntrinsicsInfo.h === --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -33,7 +33,7 @@ INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, - EXPAND_FROM_MEM, LOADA, LOADU, STOREA, STOREU, STOREANT, BLEND, INSERT_SUBVEC, + EXPAND_FROM_MEM, LOADA, LOADU, STOREA, STOREU, BLEND, INSERT_SUBVEC, TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS, FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK }; @@ -276,9 +276,6 @@ X86_INTRINSIC_DATA(avx512_scattersiv4_si, SCATTER, X86::VPSCATTERDDZ128mr, 0), X86_INTRINSIC_DATA(avx512_scattersiv8_sf, SCATTER, X86::VSCATTERDPSZ256mr, 0), X86_INTRINSIC_DATA(avx512_scattersiv8_si, SCATTER, X86::VPSCATTERDDZ256mr, 0), - X86_INTRINSIC_DATA(avx512_storent_pd_512, STOREANT, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_storent_ps_512, STOREANT, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_storent_q_512, STOREANT, ISD::DELETED_NODE, 0), X86_INTRINSIC_DATA(rdpmc, RDPMC, X86ISD::RDPMC_DAG, 0), X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0), X86_INTRINSIC_DATA(rdrand_32, RDRAND, X86ISD::RDRAND, 0), Index: lib/Target/X86/X86ISelLowering.cpp === --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -4072,7 +4072,6 @@ break; } case STOREA: - case STOREANT: case STOREU: { Info.ptrVal = I.getArgOperand(0); Info.memVT = MVT::getVT(I.getArgOperand(1)->getType()); @@ -18256,20 +18255,6 @@ return DAG.getMaskedStore(Chain, dl, Data, Addr, VMask, VT, MemIntr->getMemOperand(), false); } - case STOREANT: { -// Store (MOVNTPD, MOVNTPS, MOVNTDQ) using non-temporal hint intrinsic implementation. -SDValue Data = Op.getOperand(3); -SDValue Addr = Op.getOperand(2); -SDValue Chain = Op.getOperand(0); - -MemIntrinsicSDNode *MemIntr = dyn_cast(Op); -assert(MemIntr && "Expected MemIntrinsicSDNode!"); -MachineMemOperand *MMO = MemIntr->getMemOperand(); - -MMO->setFlags(MachineMemOperand::MONonTemporal); - -return DAG.getStore(Chain, dl, Data, Addr, MMO); - } } } Index: include/llvm/IR/IntrinsicsX86.td === --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -2234,18 +2234,6 @@ [IntrArgMemOnly]>; } -// Store ops using non-temporal hint -let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx512_storent_q_512 : -GCCBuiltin<"__builtin_ia32_movntdq512">, -Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty], [IntrArgMemOnly]>; - def int_x86_avx512_storent_pd_512 : -GCCBuiltin<"__builtin_ia32_movntpd512">, -Intrinsic<[], [llvm_ptr_ty, llvm_v8f64_ty], [IntrArgMemOnly]>; - def int_x86_avx512_storent_ps_512 : -GCCBuiltin<"__builtin_ia32_movntps512">, -Intrinsic<[], [llvm_ptr_ty, llvm_v16f32_ty], [IntrArgMemOnly]>; -} //===--===// // AVX2 Index: test/CodeGen/X86/avx512-intrinsics.ll === --- test/CodeGen/X86/avx512-intrinsics.ll +++ test/CodeGen/X86/avx512-intrinsics.ll @@ -7413,39 +7413,6 @@ ret <2 x double> %res4 } -declare void @llvm.x86.avx512.storent.q.512(i8*, <8 x i64>) - -define void@test_storent_q_512(<8 x i64> %data, i8* %ptr) { -; CHECK-LABEL: test_storent_q_512: -; CHECK: ## BB#0: -; CHECK-NEXT:vmovntdq %zmm0, (%rdi) -; CHECK-NEXT:retq - call void @llvm.x86.avx512.storent.q.512(i8* %ptr, <8 x i64> %data) - ret void -} - -declare void @llvm.x86.avx512.storent.pd.512(i8*, <8 x double>) - -define void @test_storent_pd_512(<8 x double> %data, i8* %ptr) { -; CHECK-LABEL: test_storent_pd_512: -; CHECK: ## BB#0: -; CHECK-NEXT:vmovntpd %zmm0, (%rdi) -; CHECK-NEXT:retq - call void @llvm.x86.avx512.storent.pd.512(i8* %ptr, <8 x double> %data) - ret void -} - -declare void @llvm.x86.avx512.storent.ps.512(i8*, <16 x float>) - -define void @test_storent_ps_512(<16 x float> %data, i8* %ptr) { -; CHECK-LABEL: test_storent_ps_512: -; CHECK: ## BB#0: -; CHECK-NEXT:vmovntps %zmm0, (%rdi) -; CHECK-NEXT:retq - call void @llvm.x86.avx512.storent.ps.512(i8* %ptr, <16 x float> %data) - ret void -} - declare i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32>, <16 x i32>, i16 %x2) def
r270441 - [clang][AVX512][Builtin] adding missing intrinsics for vpmultishiftqb{128|256|512} instruction set .
Author: mzuckerm Date: Mon May 23 10:04:39 2016 New Revision: 270441 URL: http://llvm.org/viewvc/llvm-project?rev=270441&view=rev Log: [clang][AVX512][Builtin] adding missing intrinsics for vpmultishiftqb{128|256|512} instruction set . Differential Revision: http://reviews.llvm.org/D20521 Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512vbmiintrin.h cfe/trunk/lib/Headers/avx512vbmivlintrin.h cfe/trunk/test/CodeGen/avx512vbmi-builtins.c cfe/trunk/test/CodeGen/avx512vbmivl-builtin.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=270441&r1=270440&r2=270441&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Mon May 23 10:04:39 2016 @@ -2276,6 +2276,9 @@ TARGET_BUILTIN(__builtin_ia32_cvtusi2sd3 TARGET_BUILTIN(__builtin_ia32_cvtusi2sd64, "V2dV2dULLiIi","","avx512f") TARGET_BUILTIN(__builtin_ia32_cvtusi2ss32, "V4fV4fUiIi","","avx512f") TARGET_BUILTIN(__builtin_ia32_cvtusi2ss64, "V4fV4fULLiIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512_mask, "V64cV64cV64cV64cULLi","","avx512vbmi") +TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128_mask, "V16cV16cV16cV16cUs","","avx512vbmi,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb256_mask, "V32cV32cV32cV32cUi","","avx512vbmi,avx512vl") // MONITORX/MWAITX TARGET_BUILTIN(__builtin_ia32_monitorx, "vv*UiUi", "", "mwaitx") Modified: cfe/trunk/lib/Headers/avx512vbmiintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vbmiintrin.h?rev=270441&r1=270440&r2=270441&view=diff == --- cfe/trunk/lib/Headers/avx512vbmiintrin.h (original) +++ cfe/trunk/lib/Headers/avx512vbmiintrin.h Mon May 23 10:04:39 2016 @@ -108,6 +108,36 @@ _mm512_mask_permutexvar_epi8 (__m512i __ (__mmask64) __M); } +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_multishift_epi64_epi8 (__m512i __W, __mmask64 __M, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X, +(__v64qi) __Y, +(__v64qi) __W, +(__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_multishift_epi64_epi8 (__mmask64 __M, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X, +(__v64qi) __Y, +(__v64qi) +_mm512_setzero_si512 (), +(__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_multishift_epi64_epi8 (__m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X, +(__v64qi) __Y, +(__v64qi) +_mm512_undefined_epi32 (), +(__mmask64) -1); +} + + #undef __DEFAULT_FN_ATTRS #endif Modified: cfe/trunk/lib/Headers/avx512vbmivlintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vbmivlintrin.h?rev=270441&r1=270440&r2=270441&view=diff == --- cfe/trunk/lib/Headers/avx512vbmivlintrin.h (original) +++ cfe/trunk/lib/Headers/avx512vbmivlintrin.h Mon May 23 10:04:39 2016 @@ -29,7 +29,7 @@ #define __VBMIVLINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__(( __nodebug__, __target__("avx512vbmi,avx512vl"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"))) static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -183,6 +183,65 @@ _mm256_mask_permutexvar_epi8 (__m256i __ (__mmask32) __M); } +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, +(__v16qi) __Y, +(__v16qi) __W, +(__mmask16) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, +(__v16qi) __Y, +(__v16qi) +_mm_setzero_si128 (), +(__mmask16) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, +(__v16qi) __Y, +(__v16qi) +_mm_undefined_si128 (), +(__mmask16) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +
[PATCH] D20523: [Clang][AVX512][BUILTIN] Add missing intrinsics for cast .
m_zuckerman created this revision. m_zuckerman added reviewers: AsafBadouh, igorb, delena. m_zuckerman added a subscriber: cfe-commits. http://reviews.llvm.org/D20523 Files: lib/Headers/avx512fintrin.h test/CodeGen/avx512f-builtins.c Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -328,13 +328,6 @@ return _mm512_set1_pd(d); } -__m512d test_mm512_castpd256_pd512(__m256d a) -{ - // CHECK-LABEL: @test_mm512_castpd256_pd512 - // CHECK: shufflevector <4 x double> {{.*}} - return _mm512_castpd256_pd512(a); -} - __mmask16 test_mm512_knot(__mmask16 a) { // CHECK-LABEL: @test_mm512_knot @@ -5925,18 +5918,66 @@ return _mm512_maskz_cvttpd_epu32(__U, __A); } -__m512d test_mm512_castpd128_pd512(__m128d __A) { - // CHECK-LABEL: @test_mm512_castpd128_pd512 - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> - return _mm512_castpd128_pd512(__A); +__m512 test_mm512_castpd_ps (__m512d __A) +{ + // CHECK-LABEL: @test_mm512_castpd_ps + // CHECK: bitcast <8 x double> %1 to <16 x float> + return _mm512_castpd_ps (__A); +} + +__m512d test_mm512_castps_pd (__m512 __A) +{ + // CHECK-LABEL: @test_mm512_castps_pd + // CHECK: bitcast <16 x float> %1 to <8 x double> + return _mm512_castps_pd (__A); +} + +__m512i test_mm512_castpd_si512 (__m512d __A) +{ + // CHECK-LABEL: @test_mm512_castpd_si512 + // CHECK: bitcast <8 x double> %1 to <8 x i64> + return _mm512_castpd_si512 (__A); } __m512 test_mm512_castps128_ps512(__m128 __A) { // CHECK-LABEL: @test_mm512_castps128_ps512 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> return _mm512_castps128_ps512(__A); } +__m512d test_mm512_castpd128_pd512(__m128d __A) { + // CHECK-LABEL: @test_mm512_castpd128_pd512 + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> + return _mm512_castpd128_pd512(__A); +} + +__m512d test_mm512_castpd256_pd512(__m256d a) +{ + // CHECK-LABEL: @test_mm512_castpd256_pd512 + // CHECK: shufflevector <4 x double> {{.*}} + return _mm512_castpd256_pd512(a); +} + +__m256d test_mm512_castpd512_pd256 (__m512d __A) +{ + // CHECK-LABEL: @test_mm512_castpd512_pd256 + // CHECK: shufflevector <8 x double> %1, <8 x double> %2, <4 x i32> + return _mm512_castpd512_pd256 (__A); +} + +__m256 test_mm512_castps512_ps256 (__m512 __A) +{ + // CHECK-LABEL: @test_mm512_castps512_ps256 + // CHECK: shufflevector <16 x float> %1, <16 x float> %2, <8 x i32> + return _mm512_castps512_ps256 (__A); +} + +__m512i test_mm512_castps_si512 (__m512 __A) +{ + // CHECK-LABEL: @test_mm512_castps_si512 + // CHECK: bitcast <16 x float> %1 to <8 x i64> + return _mm512_castps_si512 (__A); +} __m512i test_mm512_castsi128_si512(__m128i __A) { // CHECK-LABEL: @test_mm512_castsi128_si512 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> @@ -5949,6 +5990,26 @@ return _mm512_castsi256_si512(__A); } +__m512 test_mm512_castsi512_ps (__m512i __A) +{ + // CHECK-LABEL: @test_mm512_castsi512_ps + // CHECK: bitcast <8 x i64> %1 to <16 x float> + return _mm512_castsi512_ps (__A); +} + +__m512d test_mm512_castsi512_pd (__m512i __A) +{ + // CHECK-LABEL: @test_mm512_castsi512_pd + // CHECK: bitcast <8 x i64> %1 to <8 x double> + return _mm512_castsi512_pd (__A); +} + +__m128i test_mm512_castsi512_si128 (__m512i __A) +{ + // CHECK-LABEL: @test_mm512_castsi512_si128 + // CHECK: shufflevector <8 x i64> %1, <8 x i64> %2, <2 x i32> + return _mm512_castsi512_si128 (__A); +} __m128 test_mm_cvt_roundsd_ss(__m128 __A, __m128d __B) { // CHECK-LABEL: @test_mm_cvt_roundsd_ss Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -337,19 +337,54 @@ return __builtin_shufflevector(__a, __a, 0, 1); } +static __inline __m256d __DEFAULT_FN_ATTRS +_mm512_castpd512_pd256 (__m512d __A) +{ + return __builtin_shufflevector(__A, __A, 0, 1, 2, 3); +} + static __inline __m128 __DEFAULT_FN_ATTRS _mm512_castps512_ps128(__m512 __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); } +static __inline __m256 __DEFAULT_FN_ATTRS +_mm512_castps512_ps256 (__m512 __A) +{ + return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7); +} + +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_castpd_ps (__m512d __A) +{ + return (__m512) (__A); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_castpd_si512 (__m512d __A) +{ + return (__m512i) (__A); +} static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_castpd128_pd512 (__m128d __A) { return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1); } +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_castps_pd (__m512 __A) +{ + return (__m512d) (__A); +} + +static __inline __m512i __DEFAULT_FN_AT
[PATCH] D20521: [Clang][AVX512][Builtin] adding missing intrinsics for vpmultishiftqb{128|256|512} instruction set
m_zuckerman created this revision. m_zuckerman added reviewers: AsafBadouh, igorb, delena. m_zuckerman added a subscriber: cfe-commits. http://reviews.llvm.org/D20521 Files: include/clang/Basic/BuiltinsX86.def lib/Headers/avx512vbmiintrin.h lib/Headers/avx512vbmivlintrin.h test/CodeGen/avx512vbmi-builtins.c test/CodeGen/avx512vbmivl-builtin.c Index: test/CodeGen/avx512vbmivl-builtin.c === --- test/CodeGen/avx512vbmivl-builtin.c +++ test/CodeGen/avx512vbmivl-builtin.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature avx512vbmi -target-feature avx512vl -target-feature avx2 -emit-llvm -o - -Werror | FileCheck %s +// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512vbmi -target-feature +avx512vl -target-feature +avx512bw -emit-llvm -o - -Werror | FileCheck %s // Don't include mm_malloc.h, it's system specific. #define __MM_MALLOC_H @@ -85,6 +85,43 @@ __m256i test_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_permutex2var_epi8 - // CHECK: @llvm.x86.avx512.mask.vpermt2var.qi.256 + // CHECK: @llvm.x86.avx512.maskz.vpermt2var.qi.256 return _mm256_maskz_permutex2var_epi8(__U, __A, __I, __B); -} \ No newline at end of file +} + +__m128i test_mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y) { + // CHECK-LABEL: @test_mm_mask_multishift_epi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmultishift.qb.128 + return _mm_mask_multishift_epi64_epi8(__W, __M, __X, __Y); +} + +__m128i test_mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X, __m128i __Y) { + // CHECK-LABEL: @test_mm_maskz_multishift_epi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmultishift.qb.128 + return _mm_maskz_multishift_epi64_epi8(__M, __X, __Y); +} + +__m128i test_mm_multishift_epi64_epi8(__m128i __X, __m128i __Y) { + // CHECK-LABEL: @test_mm_multishift_epi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmultishift.qb.128 + return _mm_multishift_epi64_epi8(__X, __Y); +} + +__m256i test_mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y) { + // CHECK-LABEL: @test_mm256_mask_multishift_epi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmultishift.qb.256 + return _mm256_mask_multishift_epi64_epi8(__W, __M, __X, __Y); +} + +__m256i test_mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y) { + // CHECK-LABEL: @test_mm256_maskz_multishift_epi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmultishift.qb.256 + return _mm256_maskz_multishift_epi64_epi8(__M, __X, __Y); +} + +__m256i test_mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y) { + // CHECK-LABEL: @test_mm256_multishift_epi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmultishift.qb.256 + return _mm256_multishift_epi64_epi8(__X, __Y); +} + Index: test/CodeGen/avx512vbmi-builtins.c === --- test/CodeGen/avx512vbmi-builtins.c +++ test/CodeGen/avx512vbmi-builtins.c @@ -46,3 +46,21 @@ // CHECK: @llvm.x86.avx512.mask.permvar.qi.512 return _mm512_mask_permutexvar_epi8(__W, __M, __A, __B); } + +__m512i test_mm512_mask_multishift_epi64_epi8(__m512i __W, __mmask64 __M, __m512i __X, __m512i __Y) { + // CHECK-LABEL: @test_mm512_mask_multishift_epi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmultishift.qb.512 + return _mm512_mask_multishift_epi64_epi8(__W, __M, __X, __Y); +} + +__m512i test_mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X, __m512i __Y) { + // CHECK-LABEL: @test_mm512_maskz_multishift_epi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmultishift.qb.512 + return _mm512_maskz_multishift_epi64_epi8(__M, __X, __Y); +} + +__m512i test_mm512_multishift_epi64_epi8(__m512i __X, __m512i __Y) { + // CHECK-LABEL: @test_mm512_multishift_epi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmultishift.qb.512 + return _mm512_multishift_epi64_epi8(__X, __Y); +} Index: lib/Headers/avx512vbmivlintrin.h === --- lib/Headers/avx512vbmivlintrin.h +++ lib/Headers/avx512vbmivlintrin.h @@ -29,7 +29,7 @@ #define __VBMIVLINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__(( __nodebug__, __target__("avx512vbmi,avx512vl"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"))) static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -183,6 +183,65 @@ (__mmask32) __M); } +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, +(__v16qi) __Y, +(__v16qi) __W, +(__mmask16) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm
r270401 - [Clang][AVX512][BUILTIN]adding missing intrinsics for movdaq instruction set
Author: mzuckerm Date: Mon May 23 03:01:48 2016 New Revision: 270401 URL: http://llvm.org/viewvc/llvm-project?rev=270401&view=rev Log: [Clang][AVX512][BUILTIN]adding missing intrinsics for movdaq instruction set Differential Revision: http://reviews.llvm.org/D20514 Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/lib/Headers/avx512vlintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c cfe/trunk/test/CodeGen/avx512vl-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=270401&r1=270400&r2=270401&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Mon May 23 03:01:48 2016 @@ -1723,6 +1723,11 @@ TARGET_BUILTIN(__builtin_ia32_psrlw128_m TARGET_BUILTIN(__builtin_ia32_psrlw256_mask, "V16sV16sV8sV16sUs","","avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_psrlwi128_mask, "V8sV8sIiV8sUc","","avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_psrlwi256_mask, "V16sV16sIiV16sUs","","avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_movdqa32_128_mask, "V4iV4iV4iUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_movdqa32_256_mask, "V8iV8iV8iUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_movdqa32_512_mask, "V16iV16iV16iUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_movdqa32load128_mask, "V4iV4i*V4iUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_movdqa32load256_mask, "V8iV8i*V8iUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_movdqa32load512_mask, "V16iV16iC*V16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_movdqa32store512_mask, "vV16i*V16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_movdqa64_512_mask, "V8LLiV8LLiV8LLiUc","","avx512f") Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=270401&r1=270400&r2=270401&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Mon May 23 03:01:48 2016 @@ -4926,6 +4926,23 @@ _mm512_mask_store_epi32 (void *__P, __mm } static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A, + (__v16si) __W, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A) { return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A, Modified: cfe/trunk/lib/Headers/avx512vlintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlintrin.h?rev=270401&r1=270400&r2=270401&view=diff == --- cfe/trunk/lib/Headers/avx512vlintrin.h (original) +++ cfe/trunk/lib/Headers/avx512vlintrin.h Mon May 23 03:01:48 2016 @@ -5834,7 +5834,78 @@ _mm256_maskz_srav_epi64 (__mmask8 __U, _ (__mmask8) __U); } +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A, + (__v4si) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} + + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A, + (__v8si) __W, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U); +} +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P) +{ + return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P, + (__v4si) __W, + (__mmask8) + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_load_epi32 (__mmask8 __U, void const *__P) +{ + return (__m128i) __builtin_ia3
[PATCH] D20514: [Clang][AVX512][BUILTIN]adding missing intrinsics for movdaq instruction set
m_zuckerman created this revision. m_zuckerman added reviewers: AsafBadouh, igorb, delena. m_zuckerman added a subscriber: cfe-commits. http://reviews.llvm.org/D20514 Files: include/clang/Basic/BuiltinsX86.def lib/Headers/avx512fintrin.h lib/Headers/avx512vlintrin.h test/CodeGen/avx512f-builtins.c test/CodeGen/avx512vl-builtins.c Index: test/CodeGen/avx512vl-builtins.c === --- test/CodeGen/avx512vl-builtins.c +++ test/CodeGen/avx512vl-builtins.c @@ -3948,6 +3948,30 @@ return _mm256_mask_store_epi32(__P, __U, __A); } +__m128i test_mm_mask_mov_epi32(__m128i __W, __mmask8 __U, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_mov_epi32 + // CHECK: @llvm.x86.avx512.mask.mov + return _mm_mask_mov_epi32(__W, __U, __A); +} + +__m128i test_mm_maskz_mov_epi32(__mmask8 __U, __m128i __A) { + // CHECK-LABEL: @test_mm_maskz_mov_epi32 + // CHECK: @llvm.x86.avx512.mask.mov + return _mm_maskz_mov_epi32(__U, __A); +} + +__m256i test_mm256_mask_mov_epi32(__m256i __W, __mmask8 __U, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_mov_epi32 + // CHECK: @llvm.x86.avx512.mask.mov + return _mm256_mask_mov_epi32(__W, __U, __A); +} + +__m256i test_mm256_maskz_mov_epi32(__mmask8 __U, __m256i __A) { + // CHECK-LABEL: @test_mm256_maskz_mov_epi32 + // CHECK: @llvm.x86.avx512.mask.mov + return _mm256_maskz_mov_epi32(__U, __A); +} + __m128i test_mm_mask_mov_epi64(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_mask_mov_epi64 // CHECK: @llvm.x86.avx512.mask.mov @@ -3972,6 +3996,30 @@ return _mm256_maskz_mov_epi64(__U, __A); } +__m128i test_mm_mask_load_epi32(__m128i __W, __mmask8 __U, void const *__P) { + // CHECK-LABEL: @test_mm_mask_load_epi32 + // CHECK: @llvm.x86.avx512.mask.load.d.128 + return _mm_mask_load_epi32(__W, __U, __P); +} + +__m128i test_mm_maskz_load_epi32(__mmask8 __U, void const *__P) { + // CHECK-LABEL: @test_mm_maskz_load_epi32 + // CHECK: @llvm.x86.avx512.mask.load.d.128 + return _mm_maskz_load_epi32(__U, __P); +} + +__m256i test_mm256_mask_load_epi32(__m256i __W, __mmask8 __U, void const *__P) { + // CHECK-LABEL: @test_mm256_mask_load_epi32 + // CHECK: @llvm.x86.avx512.mask.load.d.256 + return _mm256_mask_load_epi32(__W, __U, __P); +} + +__m256i test_mm256_maskz_load_epi32(__mmask8 __U, void const *__P) { + // CHECK-LABEL: @test_mm256_maskz_load_epi32 + // CHECK: @llvm.x86.avx512.mask.load.d.256 + return _mm256_maskz_load_epi32(__U, __P); +} + __m128i test_mm_mask_load_epi64(__m128i __W, __mmask8 __U, void const *__P) { // CHECK-LABEL: @test_mm_mask_load_epi64 // CHECK: @llvm.x86.avx512.mask.load.q.128 Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -2542,6 +2542,18 @@ return _mm512_maskz_load_epi32(__U, __P); } +__m512i test_mm512_mask_mov_epi32(__m512i __W, __mmask16 __U, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_mov_epi32 + // CHECK: @llvm.x86.avx512.mask.mov + return _mm512_mask_mov_epi32(__W, __U, __A); +} + +__m512i test_mm512_maskz_mov_epi32(__mmask16 __U, __m512i __A) { + // CHECK-LABEL: @test_mm512_maskz_mov_epi32 + // CHECK: @llvm.x86.avx512.mask.mov + return _mm512_maskz_mov_epi32(__U, __A); +} + __m512i test_mm512_mask_mov_epi64(__m512i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_mov_epi64 // CHECK: @llvm.x86.avx512.mask.mov Index: lib/Headers/avx512vlintrin.h === --- lib/Headers/avx512vlintrin.h +++ lib/Headers/avx512vlintrin.h @@ -5834,7 +5834,78 @@ (__mmask8) __U); } +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A, + (__v4si) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} + + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A, + (__v8si) __W, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U); +} +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P) +{ + return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P
r270047 - [Clang][AVX512][intrinsics] continue completing missing set intrinsics
Author: mzuckerm Date: Thu May 19 07:07:49 2016 New Revision: 270047 URL: http://llvm.org/viewvc/llvm-project?rev=270047&view=rev Log: [Clang][AVX512][intrinsics] continue completing missing set intrinsics Differential Revision: http://reviews.llvm.org/D20160 Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=270047&r1=270046&r2=270047&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Thu May 19 07:07:49 2016 @@ -8983,6 +8983,21 @@ _mm512_mask_set1_epi64 (__m512i __O, __m __M); } +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_set_epi32 (int __A, int __B, int __C, int __D, + int __E, int __F, int __G, int __H, + int __I, int __J, int __K, int __L, + int __M, int __N, int __O, int __P) +{ + return __extension__ (__m512i)(__v16si) + { __P, __O, __N, __M, __L, __K, __J, __I, +__H, __G, __F, __E, __D, __C, __B, __A }; +} + +#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \ + e8,e9,e10,e11,e12,e13,e14,e15) \ + _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0) + static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_set_epi64 (long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, @@ -8992,6 +9007,9 @@ _mm512_set_epi64 (long long __A, long lo { __H, __G, __F, __E, __D, __C, __B, __A }; } +#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \ + _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0) + static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_set_pd (double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H) @@ -9000,6 +9018,9 @@ _mm512_set_pd (double __A, double __B, d { __H, __G, __F, __E, __D, __C, __B, __A }; } +#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \ + _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0) + static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_set_ps (float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, @@ -9011,6 +9032,9 @@ _mm512_set_ps (float __A, float __B, flo __H, __G, __F, __E, __D, __C, __B, __A }; } +#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \ + _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0) + #undef __DEFAULT_FN_ATTRS #endif // __AVX512FINTRIN_H Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=270047&r1=270046&r2=270047&view=diff == --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Thu May 19 07:07:49 2016 @@ -6521,6 +6521,74 @@ __m512i test_mm512_mask_set1_epi32 (__m5 return _mm512_mask_set1_epi32 ( __O, __M, __A); } +__m512i test_mm512_set_epi32 (int __A, int __B, int __C, int __D, + int __E, int __F, int __G, int __H, + int __I, int __J, int __K, int __L, + int __M, int __N, int __O, int __P) +{ + //CHECK-LABLE: @test_mm512_set_epi32 + //CHECK: insertelement{{.*}}i32 0 +//CHECK: insertelement{{.*}}i32 1 +//CHECK: insertelement{{.*}}i32 2 +//CHECK: insertelement{{.*}}i32 3 +//CHECK: insertelement{{.*}}i32 4 +//CHECK: insertelement{{.*}}i32 5 +//CHECK: insertelement{{.*}}i32 6 +//CHECK: insertelement{{.*}}i32 7 +//CHECK: insertelement{{.*}}i32 8 +//CHECK: insertelement{{.*}}i32 9 +//CHECK: insertelement{{.*}}i32 10 +//CHECK: insertelement{{.*}}i32 11 +//CHECK: insertelement{{.*}}i32 12 +//CHECK: insertelement{{.*}}i32 13 +//CHECK: insertelement{{.*}}i32 14 +//CHECK: insertelement{{.*}}i32 15 + return _mm512_set_epi32( __A, __B, __C, __D,__E, __F, __G, __H, + __I, __J, __K, __L,__M, __N, __O, __P); +} + +__m512i test_mm512_setr_epi32 (int __A, int __B, int __C, int __D, + int __E, int __F, int __G, int __H, + int __I, int __J, int __K, int __L, + int __M, int __N, int __O, int __P) +{ +//CHECK-LABLE: @test_mm512_setr_epi32 + //CHECK: %0 = load{{.*}}%__P.addr, align 4 + //CHECK: %1 = load{{.*}}%__O.addr, align 4 + //CHECK: %2 = load{{.*}}%__N.addr, align 4 + //CHECK: %3 = load{{.*}}%__M.addr, align 4 + //CHECK: %4 = load{{.*}}%__L.addr, align 4 + //CHECK: %5 = load{{.*}}%__K.addr, align 4 + //CHECK: %6 = load{{.*}}%__J.addr, align 4 + //CHECK: %7 = load{{.*}}%__I.addr, align 4 + //CHECK: %8 = load{{.*}}%__H.addr, align 4 + //CHECK: %9 = load{{.*}}%__G.addr, align 4 + //CHECK: %10 = load{{.*}}%__F.addr, align 4 + //CHECK: %11 = load{{.*}}%__E.addr, align 4 + //CHECK: %12 =
Re: [PATCH] D20321: [Clang][AVX512][intrinsics] Fix vperm intrinsics.
m_zuckerman added a comment. Thanks, you are right ! Repository: rL LLVM http://reviews.llvm.org/D20321 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r269939 - [Clang][AVX512] completing missing intrinsics [pandnd].
Author: mzuckerm Date: Wed May 18 10:25:53 2016 New Revision: 269939 URL: http://llvm.org/viewvc/llvm-project?rev=269939&view=rev Log: [Clang][AVX512] completing missing intrinsics [pandnd]. Differential Revision: http://reviews.llvm.org/D20101 Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=269939&r1=269938&r2=269939&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Wed May 18 10:25:53 2016 @@ -418,6 +418,12 @@ _mm512_maskz_and_epi64(__mmask8 __k, __m } static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_andnot_si512 (__m512i __A, __m512i __B) +{ + return (__m512i)(~(__A) & __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_andnot_epi32 (__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=269939&r1=269938&r2=269939&view=diff == --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Wed May 18 10:25:53 2016 @@ -1451,6 +1451,17 @@ __m512i test_mm512_mask_andnot_epi32 (__ return _mm512_mask_andnot_epi32(__src,__k,__A,__B); } +__m512i test_mm512_andnot_si512(__m512i __A, __m512i __B) +{ + //CHECK-LABLE: @test_mm512_andnot_si512 + //CHECK: load {{.*}}%__A.addr.i, align 64 + //CHECK: %neg.i = xor{{.*}}, + //CHECK: load {{.*}}%__B.addr.i, align 64 + //CHECK: and <8 x i64> %neg.i,{{.*}} + + return _mm512_andnot_si512(__A, __B); +} + __m512i test_mm512_andnot_epi32(__m512i __A, __m512i __B) { //CHECK-LABEL: @test_mm512_andnot_epi32 //CHECK: @llvm.x86.avx512.mask.pandn.d.512 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20321: [Clang][AVX512][intrinsics] Fix vperm intrinsics.
m_zuckerman accepted this revision. m_zuckerman added a comment. This revision is now accepted and ready to land. lgtm Repository: rL LLVM http://reviews.llvm.org/D20321 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D20359: Convert AVX non-temporal store builtins to LLVM-native IR. This was previously done for SSE builtins.
m_zuckerman created this revision. m_zuckerman added a reviewer: craig.topper. m_zuckerman added subscribers: delena, cfe-commits, AsafBadouh, igorb. http://reviews.llvm.org/D20359 Files: include/llvm/IR/IntrinsicsX86.td lib/Target/X86/X86IntrinsicsInfo.h test/CodeGen/X86/avx512-intrinsics.ll Index: lib/Target/X86/X86IntrinsicsInfo.h === --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -276,9 +276,6 @@ X86_INTRINSIC_DATA(avx512_scattersiv4_si, SCATTER, X86::VPSCATTERDDZ128mr, 0), X86_INTRINSIC_DATA(avx512_scattersiv8_sf, SCATTER, X86::VSCATTERDPSZ256mr, 0), X86_INTRINSIC_DATA(avx512_scattersiv8_si, SCATTER, X86::VPSCATTERDDZ256mr, 0), - X86_INTRINSIC_DATA(avx512_storent_pd_512, STOREANT, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_storent_ps_512, STOREANT, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_storent_q_512, STOREANT, ISD::DELETED_NODE, 0), X86_INTRINSIC_DATA(rdpmc, RDPMC, X86ISD::RDPMC_DAG, 0), X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0), X86_INTRINSIC_DATA(rdrand_32, RDRAND, X86ISD::RDRAND, 0), Index: include/llvm/IR/IntrinsicsX86.td === --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -2234,18 +2234,6 @@ [IntrArgMemOnly]>; } -// Store ops using non-temporal hint -let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx512_storent_q_512 : -GCCBuiltin<"__builtin_ia32_movntdq512">, -Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty], [IntrArgMemOnly]>; - def int_x86_avx512_storent_pd_512 : -GCCBuiltin<"__builtin_ia32_movntpd512">, -Intrinsic<[], [llvm_ptr_ty, llvm_v8f64_ty], [IntrArgMemOnly]>; - def int_x86_avx512_storent_ps_512 : -GCCBuiltin<"__builtin_ia32_movntps512">, -Intrinsic<[], [llvm_ptr_ty, llvm_v16f32_ty], [IntrArgMemOnly]>; -} //===--===// // AVX2 Index: test/CodeGen/X86/avx512-intrinsics.ll === --- test/CodeGen/X86/avx512-intrinsics.ll +++ test/CodeGen/X86/avx512-intrinsics.ll @@ -7413,39 +7413,6 @@ ret <2 x double> %res4 } -declare void @llvm.x86.avx512.storent.q.512(i8*, <8 x i64>) - -define void@test_storent_q_512(<8 x i64> %data, i8* %ptr) { -; CHECK-LABEL: test_storent_q_512: -; CHECK: ## BB#0: -; CHECK-NEXT:vmovntdq %zmm0, (%rdi) -; CHECK-NEXT:retq - call void @llvm.x86.avx512.storent.q.512(i8* %ptr, <8 x i64> %data) - ret void -} - -declare void @llvm.x86.avx512.storent.pd.512(i8*, <8 x double>) - -define void @test_storent_pd_512(<8 x double> %data, i8* %ptr) { -; CHECK-LABEL: test_storent_pd_512: -; CHECK: ## BB#0: -; CHECK-NEXT:vmovntpd %zmm0, (%rdi) -; CHECK-NEXT:retq - call void @llvm.x86.avx512.storent.pd.512(i8* %ptr, <8 x double> %data) - ret void -} - -declare void @llvm.x86.avx512.storent.ps.512(i8*, <16 x float>) - -define void @test_storent_ps_512(<16 x float> %data, i8* %ptr) { -; CHECK-LABEL: test_storent_ps_512: -; CHECK: ## BB#0: -; CHECK-NEXT:vmovntps %zmm0, (%rdi) -; CHECK-NEXT:retq - call void @llvm.x86.avx512.storent.ps.512(i8* %ptr, <16 x float> %data) - ret void -} - declare i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32>, <16 x i32>, i16 %x2) define i16@test_int_x86_avx512_ptestnm_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { Index: lib/Target/X86/X86IntrinsicsInfo.h === --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -276,9 +276,6 @@ X86_INTRINSIC_DATA(avx512_scattersiv4_si, SCATTER, X86::VPSCATTERDDZ128mr, 0), X86_INTRINSIC_DATA(avx512_scattersiv8_sf, SCATTER, X86::VSCATTERDPSZ256mr, 0), X86_INTRINSIC_DATA(avx512_scattersiv8_si, SCATTER, X86::VPSCATTERDDZ256mr, 0), - X86_INTRINSIC_DATA(avx512_storent_pd_512, STOREANT, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_storent_ps_512, STOREANT, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_storent_q_512, STOREANT, ISD::DELETED_NODE, 0), X86_INTRINSIC_DATA(rdpmc, RDPMC, X86ISD::RDPMC_DAG, 0), X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0), X86_INTRINSIC_DATA(rdrand_32, RDRAND, X86ISD::RDRAND, 0), Index: include/llvm/IR/IntrinsicsX86.td === --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -2234,18 +2234,6 @@ [IntrArgMemOnly]>; } -// Store ops using non-temporal hint -let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx512_storent_q_512 : -GCCBuiltin<"__builtin_ia32_movntdq512">, -Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty], [IntrArgMemOnly]>; - def int_x86_avx512_st