Re: [PATCH] D20321: [Clang][AVX512][intrinsics] Fix vperm intrinsics.
igorb added a comment. In http://reviews.llvm.org/D20321#436494, @craig.topper wrote: > Looking at this again. This doesn't match the gcc implementation of the > builtins. Unless their header file is also wrong. Can you clarify? Thanks, You are correct. I implemented all changes in code-gen ( http://reviews.llvm.org/D20515 ). If there's no objection, I abandon this review. Repository: rL LLVM http://reviews.llvm.org/D20321 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20321: [Clang][AVX512][intrinsics] Fix vperm intrinsics.
craig.topper added a comment. Looking at this again. This doesn't match the gcc implementation of the builtins. Unless their header file is also wrong. Can you clarify? Repository: rL LLVM http://reviews.llvm.org/D20321 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20321: [Clang][AVX512][intrinsics] Fix vperm intrinsics.
igorb updated this revision to Diff 58055. igorb added a comment. Update path according to comments. Thanks for review, Craig ! Repository: rL LLVM http://reviews.llvm.org/D20321 Files: include/clang/Basic/BuiltinsX86.def lib/Headers/avx512bwintrin.h lib/Headers/avx512fintrin.h lib/Headers/avx512vlbwintrin.h lib/Headers/avx512vlintrin.h Index: lib/Headers/avx512vlintrin.h === --- lib/Headers/avx512vlintrin.h +++ lib/Headers/avx512vlintrin.h @@ -8940,111 +8940,111 @@ static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_permutexvar_pd (__m256i __X, __m256d __Y) { - return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, - (__v4di) __X, + return (__m256d) __builtin_ia32_permvardf256_mask ((__v4di) __X /* idx */, + (__v4df) __Y, (__v4df) _mm256_undefined_si256 (), (__mmask8) -1); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X, __m256d __Y) { - return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, - (__v4di) __X, + return (__m256d) __builtin_ia32_permvardf256_mask ((__v4di) __X /* idx */, + (__v4df) __Y, (__v4df) __W, (__mmask8) __U); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y) { - return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, - (__v4di) __X, + return (__m256d) __builtin_ia32_permvardf256_mask ((__v4di) __X /* idx */, + (__v4df) __Y, (__v4df) _mm256_setzero_pd (), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, - (__v4di) __X, + return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __X /* idx */, + (__v4di) __Y, (__v4di) _mm256_setzero_si256 (), (__mmask8) __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, - (__v4di) __X, + return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __X /* idx */, + (__v4di) __Y, (__v4di) _mm256_undefined_si256 (), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, - (__v4di) __X, + return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __X /* idx */, + (__v4di) __Y, (__v4di) __W, __M); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) { - return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, -(__v8si) __X, + return (__m256) __builtin_ia32_permvarsf256_mask ((__v8si) __X /* idx */, +(__v8sf) __Y, (__v8sf) __W, (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y) { - return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, -(__v8si) __X, + return (__m256) __builtin_ia32_permvarsf256_mask ((__v8si) __X /* idx */, +(__v8sf) __Y, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_permutexvar_ps (__m256i __X, __m256 __Y) { - return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, -(__v8si) __X, + return (__m256) __builtin_ia32_permvarsf256_mask ((__v8si) __X /* idx */, +(__v8sf) __Y, (__v8sf) _mm256_undefined_si256 (), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, - (__v8si) __X, + return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __X /* idx */, + (__v8si) __Y, (__v8si) _mm256_setzero_si256 (), __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, - (__v8si) __X, + return (__m256i) __builtin_ia32_permvarsi256_mask
Re: [PATCH] D20321: [Clang][AVX512][intrinsics] Fix vperm intrinsics.
m_zuckerman added a comment. Thanks, you are right ! Repository: rL LLVM http://reviews.llvm.org/D20321 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20321: [Clang][AVX512][intrinsics] Fix vperm intrinsics.
craig.topper added a subscriber: craig.topper. craig.topper requested changes to this revision. craig.topper added a reviewer: craig.topper. craig.topper added a comment. This revision now requires changes to proceed. Don't the type casts need to be changed too? For example permutexvar_pd has the index as __v8df. Repository: rL LLVM http://reviews.llvm.org/D20321 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20321: [Clang][AVX512][intrinsics] Fix vperm intrinsics.
m_zuckerman accepted this revision. m_zuckerman added a comment. This revision is now accepted and ready to land. lgtm Repository: rL LLVM http://reviews.llvm.org/D20321 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D20321: [Clang][AVX512][intrinsics] Fix vperm intrinsics.
igorb created this revision. igorb added reviewers: m_zuckerman, AsafBadouh, delena. igorb added a subscriber: cfe-commits. igorb set the repository for this revision to rL LLVM. [Clang][AVX512][intrinsics] Fix vperm{w|d|q|ps|pd} intrinsics. Index is first argument to buildin function. Repository: rL LLVM http://reviews.llvm.org/D20321 Files: lib/Headers/avx512bwintrin.h lib/Headers/avx512fintrin.h lib/Headers/avx512vlbwintrin.h lib/Headers/avx512vlintrin.h Index: lib/Headers/avx512vlintrin.h === --- lib/Headers/avx512vlintrin.h +++ lib/Headers/avx512vlintrin.h @@ -9100,111 +9100,111 @@ static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_permutexvar_pd (__m256i __X, __m256d __Y) { - return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, - (__v4di) __X, + return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __X /* idx */, + (__v4di) __Y, (__v4df) _mm256_undefined_si256 (), (__mmask8) -1); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X, __m256d __Y) { - return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, - (__v4di) __X, + return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __X /* idx */, + (__v4di) __Y, (__v4df) __W, (__mmask8) __U); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y) { - return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, - (__v4di) __X, + return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __X /* idx */, + (__v4di) __Y, (__v4df) _mm256_setzero_pd (), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, - (__v4di) __X, + return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __X /* idx */, + (__v4di) __Y, (__v4di) _mm256_setzero_si256 (), (__mmask8) __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, - (__v4di) __X, + return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __X /* idx */, + (__v4di) __Y, (__v4di) _mm256_undefined_si256 (), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, - (__v4di) __X, + return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __X /* idx */, + (__v4di) __Y, (__v4di) __W, __M); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) { - return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, -(__v8si) __X, + return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __X /* idx */, +(__v8si) __Y, (__v8sf) __W, (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y) { - return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, -(__v8si) __X, + return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __X /* idx */, +(__v8si) __Y, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_permutexvar_ps (__m256i __X, __m256 __Y) { - return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, -(__v8si) __X, + return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __X /* idx */, +(__v8si) __Y, (__v8sf) _mm256_undefined_si256 (), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, - (__v8si) __X, + return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __X /* idx */, + (__v8si) __Y, (__v8si) _mm256_setzero_si256 (), __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i)