coby created this revision.
coby added a reviewer: craig.topper.
Herald added a subscriber: mgorny.

added intrinsics support for vpclmulqdq instructions, matching a similar work 
on the backend (https://reviews.llvm.org/D40101)


Repository:
  rC Clang

https://reviews.llvm.org/D41573

Files:
  include/clang/Basic/BuiltinsX86.def
  include/clang/Driver/Options.td
  lib/Basic/Targets/X86.cpp
  lib/Basic/Targets/X86.h
  lib/Headers/CMakeLists.txt
  lib/Headers/immintrin.h
  lib/Headers/vpclmulqdqintrin.h
  test/CodeGen/attr-target-x86.c
  test/CodeGen/vpclmulqdq-builtins.c
  test/Driver/x86-target-features.c
  test/Preprocessor/predefined-arch-macros.c
  test/Preprocessor/x86_target_features.c

Index: lib/Headers/CMakeLists.txt
===================================================================
--- lib/Headers/CMakeLists.txt
+++ lib/Headers/CMakeLists.txt
@@ -84,6 +84,7 @@
   vadefs.h
   varargs.h
   vecintrin.h
+  vpclmulqdqintrin.h
   wmmintrin.h
   __wmmintrin_aes.h
   __wmmintrin_pclmul.h
Index: lib/Headers/immintrin.h
===================================================================
--- lib/Headers/immintrin.h
+++ lib/Headers/immintrin.h
@@ -118,6 +118,10 @@
 }
 #endif /* __AVX2__ */
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__)
+#include <vpclmulqdqintrin.h>
+#endif
+
 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
 #include <bmiintrin.h>
 #endif
Index: lib/Headers/vpclmulqdqintrin.h
===================================================================
--- lib/Headers/vpclmulqdqintrin.h
+++ lib/Headers/vpclmulqdqintrin.h
@@ -0,0 +1,48 @@
+/*===------------ vpclmulqdqintrin.h - VPCLMULQDQ intrinsics ---------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <vpclmulqdqintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __VPCLMULQDQINTRIN_H
+#define __VPCLMULQDQINTRIN_H
+
+#if defined(__VPCLMULQDQ__)
+
+#define _mm256_clmulepi64_epi128(A, B, I) __extension__ ({    \
+  (__m256i)__builtin_ia32_pclmulqdq256((__v4di)(__m256i)(A),  \
+                                       (__v4di)(__m256i)(B),  \
+                                       (char)(I)); })
+
+#if defined(__AVX512F__)
+
+#define _mm512_clmulepi64_epi128(A, B, I) __extension__ ({    \
+  (__m512i)__builtin_ia32_pclmulqdq512((__v8di)(__m512i)(A),  \
+                                       (__v8di)(__m512i)(B),  \
+                                       (char)(I)); })
+
+#endif // defined(__AVX512__)
+#endif // defined(__VPCLMULQDQ__)
+#endif // __VPCLMULQDQINTRIN_H
+
Index: lib/Basic/Targets/X86.cpp
===================================================================
--- lib/Basic/Targets/X86.cpp
+++ lib/Basic/Targets/X86.cpp
@@ -132,6 +132,7 @@
     break;
 
   case CK_Icelake:
+    setFeatureEnabledImpl(Features, "vpclmulqdq", true);
     // TODO: Add icelake features here.
     LLVM_FALLTHROUGH;
   case CK_Cannonlake:
@@ -460,7 +461,7 @@
     LLVM_FALLTHROUGH;
   case AVX:
     Features["fma"] = Features["avx"] = Features["f16c"] = Features["xsave"] =
-        Features["xsaveopt"] = false;
+        Features["xsaveopt"] = Features["vpclmulqdq"] = false;
     setXOPLevel(Features, FMA4, false);
     LLVM_FALLTHROUGH;
   case AVX2:
@@ -575,6 +576,11 @@
   } else if (Name == "pclmul") {
     if (Enabled)
       setSSELevel(Features, SSE2, Enabled);
+  } else if (Name == "vpclmulqdq") {
+    if (Enabled) {
+      setSSELevel(Features, AVX, Enabled);
+      Features["pclmul"] = true;
+    }
   } else if (Name == "avx") {
     setSSELevel(Features, AVX, Enabled);
   } else if (Name == "avx2") {
@@ -638,6 +644,8 @@
       HasAES = true;
     } else if (Feature == "+pclmul") {
       HasPCLMUL = true;
+    } else if (Feature == "+vpclmulqdq") {
+      HasVPCLMULQDQ = true;
     } else if (Feature == "+lzcnt") {
       HasLZCNT = true;
     } else if (Feature == "+rdrnd") {
@@ -937,6 +945,9 @@
   if (HasPCLMUL)
     Builder.defineMacro("__PCLMUL__");
 
+  if (HasVPCLMULQDQ)
+    Builder.defineMacro("__VPCLMULQDQ__");
+
   if (HasLZCNT)
     Builder.defineMacro("__LZCNT__");
 
@@ -1185,6 +1196,7 @@
       .Case("sse4.2", true)
       .Case("sse4a", true)
       .Case("tbm", true)
+      .Case("vpclmulqdq", true)
       .Case("x87", true)
       .Case("xop", true)
       .Case("xsave", true)
@@ -1249,6 +1261,7 @@
       .Case("sse4.2", SSELevel >= SSE42)
       .Case("sse4a", XOPLevel >= SSE4A)
       .Case("tbm", HasTBM)
+      .Case("vpclmulqdq", HasVPCLMULQDQ)
       .Case("x86", true)
       .Case("x86_32", getTriple().getArch() == llvm::Triple::x86)
       .Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64)
Index: lib/Basic/Targets/X86.h
===================================================================
--- lib/Basic/Targets/X86.h
+++ lib/Basic/Targets/X86.h
@@ -49,6 +49,7 @@
 
   bool HasAES = false;
   bool HasPCLMUL = false;
+  bool HasVPCLMULQDQ = false;
   bool HasLZCNT = false;
   bool HasRDRND = false;
   bool HasFSGSBASE = false;
Index: include/clang/Driver/Options.td
===================================================================
--- include/clang/Driver/Options.td
+++ include/clang/Driver/Options.td
@@ -2543,6 +2543,8 @@
 def mno_sha : Flag<["-"], "mno-sha">, Group<m_x86_Features_Group>;
 def mtbm : Flag<["-"], "mtbm">, Group<m_x86_Features_Group>;
 def mno_tbm : Flag<["-"], "mno-tbm">, Group<m_x86_Features_Group>;
+def mvpclmulqdq : Flag<["-"], "mvpclmulqdq">, Group<m_x86_Features_Group>;
+def mno_vpclmulqdq : Flag<["-"], "mno-vpclmulqdq">, Group<m_x86_Features_Group>;
 def mxop : Flag<["-"], "mxop">, Group<m_x86_Features_Group>;
 def mno_xop : Flag<["-"], "mno-xop">, Group<m_x86_Features_Group>;
 def mxsave : Flag<["-"], "mxsave">, Group<m_x86_Features_Group>;
Index: include/clang/Basic/BuiltinsX86.def
===================================================================
--- include/clang/Basic/BuiltinsX86.def
+++ include/clang/Basic/BuiltinsX86.def
@@ -432,6 +432,10 @@
 // CLMUL
 TARGET_BUILTIN(__builtin_ia32_pclmulqdq128, "V2LLiV2LLiV2LLiIc", "", "pclmul")
 
+// VPCLMULQDQ
+TARGET_BUILTIN(__builtin_ia32_pclmulqdq256, "V4LLiV4LLiV4LLiIc", "", "vpclmulqdq")
+TARGET_BUILTIN(__builtin_ia32_pclmulqdq512, "V8LLiV8LLiV8LLiIc", "", "avx512f,vpclmulqdq")
+
 // AVX
 TARGET_BUILTIN(__builtin_ia32_addsubpd256, "V4dV4dV4d", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_addsubps256, "V8fV8fV8f", "", "avx")
Index: test/CodeGen/vpclmulqdq-builtins.c
===================================================================
--- test/CodeGen/vpclmulqdq-builtins.c
+++ test/CodeGen/vpclmulqdq-builtins.c
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +vpclmulqdq -emit-llvm -o - | FileCheck %s --check-prefix AVX
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -DAVX512 -target-feature +vpclmulqdq -target-feature +avx512f -emit-llvm -o - | FileCheck %s --check-prefixes AVX,AVX512
+
+#include <immintrin.h>
+
+__m256i test_mm256_clmulepi64_epi128(__m256i A, __m256i B) {
+  // AVX: @llvm.x86.pclmulqdq.256
+  return _mm256_clmulepi64_epi128(A, B, 0);
+}
+
+#ifdef AVX512
+__m512i test_mm512_clmulepi64_epi128(__m512i A, __m512i B) {
+  // AVX512: @llvm.x86.pclmulqdq.512
+  return _mm512_clmulepi64_epi128(A, B, 0);
+}
+#endif
+
Index: test/CodeGen/attr-target-x86.c
===================================================================
--- test/CodeGen/attr-target-x86.c
+++ test/CodeGen/attr-target-x86.c
@@ -38,9 +38,9 @@
 // CHECK: lake{{.*}} #7
 // CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+x87"
 // CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+aes,+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
-// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-xop,-xsave,-xsaveopt"
+// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vpclmulqdq,-xop,-xsave,-xsaveopt"
 // CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
-// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-xop,-xsave,-xsaveopt"
+// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vpclmulqdq,-xop,-xsave,-xsaveopt"
 // CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes"
 // CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-3dnow,-3dnowa,-mmx"
 // CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+mmx"
Index: test/Driver/x86-target-features.c
===================================================================
--- test/Driver/x86-target-features.c
+++ test/Driver/x86-target-features.c
@@ -94,3 +94,8 @@
 // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-clzero %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-CLZERO %s
 // CLZERO: "-target-feature" "+clzero"
 // NO-CLZERO: "-target-feature" "-clzero"
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mvpclmulqdq %s -### -o %t.o 2>&1 | FileCheck -check-prefix=VPCLMULQDQ %s
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-vpclmulqdq %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-VPCLMULQDQ %s
+// VPCLMULQDQ: "-target-feature" "+vpclmulqdq"
+// NO-VPCLMULQDQ: "-target-feature" "-vpclmulqdq"
Index: test/Preprocessor/predefined-arch-macros.c
===================================================================
--- test/Preprocessor/predefined-arch-macros.c
+++ test/Preprocessor/predefined-arch-macros.c
@@ -1079,6 +1079,7 @@
 // CHECK_ICL_M32: #define __SSE4_2__ 1
 // CHECK_ICL_M32: #define __SSE__ 1
 // CHECK_ICL_M32: #define __SSSE3__ 1
+// CHECK_ICL_M32: #define __VPCLMULQDQ__ 1
 // CHECK_ICL_M32: #define __XSAVEC__ 1
 // CHECK_ICL_M32: #define __XSAVEOPT__ 1
 // CHECK_ICL_M32: #define __XSAVES__ 1
@@ -1124,6 +1125,7 @@
 // CHECK_ICL_M64: #define __SSE4_2__ 1
 // CHECK_ICL_M64: #define __SSE__ 1
 // CHECK_ICL_M64: #define __SSSE3__ 1
+// CHECK_ICL_M64: #define __VPCLMULQDQ__ 1
 // CHECK_ICL_M64: #define __XSAVEC__ 1
 // CHECK_ICL_M64: #define __XSAVEOPT__ 1
 // CHECK_ICL_M64: #define __XSAVES__ 1
Index: test/Preprocessor/x86_target_features.c
===================================================================
--- test/Preprocessor/x86_target_features.c
+++ test/Preprocessor/x86_target_features.c
@@ -368,3 +368,10 @@
 // RUN: %clang -target i386-unknown-unknown -march=atom -mclflushopt -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=CLFLUSHOPT %s
 
 // CLFLUSHOPT: #define __CLFLUSHOPT__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mvpclmulqdq -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=VPCLMULQDQ %s
+
+// VPCLMULQDQ: #define __PCLMUL__ 1
+// VPCLMULQDQ: #define __VPCLMULQDQ__ 1
+
+
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to