[GitHub] [tvm] areusch commented on a diff in pull request #11505: Fix max concurrency number in runtime for Intel processors

GitBox Wed, 01 Jun 2022 13:18:07 -0700


areusch commented on code in PR #11505:
URL: https://github.com/apache/tvm/pull/11505#discussion_r887262357



##########
src/runtime/threading_backend.cc:
##########
@@ -40,11 +40,158 @@
 #define HEXAGON_STACK_ALIGNMENT 32
 #endif
 #include <algorithm>
+#include <string>
 #include <thread>
 #define CURRENT_THREAD_HANDLE (static_cast<std::thread::native_handle_type>(0))
 namespace tvm {
 namespace runtime {
 namespace threading {
+#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || 
defined(_M_X64)
+#ifdef _MSC_VER
+#if (_MSC_VER < 1400)
+static inline __declspec(naked) void __cpuid(int[4], int) {
+  __asm {
+        push  ebx
+        push  esi
+        mov   eax, dword ptr [esp + 4 * 2 + 8]  // eaxIn
+        cpuid
+        mov   esi, dword ptr [esp + 4 * 2 + 4]  // data
+        mov   dword ptr [esi], eax
+        mov   dword ptr [esi + 4], ebx
+        mov   dword ptr [esi + 8], ecx
+        mov   dword ptr [esi + 12], edx
+        pop   esi
+        pop   ebx
+        ret
+  }
+}
+#else
+#include <intrin.h>  // for __cpuid
+#endif
+#else
+#ifndef __GNUC_PREREQ
+#define __GNUC_PREREQ(major, minor) \
+  ((((__GNUC__) << 16) + (__GNUC_MINOR__)) >= (((major) << 16) + (minor)))
+#endif
+#if __GNUC_PREREQ(4, 3) && !defined(__APPLE__)
+#include <cpuid.h>
+#else
+// avoid err on Apple: can't find a register in class `BREG' while reloading 
`asm'
+#if defined(__APPLE__) && defined(_M_IX86)
+#define __cpuid(eaxIn, a, b, c, d)                                         \
+  __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" \
+                       : "=a"(a), "=S"(b), "=c"(c), "=d"(d)                \
+                       : "0"(eaxIn))
+#define __cpuid_count(eaxIn, ecxIn, a, b, c, d)                            \
+  __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" \
+                       : "=a"(a), "=S"(b), "=c"(c), "=d"(d)                \
+                       : "0"(eaxIn), "2"(ecxIn))
+#else
+#define __cpuid(eaxIn, a, b, c, d) \
+  __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : 
"0"(eaxIn))
+#define __cpuid_count(eaxIn, ecxIn, a, b, c, d) \
+  __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : 
"0"(eaxIn), "2"(ecxIn))
+#endif
+#endif
+#endif
+
+unsigned int Cpu::get_num_cores(IntelCpuTopologyLevel level) const {
+  if (!x2apic_supported_) throw std::string("x2apic_supported_ is not 
supported");
+  switch (level) {
+    case SmtLevel:
+      return num_cores_[level - 1];
+    case CoreLevel:
+      return num_cores_[level - 1] / num_cores_[SmtLevel - 1];
+    default:
+      throw std::string("x2apic_supported_ is not supported");
+  }
+}
+
+/*
+  data[] = { eax, ebx, ecx, edx }
+*/
+void Cpu::get_cpuid(unsigned int eaxIn, unsigned int data[4]) {
+#ifdef _MSC_VER
+  __cpuid(reinterpret_cast<int*>(data), eaxIn);
+#else
+  __cpuid(eaxIn, data[0], data[1], data[2], data[3]);
+#endif
+}
+void Cpu::get_cpuid_ex(unsigned int eaxIn, unsigned int ecxIn, unsigned int 
data[4]) {
+#ifdef _MSC_VER
+  __cpuidex(reinterpret_cast<int*>(data), eaxIn, ecxIn);
+#else
+  __cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]);
+#endif
+}
+
+typedef uint64_t Type;
+static const Type NONE = 0;
+static const Type tINTEL = 1 << 24;
+static const Type tAMD = 1 << 25;
+
+Cpu::Cpu() : type_(NONE), x2apic_supported_(false), num_cores_() {
+  unsigned int data[4] = {};
+  const unsigned int& ECX = data[2];
+  get_cpuid(0, data);
+  static const char intel[] = "ntel";
+  static const char amd[] = "cAMD";
+  if (ECX == get32bit_ss_be(amd)) {
+    type_ |= tAMD;
+  }
+  if (ECX == get32bit_ss_be(intel)) {
+    type_ |= tINTEL;
+  }
+
+  set_num_cores();
+}
+bool Cpu::is_intel() { return type_ & tINTEL; }
+
+bool Cpu::is_amd() { return type_ & tAMD; }
+
+unsigned int Cpu::get32bit_ss_be(const char* x) const {
+  return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24);
+}
+
+unsigned int Cpu::extract_bit(unsigned int val, unsigned int base, unsigned 
int end) {
+  return (val >> base) & ((1u << (end - base)) - 1);
+}
+
+void Cpu::set_num_cores() {
+  if ((type_ & tINTEL) == 0) return;
+
+  unsigned int data[4] = {};
+
+  /* CAUTION: These numbers are configuration as shipped by Intel. */
+  get_cpuid_ex(0x0, 0, data);
+  if (data[0] >= 0xB) {
+    /*
+      if leaf 11 exists(x2APIC is supported),
+      we use it to get the number of smt cores and cores on socket
+
+      leaf 0xB can be zeroed-out by a hypervisor
+    */
+    x2apic_supported_ = true;
+    for (unsigned int i = 0; i < max_topology_levels; i++) {
+      get_cpuid_ex(0xB, i, data);
+      IntelCpuTopologyLevel level = 
(IntelCpuTopologyLevel)extract_bit(data[2], 8, 15);
+      if (level == SmtLevel || level == CoreLevel) {
+        num_cores_[level - 1] = extract_bit(data[1], 0, 15);
+      }
+    }
+    /*
+      Fallback values in case a hypervisor has 0xB leaf zeroed-out.
+    */
+    num_cores_[SmtLevel - 1] = (std::max)(1u, num_cores_[SmtLevel - 1]);

Review Comment:
   @elvin-n for any third-party code we need to consult 
https://www.apache.org/legal/resolved.html. If the code can be accepted, then 
we need to place it under 3rdparty/ (submodule not needed if the original repo 
is documented) and update LICENSE. my guess is that including this code in 
apache/tvm repo might be tricky given the third-party policy above, but let me 
know if you see an avenue there.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

[GitHub] [tvm] areusch commented on a diff in pull request #11505: Fix max concurrency number in runtime for Intel processors

Reply via email to