[v8-dev] Re: ARM64 patches for review

sirishrp Wed, 16 Jan 2019 07:55:13 -0800


On Wednesday, January 16, 2019 at 9:53:13 AM UTC-6, [email protected] wrote:
>
> Hi all, 
>
> Following the directions from https://v8.dev/docs/contribute, I am 
> putting my first two patches (for review) for ARM64 that I would like to 
> contribute to V8.
>
> First patch is a very simple patch - it adds default march for ARM64. 
> 64-bit support in ARM started with arm version 8. 
> Second patch is a probe implementation of ARM64, and probes for crc32 
> feature. This feature is later used, if present, in hashing algorithm. This 
> patch improves speedometer performance by about half a percent on current 
> Samsung and Pixel devices.
>
> Please review these patches, and let me know what I need to do next to get 
> these patches committed. 
>
> Sirish Pande
> Samsung Austin R&D Center
>


-- 
-- 
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- 
You received this message because you are subscribed to the Google Groups 
"v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

>From 19ab9299ef3a490f075b87ee367b626644f1d30d Mon Sep 17 00:00:00 2001
From: Sirish Pande <[email protected]>
Date: Mon, 14 Jan 2019 15:57:01 -0600
Subject: [PATCH 1/2] [ARM64] ARM64 support started from armv8.

  --ARM64 started with arm_version 8. Make that default for building.
  --Enable crc feature on arm64. Most arm64 support that feautre.
  --At runtime probe for crc on ARM64, we will find out whether we
    can execute code path with crc32 or not.
---
 BUILD.gn | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/BUILD.gn b/BUILD.gn
index a00dcd73dd..144c89fea8 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -449,6 +449,9 @@ config("toolchain") {
   }
   if (v8_current_cpu == "arm64") {
     defines += [ "V8_TARGET_ARCH_ARM64" ]
+    if (current_cpu == "arm64") {
+      cflags += [ "-march=armv8+crc" ]
+    }
   }
 
   # Mips64el/mipsel simulators.
-- 
2.20.1

>From 28859858e9fbd94d654de23e51f46e4c3a25a777 Mon Sep 17 00:00:00 2001
From: Sirish Pande <[email protected]>
Date: Thu, 10 Jan 2019 12:00:32 -0600
Subject: [PATCH 2/2] [ARM64] Detect CRC32 support for ARM64, and use crc32 for
 hashing.

Currently, we don't probe ARM64 for any supported CpuFeatures.
  -- Add HWCAPS flag from uapi/asm/hwcap.h
  -- Add crc32 bit for ARM64.
  -- Add support for ProbeImpl for crc32.
  -- Add crc32 supported Hash for ARM64 (by Kasi @ Samsung)
---
 src/arm64/assembler-arm64.cc | 12 +++++--
 src/base/cpu.cc              | 52 ++++++++++++++++++++++-----
 src/base/cpu.h               |  5 +++
 src/base/functional.cc       | 69 ++++++++++++++++++++++++++++++++++++
 src/cpu-features.h           |  2 ++
 5 files changed, 128 insertions(+), 12 deletions(-)

diff --git a/src/arm64/assembler-arm64.cc b/src/arm64/assembler-arm64.cc
index a494f87d4e..823b9e5c33 100644
--- a/src/arm64/assembler-arm64.cc
+++ b/src/arm64/assembler-arm64.cc
@@ -44,20 +44,26 @@ namespace internal {
 // CpuFeatures implementation.
 
 void CpuFeatures::ProbeImpl(bool cross_compile) {
-  // AArch64 has no configuration options, no further probing is required.
   supported_ = 0;
 
   // Only use statically determined features for cross compile (snapshot).
   if (cross_compile) return;
 
+  // Runtime checks for certain CPU features of ARM64.
+  base::CPU cpu;
+  if (cpu.has_crc32())
+    supported_ |=  1u << CRC32;
+
   // We used to probe for coherent cache support, but on older CPUs it
   // causes crashes (crbug.com/524337), and newer CPUs don't even have
   // the feature any more.
 }
 
 void CpuFeatures::PrintTarget() { }
-void CpuFeatures::PrintFeatures() {}
-
+void CpuFeatures::PrintFeatures() {
+  printf("CRC32=%d\n",
+          CpuFeatures::IsSupported(CRC32));
+}
 // -----------------------------------------------------------------------------
 // CPURegList utilities.
 
diff --git a/src/base/cpu.cc b/src/base/cpu.cc
index 6ab0ffee29..47bb16c4a9 100644
--- a/src/base/cpu.cc
+++ b/src/base/cpu.cc
@@ -75,10 +75,31 @@ static V8_INLINE void __cpuid(int cpu_info[4], int info_type) {
 
 #endif  // !V8_LIBC_MSVCRT
 
-#elif V8_HOST_ARCH_ARM || V8_HOST_ARCH_MIPS || V8_HOST_ARCH_MIPS64
+#elif V8_HOST_ARCH_ARM || V8_HOST_ARCH_ARM64 || \
+      V8_HOST_ARCH_MIPS || V8_HOST_ARCH_MIPS64
 
 #if V8_OS_LINUX
 
+#if V8_HOST_ARCH_ARM64
+
+// see <arch/arm64/include/uapi/asm/hwcap.h> kernel header.
+/*
+ * HWCAP flags - for elf_hwcap (in kernel) and AT_HWCAP
+ */
+#define HWCAP_FP       (1 << 0)
+#define HWCAP_ASIMD    (1 << 1)
+#define HWCAP_EVTSTRM  (1 << 2)
+#define HWCAP_AES      (1 << 3)
+#define HWCAP_PMULL    (1 << 4)
+#define HWCAP_SHA1     (1 << 5)
+#define HWCAP_SHA2     (1 << 6)
+#define HWCAP_CRC32    (1 << 7)
+#define HWCAP_ATOMICS  (1 << 8)
+#define HWCAP_FPHP     (1 << 9)
+#define HWCAP_ASIMDHP  (1 << 10)
+
+#endif  // V8_HOST_ARCH_ARM64
+
 #if V8_HOST_ARCH_ARM
 
 // See <uapi/asm/hwcap.h> kernel header.
@@ -108,9 +129,14 @@ static V8_INLINE void __cpuid(int cpu_info[4], int info_type) {
 #define HWCAP_IDIV  (HWCAP_IDIVA | HWCAP_IDIVT)
 #define HWCAP_LPAE  (1 << 20)
 
+#endif
+
+
+#if V8_HOST_ARCH_ARM || V8_HOST_ARCH_ARM64
+
 static uint32_t ReadELFHWCaps() {
   uint32_t result = 0;
-#if V8_GLIBC_PREREQ(2, 16)
+#if V8_HOST_ARCH_ARM && V8_GLIBC_PREREQ(2, 16)
   result = static_cast<uint32_t>(getauxval(AT_HWCAP));
 #else
   // Read the ELF HWCAP flags by parsing /proc/self/auxv.
@@ -133,7 +159,7 @@ static uint32_t ReadELFHWCaps() {
   return result;
 }
 
-#endif  // V8_HOST_ARCH_ARM
+#endif  // V8_HOST_ARCH_ARM || V8_HOST_ARCH_ARM64
 
 #if V8_HOST_ARCH_MIPS
 int __detect_fp64_mode(void) {
@@ -336,7 +362,8 @@ CPU::CPU()
       has_vfp3_d32_(false),
       is_fp64_mode_(false),
       has_non_stop_time_stamp_counter_(false),
-      has_msa_(false) {
+      has_msa_(false),
+      has_crc32_(false) {
   memcpy(vendor_, "Unknown", 8);
 #if V8_HOST_ARCH_IA32 || V8_HOST_ARCH_X64
   int cpu_info[4];
@@ -420,17 +447,19 @@ CPU::CPU()
     has_non_stop_time_stamp_counter_ = (cpu_info[3] & (1 << 8)) != 0;
   }
 
-#elif V8_HOST_ARCH_ARM
+#elif V8_HOST_ARCH_ARM || V8_HOST_ARCH_ARM64
 
 #if V8_OS_LINUX
 
   CPUInfo cpu_info;
 
   // Extract implementor from the "CPU implementer" field.
+  // todo: This section needs to rewritten for ARM64 as most
+  // ARM64 implementations have BigLittle, and even BigMediumLittle
   char* implementer = cpu_info.ExtractField("CPU implementer");
   if (implementer != nullptr) {
     char* end;
-    implementer_ = strtol(implementer, &end, 0);
+    implementer_ = (int)strtol(implementer, &end, 0);
     if (end == implementer) {
       implementer_ = 0;
     }
@@ -440,7 +469,7 @@ CPU::CPU()
   char* variant = cpu_info.ExtractField("CPU variant");
   if (variant != nullptr) {
     char* end;
-    variant_ = strtol(variant, &end, 0);
+    variant_ = (int)strtol(variant, &end, 0);
     if (end == variant) {
       variant_ = -1;
     }
@@ -451,7 +480,7 @@ CPU::CPU()
   char* part = cpu_info.ExtractField("CPU part");
   if (part != nullptr) {
     char* end;
-    part_ = strtol(part, &end, 0);
+    part_ = (int)strtol(part, &end, 0);
     if (end == part) {
       part_ = 0;
     }
@@ -467,7 +496,7 @@ CPU::CPU()
   char* architecture = cpu_info.ExtractField("CPU architecture");
   if (architecture != nullptr) {
     char* end;
-    architecture_ = strtol(architecture, &end, 10);
+    architecture_ = (int)strtol(architecture, &end, 10);
     if (end == architecture) {
       // Kernels older than 3.18 report "CPU architecture: AArch64" on ARMv8.
       if (strcmp(architecture, "AArch64") == 0) {
@@ -509,12 +538,16 @@ CPU::CPU()
   // Try to extract the list of CPU features from ELF hwcaps.
   uint32_t hwcaps = ReadELFHWCaps();
   if (hwcaps != 0) {
+#if V8_HOST_ARCH_ARM
     has_idiva_ = (hwcaps & HWCAP_IDIVA) != 0;
     has_neon_ = (hwcaps & HWCAP_NEON) != 0;
     has_vfp_ = (hwcaps & HWCAP_VFP) != 0;
     has_vfp3_ = (hwcaps & (HWCAP_VFPv3 | HWCAP_VFPv3D16 | HWCAP_VFPv4)) != 0;
     has_vfp3_d32_ = (has_vfp3_ && ((hwcaps & HWCAP_VFPv3D16) == 0 ||
                                    (hwcaps & HWCAP_VFPD32) != 0));
+#elif V8_HOST_ARCH_ARM64
+    has_crc32_ = (hwcaps & HWCAP_CRC32) != 0;
+#endif
   } else {
     // Try to fallback to "Features" CPUInfo field.
     char* features = cpu_info.ExtractField("Features");
@@ -528,6 +561,7 @@ CPU::CPU()
       has_vfp3_ = true;
       has_vfp3_d32_ = true;
     }
+    has_crc32_ = HasListItem(features, "crc32");
     delete[] features;
   }
 
diff --git a/src/base/cpu.h b/src/base/cpu.h
index 4b4becfa20..7a181abc2b 100644
--- a/src/base/cpu.h
+++ b/src/base/cpu.h
@@ -47,6 +47,7 @@ class V8_BASE_EXPORT CPU final {
   static const int ARM = 0x41;
   static const int NVIDIA = 0x4e;
   static const int QUALCOMM = 0x51;
+  static const int SAMSUNG = 0x53;
   int architecture() const { return architecture_; }
   int variant() const { return variant_; }
   static const int NVIDIA_DENVER = 0x0;
@@ -111,6 +112,9 @@ class V8_BASE_EXPORT CPU final {
   bool has_vfp3() const { return has_vfp3_; }
   bool has_vfp3_d32() const { return has_vfp3_d32_; }
 
+  // arm64 features
+  bool has_crc32() const { return has_crc32_; }
+
   // mips features
   bool is_fp64_mode() const { return is_fp64_mode_; }
   bool has_msa() const { return has_msa_; }
@@ -156,6 +160,7 @@ class V8_BASE_EXPORT CPU final {
   bool is_fp64_mode_;
   bool has_non_stop_time_stamp_counter_;
   bool has_msa_;
+  bool has_crc32_;
 };
 
 }  // namespace base
diff --git a/src/base/functional.cc b/src/base/functional.cc
index dffb91f3cc..ec44c34d15 100644
--- a/src/base/functional.cc
+++ b/src/base/functional.cc
@@ -12,6 +12,14 @@
 
 #include <limits>
 
+#if V8_HOST_ARCH_ARM64
+#include "src/cpu-features.h"
+// arm_acle.h is in usr/lib/gcc/arm-linux-gnueabihf/6/include/arm_acle.h
+// TODO: Current build system does not get to that header.
+// So, instead of using builtin intrinsics: v = __crc32w(hash, v);
+// I am using inline-asm.
+#endif
+
 #include "src/base/bits.h"
 
 namespace v8 {
@@ -23,8 +31,26 @@ namespace {
 // https://gist.github.com/badboy/6267743
 template <typename T>
 V8_INLINE size_t hash_value_unsigned(T v) {
+
   switch (sizeof(T)) {
     case 4: {
+#if V8_HOST_ARCH_ARM64
+      if (v8::internal::CpuFeatures::IsSupported(v8::internal::CRC32)) {
+        uint32_t hash = 0;
+        __asm ( "crc32w %w[v],%w[hash],%w[v]"
+            : [hash] "+r" (hash), [v] "+r" (v)
+            :
+          );
+      } else {
+        // "32 bit Mix Functions"
+        v = ~v + (v << 15);  // v = (v << 15) - v - 1;
+        v = v ^ (v >> 12);
+        v = v + (v << 2);
+        v = v ^ (v >> 4);
+        v = v * 2057;  // v = (v + (v << 3)) + (v << 11);
+        v = v ^ (v >> 16);
+      }
+#else
       // "32 bit Mix Functions"
       v = ~v + (v << 15);  // v = (v << 15) - v - 1;
       v = v ^ (v >> 12);
@@ -32,11 +58,29 @@ V8_INLINE size_t hash_value_unsigned(T v) {
       v = v ^ (v >> 4);
       v = v * 2057;  // v = (v + (v << 3)) + (v << 11);
       v = v ^ (v >> 16);
+#endif
       return static_cast<size_t>(v);
     }
     case 8: {
       switch (sizeof(size_t)) {
         case 4: {
+#if V8_HOST_ARCH_ARM64
+          if (v8::internal::CpuFeatures::IsSupported(v8::internal::CRC32)) {
+            uint32_t hash = 0;
+            __asm ( "crc32x %w[v],%w[hash],%x[v]"
+                : [hash] "+r" (hash), [v] "+r" (v)
+                :
+              );
+          } else {
+            // "64 bit to 32 bit Hash Functions"
+            v = ~v + (v << 18);  // v = (v << 18) - v - 1;
+            v = v ^ (v >> 31);
+            v = v * 21;  // v = (v + (v << 2)) + (v << 4);
+            v = v ^ (v >> 11);
+            v = v + (v << 6);
+            v = v ^ (v >> 22);
+          }
+#else
           // "64 bit to 32 bit Hash Functions"
           v = ~v + (v << 18);  // v = (v << 18) - v - 1;
           v = v ^ (v >> 31);
@@ -44,9 +88,33 @@ V8_INLINE size_t hash_value_unsigned(T v) {
           v = v ^ (v >> 11);
           v = v + (v << 6);
           v = v ^ (v >> 22);
+#endif
           return static_cast<size_t>(v);
         }
         case 8: {
+#if V8_HOST_ARCH_ARM64
+          if (v8::internal::CpuFeatures::IsSupported(v8::internal::CRC32)) {
+            uint64_t hash1 = uint64_t{0x0000000000000000};
+            uint64_t hash2 = hash1;
+            uint64_t upper32 = 0;
+            __asm ( "lsr      %x[upper32], %x[v],#32            \n\t"
+                    "crc32w   %w[hash1],%w[hash1],%w[v]         \n\t"
+                    "crc32w   %w[hash2],%w[hash2],%w[upper32]   \n\t"
+                    "orr      %x[v],%x[hash1],%x[hash2],lsl #32 \n\t"
+                    : [hash1] "+r" (hash1), [hash2] "+r" (hash2), [upper32] "+r" (upper32), [v] "+r" (v)
+                    :
+                  );
+          } else {
+            // "64 bit Mix Functions"
+            v = ~v + (v << 21);  // v = (v << 21) - v - 1;
+            v = v ^ (v >> 24);
+            v = (v + (v << 3)) + (v << 8);  // v * 265
+            v = v ^ (v >> 14);
+            v = (v + (v << 2)) + (v << 4);  // v * 21
+            v = v ^ (v >> 28);
+            v = v + (v << 31);
+          }
+#else
           // "64 bit Mix Functions"
           v = ~v + (v << 21);  // v = (v << 21) - v - 1;
           v = v ^ (v >> 24);
@@ -55,6 +123,7 @@ V8_INLINE size_t hash_value_unsigned(T v) {
           v = (v + (v << 2)) + (v << 4);  // v * 21
           v = v ^ (v >> 28);
           v = v + (v << 31);
+#endif
           return static_cast<size_t>(v);
         }
       }
diff --git a/src/cpu-features.h b/src/cpu-features.h
index 310fafe272..ee64dff9b6 100644
--- a/src/cpu-features.h
+++ b/src/cpu-features.h
@@ -49,6 +49,8 @@ enum CpuFeature {
   FLOATING_POINT_EXT,
   VECTOR_FACILITY,
   MISC_INSTR_EXT2,
+  // ARM64
+  CRC32,
 
   NUMBER_OF_CPU_FEATURES,
 
-- 
2.20.1

[v8-dev] Re: ARM64 patches for review

Reply via email to