This is an automated email from the ASF dual-hosted git repository.

jamesge pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-brpc.git


The following commit(s) were added to refs/heads/master by this push:
     new efa9989e Support apple silicon
efa9989e is described below

commit efa9989e99dce0bb5f8e26818574748c7ef45059
Author: James Ge <jge...@gmail.com>
AuthorDate: Wed Jun 8 16:49:33 2022 +0800

    Support apple silicon
---
 Makefile                    | 10 +++--
 config_brpc.sh              | 21 ++++++----
 docs/cn/getting_started.md  | 33 +++++++--------
 docs/en/getting_started.md  | 25 ++++++++----
 src/brpc/span.h             |  2 +-
 src/bthread/context.cpp     | 97 +++++++++++++++++++++++++++++++++++++++++++++
 src/bthread/context.h       | 19 +++++----
 src/bthread/key.cpp         |  2 +-
 src/bthread/task_group.cpp  |  2 +-
 src/butil/time.h            | 26 ++++++++++++
 test/{BUILD => BUILD.bazel} |  0
 test/CMakeLists.txt         |  2 +-
 test/Makefile               |  4 +-
 test/endpoint_unittest.cpp  |  2 +
 14 files changed, 196 insertions(+), 49 deletions(-)

diff --git a/Makefile b/Makefile
index 3538e47c..e2e1a6b2 100644
--- a/Makefile
+++ b/Makefile
@@ -41,13 +41,15 @@ endif
 
 #required by butil/crc32.cc to boost performance for 10x
 ifeq ($(shell test $(GCC_VERSION) -ge 40400; echo $$?),0)
-       CXXFLAGS+=-msse4 -msse4.2
+  ifeq ($(shell uname -p),i386)  #note: i386 is processor family type, not the 
32-bit x86 arch
+    CXXFLAGS+=-msse4 -msse4.2
+  endif
 endif
 #not solved yet
 ifeq ($(CC),gcc)
- ifeq ($(shell test $(GCC_VERSION) -ge 70000; echo $$?),0)
-       CXXFLAGS+=-Wno-aligned-new
- endif
+  ifeq ($(shell test $(GCC_VERSION) -ge 70000; echo $$?),0)
+    CXXFLAGS+=-Wno-aligned-new
+  endif
 endif
 
 BUTIL_SOURCES = \
diff --git a/config_brpc.sh b/config_brpc.sh
index f52911db..842f100f 100755
--- a/config_brpc.sh
+++ b/config_brpc.sh
@@ -154,16 +154,21 @@ find_dir_of_header_or_die() {
 }
 
 if [ "$SYSTEM" = "Darwin" ]; then
-    OPENSSL_LIB="/usr/local/opt/openssl/lib"
-    OPENSSL_HDR="/usr/local/opt/openssl/include"
-else
-    # User specified path of openssl, if not given it's empty
-    OPENSSL_LIB=$(find_dir_of_lib ssl)
-    # Inconvenient to check these headers in baidu-internal
-    #PTHREAD_HDR=$(find_dir_of_header_or_die pthread.h)
-    OPENSSL_HDR=$(find_dir_of_header_or_die openssl/ssl.h 
mesalink/openssl/ssl.h)
+    if [ -d "/usr/local/opt/openssl" ]; then
+        LIBS_IN="/usr/local/opt/openssl/lib $LIBS_IN"
+        HDRS_IN="/usr/local/opt/openssl/include $HDRS_IN"
+    elif [ -d "/opt/homebrew/Cellar" ]; then
+        LIBS_IN="/opt/homebrew/Cellar $LIBS_IN"
+        HDRS_IN="/opt/homebrew/Cellar $HDRS_IN"
+    fi
 fi
 
+# User specified path of openssl, if not given it's empty
+OPENSSL_LIB=$(find_dir_of_lib ssl)
+# Inconvenient to check these headers in baidu-internal
+#PTHREAD_HDR=$(find_dir_of_header_or_die pthread.h)
+OPENSSL_HDR=$(find_dir_of_header_or_die openssl/ssl.h mesalink/openssl/ssl.h)
+
 if [ $WITH_MESALINK != 0 ]; then
     MESALINK_HDR=$(find_dir_of_header_or_die mesalink/openssl/ssl.h)
     OPENSSL_HDR="$OPENSSL_HDR\n$MESALINK_HDR"
diff --git a/docs/cn/getting_started.md b/docs/cn/getting_started.md
index 53ef0f50..b4af85a5 100644
--- a/docs/cn/getting_started.md
+++ b/docs/cn/getting_started.md
@@ -20,7 +20,7 @@ brpc有如下依赖:
 ## Ubuntu/LinuxMint/WSL
 ### 依赖准备
 
-安装通用依赖,[gflags](https://github.com/gflags/gflags), 
[protobuf](https://github.com/google/protobuf), 
[leveldb](https://github.com/google/leveldb):
+安装依赖:
 ```shell
 sudo apt-get install -y git g++ make libssl-dev libgflags-dev libprotobuf-dev 
libprotoc-dev protobuf-compiler libleveldb-dev
 ```
@@ -113,14 +113,9 @@ CentOS一般需要安装EPEL,否则很多包都默认不可用。
 sudo yum install epel-release
 ```
 
-安装通用依赖:
+安装依赖:
 ```shell
-sudo yum install git gcc-c++ make openssl-devel
-```
-
-安装 [gflags](https://github.com/gflags/gflags), 
[protobuf](https://github.com/google/protobuf), 
[leveldb](https://github.com/google/leveldb):
-```shell
-sudo yum install gflags-devel protobuf-devel protobuf-compiler leveldb-devel
+sudo yum install git gcc-c++ make openssl-devel gflags-devel protobuf-devel 
protobuf-compiler leveldb-devel
 ```
 
 如果你要在样例中启用cpu/heap的profiler:
@@ -216,18 +211,13 @@ $ make
 
 ## MacOS
 
-注意:在相同运行环境下,当前Mac版brpc的性能比Linux版差2.5倍。如果你的服务是性能敏感的,请不要使用MacOs作为你的生产环境。
+注意:在相同硬件条件下,MacOS版brpc的性能可能明显差于Linux版。如果你的服务是性能敏感的,请不要使用MacOS作为你的生产环境。
 
 ### 依赖准备
 
-安装通用依赖:
-```shell
-brew install openssl git gnu-getopt coreutils
-```
-
-安装[gflags](https://github.com/gflags/gflags),[protobuf](https://github.com/google/protobuf),[leveldb](https://github.com/google/leveldb):
+安装依赖:
 ```shell
-brew install gflags protobuf leveldb
+brew install openssl git gnu-getopt coreutils gflags protobuf leveldb
 ```
 
 如果你要在样例中启用cpu/heap的profiler:
@@ -241,6 +231,17 @@ git clone https://github.com/google/googletest -b 
release-1.10.0 && cd googletes
 ```
 
在编译完成后,复制include/和lib/目录到/usr/local/include和/usr/local/lib目录中,以便于让所有应用都能使用gtest。
 
+### Monterey
+Monterey中openssl的安装位置可能不再位于`/usr/local/opt/openssl`,很可能会在`/opt/homebrew/Cellar`目录下,如果编译时报告找不到openssl,可考虑设置软链如下:
+```shell
+sudo ln -s /opt/homebrew/Cellar/openssl@3/3.0.3 /usr/local/opt/openssl
+```
+请注意上述命令中openssl的目录可能随环境变化而变化,你可以通过`brew info openssl`查看。
+
+### Apple Silicon
+
+master HEAD已支持M1系列芯片,M2未测试过。欢迎通过issues向我们报告遗留的warning/error。
+
 ### 使用config_brpc.sh编译brpc
 git克隆brpc,进入到项目目录然后运行:
 ```shell
diff --git a/docs/en/getting_started.md b/docs/en/getting_started.md
index 4d24f2d7..03e34cf6 100644
--- a/docs/en/getting_started.md
+++ b/docs/en/getting_started.md
@@ -215,18 +215,13 @@ Same with [here](#compile-brpc-with-cmake)
 
 ## MacOS
 
-Note: In the same running environment, the performance of the current Mac 
version is about 2.5 times worse than the Linux version. If your service is 
performance-critical, do not use MacOS as your production environment.
+Note: With same environment, the performance of the MacOS version is worse 
than the Linux version. If your service is performance-critical, do not use 
MacOS as your production environment.
 
 ### Prepare deps
 
-Install common deps:
+Install dependencies:
 ```shell
-brew install openssl git gnu-getopt coreutils
-```
-
-Install [gflags](https://github.com/gflags/gflags), 
[protobuf](https://github.com/google/protobuf), 
[leveldb](https://github.com/google/leveldb):
-```shell
-brew install gflags protobuf leveldb
+brew install openssl git gnu-getopt coreutils gflags protobuf leveldb
 ```
 
 If you need to enable cpu/heap profilers in examples:
@@ -240,6 +235,20 @@ git clone https://github.com/google/googletest -b 
release-1.10.0 && cd googletes
 ```
 After the compilation, copy include/ and lib/ into /usr/local/include and 
/usr/local/lib respectively to expose gtest to all apps
 
+### Monterey
+
+openssl installed in Monterey may not be found at `/usr/local/opt/openssl`, 
instead it's probably put under `/opt/homebrew/Cellar`. If the compiler cannot 
find openssl, consider making a soft link like below:
+
+```shell
+sudo ln -s /opt/homebrew/Cellar/openssl@3/3.0.3 /usr/local/opt/openssl
+```
+
+Please note that the directory of installed openssl in above command may be 
different in different environments, you could check it  out by running `brew 
info openssl`.
+
+### Apple Silicon
+
+The code at master HEAD already supports M1 series chips. M2 series are not 
tested yet. Please feel free to report remaining warnings/errors to us by 
issues.
+
 ### Compile brpc with config_brpc.sh
 git clone brpc, cd into the repo and run
 ```shell
diff --git a/src/brpc/span.h b/src/brpc/span.h
index 07771649..43ede3d5 100644
--- a/src/brpc/span.h
+++ b/src/brpc/span.h
@@ -34,7 +34,7 @@
 #include "brpc/span.pb.h"
 
 namespace bthread {
-extern thread_local bthread::LocalStorage tls_bls;
+extern __thread bthread::LocalStorage tls_bls;
 }
 
 
diff --git a/src/bthread/context.cpp b/src/bthread/context.cpp
index 5098bc39..b89d432a 100644
--- a/src/bthread/context.cpp
+++ b/src/bthread/context.cpp
@@ -713,3 +713,100 @@ __asm (
 );
 
 #endif
+
+
+#if defined(BTHREAD_CONTEXT_PLATFORM_apple_arm64) && 
defined(BTHREAD_CONTEXT_COMPILER_gcc)
+__asm (
+".text\n"
+".globl _bthread_jump_fcontext\n"
+".balign 16\n"
+"_bthread_jump_fcontext:\n"
+"    ; prepare stack for GP + FPU\n"
+"    sub  sp, sp, #0xb0\n"
+"#if (defined(__VFP_FP__) && !defined(__SOFTFP__))\n"
+"    ; test if fpu env should be preserved\n"
+"    cmp  w3, #0\n"
+"    b.eq  1f\n"
+"    ; save d8 - d15\n"
+"    stp  d8,  d9,  [sp, #0x00]\n"
+"    stp  d10, d11, [sp, #0x10]\n"
+"    stp  d12, d13, [sp, #0x20]\n"
+"    stp  d14, d15, [sp, #0x30]\n"
+"1:\n"
+"#endif\n"
+"    ; save x19-x30\n"
+"    stp  x19, x20, [sp, #0x40]\n"
+"    stp  x21, x22, [sp, #0x50]\n"
+"    stp  x23, x24, [sp, #0x60]\n"
+"    stp  x25, x26, [sp, #0x70]\n"
+"    stp  x27, x28, [sp, #0x80]\n"
+"    stp  fp,  lr,  [sp, #0x90]\n"
+"    ; save LR as PC\n"
+"    str  lr, [sp, #0xa0]\n"
+"    ; store RSP (pointing to context-data) in first argument (x0).\n"
+"    ; STR cannot have sp as a target register\n"
+"    mov  x4, sp\n"
+"    str  x4, [x0]\n"
+"    ; restore RSP (pointing to context-data) from A2 (x1)\n"
+"    mov  sp, x1\n"
+"#if (defined(__VFP_FP__) && !defined(__SOFTFP__))\n"
+"    ; test if fpu env should be preserved\n"
+"    cmp  w3, #0\n"
+"    b.eq  2f\n"
+"    ; load d8 - d15\n"
+"    ldp  d8,  d9,  [sp, #0x00]\n"
+"    ldp  d10, d11, [sp, #0x10]\n"
+"    ldp  d12, d13, [sp, #0x20]\n"
+"    ldp  d14, d15, [sp, #0x30]\n"
+"2:\n"
+"#endif\n"
+"    ; load x19-x30\n"
+"    ldp  x19, x20, [sp, #0x40]\n"
+"    ldp  x21, x22, [sp, #0x50]\n"
+"    ldp  x23, x24, [sp, #0x60]\n"
+"    ldp  x25, x26, [sp, #0x70]\n"
+"    ldp  x27, x28, [sp, #0x80]\n"
+"    ldp  fp,  lr,  [sp, #0x90]\n"
+"    ; use third arg as return value after jump\n"
+"    ; and as first arg in context function\n"
+"    mov  x0, x2\n"
+"    ; load pc\n"
+"    ldr  x4, [sp, #0xa0]\n"
+"    ; restore stack from GP + FPU\n"
+"    add  sp, sp, #0xb0\n"
+"    ret x4\n"
+);
+
+#endif
+
+#if defined(BTHREAD_CONTEXT_PLATFORM_apple_arm64) && 
defined(BTHREAD_CONTEXT_COMPILER_gcc)
+__asm (
+".text\n"
+".globl _bthread_make_fcontext\n"
+".balign 16\n"
+"_bthread_make_fcontext:\n"
+"    ; shift address in x0 (allocated stack) to lower 16 byte boundary\n"
+"    and x0, x0, ~0xF\n"
+"    ; reserve space for context-data on context-stack\n"
+"    sub  x0, x0, #0xb0\n"
+"    ; third arg of make_fcontext() == address of context-function\n"
+"    ; store address as a PC to jump in\n"
+"    str  x2, [x0, #0xa0]\n"
+"    ; compute abs address of label finish\n"
+"    ; 0x0c = 3 instructions * size (4) before label 'finish'\n"
+"    ; TODO: Numeric offset since llvm still does not support labels in ADR. 
Fix:\n"
+"    ;       http:\n"
+"    adr  x1, 0x0c\n"
+"    ; save address of finish as return-address for context-function\n"
+"    ; will be entered after context-function returns (LR register)\n"
+"    str  x1, [x0, #0x98]\n"
+"    ret  lr ; return pointer to context-data (x0)\n"
+"finish:\n"
+"    ; exit code is zero\n"
+"    mov  x0, #0\n"
+"    ; exit application\n"
+"    bl  __exit\n"
+);
+
+#endif
+
diff --git a/src/bthread/context.h b/src/bthread/context.h
index 8ff027df..ef98e458 100644
--- a/src/bthread/context.h
+++ b/src/bthread/context.h
@@ -23,9 +23,9 @@
 
 #if defined(__GNUC__) || defined(__APPLE__)
 
-    #define BTHREAD_CONTEXT_COMPILER_gcc
+  #define BTHREAD_CONTEXT_COMPILER_gcc
 
-    #if defined(__linux__)
+  #if defined(__linux__)
        #ifdef __x86_64__
            #define BTHREAD_CONTEXT_PLATFORM_linux_x86_64
            #define BTHREAD_CONTEXT_CALL_CONVENTION
@@ -41,27 +41,30 @@
            #define BTHREAD_CONTEXT_CALL_CONVENTION
        #endif
 
-    #elif defined(__MINGW32__) || defined (__MINGW64__)
+  #elif defined(__MINGW32__) || defined (__MINGW64__)
        #if defined(__x86_64__)
            #define BTHREAD_CONTEXT_COMPILER_gcc
            #define BTHREAD_CONTEXT_PLATFORM_windows_x86_64
            #define BTHREAD_CONTEXT_CALL_CONVENTION
-       #endif
-
-       #if defined(__i386__)
+       #elif defined(__i386__)
            #define BTHREAD_CONTEXT_COMPILER_gcc
            #define BTHREAD_CONTEXT_PLATFORM_windows_i386
            #define BTHREAD_CONTEXT_CALL_CONVENTION __cdecl
        #endif
-    #elif defined(__APPLE__) && defined(__MACH__)
+
+  #elif defined(__APPLE__) && defined(__MACH__)
        #if defined (__i386__)
            #define BTHREAD_CONTEXT_PLATFORM_apple_i386
            #define BTHREAD_CONTEXT_CALL_CONVENTION
        #elif defined (__x86_64__)
            #define BTHREAD_CONTEXT_PLATFORM_apple_x86_64
            #define BTHREAD_CONTEXT_CALL_CONVENTION
-       #endif
+       #elif defined (__aarch64__)
+           #define BTHREAD_CONTEXT_PLATFORM_apple_arm64
+           #define BTHREAD_CONTEXT_CALL_CONVENTION
     #endif
+  #endif
+
 #endif
 
 #if defined(_WIN32_WCE)
diff --git a/src/bthread/key.cpp b/src/bthread/key.cpp
index 53f7594f..a4a08407 100644
--- a/src/bthread/key.cpp
+++ b/src/bthread/key.cpp
@@ -34,7 +34,7 @@ class KeyTable;
 
 // defined in task_group.cpp
 extern __thread TaskGroup* tls_task_group;
-extern thread_local LocalStorage tls_bls;
+extern __thread LocalStorage tls_bls;
 static __thread bool tls_ever_created_keytable = false;
 
 // We keep thread specific data in a two-level array. The top-level array
diff --git a/src/bthread/task_group.cpp b/src/bthread/task_group.cpp
index 9f6d42fb..b8ead163 100644
--- a/src/bthread/task_group.cpp
+++ b/src/bthread/task_group.cpp
@@ -61,7 +61,7 @@ __thread TaskGroup* tls_task_group = NULL;
 // Sync with TaskMeta::local_storage when a bthread is created or destroyed.
 // During running, the two fields may be inconsistent, use tls_bls as the
 // groundtruth.
-thread_local LocalStorage tls_bls = BTHREAD_LOCAL_STORAGE_INITIALIZER;
+__thread LocalStorage tls_bls = BTHREAD_LOCAL_STORAGE_INITIALIZER;
 
 // defined in bthread/key.cpp
 extern void return_keytable(bthread_keytable_pool_t*, KeyTable*);
diff --git a/src/butil/time.h b/src/butil/time.h
index b288d8e4..00949f8e 100644
--- a/src/butil/time.h
+++ b/src/butil/time.h
@@ -215,6 +215,7 @@ inline int64_t monotonic_time_s() {
 
 namespace detail {
 inline uint64_t clock_cycles() {
+#if defined(__x86_64__) || defined(__amd64__)
     unsigned int lo = 0;
     unsigned int hi = 0;
     // We cannot use "=A", since this would use %rax on x86_64
@@ -223,6 +224,31 @@ inline uint64_t clock_cycles() {
         : "=a" (lo), "=d" (hi)
         );
     return ((uint64_t)hi << 32) | lo;
+#elif defined(__aarch64__)
+    uint64_t virtual_timer_value;
+    asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
+    return virtual_timer_value;
+#elif defined(__ARM_ARCH)
+  #if (__ARM_ARCH >= 6)
+    unsigned int pmccntr;
+    unsigned int pmuseren;
+    unsigned int pmcntenset;
+    // Read the user mode perf monitor counter access permissions.
+    asm volatile ("mrc p15, 0, %0, c9, c14, 0" : "=r" (pmuseren));
+    if (pmuseren & 1) {  // Allows reading perfmon counters for user mode code.
+        asm volatile ("mrc p15, 0, %0, c9, c12, 1" : "=r" (pmcntenset));
+        if (pmcntenset & 0x80000000ul) {  // Is it counting?
+            asm volatile ("mrc p15, 0, %0, c9, c13, 0" : "=r" (pmccntr));
+            // The counter is set up to count every 64th cycle
+            return static_cast<uint64_t>(pmccntr) * 64;  // Should optimize to 
<< 6
+        }
+    }
+  #else
+    #error "unsupported arm_arch"
+  #endif
+#else
+  #error "unsupported arch"
+#endif
 }
 extern int64_t read_invariant_cpu_frequency();
 // Be positive iff:
diff --git a/test/BUILD b/test/BUILD.bazel
similarity index 100%
rename from test/BUILD
rename to test/BUILD.bazel
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 7c274ef5..f3e0c9bb 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -58,7 +58,7 @@ use_cxx11()
 
 if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
     #required by butil/crc32.cc to boost performance for 10x
-    if(NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.4))
+    if((CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") AND NOT 
(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.4))
         set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4 -msse4.2")
     endif()
     if(NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0))
diff --git a/test/Makefile b/test/Makefile
index a6e035f4..99f7b4f7 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -23,7 +23,9 @@ CXXFLAGS=$(CPPFLAGS) -pipe -Wall -W -fPIC -fstrict-aliasing 
-Wno-invalid-offseto
 
 #required by butil/crc32.cc to boost performance for 10x
 ifeq ($(shell test $(GCC_VERSION) -ge 40400; echo $$?),0)
-       CXXFLAGS+=-msse4 -msse4.2
+  ifeq ($(shell uname -p),i386)  #note: i386 is processor family type, not the 
32-bit x86 arch
+    CXXFLAGS+=-msse4 -msse4.2
+  endif
 endif
 #not solved yet
 ifeq ($(CC),gcc)
diff --git a/test/endpoint_unittest.cpp b/test/endpoint_unittest.cpp
index ba9a4ee6..8b515ef3 100644
--- a/test/endpoint_unittest.cpp
+++ b/test/endpoint_unittest.cpp
@@ -384,6 +384,8 @@ TEST(EndPointTest, endpoint_sockaddr_conv_ipv6) {
     in6_addr expect_in6_addr;
     bzero(&expect_in6_addr, sizeof(expect_in6_addr));
     expect_in6_addr.__in6_u.__u6_addr8[15] = 1;
+    // jge: mac monterey上应该这样,但准确判定条件不明
+    //expect_in6_addr.__u6_addr.__u6_addr8[15] = 1;
 
     sockaddr_storage ss;
     const sockaddr_in6* sa6 = (sockaddr_in6*) &ss;


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@brpc.apache.org
For additional commands, e-mail: dev-h...@brpc.apache.org

Reply via email to