This is an automated email from the ASF dual-hosted git repository. jamesge pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-brpc.git
The following commit(s) were added to refs/heads/master by this push: new efa9989e Support apple silicon efa9989e is described below commit efa9989e99dce0bb5f8e26818574748c7ef45059 Author: James Ge <jge...@gmail.com> AuthorDate: Wed Jun 8 16:49:33 2022 +0800 Support apple silicon --- Makefile | 10 +++-- config_brpc.sh | 21 ++++++---- docs/cn/getting_started.md | 33 +++++++-------- docs/en/getting_started.md | 25 ++++++++---- src/brpc/span.h | 2 +- src/bthread/context.cpp | 97 +++++++++++++++++++++++++++++++++++++++++++++ src/bthread/context.h | 19 +++++---- src/bthread/key.cpp | 2 +- src/bthread/task_group.cpp | 2 +- src/butil/time.h | 26 ++++++++++++ test/{BUILD => BUILD.bazel} | 0 test/CMakeLists.txt | 2 +- test/Makefile | 4 +- test/endpoint_unittest.cpp | 2 + 14 files changed, 196 insertions(+), 49 deletions(-) diff --git a/Makefile b/Makefile index 3538e47c..e2e1a6b2 100644 --- a/Makefile +++ b/Makefile @@ -41,13 +41,15 @@ endif #required by butil/crc32.cc to boost performance for 10x ifeq ($(shell test $(GCC_VERSION) -ge 40400; echo $$?),0) - CXXFLAGS+=-msse4 -msse4.2 + ifeq ($(shell uname -p),i386) #note: i386 is processor family type, not the 32-bit x86 arch + CXXFLAGS+=-msse4 -msse4.2 + endif endif #not solved yet ifeq ($(CC),gcc) - ifeq ($(shell test $(GCC_VERSION) -ge 70000; echo $$?),0) - CXXFLAGS+=-Wno-aligned-new - endif + ifeq ($(shell test $(GCC_VERSION) -ge 70000; echo $$?),0) + CXXFLAGS+=-Wno-aligned-new + endif endif BUTIL_SOURCES = \ diff --git a/config_brpc.sh b/config_brpc.sh index f52911db..842f100f 100755 --- a/config_brpc.sh +++ b/config_brpc.sh @@ -154,16 +154,21 @@ find_dir_of_header_or_die() { } if [ "$SYSTEM" = "Darwin" ]; then - OPENSSL_LIB="/usr/local/opt/openssl/lib" - OPENSSL_HDR="/usr/local/opt/openssl/include" -else - # User specified path of openssl, if not given it's empty - OPENSSL_LIB=$(find_dir_of_lib ssl) - # Inconvenient to check these headers in baidu-internal - #PTHREAD_HDR=$(find_dir_of_header_or_die pthread.h) - OPENSSL_HDR=$(find_dir_of_header_or_die openssl/ssl.h mesalink/openssl/ssl.h) + if [ -d "/usr/local/opt/openssl" ]; then + LIBS_IN="/usr/local/opt/openssl/lib $LIBS_IN" + HDRS_IN="/usr/local/opt/openssl/include $HDRS_IN" + elif [ -d "/opt/homebrew/Cellar" ]; then + LIBS_IN="/opt/homebrew/Cellar $LIBS_IN" + HDRS_IN="/opt/homebrew/Cellar $HDRS_IN" + fi fi +# User specified path of openssl, if not given it's empty +OPENSSL_LIB=$(find_dir_of_lib ssl) +# Inconvenient to check these headers in baidu-internal +#PTHREAD_HDR=$(find_dir_of_header_or_die pthread.h) +OPENSSL_HDR=$(find_dir_of_header_or_die openssl/ssl.h mesalink/openssl/ssl.h) + if [ $WITH_MESALINK != 0 ]; then MESALINK_HDR=$(find_dir_of_header_or_die mesalink/openssl/ssl.h) OPENSSL_HDR="$OPENSSL_HDR\n$MESALINK_HDR" diff --git a/docs/cn/getting_started.md b/docs/cn/getting_started.md index 53ef0f50..b4af85a5 100644 --- a/docs/cn/getting_started.md +++ b/docs/cn/getting_started.md @@ -20,7 +20,7 @@ brpc有如下依赖: ## Ubuntu/LinuxMint/WSL ### 依赖准备 -安装通用依赖,[gflags](https://github.com/gflags/gflags), [protobuf](https://github.com/google/protobuf), [leveldb](https://github.com/google/leveldb): +安装依赖: ```shell sudo apt-get install -y git g++ make libssl-dev libgflags-dev libprotobuf-dev libprotoc-dev protobuf-compiler libleveldb-dev ``` @@ -113,14 +113,9 @@ CentOS一般需要安装EPEL,否则很多包都默认不可用。 sudo yum install epel-release ``` -安装通用依赖: +安装依赖: ```shell -sudo yum install git gcc-c++ make openssl-devel -``` - -安装 [gflags](https://github.com/gflags/gflags), [protobuf](https://github.com/google/protobuf), [leveldb](https://github.com/google/leveldb): -```shell -sudo yum install gflags-devel protobuf-devel protobuf-compiler leveldb-devel +sudo yum install git gcc-c++ make openssl-devel gflags-devel protobuf-devel protobuf-compiler leveldb-devel ``` 如果你要在样例中启用cpu/heap的profiler: @@ -216,18 +211,13 @@ $ make ## MacOS -注意:在相同运行环境下,当前Mac版brpc的性能比Linux版差2.5倍。如果你的服务是性能敏感的,请不要使用MacOs作为你的生产环境。 +注意:在相同硬件条件下,MacOS版brpc的性能可能明显差于Linux版。如果你的服务是性能敏感的,请不要使用MacOS作为你的生产环境。 ### 依赖准备 -安装通用依赖: -```shell -brew install openssl git gnu-getopt coreutils -``` - -安装[gflags](https://github.com/gflags/gflags),[protobuf](https://github.com/google/protobuf),[leveldb](https://github.com/google/leveldb): +安装依赖: ```shell -brew install gflags protobuf leveldb +brew install openssl git gnu-getopt coreutils gflags protobuf leveldb ``` 如果你要在样例中启用cpu/heap的profiler: @@ -241,6 +231,17 @@ git clone https://github.com/google/googletest -b release-1.10.0 && cd googletes ``` 在编译完成后,复制include/和lib/目录到/usr/local/include和/usr/local/lib目录中,以便于让所有应用都能使用gtest。 +### Monterey +Monterey中openssl的安装位置可能不再位于`/usr/local/opt/openssl`,很可能会在`/opt/homebrew/Cellar`目录下,如果编译时报告找不到openssl,可考虑设置软链如下: +```shell +sudo ln -s /opt/homebrew/Cellar/openssl@3/3.0.3 /usr/local/opt/openssl +``` +请注意上述命令中openssl的目录可能随环境变化而变化,你可以通过`brew info openssl`查看。 + +### Apple Silicon + +master HEAD已支持M1系列芯片,M2未测试过。欢迎通过issues向我们报告遗留的warning/error。 + ### 使用config_brpc.sh编译brpc git克隆brpc,进入到项目目录然后运行: ```shell diff --git a/docs/en/getting_started.md b/docs/en/getting_started.md index 4d24f2d7..03e34cf6 100644 --- a/docs/en/getting_started.md +++ b/docs/en/getting_started.md @@ -215,18 +215,13 @@ Same with [here](#compile-brpc-with-cmake) ## MacOS -Note: In the same running environment, the performance of the current Mac version is about 2.5 times worse than the Linux version. If your service is performance-critical, do not use MacOS as your production environment. +Note: With same environment, the performance of the MacOS version is worse than the Linux version. If your service is performance-critical, do not use MacOS as your production environment. ### Prepare deps -Install common deps: +Install dependencies: ```shell -brew install openssl git gnu-getopt coreutils -``` - -Install [gflags](https://github.com/gflags/gflags), [protobuf](https://github.com/google/protobuf), [leveldb](https://github.com/google/leveldb): -```shell -brew install gflags protobuf leveldb +brew install openssl git gnu-getopt coreutils gflags protobuf leveldb ``` If you need to enable cpu/heap profilers in examples: @@ -240,6 +235,20 @@ git clone https://github.com/google/googletest -b release-1.10.0 && cd googletes ``` After the compilation, copy include/ and lib/ into /usr/local/include and /usr/local/lib respectively to expose gtest to all apps +### Monterey + +openssl installed in Monterey may not be found at `/usr/local/opt/openssl`, instead it's probably put under `/opt/homebrew/Cellar`. If the compiler cannot find openssl, consider making a soft link like below: + +```shell +sudo ln -s /opt/homebrew/Cellar/openssl@3/3.0.3 /usr/local/opt/openssl +``` + +Please note that the directory of installed openssl in above command may be different in different environments, you could check it out by running `brew info openssl`. + +### Apple Silicon + +The code at master HEAD already supports M1 series chips. M2 series are not tested yet. Please feel free to report remaining warnings/errors to us by issues. + ### Compile brpc with config_brpc.sh git clone brpc, cd into the repo and run ```shell diff --git a/src/brpc/span.h b/src/brpc/span.h index 07771649..43ede3d5 100644 --- a/src/brpc/span.h +++ b/src/brpc/span.h @@ -34,7 +34,7 @@ #include "brpc/span.pb.h" namespace bthread { -extern thread_local bthread::LocalStorage tls_bls; +extern __thread bthread::LocalStorage tls_bls; } diff --git a/src/bthread/context.cpp b/src/bthread/context.cpp index 5098bc39..b89d432a 100644 --- a/src/bthread/context.cpp +++ b/src/bthread/context.cpp @@ -713,3 +713,100 @@ __asm ( ); #endif + + +#if defined(BTHREAD_CONTEXT_PLATFORM_apple_arm64) && defined(BTHREAD_CONTEXT_COMPILER_gcc) +__asm ( +".text\n" +".globl _bthread_jump_fcontext\n" +".balign 16\n" +"_bthread_jump_fcontext:\n" +" ; prepare stack for GP + FPU\n" +" sub sp, sp, #0xb0\n" +"#if (defined(__VFP_FP__) && !defined(__SOFTFP__))\n" +" ; test if fpu env should be preserved\n" +" cmp w3, #0\n" +" b.eq 1f\n" +" ; save d8 - d15\n" +" stp d8, d9, [sp, #0x00]\n" +" stp d10, d11, [sp, #0x10]\n" +" stp d12, d13, [sp, #0x20]\n" +" stp d14, d15, [sp, #0x30]\n" +"1:\n" +"#endif\n" +" ; save x19-x30\n" +" stp x19, x20, [sp, #0x40]\n" +" stp x21, x22, [sp, #0x50]\n" +" stp x23, x24, [sp, #0x60]\n" +" stp x25, x26, [sp, #0x70]\n" +" stp x27, x28, [sp, #0x80]\n" +" stp fp, lr, [sp, #0x90]\n" +" ; save LR as PC\n" +" str lr, [sp, #0xa0]\n" +" ; store RSP (pointing to context-data) in first argument (x0).\n" +" ; STR cannot have sp as a target register\n" +" mov x4, sp\n" +" str x4, [x0]\n" +" ; restore RSP (pointing to context-data) from A2 (x1)\n" +" mov sp, x1\n" +"#if (defined(__VFP_FP__) && !defined(__SOFTFP__))\n" +" ; test if fpu env should be preserved\n" +" cmp w3, #0\n" +" b.eq 2f\n" +" ; load d8 - d15\n" +" ldp d8, d9, [sp, #0x00]\n" +" ldp d10, d11, [sp, #0x10]\n" +" ldp d12, d13, [sp, #0x20]\n" +" ldp d14, d15, [sp, #0x30]\n" +"2:\n" +"#endif\n" +" ; load x19-x30\n" +" ldp x19, x20, [sp, #0x40]\n" +" ldp x21, x22, [sp, #0x50]\n" +" ldp x23, x24, [sp, #0x60]\n" +" ldp x25, x26, [sp, #0x70]\n" +" ldp x27, x28, [sp, #0x80]\n" +" ldp fp, lr, [sp, #0x90]\n" +" ; use third arg as return value after jump\n" +" ; and as first arg in context function\n" +" mov x0, x2\n" +" ; load pc\n" +" ldr x4, [sp, #0xa0]\n" +" ; restore stack from GP + FPU\n" +" add sp, sp, #0xb0\n" +" ret x4\n" +); + +#endif + +#if defined(BTHREAD_CONTEXT_PLATFORM_apple_arm64) && defined(BTHREAD_CONTEXT_COMPILER_gcc) +__asm ( +".text\n" +".globl _bthread_make_fcontext\n" +".balign 16\n" +"_bthread_make_fcontext:\n" +" ; shift address in x0 (allocated stack) to lower 16 byte boundary\n" +" and x0, x0, ~0xF\n" +" ; reserve space for context-data on context-stack\n" +" sub x0, x0, #0xb0\n" +" ; third arg of make_fcontext() == address of context-function\n" +" ; store address as a PC to jump in\n" +" str x2, [x0, #0xa0]\n" +" ; compute abs address of label finish\n" +" ; 0x0c = 3 instructions * size (4) before label 'finish'\n" +" ; TODO: Numeric offset since llvm still does not support labels in ADR. Fix:\n" +" ; http:\n" +" adr x1, 0x0c\n" +" ; save address of finish as return-address for context-function\n" +" ; will be entered after context-function returns (LR register)\n" +" str x1, [x0, #0x98]\n" +" ret lr ; return pointer to context-data (x0)\n" +"finish:\n" +" ; exit code is zero\n" +" mov x0, #0\n" +" ; exit application\n" +" bl __exit\n" +); + +#endif + diff --git a/src/bthread/context.h b/src/bthread/context.h index 8ff027df..ef98e458 100644 --- a/src/bthread/context.h +++ b/src/bthread/context.h @@ -23,9 +23,9 @@ #if defined(__GNUC__) || defined(__APPLE__) - #define BTHREAD_CONTEXT_COMPILER_gcc + #define BTHREAD_CONTEXT_COMPILER_gcc - #if defined(__linux__) + #if defined(__linux__) #ifdef __x86_64__ #define BTHREAD_CONTEXT_PLATFORM_linux_x86_64 #define BTHREAD_CONTEXT_CALL_CONVENTION @@ -41,27 +41,30 @@ #define BTHREAD_CONTEXT_CALL_CONVENTION #endif - #elif defined(__MINGW32__) || defined (__MINGW64__) + #elif defined(__MINGW32__) || defined (__MINGW64__) #if defined(__x86_64__) #define BTHREAD_CONTEXT_COMPILER_gcc #define BTHREAD_CONTEXT_PLATFORM_windows_x86_64 #define BTHREAD_CONTEXT_CALL_CONVENTION - #endif - - #if defined(__i386__) + #elif defined(__i386__) #define BTHREAD_CONTEXT_COMPILER_gcc #define BTHREAD_CONTEXT_PLATFORM_windows_i386 #define BTHREAD_CONTEXT_CALL_CONVENTION __cdecl #endif - #elif defined(__APPLE__) && defined(__MACH__) + + #elif defined(__APPLE__) && defined(__MACH__) #if defined (__i386__) #define BTHREAD_CONTEXT_PLATFORM_apple_i386 #define BTHREAD_CONTEXT_CALL_CONVENTION #elif defined (__x86_64__) #define BTHREAD_CONTEXT_PLATFORM_apple_x86_64 #define BTHREAD_CONTEXT_CALL_CONVENTION - #endif + #elif defined (__aarch64__) + #define BTHREAD_CONTEXT_PLATFORM_apple_arm64 + #define BTHREAD_CONTEXT_CALL_CONVENTION #endif + #endif + #endif #if defined(_WIN32_WCE) diff --git a/src/bthread/key.cpp b/src/bthread/key.cpp index 53f7594f..a4a08407 100644 --- a/src/bthread/key.cpp +++ b/src/bthread/key.cpp @@ -34,7 +34,7 @@ class KeyTable; // defined in task_group.cpp extern __thread TaskGroup* tls_task_group; -extern thread_local LocalStorage tls_bls; +extern __thread LocalStorage tls_bls; static __thread bool tls_ever_created_keytable = false; // We keep thread specific data in a two-level array. The top-level array diff --git a/src/bthread/task_group.cpp b/src/bthread/task_group.cpp index 9f6d42fb..b8ead163 100644 --- a/src/bthread/task_group.cpp +++ b/src/bthread/task_group.cpp @@ -61,7 +61,7 @@ __thread TaskGroup* tls_task_group = NULL; // Sync with TaskMeta::local_storage when a bthread is created or destroyed. // During running, the two fields may be inconsistent, use tls_bls as the // groundtruth. -thread_local LocalStorage tls_bls = BTHREAD_LOCAL_STORAGE_INITIALIZER; +__thread LocalStorage tls_bls = BTHREAD_LOCAL_STORAGE_INITIALIZER; // defined in bthread/key.cpp extern void return_keytable(bthread_keytable_pool_t*, KeyTable*); diff --git a/src/butil/time.h b/src/butil/time.h index b288d8e4..00949f8e 100644 --- a/src/butil/time.h +++ b/src/butil/time.h @@ -215,6 +215,7 @@ inline int64_t monotonic_time_s() { namespace detail { inline uint64_t clock_cycles() { +#if defined(__x86_64__) || defined(__amd64__) unsigned int lo = 0; unsigned int hi = 0; // We cannot use "=A", since this would use %rax on x86_64 @@ -223,6 +224,31 @@ inline uint64_t clock_cycles() { : "=a" (lo), "=d" (hi) ); return ((uint64_t)hi << 32) | lo; +#elif defined(__aarch64__) + uint64_t virtual_timer_value; + asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value)); + return virtual_timer_value; +#elif defined(__ARM_ARCH) + #if (__ARM_ARCH >= 6) + unsigned int pmccntr; + unsigned int pmuseren; + unsigned int pmcntenset; + // Read the user mode perf monitor counter access permissions. + asm volatile ("mrc p15, 0, %0, c9, c14, 0" : "=r" (pmuseren)); + if (pmuseren & 1) { // Allows reading perfmon counters for user mode code. + asm volatile ("mrc p15, 0, %0, c9, c12, 1" : "=r" (pmcntenset)); + if (pmcntenset & 0x80000000ul) { // Is it counting? + asm volatile ("mrc p15, 0, %0, c9, c13, 0" : "=r" (pmccntr)); + // The counter is set up to count every 64th cycle + return static_cast<uint64_t>(pmccntr) * 64; // Should optimize to << 6 + } + } + #else + #error "unsupported arm_arch" + #endif +#else + #error "unsupported arch" +#endif } extern int64_t read_invariant_cpu_frequency(); // Be positive iff: diff --git a/test/BUILD b/test/BUILD.bazel similarity index 100% rename from test/BUILD rename to test/BUILD.bazel diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 7c274ef5..f3e0c9bb 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -58,7 +58,7 @@ use_cxx11() if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") #required by butil/crc32.cc to boost performance for 10x - if(NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.4)) + if((CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") AND NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.4)) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4 -msse4.2") endif() if(NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0)) diff --git a/test/Makefile b/test/Makefile index a6e035f4..99f7b4f7 100644 --- a/test/Makefile +++ b/test/Makefile @@ -23,7 +23,9 @@ CXXFLAGS=$(CPPFLAGS) -pipe -Wall -W -fPIC -fstrict-aliasing -Wno-invalid-offseto #required by butil/crc32.cc to boost performance for 10x ifeq ($(shell test $(GCC_VERSION) -ge 40400; echo $$?),0) - CXXFLAGS+=-msse4 -msse4.2 + ifeq ($(shell uname -p),i386) #note: i386 is processor family type, not the 32-bit x86 arch + CXXFLAGS+=-msse4 -msse4.2 + endif endif #not solved yet ifeq ($(CC),gcc) diff --git a/test/endpoint_unittest.cpp b/test/endpoint_unittest.cpp index ba9a4ee6..8b515ef3 100644 --- a/test/endpoint_unittest.cpp +++ b/test/endpoint_unittest.cpp @@ -384,6 +384,8 @@ TEST(EndPointTest, endpoint_sockaddr_conv_ipv6) { in6_addr expect_in6_addr; bzero(&expect_in6_addr, sizeof(expect_in6_addr)); expect_in6_addr.__in6_u.__u6_addr8[15] = 1; + // jge: mac monterey上应该这样,但准确判定条件不明 + //expect_in6_addr.__u6_addr.__u6_addr8[15] = 1; sockaddr_storage ss; const sockaddr_in6* sa6 = (sockaddr_in6*) &ss; --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@brpc.apache.org For additional commands, e-mail: dev-h...@brpc.apache.org