Hi Changsheng,
Thank for the patch, looks good to me, some comments, + set(RISCV64_ARGS -O3) default to -O3? +uint32_t cpu_detect(bool benableavx512) + int flags = 0; +static inline int riscv64_cpu_detect() uint32_t? Regards, Chen At 2025-09-30 15:37:31, [email protected] wrote: From 32232389434ac0834deb07dba60a10500ccc4226 Mon Sep 17 00:00:00 2001 From: Changsheng Wu <[email protected]> Date: Wed, 6 Aug 2025 01:18:13 +0800 Subject: [PATCH] RISCV64:supports RISCV compile This patch is a basic enablement that supports RISC-V compile. We are working on adding vector optimizations with RISC-V RVV extensions, and will push the implementations later. --- source/CMakeLists.txt | 98 ++++++++++++++++++++++-- source/common/CMakeLists.txt | 21 +++++ source/common/cpu.cpp | 18 +++++ source/common/param.cpp | 2 +- source/common/primitives.cpp | 4 +- source/common/riscv64/asm-primitives.cpp | 59 ++++++++++++++ source/common/riscv64/cpu.h | 70 +++++++++++++++++ source/test/testbench.cpp | 8 +- source/test/testharness.h | 4 +- source/x265.h | 3 + 10 files changed, 274 insertions(+), 13 deletions(-) create mode 100644 source/common/riscv64/asm-primitives.cpp create mode 100644 source/common/riscv64/cpu.h diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt index 4160514b9..b6d360dd6 100755 --- a/source/CMakeLists.txt +++ b/source/CMakeLists.txt @@ -44,11 +44,13 @@ endif() set(X86_ALIASES x86 i386 i686 x86_64 amd64) set(ARM_ALIASES armv6l armv7l) set(ARM64_ALIASES arm64 arm64e aarch64) +set(RISCV64_ALIASES riscv64) list(FIND X86_ALIASES "${SYSPROC}" X86MATCH) list(FIND ARM_ALIASES "${SYSPROC}" ARMMATCH) list(FIND ARM64_ALIASES "${SYSPROC}" ARM64MATCH) set(POWER_ALIASES powerpc64 powerpc64le ppc64 ppc64le) list(FIND POWER_ALIASES "${SYSPROC}" POWERMATCH) +list(FIND RISCV64_ALIASES "${SYSPROC}" RISCV64MATCH) if(X86MATCH GREATER "-1") set(X86 1) add_definitions(-DX265_ARCH_X86=1) @@ -108,6 +110,13 @@ elseif(ARM64MATCH GREATER "-1") set(AARCH64_SVE2_FLAG "-march=armv9-a+i8mm+sve2") # SVE2 BitPerm implies +dotprod, +sve, and +sve2. set(AARCH64_SVE2_BITPERM_FLAG "-march=armv9-a+i8mm+sve2-bitperm") +elseif(RISCV64MATCH GREATER "-1") + message(STATUS "Detected RISCV64 target processor") + set(RISCV64 1) + + option(RISCV64_RUNTIME_CPU_DETECT "Enable RISCV64 run-time CPU feature detection" ON) + + option(ENABLE_RVV "Enable RVV" ON) else() message(STATUS "CMAKE_SYSTEM_PROCESSOR value `${CMAKE_SYSTEM_PROCESSOR}` is unknown") message(STATUS "Please add this value near ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE}") @@ -171,7 +180,7 @@ if(UNIX) endif(X64) endif(UNIX) -if((X64 AND NOT WIN32) OR ARM64 OR PPC64) +if((X64 AND NOT WIN32) OR ARM64 OR PPC64 OR RISCV64) option(ENABLE_PIC "Enable Position Independent Code" ON) else() option(ENABLE_PIC "Enable Position Independent Code" OFF) @@ -440,6 +449,68 @@ int main() { return 0; }") set(ARM_ARGS ${ARM_ARGS} -flax-vector-conversions=none) endif() endif() + if(RISCV64) + add_definitions(-DX265_ARCH_RISCV64=1) + + if (RISCV64_RUNTIME_CPU_DETECT) + add_definitions(-DRISCV64_RUNTIME_CPU_DETECT=1) + message(STATUS "Configuring build for run-time CPU feature detection") + endif() + + if(RISCV64_RUNTIME_CPU_DETECT OR CROSS_COMPILE_RISCV64) + # Add all extensions when compiling for run-time CPU feature detection or cross compiling. + set(CPU_HAS_RVV 1) + else() + if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin") + find_package(RVV) + else() + message(STATUS "Compile-time CPU feature detection unsupported on this platform") + endif() + endif() + + if(ENABLE_RVV) + set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) + set(OLD_CMAKE_TRY_COMPILE_TARGET_TYPE ${CMAKE_TRY_COMPILE_TARGET_TYPE}) + set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) + # When compilation target is a STATIC_LIBRARY, the LINK_OPTIONS are + # passed to the archiver, so we must backup, clear and restore these. + # https://gitlab.kitware.com/cmake/cmake/-/issues/23454 + set(OLD_CMAKE_REQUIRED_LINK_OPTIONS ${CMAKE_REQUIRED_LINK_OPTIONS}) + set(CMAKE_REQUIRED_LINK_OPTIONS "") + + # Check whether the compiler can compile RVV functions that require + # backup/restore of RVV registers according to AAPCS. + # https://github.com/llvm/llvm-project/issues/80009. + set(RVV_COMPILATION_TEST [[ +int main() { + asm volatile(".option arch, +v; vsetvli t0, a0, e32, m1, ta, ma"); + return 0; +}]]) + + check_c_source_compiles("${RVV_COMPILATION_TEST}" RVV_COMPILATION_C_TEST_COMPILED) + check_cxx_source_compiles("${RVV_COMPILATION_TEST}" RVV_COMPILATION_CXX_TEST_COMPILED) + + set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) + set(CMAKE_TRY_COMPILE_TARGET_TYPE ${OLD_CMAKE_TRY_COMPILE_TARGET_TYPE}) + set(CMAKE_REQUIRED_LINK_OPTIONS ${OLD_CMAKE_REQUIRED_LINK_OPTIONS}) + if (NOT(RVV_COMPILATION_C_TEST_COMPILED AND RVV_COMPILATION_CXX_TEST_COMPILED)) + set(ENABLE_RVV OFF CACHE BOOL "" FORCE) + message(STATUS "Disabling RVV") + endif() + endif() + + if(NOT ENABLE_RVV) + message(STATUS "Disabling RVV") + set(CPU_HAS_RVV 0) + endif() + + if(CPU_HAS_RVV) + message(STATUS "Found RVV") + add_definitions(-DHAVE_RVV=1) + endif() + + set(RISCV64_ARGS -O3) + endif() if(ENABLE_PIC) list(APPEND ARM_ARGS -DPIC -fPIC) endif() @@ -492,7 +563,7 @@ int main() { return 0; }") if (CC_HAS_FAST_MATH) add_definitions(-ffast-math) endif() - if (NOT (ARM64 OR CROSS_COMPILE_ARM64)) + if (NOT (ARM64 OR CROSS_COMPILE_ARM64 OR RISCV64 OR CROSS_COMPILE_RISCV64)) check_cxx_compiler_flag(-mstackrealign CC_HAS_STACK_REALIGN) if (CC_HAS_STACK_REALIGN) add_definitions(-mstackrealign) @@ -548,6 +619,8 @@ if(ARM OR CROSS_COMPILE_ARM OR ARM64 OR CROSS_COMPILE_ARM64) if (NOT ENABLE_NEON) set(ENABLE_ASSEMBLY OFF CACHE BOOL "" FORCE) endif() +elseif((RISCV64 OR CROSS_COMPILE_RISCV64) AND ENABLE_RVV) + option(ENABLE_ASSEMBLY "Enable use of assembly coded primitives" ON) elseif(NASM_FOUND AND X86) if (NASM_VERSION_STRING VERSION_LESS "2.13.0") message(STATUS "Nasm version ${NASM_VERSION_STRING} is too old. 2.13.0 or later required") @@ -584,7 +657,7 @@ if(EXTRA_LIB) endif(EXTRA_LIB) mark_as_advanced(EXTRA_LIB EXTRA_LINK_FLAGS) -if(X64 OR ARM64 OR PPC64) +if(X64 OR ARM64 OR PPC64 OR RISCV64) # NOTE: We only officially support high-bit-depth compiles of x265 # on 64bit architectures. Main10 plus large resolution plus slow # preset plus 32bit address space usually means malloc failure. You @@ -593,7 +666,7 @@ if(X64 OR ARM64 OR PPC64) # license" so to speak. If it breaks you get to keep both halves. # You will need to disable assembly manually. option(HIGH_BIT_DEPTH "Store pixel samples as 16bit values (Main10/Main12)" OFF) -endif(X64 OR ARM64 OR PPC64) +endif(X64 OR ARM64 OR PPC64 OR RISCV64) if(HIGH_BIT_DEPTH) option(MAIN12 "Support Main12 instead of Main10" OFF) if(MAIN12) @@ -640,7 +713,7 @@ else() endif() add_definitions(-DX265_NS=${X265_NS}) -if(ARM64) +if(ARM64 OR RISCV64) if(HIGH_BIT_DEPTH) if(MAIN12) list(APPEND ASM_FLAGS -DHIGH_BIT_DEPTH=1 -DBIT_DEPTH=12 -DX265_NS=${X265_NS}) @@ -650,7 +723,7 @@ if(ARM64) else() list(APPEND ASM_FLAGS -DHIGH_BIT_DEPTH=0 -DBIT_DEPTH=8 -DX265_NS=${X265_NS}) endif() -endif(ARM64) +endif(ARM64 OR RISCV64) option(WARNINGS_AS_ERRORS "Stop compiles on first warning" OFF) if(WARNINGS_AS_ERRORS) @@ -847,6 +920,19 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY) DEPENDS ${ASM_SRC}) endforeach() endif() + elseif(RISCV64 OR CROSS_COMPILE_RISCV64) + # compile RISCV64 arch asm files here + enable_language(ASM) + foreach(ASM ${RISCV64_ASMS}) + set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/riscv64/${ASM}) + list(APPEND ASM_SRCS ${ASM_SRC}) + list(APPEND ASM_OBJS ${ASM}.${SUFFIX}) + add_custom_command( + OUTPUT ${ASM}.${SUFFIX} + COMMAND ${CMAKE_CXX_COMPILER} + ARGS ${RISCV64_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX} + DEPENDS ${ASM_SRC}) + endforeach() elseif(X86) # compile X86 arch asm files here foreach(ASM ${MSVC_ASMS}) diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt index 2de780931..d88c00e61 100644 --- a/source/common/CMakeLists.txt +++ b/source/common/CMakeLists.txt @@ -164,6 +164,27 @@ if(ENABLE_ASSEMBLY AND (ARM64 OR CROSS_COMPILE_ARM64)) endif() endif(ENABLE_ASSEMBLY AND (ARM64 OR CROSS_COMPILE_ARM64)) +if(ENABLE_ASSEMBLY AND (RISCV64 OR CROSS_COMPILE_RISCV64)) + if(GCC AND (CMAKE_CXX_FLAGS_RELEASE MATCHES "-O3")) + message(STATUS "Detected CXX compiler using -O3 optimization level") + add_definitions(-DAUTO_VECTORIZE=1) + endif() + + # Add riscv64 intrinsics files here. + set(C_SRCS_RVV asm-primitives.cpp) + enable_language(ASM) + + foreach(SRC ${C_SRCS_RVV}) + set(ASM_PRIMITIVES ${ASM_PRIMITIVES} riscv64/${SRC}) + endforeach() + + source_group(Assembly FILES ${ASM_PRIMITIVES}) + + if(RISCV64_WARNINGS_AS_ERRORS) + set_source_files_properties(${ASM_PRIMITIVES} PROPERTIES COMPILE_FLAGS -Werror) + endif() +endif(ENABLE_ASSEMBLY AND (RISCV64 OR CROSS_COMPILE_RISCV64)) + if(POWER) set_source_files_properties(version.cpp PROPERTIES COMPILE_FLAGS -DX265_VERSION=${X265_VERSION}) if(ENABLE_ALTIVEC) diff --git a/source/common/cpu.cpp b/source/common/cpu.cpp index 00defd837..c7ce9f3eb 100644 --- a/source/common/cpu.cpp +++ b/source/common/cpu.cpp @@ -129,6 +129,9 @@ const cpu_name_t cpu_names[] = #elif X265_ARCH_POWER8 { "Altivec", X265_CPU_ALTIVEC }, +#elif X265_ARCH_RISCV64 + { "RVV", X265_CPU_RVV }, + #endif // if X265_ARCH_X86 { "", 0 }, }; @@ -409,6 +412,21 @@ uint32_t cpu_detect(bool benableavx512) return flags; } +#elif X265_ARCH_RISCV64 +#include "riscv64/cpu.h" + +uint32_t cpu_detect(bool benableavx512) +{ + (void)benableavx512; + int flags = 0; + +#ifdef ENABLE_ASSEMBLY + flags = riscv64_cpu_detect(); +#endif + + return flags; +} + #elif X265_ARCH_POWER8 uint32_t cpu_detect(bool benableavx512) diff --git a/source/common/param.cpp b/source/common/param.cpp index c06862b9e..b6ed8553c 100755 --- a/source/common/param.cpp +++ b/source/common/param.cpp @@ -1955,7 +1955,7 @@ int x265_check_params(x265_param* param) CHECK(param->hmeRange[level] < 0 || param->hmeRange[level] >= 32768, "Search Range for HME levels must be between 0 and 32768"); } -#if !X86_64 && !X265_ARCH_ARM64 +#if !X86_64 && !X265_ARCH_ARM64 && !X265_ARCH_RISCV64 CHECK(param->searchMethod == X265_SEA && (param->sourceWidth > 840 || param->sourceHeight > 480), "SEA motion search does not support resolutions greater than 480p in 32 bit build"); #endif diff --git a/source/common/primitives.cpp b/source/common/primitives.cpp index 83ebc455e..55f702c7b 100644 --- a/source/common/primitives.cpp +++ b/source/common/primitives.cpp @@ -91,7 +91,7 @@ void setupAliasPrimitives(EncoderPrimitives &p) /* at HIGH_BIT_DEPTH, pixel == short so we can alias many primitives */ for (int i = 0; i < NUM_CU_SIZES; i++) { -#if !defined(X265_ARCH_ARM64) +#if !defined(X265_ARCH_ARM64) && !defined(X265_ARCH_RISCV64) p.cu[i].sse_pp = (pixel_sse_t)p.cu[i].sse_ss; #endif @@ -260,7 +260,7 @@ void x265_setup_primitives(x265_param *param) primitives.cu[i].intra_pred_allangs = NULL; #if ENABLE_ASSEMBLY -#if defined(X265_ARCH_X86) || defined(X265_ARCH_ARM64) +#if defined(X265_ARCH_X86) || defined(X265_ARCH_ARM64) || defined(X265_ARCH_RISCV64) setupIntrinsicPrimitives(primitives, param->cpuid); #endif setupAssemblyPrimitives(primitives, param->cpuid); diff --git a/source/common/riscv64/asm-primitives.cpp b/source/common/riscv64/asm-primitives.cpp new file mode 100644 index 000000000..edc04cbcd --- /dev/null +++ b/source/common/riscv64/asm-primitives.cpp @@ -0,0 +1,59 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Changsheng Wu <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + + +#include "common.h" +#include "primitives.h" +#include "x265.h" +#include "cpu.h" + +#if defined(__GNUC__) +#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#endif + +namespace X265_NS +{ +// private x265 namespace + + +void setupRVVPrimitives(EncoderPrimitives &p) +{ +} + +void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask) +{ + if (cpuMask & X265_CPU_RVV) + { + setupRVVPrimitives(p); + } +} + +void setupIntrinsicPrimitives(EncoderPrimitives &p, int cpuMask) +{ + (void)p; + if (cpuMask & X265_CPU_RVV) + { + } +} + +} // namespace X265_NS diff --git a/source/common/riscv64/cpu.h b/source/common/riscv64/cpu.h new file mode 100644 index 000000000..2f591c404 --- /dev/null +++ b/source/common/riscv64/cpu.h @@ -0,0 +1,70 @@ +/***************************************************************************** + * Copyright (C) 2025 MulticoreWare, Inc + * + * Authors: Changsheng Wu <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#ifndef X265_COMMON_RISCV64_CPU_H +#define X265_COMMON_RISCV64_CPU_H + +#include "x265.h" + +#if RISCV64_RUNTIME_CPU_DETECT + +#if defined(__linux__) + +#include <sys/auxv.h> + +#define HWCAP_RISCV64_RVV (1 << ('V' - 'A')) + +static inline int riscv64_cpu_detect() +{ + int flags = 0; + + unsigned long hwcap = getauxval(AT_HWCAP); + + if (hwcap & HWCAP_RISCV64_RVV) + flags |= X265_CPU_RVV; + + return flags; +} + +#else // defined(__linux__) +#error \ + "Run-time CPU feature detection selected, but no detection method" \ + "available for your platform. Rerun cmake configure with" \ + "-DRISCV64_RUNTIME_CPU_DETECT=OFF." +#endif // defined(__linux__) + +#else // if AARCH64_RUNTIME_CPU_DETECT + +static inline int riscv64_cpu_detect() +{ + int flags = 0; + +#if HAVE_RVV + flags |= X265_CPU_RVV; +#endif + return flags; +} + +#endif // if RISCV64_RUNTIME_CPU_DETECT + +#endif // ifndef X265_COMMON_RISCV64_CPU_H diff --git a/source/test/testbench.cpp b/source/test/testbench.cpp index fb5b4252f..a93f131c0 100644 --- a/source/test/testbench.cpp +++ b/source/test/testbench.cpp @@ -96,7 +96,7 @@ struct test_arch_t { "AVX2", X265_CPU_AVX2 }, { "BMI2", X265_CPU_AVX2 | X265_CPU_BMI1 | X265_CPU_BMI2 }, { "AVX512", X265_CPU_AVX512 }, -#else +#elif X265_ARCH_ARM64 || X265_ARCH_ARM { "ARMv6", X265_CPU_ARMV6 }, { "NEON", X265_CPU_NEON }, { "Neon_DotProd", X265_CPU_NEON_DOTPROD }, @@ -105,6 +105,8 @@ struct test_arch_t { "SVE2", X265_CPU_SVE2 }, { "SVE2_BitPerm", X265_CPU_SVE2_BITPERM }, { "FastNeonMRC", X265_CPU_FAST_NEON_MRC }, +#elif X265_ARCH_RISCV64 + { "RVV", X265_CPU_RVV}, #endif { "", 0 }, }; @@ -226,7 +228,7 @@ int main(int argc, char *argv[]) printf("Testing primitives: %s\n", testArch[i].name); fflush(stdout); -#if defined(X265_ARCH_X86) || defined(X265_ARCH_ARM64) +#if defined(X265_ARCH_X86) || defined(X265_ARCH_ARM64) || defined(X265_ARCH_RISCV64) EncoderPrimitives vecprim; memset(&vecprim, 0, sizeof(vecprim)); setupIntrinsicPrimitives(vecprim, testArch[i].flag); @@ -268,7 +270,7 @@ int main(int argc, char *argv[]) { EncoderPrimitives optprim; memset(&optprim, 0, sizeof(optprim)); -#if defined(X265_ARCH_X86) || defined(X265_ARCH_ARM64) +#if defined(X265_ARCH_X86) || defined(X265_ARCH_ARM64) || defined(X265_ARCH_RISCV64) setupIntrinsicPrimitives(optprim, cpuid); #endif diff --git a/source/test/testharness.h b/source/test/testharness.h index 368c865ee..e54d05a8e 100644 --- a/source/test/testharness.h +++ b/source/test/testharness.h @@ -90,6 +90,8 @@ static inline uint32_t __rdtsc(void) #elif X265_ARCH_ARM64 asm volatile("isb" : : : "memory"); asm volatile("mrs %x0, cntvct_el0" : "=r"(a)); +#elif X265_ARCH_RISCV64 + asm volatile("rdtime %0" : "=r"(a)); #endif return a; } @@ -139,7 +141,7 @@ int PFX(stack_pagealign)(int (*func)(), int align); * needs an explicit asm check because it only sometimes crashes in normal use. */ intptr_t PFX(checkasm_call)(intptr_t (*func)(), int *ok, ...); float PFX(checkasm_call_float)(float (*func)(), int *ok, ...); -#elif (X265_ARCH_ARM == 0 && X265_ARCH_ARM64 == 0) +#elif (X265_ARCH_ARM == 0 && X265_ARCH_ARM64 == 0 && X265_ARCH_RISCV64 == 0) #define PFX(stack_pagealign)(func, align) func() #endif diff --git a/source/x265.h b/source/x265.h index 31cd760e4..afd629d61 100644 --- a/source/x265.h +++ b/source/x265.h @@ -560,6 +560,9 @@ typedef enum #define X265_CPU_NEON_I8MM (1 << 6) /* AArch64 Neon I8MM */ #define X265_CPU_SVE2_BITPERM (1 << 7) /* AArch64 SVE2 BitPerm */ +/* RISCV */ +#define X265_CPU_RVV (1 << 0) /* RISCV vector */ + /* IBM Power8 */ #define X265_CPU_ALTIVEC 0x0000001 -- 2.43.0
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
