Commit: e2ff730fd9830ef399924cf04b2dac5235097dde Author: Sayak Biswas Date: Thu Oct 21 20:57:17 2021 +0200 Branches: cycles-hip-binaries https://developer.blender.org/rBe2ff730fd9830ef399924cf04b2dac5235097dde
Cycles: patch for precompiled HIP binaries and additional fixes Committing to a branch to test it on the buildbot. Ref T92393, D12958 =================================================================== M CMakeLists.txt M extern/hipew/include/hipew.h M extern/hipew/src/hipew.c M intern/cycles/blender/addon/properties.py M intern/cycles/device/hip/device.cpp M intern/cycles/device/hip/device_impl.cpp M intern/cycles/device/hip/device_impl.h M intern/cycles/kernel/CMakeLists.txt M intern/cycles/kernel/device/hip/globals.h =================================================================== diff --git a/CMakeLists.txt b/CMakeLists.txt index 94a5ff27491..715e9dd01d3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -420,7 +420,9 @@ mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL) set(CYCLES_TEST_DEVICES CPU CACHE STRING "Run regression tests on the specified device types (CPU CUDA OPTIX)" ) set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 compute_75 CACHE STRING "CUDA architectures to build binaries for") mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH) -option(WITH_CYCLES_HIP_BINARIES "Build Cycles HIP binaries" OFF) +option(WITH_CYCLES_HIP_BINARIES "Build Cycles HIP binaries" ON) +set(CYCLES_HIP_BINARIES_ARCH gfx1030 gfx1031 gfx1032 gfx1034 CACHE STRING "HIP architectures to build binaries for") +mark_as_advanced(CYCLES_HIP_BINARIES_ARCH) unset(PLATFORM_DEFAULT) option(WITH_CYCLES_LOGGING "Build Cycles with logging support" ON) option(WITH_CYCLES_DEBUG_NAN "Build Cycles with additional asserts for detecting NaNs and invalid values" OFF) diff --git a/extern/hipew/include/hipew.h b/extern/hipew/include/hipew.h index aa42fdf8ecd..d18cf67524d 100644 --- a/extern/hipew/include/hipew.h +++ b/extern/hipew/include/hipew.h @@ -425,6 +425,105 @@ typedef struct HIPdevprop_st { int textureAlign; } HIPdevprop; +typedef struct { + // 32-bit Atomics + unsigned hasGlobalInt32Atomics : 1; ///< 32-bit integer atomics for global memory. + unsigned hasGlobalFloatAtomicExch : 1; ///< 32-bit float atomic exch for global memory. + unsigned hasSharedInt32Atomics : 1; ///< 32-bit integer atomics for shared memory. + unsigned hasSharedFloatAtomicExch : 1; ///< 32-bit float atomic exch for shared memory. + unsigned hasFloatAtomicAdd : 1; ///< 32-bit float atomic add in global and shared memory. + + // 64-bit Atomics + unsigned hasGlobalInt64Atomics : 1; ///< 64-bit integer atomics for global memory. + unsigned hasSharedInt64Atomics : 1; ///< 64-bit integer atomics for shared memory. + + // Doubles + unsigned hasDoubles : 1; ///< Double-precision floating point. + + // Warp cross-lane operations + unsigned hasWarpVote : 1; ///< Warp vote instructions (__any, __all). + unsigned hasWarpBallot : 1; ///< Warp ballot instructions (__ballot). + unsigned hasWarpShuffle : 1; ///< Warp shuffle operations. (__shfl_*). + unsigned hasFunnelShift : 1; ///< Funnel two words into one with shift&mask caps. + + // Sync + unsigned hasThreadFenceSystem : 1; ///< __threadfence_system. + unsigned hasSyncThreadsExt : 1; ///< __syncthreads_count, syncthreads_and, syncthreads_or. + + // Misc + unsigned hasSurfaceFuncs : 1; ///< Surface functions. + unsigned has3dGrid : 1; ///< Grid and group dims are 3D (rather than 2D). + unsigned hasDynamicParallelism : 1; ///< Dynamic parallelism. +} hipDeviceArch_t; + +typedef struct hipDeviceProp_t { + char name[256]; ///< Device name. + size_t totalGlobalMem; ///< Size of global memory region (in bytes). + size_t sharedMemPerBlock; ///< Size of shared memory region (in bytes). + int regsPerBlock; ///< Registers per block. + int warpSize; ///< Warp size. + int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size. + int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block. + int maxGridSize[3]; ///< Max grid dimensions (XYZ). + int clockRate; ///< Max clock frequency of the multiProcessors in khz. + int memoryClockRate; ///< Max global memory clock frequency in khz. + int memoryBusWidth; ///< Global memory bus width in bits. + size_t totalConstMem; ///< Size of shared memory region (in bytes). + int major; ///< Major compute capability. On HCC, this is an approximation and features may + ///< differ from CUDA CC. See the arch feature flags for portable ways to query + ///< feature caps. + int minor; ///< Minor compute capability. On HCC, this is an approximation and features may + ///< differ from CUDA CC. See the arch feature flags for portable ways to query + ///< feature caps. + int multiProcessorCount; ///< Number of multi-processors (compute units). + int l2CacheSize; ///< L2 cache size. + int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor. + int computeMode; ///< Compute mode. + int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*" + ///< instructions. New for HIP. + hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP. + int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently. + int pciDomainID; ///< PCI Domain ID + int pciBusID; ///< PCI Bus ID. + int pciDeviceID; ///< PCI Device ID. + size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per Multiprocessor. + int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not. + int canMapHostMemory; ///< Check whether HIP can map host memory + int gcnArch; ///< DEPRECATED: use gcnArchName instead + char gcnArchName[256]; ///< AMD GCN Arch Name. + int integrated; ///< APU vs dGPU + int cooperativeLaunch; ///< HIP device supports cooperative launch + int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple devices + int maxTexture1DLinear; ///< Maximum size for 1D textures bound to linear memory + int maxTexture1D; ///< Maximum number of elements in 1D images + int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements + int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image elements + unsigned int* hdpMemFlushCntl; ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register + unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register + size_t memPitch; ///<Maximum pitch in bytes allowed by memory copies + size_t textureAlignment; ///<Alignment requirement for textures + size_t texturePitchAlignment; ///<Pitch alignment requirement for texture references bound to pitched memory + int kernelExecTimeoutEnabled; ///<Run time limit for kernels executed on the device + int ECCEnabled; ///<Device has ECC support enabled + int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0 + int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on multiple + ///devices with unmatched functions + int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on multiple + ///devices with unmatched grid dimensions + int cooperativeMultiDeviceUnmatchedBlockDim; ///< HIP device supports cooperative launch on multiple + ///devices with unmatched block dimensions + int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on multiple + ///devices with unmatched shared memories + int isLargeBar; ///< 1: if it is a large PCI bar device, else 0 + int asicRevision; ///< Revision of the GPU in this device + int managedMemory; ///< Device supports allocating managed memory on this system + int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device without migration + int concurrentManagedAccess; ///< Device can coherently access managed memory concurrently with the CPU + int pageableMemoryAccess; ///< Device supports coherently accessing pageable memory + ///< without calling hipHostRegister on it + int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's page tables +} hipDeviceProp_t; + typedef enum HIPpointer_attribute_enum { HIP_POINTER_ATTRIBUTE_CONTEXT = 1, HIP_POINTER_ATTRIBUTE_MEMORY_TYPE = 2, @@ -951,6 +1050,25 @@ typedef enum HIPGLmap_flags_enum { HIP_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02, } HIPGLmap_flags; +/** +* hipRTC related +*/ +typedef struct _hiprtcProgram* hiprtcProgram; + +typedef enum hiprtcResult { + HIPRTC_SUCCESS = 0, + HIPRTC_ERROR_OUT_OF_MEMORY = 1, + HIPRTC_ERROR_PROGRAM_CREATION_FAILURE = 2, + HIPRTC_ERROR_INVALID_INPUT = 3, + HIPRTC_ERROR_INVALID_PROGRAM = 4, + HIPRTC_ERROR_INVALID_OPTION = 5, + HIPRTC_ERROR_COMPILATION = 6, + HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7, + HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8, + HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9, + HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10, + HIPRTC_ERROR_INTERNAL_ERROR = 11 +} hiprtcResult; /* Function types. */ typedef hipError_t HIPAPI thipGetErrorName(hipError_t error, const char** pStr); @@ -958,6 +1076,7 @@ typedef hipError_t HIPAPI thipInit(unsigned int Flags); typedef hipError_t HIPAPI thipDriverGetVersion(int* driverVersion); typedef hipError_t HIPAPI thipGetDevice(hipDevice_t* device, int ordinal); typedef hipError_t HIPAPI thipGetDeviceCount(int* count); +typedef hipError_t HIPAPI thipGetDeviceProperties(hipDeviceProp_t* props, int deviceId); typedef hipError_t HIPAPI thipDeviceGetName(char* name, int len, hipDevice_t dev); typedef hipError_t HIPAPI thipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attrib, hipDevice_t dev); typedef hipError_t HIPAPI thipDeviceComputeCapability(int* major, int* minor, hipDevice_t dev); @@ -1071,6 +1190,16 @@ typedef hipError_t HIPAPI thipGraphicsMapResources(unsigned int count, hipGraphi typedef @@ Diff output truncated at 10240 characters. @@ _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs