Commit: 3d5dbc1c44907c73d2e6e57a146cbadaea9623bd Author: Patrick Mours Date: Mon Dec 6 14:58:35 2021 +0100 Branches: blender-v3.0-release https://developer.blender.org/rB3d5dbc1c44907c73d2e6e57a146cbadaea9623bd
Cycles: Reintroduce device-only memory handling that got lost in Cycles X merge Somehow only a part of rBf4f8b6dde32b0438e0b97a6d8ebeb89802987127 ended up in Cycles X, causing the issue that commit fixed, "OPTIX_ERROR_INVALID_VALUE" when the system is out of memory, to show up again. This adds the missing changes to fix that problem. Maniphest Tasks: T93620 Differential Revision: https://developer.blender.org/D13488 =================================================================== M intern/cycles/device/cpu/device_impl.cpp M intern/cycles/device/cuda/device_impl.cpp M intern/cycles/device/optix/device_impl.cpp =================================================================== diff --git a/intern/cycles/device/cpu/device_impl.cpp b/intern/cycles/device/cpu/device_impl.cpp index 68dec7f0af2..5db89d1e4fb 100644 --- a/intern/cycles/device/cpu/device_impl.cpp +++ b/intern/cycles/device/cpu/device_impl.cpp @@ -134,8 +134,7 @@ void CPUDevice::mem_alloc(device_memory &mem) << string_human_readable_size(mem.memory_size()) << ")"; } - if (mem.type == MEM_DEVICE_ONLY) { - assert(!mem.host_pointer); + if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) { size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES; void *data = util_aligned_malloc(mem.memory_size(), alignment); mem.device_pointer = (device_ptr)data; @@ -194,7 +193,7 @@ void CPUDevice::mem_free(device_memory &mem) tex_free((device_texture &)mem); } else if (mem.device_pointer) { - if (mem.type == MEM_DEVICE_ONLY) { + if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) { util_aligned_free((void *)mem.device_pointer); } mem.device_pointer = 0; diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp index 20945796a2d..8c5779f4a72 100644 --- a/intern/cycles/device/cuda/device_impl.cpp +++ b/intern/cycles/device/cuda/device_impl.cpp @@ -680,7 +680,7 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_ void *shared_pointer = 0; - if (mem_alloc_result != CUDA_SUCCESS && can_map_host) { + if (mem_alloc_result != CUDA_SUCCESS && can_map_host && mem.type != MEM_DEVICE_ONLY) { if (mem.shared_pointer) { /* Another device already allocated host memory. */ mem_alloc_result = CUDA_SUCCESS; @@ -703,8 +703,14 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_ } if (mem_alloc_result != CUDA_SUCCESS) { - status = " failed, out of device and host memory"; - set_error("System is out of GPU and shared host memory"); + if (mem.type == MEM_DEVICE_ONLY) { + status = " failed, out of device memory"; + set_error("System is out of GPU memory"); + } + else { + status = " failed, out of device and host memory"; + set_error("System is out of GPU and shared host memory"); + } } if (mem.name) { diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index f230f865f60..b33b5e21eee 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -44,14 +44,14 @@ CCL_NAMESPACE_BEGIN OptiXDevice::Denoiser::Denoiser(OptiXDevice *device) - : device(device), queue(device), state(device, "__denoiser_state") + : device(device), queue(device), state(device, "__denoiser_state", true) { } OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler) : CUDADevice(info, stats, profiler), sbt_data(this, "__sbt", MEM_READ_ONLY), - launch_params(this, "__params"), + launch_params(this, "__params", false), denoiser_(this) { /* Make the CUDA context current. */ @@ -507,7 +507,7 @@ class OptiXDevice::DenoiseContext { : denoise_params(task.params), render_buffers(task.render_buffers), buffer_params(task.buffer_params), - guiding_buffer(device, "denoiser guiding passes buffer"), + guiding_buffer(device, "denoiser guiding passes buffer", true), num_samples(task.num_samples) { num_input_passes = 1; @@ -1001,6 +1001,13 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh, const OptixBuildInput &build_input, uint16_t num_motion_steps) { + /* Allocate and build acceleration structures only one at a time, to prevent parallel builds + * from running out of memory (since both original and compacted acceleration structure memory + * may be allocated at the same time for the duration of this function). The builds would + * otherwise happen on the same CUDA stream anyway. */ + static thread_mutex mutex; + thread_scoped_lock lock(mutex); + const CUDAContextScope scope(this); const bool use_fast_trace_bvh = (bvh->params.bvh_type == BVH_TYPE_STATIC); @@ -1026,13 +1033,14 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh, optix_assert(optixAccelComputeMemoryUsage(context, &options, &build_input, 1, &sizes)); /* Allocate required output buffers. */ - device_only_memory<char> temp_mem(this, "optix temp as build mem"); + device_only_memory<char> temp_mem(this, "optix temp as build mem", true); temp_mem.alloc_to_device(align_up(sizes.tempSizeInBytes, 8) + 8); if (!temp_mem.device_pointer) { /* Make sure temporary memory allocation succeeded. */ return false; } + /* Acceleration structure memory has to be allocated on the device (not allowed on the host). */ device_only_memory<char> &out_data = *bvh->as_data; if (operation == OPTIX_BUILD_OPERATION_BUILD) { assert(out_data.device == this); @@ -1081,12 +1089,13 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh, /* There is no point compacting if the size does not change. */ if (compacted_size < sizes.outputSizeInBytes) { - device_only_memory<char> compacted_data(this, "optix compacted as"); + device_only_memory<char> compacted_data(this, "optix compacted as", false); compacted_data.alloc_to_device(compacted_size); - if (!compacted_data.device_pointer) + if (!compacted_data.device_pointer) { /* Do not compact if memory allocation for compacted acceleration structure fails. * Can just use the uncompacted one then, so succeed here regardless. */ return !have_error(); + } optix_assert(optixAccelCompact( context, NULL, out_handle, compacted_data.device_pointer, compacted_size, &out_handle)); @@ -1097,6 +1106,8 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh, std::swap(out_data.device_size, compacted_data.device_size); std::swap(out_data.device_pointer, compacted_data.device_pointer); + /* Original acceleration structure memory is freed when 'compacted_data' goes out of scope. + */ } } @@ -1185,7 +1196,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) const float4 pw = make_float4( curve_radius[ka], curve_radius[k0], curve_radius[k1], curve_radius[kb]); - /* Convert Catmull-Rom data to Bezier spline. */ + /* Convert Catmull-Rom data to B-spline. */ static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f; static const float4 cr2bsp1 = make_float4(-2, 11, -4, +1) / 6.f; static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f; _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs