Commit: e14f8c2dd765a5f20d652899434174daa039804b Author: Patrick Mours Date: Mon Dec 6 14:58:35 2021 +0100 Branches: master https://developer.blender.org/rBe14f8c2dd765a5f20d652899434174daa039804b
Cycles: Reintroduce device-only memory handling that got lost in Cycles X merge Somehow only a part of rBf4f8b6dde32b0438e0b97a6d8ebeb89802987127 ended up in Cycles X, causing the issue that commit fixed, "OPTIX_ERROR_INVALID_VALUE" when the system is out of memory, to show up again. This adds the missing changes to fix that problem. Maniphest Tasks: T93620 Differential Revision: https://developer.blender.org/D13488 =================================================================== M intern/cycles/device/cpu/device_impl.cpp M intern/cycles/device/cuda/device_impl.cpp M intern/cycles/device/optix/device_impl.cpp =================================================================== diff --git a/intern/cycles/device/cpu/device_impl.cpp b/intern/cycles/device/cpu/device_impl.cpp index 62b9cc93dae..6f3c8b42124 100644 --- a/intern/cycles/device/cpu/device_impl.cpp +++ b/intern/cycles/device/cpu/device_impl.cpp @@ -129,8 +129,7 @@ void CPUDevice::mem_alloc(device_memory &mem) << string_human_readable_size(mem.memory_size()) << ")"; } - if (mem.type == MEM_DEVICE_ONLY) { - assert(!mem.host_pointer); + if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) { size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES; void *data = util_aligned_malloc(mem.memory_size(), alignment); mem.device_pointer = (device_ptr)data; @@ -189,7 +188,7 @@ void CPUDevice::mem_free(device_memory &mem) tex_free((device_texture &)mem); } else if (mem.device_pointer) { - if (mem.type == MEM_DEVICE_ONLY) { + if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) { util_aligned_free((void *)mem.device_pointer); } mem.device_pointer = 0; diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp index ee55e6dc632..8d022040414 100644 --- a/intern/cycles/device/cuda/device_impl.cpp +++ b/intern/cycles/device/cuda/device_impl.cpp @@ -678,7 +678,7 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_ void *shared_pointer = 0; - if (mem_alloc_result != CUDA_SUCCESS && can_map_host) { + if (mem_alloc_result != CUDA_SUCCESS && can_map_host && mem.type != MEM_DEVICE_ONLY) { if (mem.shared_pointer) { /* Another device already allocated host memory. */ mem_alloc_result = CUDA_SUCCESS; @@ -701,8 +701,14 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_ } if (mem_alloc_result != CUDA_SUCCESS) { - status = " failed, out of device and host memory"; - set_error("System is out of GPU and shared host memory"); + if (mem.type == MEM_DEVICE_ONLY) { + status = " failed, out of device memory"; + set_error("System is out of GPU memory"); + } + else { + status = " failed, out of device and host memory"; + set_error("System is out of GPU and shared host memory"); + } } if (mem.name) { diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index a0c748fb6cd..da3c1ac57d1 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -46,14 +46,14 @@ CCL_NAMESPACE_BEGIN OptiXDevice::Denoiser::Denoiser(OptiXDevice *device) - : device(device), queue(device), state(device, "__denoiser_state") + : device(device), queue(device), state(device, "__denoiser_state", true) { } OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler) : CUDADevice(info, stats, profiler), sbt_data(this, "__sbt", MEM_READ_ONLY), - launch_params(this, "__params"), + launch_params(this, "__params", false), denoiser_(this) { /* Make the CUDA context current. */ @@ -523,7 +523,7 @@ class OptiXDevice::DenoiseContext { : denoise_params(task.params), render_buffers(task.render_buffers), buffer_params(task.buffer_params), - guiding_buffer(device, "denoiser guiding passes buffer"), + guiding_buffer(device, "denoiser guiding passes buffer", true), num_samples(task.num_samples) { num_input_passes = 1; @@ -1015,6 +1015,13 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh, const OptixBuildInput &build_input, uint16_t num_motion_steps) { + /* Allocate and build acceleration structures only one at a time, to prevent parallel builds + * from running out of memory (since both original and compacted acceleration structure memory + * may be allocated at the same time for the duration of this function). The builds would + * otherwise happen on the same CUDA stream anyway. */ + static thread_mutex mutex; + thread_scoped_lock lock(mutex); + const CUDAContextScope scope(this); const bool use_fast_trace_bvh = (bvh->params.bvh_type == BVH_TYPE_STATIC); @@ -1040,13 +1047,14 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh, optix_assert(optixAccelComputeMemoryUsage(context, &options, &build_input, 1, &sizes)); /* Allocate required output buffers. */ - device_only_memory<char> temp_mem(this, "optix temp as build mem"); + device_only_memory<char> temp_mem(this, "optix temp as build mem", true); temp_mem.alloc_to_device(align_up(sizes.tempSizeInBytes, 8) + 8); if (!temp_mem.device_pointer) { /* Make sure temporary memory allocation succeeded. */ return false; } + /* Acceleration structure memory has to be allocated on the device (not allowed on the host). */ device_only_memory<char> &out_data = *bvh->as_data; if (operation == OPTIX_BUILD_OPERATION_BUILD) { assert(out_data.device == this); @@ -1095,12 +1103,13 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh, /* There is no point compacting if the size does not change. */ if (compacted_size < sizes.outputSizeInBytes) { - device_only_memory<char> compacted_data(this, "optix compacted as"); + device_only_memory<char> compacted_data(this, "optix compacted as", false); compacted_data.alloc_to_device(compacted_size); - if (!compacted_data.device_pointer) + if (!compacted_data.device_pointer) { /* Do not compact if memory allocation for compacted acceleration structure fails. * Can just use the uncompacted one then, so succeed here regardless. */ return !have_error(); + } optix_assert(optixAccelCompact( context, NULL, out_handle, compacted_data.device_pointer, compacted_size, &out_handle)); @@ -1111,6 +1120,8 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh, std::swap(out_data.device_size, compacted_data.device_size); std::swap(out_data.device_pointer, compacted_data.device_pointer); + /* Original acceleration structure memory is freed when 'compacted_data' goes out of scope. + */ } } @@ -1208,7 +1219,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) const float4 pw = make_float4( curve_radius[ka], curve_radius[k0], curve_radius[k1], curve_radius[kb]); - /* Convert Catmull-Rom data to Bezier spline. */ + /* Convert Catmull-Rom data to B-spline. */ static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f; static const float4 cr2bsp1 = make_float4(-2, 11, -4, +1) / 6.f; static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f; _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs