[Bf-blender-cvs] [f2538c71739] master: Fix T104335: MNEE + OptiX OSL results in illegal address error
Commit: f2538c71739a19baa506201d80d1c48d73f4d504 Author: Patrick Mours Date: Mon Feb 6 15:06:52 2023 +0100 Branches: master https://developer.blender.org/rBf2538c71739a19baa506201d80d1c48d73f4d504 Fix T104335: MNEE + OptiX OSL results in illegal address error The OptiX pipeline created for OSL was missing sufficient continuation stack to handle the MNEE ray generation program. === M intern/cycles/device/optix/device_impl.cpp === diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index 23e7bbfa7bb..06589140ad9 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -854,12 +854,14 @@ bool OptiXDevice::load_osl_kernels() context, group_descs, 2, _options, nullptr, 0, _groups[i * 2])); } + OptixStackSizes stack_size[NUM_PROGRAM_GROUPS] = {}; vector osl_stack_size(osl_groups.size()); /* Update SBT with new entries. */ sbt_data.alloc(NUM_PROGRAM_GROUPS + osl_groups.size()); for (int i = 0; i < NUM_PROGRAM_GROUPS; ++i) { optix_assert(optixSbtRecordPackHeader(groups[i], _data[i])); +optix_assert(optixProgramGroupGetStackSize(groups[i], _size[i])); } for (size_t i = 0; i < osl_groups.size(); ++i) { if (osl_groups[i] != NULL) { @@ -907,13 +909,15 @@ bool OptiXDevice::load_osl_kernels() 0, [PIP_SHADE])); +const unsigned int css = std::max(stack_size[PG_RGEN_SHADE_SURFACE_RAYTRACE].cssRG, + stack_size[PG_RGEN_SHADE_SURFACE_MNEE].cssRG); unsigned int dss = 0; for (unsigned int i = 0; i < osl_stack_size.size(); ++i) { dss = std::max(dss, osl_stack_size[i].dssDC); } optix_assert(optixPipelineSetStackSize( -pipelines[PIP_SHADE], 0, dss, 0, pipeline_options.usesMotionBlur ? 3 : 2)); +pipelines[PIP_SHADE], 0, dss, css, pipeline_options.usesMotionBlur ? 3 : 2)); } return !have_error(); ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [fa9fc59b560] master: Fix T104240: OptiX OSL texture loading broken with displacement
Commit: fa9fc59b560a9743b1cbe7d46e0d5de98a2f3567 Author: Patrick Mours Date: Tue Jan 31 16:35:47 2023 +0100 Branches: master https://developer.blender.org/rBfa9fc59b560a9743b1cbe7d46e0d5de98a2f3567 Fix T104240: OptiX OSL texture loading broken with displacement The image manager used to handle OSL textures on the GPU by default loads images after displacement is evaluated. This is a problem when the displacement shader uses any textures, hence why the geometry manager already makes the image manager load any images used in the displacement shader graph early (`GeometryManager::device_update_displacement_images`). This only handled Cycles image nodes however, not OSL nodes, so if any `texture` calls were made in OSL those would be missed and therefore crash when accessed on the GPU. Unfortunately it is not simple to determine which textures referenced by OSL are needed for displacement, so the solution for now is to simply load all of them early if true displacement is used. This patch also fixes the result of the displacement shader not being used properly in OptiX. Maniphest Tasks: T104240 Differential Revision: https://developer.blender.org/D17162 === M intern/cycles/kernel/osl/osl.h M intern/cycles/scene/geometry.cpp === diff --git a/intern/cycles/kernel/osl/osl.h b/intern/cycles/kernel/osl/osl.h index ffaf87b7048..18288d202b5 100644 --- a/intern/cycles/kernel/osl/osl.h +++ b/intern/cycles/kernel/osl/osl.h @@ -161,7 +161,10 @@ ccl_device_inline void osl_eval_nodes(KernelGlobals kg, /* shadeindex = */ 0); # endif - if (globals.Ci) { + if constexpr (type == SHADER_TYPE_DISPLACEMENT) { +sd->P = globals.P; + } + else if (globals.Ci) { flatten_closure_tree(kg, sd, path_flag, globals.Ci); } } diff --git a/intern/cycles/scene/geometry.cpp b/intern/cycles/scene/geometry.cpp index a1df24878c9..4c5013b5a9f 100644 --- a/intern/cycles/scene/geometry.cpp +++ b/intern/cycles/scene/geometry.cpp @@ -23,7 +23,10 @@ #include "subd/patch_table.h" #include "subd/split.h" -#include "kernel/osl/globals.h" +#ifdef WITH_OSL +# include "kernel/osl/globals.h" +# include "kernel/osl/services.h" +#endif #include "util/foreach.h" #include "util/log.h" @@ -1671,6 +1674,7 @@ void GeometryManager::device_update_displacement_images(Device *device, TaskPool pool; ImageManager *image_manager = scene->image_manager; set bump_images; + bool has_osl_node = false; foreach (Geometry *geom, scene->geometry) { if (geom->is_modified()) { /* Geometry-level check for hair shadow transparency. @@ -1690,6 +1694,9 @@ void GeometryManager::device_update_displacement_images(Device *device, continue; } foreach (ShaderNode *node, shader->graph->nodes) { + if (node->special_type == SHADER_SPECIAL_TYPE_OSL) { +has_osl_node = true; + } if (node->special_type != SHADER_SPECIAL_TYPE_IMAGE_SLOT) { continue; } @@ -1705,6 +1712,28 @@ void GeometryManager::device_update_displacement_images(Device *device, } } } + +#ifdef WITH_OSL + /* If any OSL node is used for displacement, it may reference a texture. But it's + * unknown which ones, so have to load them all. */ + if (has_osl_node) { +set services_shared; +device->foreach_device([_shared](Device *sub_device) { + OSLGlobals *og = (OSLGlobals *)sub_device->get_cpu_osl_memory(); + services_shared.insert(og->services); +}); + +for (OSLRenderServices *services : services_shared) { + for (auto it = services->textures.begin(); it != services->textures.end(); ++it) { +if (it->second->handle.get_manager() == image_manager) { + const int slot = it->second->handle.svm_slot(); + bump_images.insert(slot); +} + } +} + } +#endif + foreach (int slot, bump_images) { pool.push(function_bind( ::device_update_slot, image_manager, device, scene, slot, )); ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [129093fbce0] master: Cycles: Fix crash when rendering with OSL on multiple GPUs
Commit: 129093fbce0b73219fa8c270072ba5400120033d Author: Patrick Mours Date: Mon Jan 30 19:40:02 2023 +0100 Branches: master https://developer.blender.org/rB129093fbce0b73219fa8c270072ba5400120033d Cycles: Fix crash when rendering with OSL on multiple GPUs The `MultiDevice` implementation of `get_cpu_osl_memory` returns a nullptr when there is no CPU device in the mix. As such access to that crashed in `update_osl_globals`. But that only updates maps that are not currently used on the GPU anyway, so can just skip that when the CPU is not used for rendering. Maniphest Tasks: T104216 === M intern/cycles/scene/geometry.cpp === diff --git a/intern/cycles/scene/geometry.cpp b/intern/cycles/scene/geometry.cpp index 8e831187477..a1df24878c9 100644 --- a/intern/cycles/scene/geometry.cpp +++ b/intern/cycles/scene/geometry.cpp @@ -306,6 +306,11 @@ void GeometryManager::update_osl_globals(Device *device, Scene *scene) { #ifdef WITH_OSL OSLGlobals *og = (OSLGlobals *)device->get_cpu_osl_memory(); + if (og == nullptr) { +/* Can happen when rendering with multiple GPUs, but no CPU (in which case the name maps filled + * below are not used anyway) */ +return; + } og->object_name_map.clear(); og->object_names.clear(); ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [4635dd6aed4] master: Fix T104157: Deleting an active OSL node causes issues
Commit: 4635dd6aed4c97ea234508e774db991926a7b3cf Author: Patrick Mours Date: Fri Jan 27 15:58:03 2023 +0100 Branches: master https://developer.blender.org/rB4635dd6aed4c97ea234508e774db991926a7b3cf Fix T104157: Deleting an active OSL node causes issues Removing all OSL script nodes from the shader graph would cause that graph to no longer report it using `KERNEL_FEATURE_SHADER_RAYTRACE` via `ShaderManager::get_graph_kernel_features`, but the shader object itself still would have the `has_surface_raytrace` field set. This caused kernels to be reloaded without shader raytracing support, but later the `DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE` kernel would still be invoked since the shader continued to report it requiring that through the `SD_HAS_RAYTRACE` flag set because of `has_surface_raytrace`. Fix that by ensuring `has_surface_raytrace` is reset on every shader update, so that when all OSL script nodes are deleted it is set to false, and only stays true when there are still OSL script nodes (or other nodes using it). Maniphest Tasks: T104157 Differential Revision: https://developer.blender.org/D17140 === M intern/cycles/scene/osl.cpp === diff --git a/intern/cycles/scene/osl.cpp b/intern/cycles/scene/osl.cpp index 73a8553c5d5..53e993b8135 100644 --- a/intern/cycles/scene/osl.cpp +++ b/intern/cycles/scene/osl.cpp @@ -1241,6 +1241,7 @@ void OSLCompiler::compile(OSLGlobals *og, Shader *shader) shader->has_surface = false; shader->has_surface_transparent = false; +shader->has_surface_raytrace = false; shader->has_surface_bssrdf = false; shader->has_bump = has_bump; shader->has_bssrdf_bump = has_bump; ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [9066f2e0437] master: Cycles: Add support for OSL texture intrinsic on the GPU
Commit: 9066f2e0437a45d66f3b6a4bb0de7acf5ec40030 Author: Patrick Mours Date: Wed Jan 18 17:28:03 2023 +0100 Branches: master https://developer.blender.org/rB9066f2e0437a45d66f3b6a4bb0de7acf5ec40030 Cycles: Add support for OSL texture intrinsic on the GPU This makes it possible to use `texture` and `texture3d` in custom OSL shaders with a constant image file name as argument on the GPU, where previously texturing was only possible through Cycles nodes. For constant file name arguments, OSL calls `OSL::RendererServices::get_texture_handle()` with the file name string to convert it into an opaque handle for use on the GPU. That is now used to load the respective image file using the Cycles image manager and generate a SVM handle that can be used on the GPU. Some care is necessary as the renderer services class is shared across multiple Cycles instances, whereas the Cycles image manager is local to each. Maniphest Tasks: T101222 Differential Revision: https://developer.blender.org/D17032 === M intern/cycles/kernel/osl/services.cpp M intern/cycles/kernel/osl/services.h M intern/cycles/kernel/osl/services_gpu.h M intern/cycles/kernel/osl/types.h M intern/cycles/kernel/svm/ies.h M intern/cycles/scene/image.cpp M intern/cycles/scene/image.h M intern/cycles/scene/osl.cpp === diff --git a/intern/cycles/kernel/osl/services.cpp b/intern/cycles/kernel/osl/services.cpp index 95d58875b91..92708df3162 100644 --- a/intern/cycles/kernel/osl/services.cpp +++ b/intern/cycles/kernel/osl/services.cpp @@ -20,6 +20,7 @@ #include "kernel/osl/globals.h" #include "kernel/osl/services.h" +#include "kernel/osl/types.h" #include "util/foreach.h" #include "util/log.h" @@ -119,6 +120,8 @@ ustring OSLRenderServices::u_u("u"); ustring OSLRenderServices::u_v("v"); ustring OSLRenderServices::u_empty; +ImageManager *OSLRenderServices::image_manager = nullptr; + OSLRenderServices::OSLRenderServices(OSL::TextureSystem *texture_system, int device_type) : OSL::RendererServices(texture_system), device_type_(device_type) { @@ -1154,7 +1157,7 @@ TextureSystem::TextureHandle *OSLRenderServices::get_texture_handle(ustring file /* For non-OIIO textures, just return a pointer to our own OSLTextureHandle. */ if (it != textures.end()) { if (it->second->type != OSLTextureHandle::OIIO) { -return (TextureSystem::TextureHandle *)it->second.get(); +return reinterpret_cast(it->second.get()); } } @@ -1173,16 +1176,53 @@ TextureSystem::TextureHandle *OSLRenderServices::get_texture_handle(ustring file /* Assign OIIO texture handle and return. */ it->second->oiio_handle = handle; -return (TextureSystem::TextureHandle *)it->second.get(); +return reinterpret_cast(it->second.get()); } else { -if (it != textures.end() && it->second->type == OSLTextureHandle::SVM && -it->second->svm_slots[0].w == -1) { - return reinterpret_cast( - static_cast(it->second->svm_slots[0].y + 1)); +/* Construct GPU texture handle for existing textures. */ +if (it != textures.end()) { + switch (it->second->type) { +case OSLTextureHandle::OIIO: + return NULL; +case OSLTextureHandle::SVM: + if (!it->second->handle.empty() && it->second->handle.get_manager() != image_manager) { +it.clear(); +break; + } + return reinterpret_cast(OSL_TEXTURE_HANDLE_TYPE_SVM | + it->second->svm_slots[0].y); +case OSLTextureHandle::IES: + if (!it->second->handle.empty() && it->second->handle.get_manager() != image_manager) { +it.clear(); +break; + } + return reinterpret_cast(OSL_TEXTURE_HANDLE_TYPE_IES | + it->second->svm_slots[0].y); +case OSLTextureHandle::AO: + return reinterpret_cast( + OSL_TEXTURE_HANDLE_TYPE_AO_OR_BEVEL | 1); +case OSLTextureHandle::BEVEL: + return reinterpret_cast( + OSL_TEXTURE_HANDLE_TYPE_AO_OR_BEVEL | 2); + } +} + +if (!image_manager) { + return NULL; +} + +/* Load new textures using SVM image manager. */ +ImageHandle handle = image_manager->add_image(filename.string(), ImageParams()); +if (handle.empty()) { + return NULL; +} + +if (!textures.insert(filename, new OSLTextureHandle(handle))) { + return NULL; } -return NULL; +return reinterpret_cast(OSL_TEXTURE_HANDLE_TYPE_SVM | +
[Bf-blender-cvs] [153e1dc31a5] master: Fix T103513: Images lose their alpha channel when OSL is enabled for GPU Compute
Commit: 153e1dc31a518fc307c7d87bb5bd700688e438f9 Author: Patrick Mours Date: Mon Jan 2 13:52:59 2023 +0100 Branches: master https://developer.blender.org/rB153e1dc31a518fc307c7d87bb5bd700688e438f9 Fix T103513: Images lose their alpha channel when OSL is enabled for GPU Compute The "osl_texture" intrinsic was not implemented correctly. It should handle alpha separately from color, the number of channels input parameter only counts color channels. === M intern/cycles/kernel/osl/services_gpu.h === diff --git a/intern/cycles/kernel/osl/services_gpu.h b/intern/cycles/kernel/osl/services_gpu.h index 744c7103b28..b9ffd959f1a 100644 --- a/intern/cycles/kernel/osl/services_gpu.h +++ b/intern/cycles/kernel/osl/services_gpu.h @@ -1532,7 +1532,7 @@ ccl_device_extern void osl_texture_set_missingcolor_alpha(ccl_private OSLTexture ccl_device_extern bool osl_texture(ccl_private ShaderGlobals *sg, DeviceString filename, ccl_private void *texture_handle, - OSLTextureOptions *opt, + ccl_private OSLTextureOptions *opt, float s, float t, float dsdx, @@ -1557,13 +1557,14 @@ ccl_device_extern bool osl_texture(ccl_private ShaderGlobals *sg, const float4 rgba = kernel_tex_image_interp(nullptr, id, s, 1.0f - t); - result[0] = rgba.x; + if (nchannels > 0) +result[0] = rgba.x; if (nchannels > 1) result[1] = rgba.y; if (nchannels > 2) result[2] = rgba.z; - if (nchannels > 3) -result[3] = rgba.w; + if (alpha) +*alpha = rgba.w; return true; } @@ -1571,7 +1572,7 @@ ccl_device_extern bool osl_texture(ccl_private ShaderGlobals *sg, ccl_device_extern bool osl_texture3d(ccl_private ShaderGlobals *sg, DeviceString filename, ccl_private void *texture_handle, - OSLTextureOptions *opt, + ccl_private OSLTextureOptions *opt, ccl_private const float3 *P, ccl_private const float3 *dPdx, ccl_private const float3 *dPdy, @@ -1594,13 +1595,14 @@ ccl_device_extern bool osl_texture3d(ccl_private ShaderGlobals *sg, const float4 rgba = kernel_tex_image_interp_3d(nullptr, id, *P, INTERPOLATION_NONE); - result[0] = rgba.x; + if (nchannels > 0) +result[0] = rgba.x; if (nchannels > 1) result[1] = rgba.y; if (nchannels > 2) result[2] = rgba.z; - if (nchannels > 3) -result[3] = rgba.w; + if (alpha) +*alpha = rgba.w; return true; } @@ -1608,7 +1610,7 @@ ccl_device_extern bool osl_texture3d(ccl_private ShaderGlobals *sg, ccl_device_extern bool osl_environment(ccl_private ShaderGlobals *sg, DeviceString filename, ccl_private void *texture_handle, - OSLTextureOptions *opt, + ccl_private OSLTextureOptions *opt, ccl_private const float3 *R, ccl_private const float3 *dRdx, ccl_private const float3 *dRdy, @@ -1621,13 +1623,14 @@ ccl_device_extern bool osl_environment(ccl_private ShaderGlobals *sg, ccl_private float *dalphay, ccl_private void *errormessage) { - result[0] = 1.0f; + if (nchannels > 0) +result[0] = 1.0f; if (nchannels > 1) result[1] = 0.0f; if (nchannels > 2) result[2] = 1.0f; - if (nchannels > 3) -result[3] = 1.0f; + if (alpha) +*alpha = 1.0f; return false; } ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [6bd6d7aec75] geometry-nodes-simulation: Fix T103258: Deleting a shader with OptiX OSL results in an illegal address error
Commit: 6bd6d7aec754e4e4f3061bb2de34b73c76a60944 Author: Patrick Mours Date: Fri Dec 16 15:41:21 2022 +0100 Branches: geometry-nodes-simulation https://developer.blender.org/rB6bd6d7aec754e4e4f3061bb2de34b73c76a60944 Fix T103258: Deleting a shader with OptiX OSL results in an illegal address error Materials without connections to the output node would crash with OSL in OptiX, since the Cycles `OSLCompiler` generates an empty shader group reference for them, which resulted in the OptiX device implementation setting an empty SBT entry for the corresponding direct callables, which then crashed when calling those direct callables was attempted in `osl_eval_nodes`. This fixes that by setting the SBT entries for empty shader groups to a dummy direct callable that does nothing. === M intern/cycles/device/optix/device_impl.cpp === diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index 601e1193e26..23e7bbfa7bb 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -866,6 +866,11 @@ bool OptiXDevice::load_osl_kernels() optix_assert(optixSbtRecordPackHeader(osl_groups[i], _data[NUM_PROGRAM_GROUPS + i])); optix_assert(optixProgramGroupGetStackSize(osl_groups[i], _stack_size[i])); } +else { + /* Default to "__direct_callable__dummy_services", so that OSL evaluation for empty + * materials has direct callables to call and does not crash. */ + optix_assert(optixSbtRecordPackHeader(osl_groups.back(), _data[NUM_PROGRAM_GROUPS + i])); +} } sbt_data.copy_to_device(); /* Upload updated SBT to device. */ ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [cfb77c54b06] geometry-nodes-simulation: Fix T103257: Enabling or disabling viewport denoising while using OptiX OSL results in an error
Commit: cfb77c54b060e835ffc209a1d8d994faa63427b0 Author: Patrick Mours Date: Fri Dec 16 14:01:51 2022 +0100 Branches: geometry-nodes-simulation https://developer.blender.org/rBcfb77c54b060e835ffc209a1d8d994faa63427b0 Fix T103257: Enabling or disabling viewport denoising while using OptiX OSL results in an error Switching viewport denoising causes kernels to be reloaded with a new feature mask, which would destroy the existing OptiX pipelines. But OSL kernels were not reloaded as well, leaving the shading pipeline uninitialized and therefore causing an error when it is later attempted to execute it. This fixes that by ensuring OSL kernels are always reloaded when the normal kernels are too. === M intern/cycles/device/optix/device_impl.cpp === diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index f4d1969f3f3..601e1193e26 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -579,7 +579,11 @@ bool OptiXDevice::load_kernels(const uint kernel_features) link_options.maxTraceDepth = 1; link_options.debugLevel = module_options.debugLevel; - if (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE) && !use_osl) { + if (use_osl) { +/* Re-create OSL pipeline in case kernels are reloaded after it has been created before. */ +load_osl_kernels(); + } + else if (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) { /* Create shader raytracing and MNEE pipeline. */ vector pipeline_groups; pipeline_groups.reserve(NUM_PROGRAM_GROUPS); @@ -743,6 +747,11 @@ bool OptiXDevice::load_osl_kernels() } } + if (osl_kernels.empty()) { +/* No OSL shader groups, so no need to create a pipeline. */ +return true; + } + OptixProgramGroupOptions group_options = {}; /* There are no options currently. */ OptixModuleCompileOptions module_options = {}; module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3; ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [a8530d31c29] master: Fix T103258: Deleting a shader with OptiX OSL results in an illegal address error
Commit: a8530d31c2971756df7f2b440a0de3d6fcfc3061 Author: Patrick Mours Date: Fri Dec 16 15:41:21 2022 +0100 Branches: master https://developer.blender.org/rBa8530d31c2971756df7f2b440a0de3d6fcfc3061 Fix T103258: Deleting a shader with OptiX OSL results in an illegal address error Materials without connections to the output node would crash with OSL in OptiX, since the Cycles `OSLCompiler` generates an empty shader group reference for them, which resulted in the OptiX device implementation setting an empty SBT entry for the corresponding direct callables, which then crashed when calling those direct callables was attempted in `osl_eval_nodes`. This fixes that by setting the SBT entries for empty shader groups to a dummy direct callable that does nothing. === M intern/cycles/device/optix/device_impl.cpp === diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index 601e1193e26..23e7bbfa7bb 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -866,6 +866,11 @@ bool OptiXDevice::load_osl_kernels() optix_assert(optixSbtRecordPackHeader(osl_groups[i], _data[NUM_PROGRAM_GROUPS + i])); optix_assert(optixProgramGroupGetStackSize(osl_groups[i], _stack_size[i])); } +else { + /* Default to "__direct_callable__dummy_services", so that OSL evaluation for empty + * materials has direct callables to call and does not crash. */ + optix_assert(optixSbtRecordPackHeader(osl_groups.back(), _data[NUM_PROGRAM_GROUPS + i])); +} } sbt_data.copy_to_device(); /* Upload updated SBT to device. */ ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [c9eb5834607] master: Fix T103257: Enabling or disabling viewport denoising while using OptiX OSL results in an error
Commit: c9eb5834607804eec1ead46289bb66a968ff2a1c Author: Patrick Mours Date: Fri Dec 16 14:01:51 2022 +0100 Branches: master https://developer.blender.org/rBc9eb5834607804eec1ead46289bb66a968ff2a1c Fix T103257: Enabling or disabling viewport denoising while using OptiX OSL results in an error Switching viewport denoising causes kernels to be reloaded with a new feature mask, which would destroy the existing OptiX pipelines. But OSL kernels were not reloaded as well, leaving the shading pipeline uninitialized and therefore causing an error when it is later attempted to execute it. This fixes that by ensuring OSL kernels are always reloaded when the normal kernels are too. === M intern/cycles/device/optix/device_impl.cpp === diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index f4d1969f3f3..601e1193e26 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -579,7 +579,11 @@ bool OptiXDevice::load_kernels(const uint kernel_features) link_options.maxTraceDepth = 1; link_options.debugLevel = module_options.debugLevel; - if (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE) && !use_osl) { + if (use_osl) { +/* Re-create OSL pipeline in case kernels are reloaded after it has been created before. */ +load_osl_kernels(); + } + else if (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) { /* Create shader raytracing and MNEE pipeline. */ vector pipeline_groups; pipeline_groups.reserve(NUM_PROGRAM_GROUPS); @@ -743,6 +747,11 @@ bool OptiXDevice::load_osl_kernels() } } + if (osl_kernels.empty()) { +/* No OSL shader groups, so no need to create a pipeline. */ +return true; + } + OptixProgramGroupOptions group_options = {}; /* There are no options currently. */ OptixModuleCompileOptions module_options = {}; module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3; ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [c30fdb9cf52] master: Fix mismatching PTX function declarations for OSL intrinsics with string parameters
Commit: c30fdb9cf52d62dcd25fbc93f29c2983cc90a447 Author: Patrick Mours Date: Wed Dec 14 15:21:39 2022 +0100 Branches: master https://developer.blender.org/rBc30fdb9cf52d62dcd25fbc93f29c2983cc90a447 Fix mismatching PTX function declarations for OSL intrinsics with string parameters The use of a struct for device strings caused the CUDA compiler to generate byte arrays as the argument type, whereas OSL generated primitive integer types (for the hash). Fix that by using a typedef instead so that the CUDA compiler too will use an integer type in the PTX it generates. Maniphest Tasks: T101222 === M intern/cycles/kernel/osl/services_gpu.h M intern/cycles/kernel/osl/types.h === diff --git a/intern/cycles/kernel/osl/services_gpu.h b/intern/cycles/kernel/osl/services_gpu.h index 75cf39919a0..744c7103b28 100644 --- a/intern/cycles/kernel/osl/services_gpu.h +++ b/intern/cycles/kernel/osl/services_gpu.h @@ -14,111 +14,111 @@ namespace DeviceStrings { /* "" */ -ccl_device_constant DeviceString _emptystring_ = {0ull}; +ccl_device_constant DeviceString _emptystring_ = 0ull; /* "common" */ -ccl_device_constant DeviceString u_common = {14645198576927606093ull}; +ccl_device_constant DeviceString u_common = 14645198576927606093ull; /* "world" */ -ccl_device_constant DeviceString u_world = {16436542438370751598ull}; +ccl_device_constant DeviceString u_world = 16436542438370751598ull; /* "shader" */ -ccl_device_constant DeviceString u_shader = {4279676006089868ull}; +ccl_device_constant DeviceString u_shader = 4279676006089868ull; /* "object" */ -ccl_device_constant DeviceString u_object = {973692718279674627ull}; +ccl_device_constant DeviceString u_object = 973692718279674627ull; /* "NDC" */ -ccl_device_constant DeviceString u_ndc = {5148305047403260775ull}; +ccl_device_constant DeviceString u_ndc = 5148305047403260775ull; /* "screen" */ -ccl_device_constant DeviceString u_screen = {14159088609039777114ull}; +ccl_device_constant DeviceString u_screen = 14159088609039777114ull; /* "camera" */ -ccl_device_constant DeviceString u_camera = {2159505832145726196ull}; +ccl_device_constant DeviceString u_camera = 2159505832145726196ull; /* "raster" */ -ccl_device_constant DeviceString u_raster = {7759263238610201778ull}; +ccl_device_constant DeviceString u_raster = 7759263238610201778ull; /* "hsv" */ -ccl_device_constant DeviceString u_hsv = {2177035556331879497ull}; +ccl_device_constant DeviceString u_hsv = 2177035556331879497ull; /* "hsl" */ -ccl_device_constant DeviceString u_hsl = {7749766809258288148ull}; +ccl_device_constant DeviceString u_hsl = 7749766809258288148ull; /* "XYZ" */ -ccl_device_constant DeviceString u_xyz = {4957977063494975483ull}; +ccl_device_constant DeviceString u_xyz = 4957977063494975483ull; /* "xyY" */ -ccl_device_constant DeviceString u_xyy = {5138822319725660255ull}; +ccl_device_constant DeviceString u_xyy = 5138822319725660255ull; /* "sRGB" */ -ccl_device_constant DeviceString u_srgb = {15368599878474175032ull}; +ccl_device_constant DeviceString u_srgb = 15368599878474175032ull; /* "object:location" */ -ccl_device_constant DeviceString u_object_location = {7846190347358762897ull}; +ccl_device_constant DeviceString u_object_location = 7846190347358762897ull; /* "object:color" */ -ccl_device_constant DeviceString u_object_color = {12695623857059169556ull}; +ccl_device_constant DeviceString u_object_color = 12695623857059169556ull; /* "object:alpha" */ -ccl_device_constant DeviceString u_object_alpha = {11165053919428293151ull}; +ccl_device_constant DeviceString u_object_alpha = 11165053919428293151ull; /* "object:index" */ -ccl_device_constant DeviceString u_object_index = {6588325838217472556ull}; +ccl_device_constant DeviceString u_object_index = 6588325838217472556ull; /* "geom:dupli_generated" */ -ccl_device_constant DeviceString u_geom_dupli_generated = {6715607178003388908ull}; +ccl_device_constant DeviceString u_geom_dupli_generated = 6715607178003388908ull; /* "geom:dupli_uv" */ -ccl_device_constant DeviceString u_geom_dupli_uv = {1294253317490155849ull}; +ccl_device_constant DeviceString u_geom_dupli_uv = 1294253317490155849ull; /* "material:index" */ -ccl_device_constant DeviceString u_material_index = {741770758159634623ull}; +ccl_device_constant DeviceString u_material_index = 741770758159634623ull; /* "object:random" */ -ccl_device_constant DeviceString u_object_random = {15789063994977955884ull}; +ccl_device_constant DeviceString u_object_random = 15789063994977955884ull; /* "particle:index" */ -ccl_device_constant DeviceString u_particle_index = {9489711748229903784ull}; +ccl
[Bf-blender-cvs] [41a3de878f6] blender-v3.4-release: Fix part of T102450: Cycles OSL render issues for with normals in shader nodes
Commit: 41a3de878f64ae19e4f80c58102cc64e583d3a5f Author: Patrick Mours Date: Fri Nov 11 16:42:49 2022 +0100 Branches: blender-v3.4-release https://developer.blender.org/rB41a3de878f64ae19e4f80c58102cc64e583d3a5f Fix part of T102450: Cycles OSL render issues for with normals in shader nodes Commit c8dd33f5a37b6a6db0b6950d24f9a7cff5ceb799 in OSL changed behavior of parameters that reference each other and are also overwritten with an instance value. This is causing the "NormalIn" parameter of a few OSL nodes in Cycles to be set to zero somehow, which should instead have received the value from a "node_geometry" node Cycles generates and connects automatically. I am not entirely sure why that is happening, but these parameters are superfluous anyway, since OSL already provides the necessary data in the global variable "N". So this patch simply removes those parameters (which mimics SVM, where these parameters do not exist either), which also fixes the rendering artifacts that occured with recent OSL. While this fixes built-in shader nodes, custom OSL scripts can still have this problem. Ref T101222 Differential Revision: https://developer.blender.org/D16470 === M intern/cycles/kernel/osl/shaders/node_geometry.osl M intern/cycles/kernel/osl/shaders/node_normal_map.osl M intern/cycles/kernel/osl/shaders/node_tangent.osl M intern/cycles/kernel/osl/shaders/node_texture_coordinate.osl M intern/cycles/scene/shader_nodes.cpp M intern/cycles/scene/shader_nodes.h === diff --git a/intern/cycles/kernel/osl/shaders/node_geometry.osl b/intern/cycles/kernel/osl/shaders/node_geometry.osl index cc891abd6e3..5d9284deac2 100644 --- a/intern/cycles/kernel/osl/shaders/node_geometry.osl +++ b/intern/cycles/kernel/osl/shaders/node_geometry.osl @@ -3,8 +3,7 @@ #include "stdcycles.h" -shader node_geometry(normal NormalIn = N, - string bump_offset = "center", +shader node_geometry(string bump_offset = "center", output point Position = point(0.0, 0.0, 0.0), output normal Normal = normal(0.0, 0.0, 0.0), @@ -17,7 +16,7 @@ shader node_geometry(normal NormalIn = N, output float RandomPerIsland = 0.0) { Position = P; - Normal = NormalIn; + Normal = N; TrueNormal = Ng; Incoming = I; Parametric = point(1.0 - u - v, u, 0.0); diff --git a/intern/cycles/kernel/osl/shaders/node_normal_map.osl b/intern/cycles/kernel/osl/shaders/node_normal_map.osl index 3cda485c686..7e41bbf1720 100644 --- a/intern/cycles/kernel/osl/shaders/node_normal_map.osl +++ b/intern/cycles/kernel/osl/shaders/node_normal_map.osl @@ -3,13 +3,12 @@ #include "stdcycles.h" -shader node_normal_map(normal NormalIn = N, - float Strength = 1.0, +shader node_normal_map(float Strength = 1.0, color Color = color(0.5, 0.5, 1.0), string space = "tangent", string attr_name = "geom:tangent", string attr_sign_name = "geom:tangent_sign", - output normal Normal = NormalIn) + output normal Normal = N) { color mcolor = 2.0 * color(Color[0] - 0.5, Color[1] - 0.5, Color[2] - 0.5); int is_backfacing = backfacing(); @@ -71,5 +70,5 @@ shader node_normal_map(normal NormalIn = N, } if (Strength != 1.0) -Normal = normalize(NormalIn + (Normal - NormalIn) * max(Strength, 0.0)); +Normal = normalize(N + (Normal - N) * max(Strength, 0.0)); } diff --git a/intern/cycles/kernel/osl/shaders/node_tangent.osl b/intern/cycles/kernel/osl/shaders/node_tangent.osl index a302c001f08..b3808778b2f 100644 --- a/intern/cycles/kernel/osl/shaders/node_tangent.osl +++ b/intern/cycles/kernel/osl/shaders/node_tangent.osl @@ -3,8 +3,7 @@ #include "stdcycles.h" -shader node_tangent(normal NormalIn = N, -string attr_name = "geom:tangent", +shader node_tangent(string attr_name = "geom:tangent", string direction_type = "radial", string axis = "z", output normal Tangent = normalize(dPdu)) @@ -29,5 +28,5 @@ shader node_tangent(normal NormalIn = N, } T = transform("object", "world", T); - Tangent = cross(NormalIn, normalize(cross(T, NormalIn))); + Tangent = cross(N, normalize(cross(T, N))); } diff --git a/intern/cycles/kernel/osl/shaders/node_texture_coordinate.osl b/intern/cycles/kernel/osl/shaders/node_texture_coordinate.osl index 24875ce140a..cd2fdae3cb3 100644 --- a/intern/cycles/kernel/osl/shaders/node_texture_coordinate.osl +++ b/intern/cycles/kernel/osl/shaders/node_texture_coordinate.osl @@
[Bf-blender-cvs] [a859837cdea] master: Cleanup: Move OptiX denoiser code from device into denoiser class
Commit: a859837cdea0c34268c870da25b038e3826aecc2 Author: Patrick Mours Date: Tue Nov 15 13:05:23 2022 +0100 Branches: master https://developer.blender.org/rBa859837cdea0c34268c870da25b038e3826aecc2 Cleanup: Move OptiX denoiser code from device into denoiser class Cycles already treats denoising fairly separate in its code, with a dedicated `Denoiser` base class used to describe denoising behavior. That class has been fully implemented for OIDN (`denoiser_oidn.cpp`), but for OptiX was mostly empty (`denoiser_optix.cpp`) and denoising was instead implemented in the OptiX device. That meant denoising code was split over various files and directories, making it a bit awkward to work with. This patch moves the OptiX denoising implementation into the existing `OptiXDenoiser` class, so that everything is in one place. There are no functional changes, code has been mostly moved as-is. To retain support for potential other denoiser implementations based on a GPU device in the future, the `DeviceDenoiser` base class was kept and slightly extended (and its file renamed to `denoiser_gpu.cpp` to follow similar naming rules as `path_trace_work_*.cpp`). Differential Revision: https://developer.blender.org/D16502 === M intern/cycles/CMakeLists.txt M intern/cycles/device/CMakeLists.txt M intern/cycles/device/denoise.h M intern/cycles/device/device.h M intern/cycles/device/optix/device_impl.cpp M intern/cycles/device/optix/device_impl.h M intern/cycles/integrator/CMakeLists.txt M intern/cycles/integrator/denoiser.cpp D intern/cycles/integrator/denoiser_device.h R077intern/cycles/integrator/denoiser_device.cpp intern/cycles/integrator/denoiser_gpu.cpp A intern/cycles/integrator/denoiser_gpu.h M intern/cycles/integrator/denoiser_optix.cpp M intern/cycles/integrator/denoiser_optix.h === diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt index 329aa3990f6..c6590a07ee4 100644 --- a/intern/cycles/CMakeLists.txt +++ b/intern/cycles/CMakeLists.txt @@ -253,6 +253,33 @@ if(WITH_CYCLES_OSL) ) endif() +if(WITH_CYCLES_DEVICE_CUDA OR WITH_CYCLES_DEVICE_OPTIX) + add_definitions(-DWITH_CUDA) + + if(WITH_CUDA_DYNLOAD) +include_directories( + ../../extern/cuew/include +) +add_definitions(-DWITH_CUDA_DYNLOAD) + else() +include_directories( + SYSTEM + ${CUDA_TOOLKIT_INCLUDE} +) + endif() +endif() + +if(WITH_CYCLES_DEVICE_HIP) + add_definitions(-DWITH_HIP) + + if(WITH_HIP_DYNLOAD) +include_directories( + ../../extern/hipew/include +) +add_definitions(-DWITH_HIP_DYNLOAD) + endif() +endif() + if(WITH_CYCLES_DEVICE_OPTIX) find_package(OptiX 7.3.0) @@ -261,12 +288,16 @@ if(WITH_CYCLES_DEVICE_OPTIX) include_directories( SYSTEM ${OPTIX_INCLUDE_DIR} - ) +) else() set_and_warn_library_found("OptiX" OPTIX_FOUND WITH_CYCLES_DEVICE_OPTIX) endif() endif() +if(WITH_CYCLES_DEVICE_METAL) + add_definitions(-DWITH_METAL) +endif() + if (WITH_CYCLES_DEVICE_ONEAPI) add_definitions(-DWITH_ONEAPI) endif() diff --git a/intern/cycles/device/CMakeLists.txt b/intern/cycles/device/CMakeLists.txt index bfca3ab6aea..6808d8c04d7 100644 --- a/intern/cycles/device/CMakeLists.txt +++ b/intern/cycles/device/CMakeLists.txt @@ -8,28 +8,13 @@ set(INC set(INC_SYS ) if(WITH_CYCLES_DEVICE_OPTIX OR WITH_CYCLES_DEVICE_CUDA) - if(WITH_CUDA_DYNLOAD) -list(APPEND INC - ../../../extern/cuew/include -) -add_definitions(-DWITH_CUDA_DYNLOAD) - else() -list(APPEND INC_SYS - ${CUDA_TOOLKIT_INCLUDE} -) + if(NOT WITH_CUDA_DYNLOAD) add_definitions(-DCYCLES_CUDA_NVCC_EXECUTABLE="${CUDA_NVCC_EXECUTABLE}") endif() add_definitions(-DCYCLES_RUNTIME_OPTIX_ROOT_DIR="${CYCLES_RUNTIME_OPTIX_ROOT_DIR}") endif() -if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD) - list(APPEND INC -../../../extern/hipew/include - ) - add_definitions(-DWITH_HIP_DYNLOAD) -endif() - set(SRC_BASE device.cpp denoise.cpp @@ -168,24 +153,15 @@ if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD) ) endif() -if(WITH_CYCLES_DEVICE_CUDA) - add_definitions(-DWITH_CUDA) -endif() -if(WITH_CYCLES_DEVICE_HIP) - add_definitions(-DWITH_HIP) -endif() -if(WITH_CYCLES_DEVICE_OPTIX) - add_definitions(-DWITH_OPTIX) -endif() if(WITH_CYCLES_DEVICE_METAL) list(APPEND LIB ${METAL_LIBRARY} ) - add_definitions(-DWITH_METAL) list(APPEND SRC ${SRC_METAL} ) endif() + if (WITH_CYCLES_DEVICE_ONEAPI) if(WITH_CYCLES_ONEAPI_BINARIES) set(cycles_kernel_oneapi_lib_suffix "_aot") @@ -203,7 +179,6 @@ if (WITH_CYCLES_DEVICE_ONEAPI) else() list(APPEND LIB ${SYCL_LIBRARY}) endif() - add_definitions(-DWITH_ONEAPI) list(APPEND SRC ${SRC_ONEAPI} ) diff
[Bf-blender-cvs] [9d827a1834a] master: Fix OSL object matrix with Cycles on the GPU
Commit: 9d827a1834ab0e2211488251dc9133e7164652dd Author: Patrick Mours Date: Fri Nov 11 20:20:47 2022 +0100 Branches: master https://developer.blender.org/rB9d827a1834ab0e2211488251dc9133e7164652dd Fix OSL object matrix with Cycles on the GPU The OSL GPU services implementation of "osl_get_matrix" and "osl_get_inverse_matrix" was missing support for the "common", "shader" and "object" matrices and thus any matrix operations in OSL shaders using these would not work. This patch adds the proper implementation copied from the OSL CPU services. Maniphest Tasks: T101222 === M intern/cycles/kernel/osl/services_gpu.h === diff --git a/intern/cycles/kernel/osl/services_gpu.h b/intern/cycles/kernel/osl/services_gpu.h index f762c7258df..75cf39919a0 100644 --- a/intern/cycles/kernel/osl/services_gpu.h +++ b/intern/cycles/kernel/osl/services_gpu.h @@ -15,6 +15,14 @@ namespace DeviceStrings { /* "" */ ccl_device_constant DeviceString _emptystring_ = {0ull}; +/* "common" */ +ccl_device_constant DeviceString u_common = {14645198576927606093ull}; +/* "world" */ +ccl_device_constant DeviceString u_world = {16436542438370751598ull}; +/* "shader" */ +ccl_device_constant DeviceString u_shader = {4279676006089868ull}; +/* "object" */ +ccl_device_constant DeviceString u_object = {973692718279674627ull}; /* "NDC" */ ccl_device_constant DeviceString u_ndc = {5148305047403260775ull}; /* "screen" */ @@ -23,10 +31,6 @@ ccl_device_constant DeviceString u_screen = {14159088609039777114ull}; ccl_device_constant DeviceString u_camera = {2159505832145726196ull}; /* "raster" */ ccl_device_constant DeviceString u_raster = {7759263238610201778ull}; -/* "world" */ -ccl_device_constant DeviceString u_world = {16436542438370751598ull}; -/* "common" */ -ccl_device_constant DeviceString u_common = {14645198576927606093ull}; /* "hsv" */ ccl_device_constant DeviceString u_hsv = {2177035556331879497ull}; /* "hsl" */ @@ -425,6 +429,7 @@ ccl_device_extern bool osl_transformc(ccl_private ShaderGlobals *sg, /* Matrix Utilities */ +#include "kernel/geom/object.h" #include "util/transform.h" ccl_device_forceinline void copy_matrix(ccl_private float *res, const Transform ) @@ -465,24 +470,24 @@ ccl_device_forceinline void copy_matrix(ccl_private float *res, const Projection res[14] = tfm.z.w; res[15] = tfm.w.w; } -ccl_device_forceinline void copy_identity_matrix(ccl_private float *res) +ccl_device_forceinline void copy_identity_matrix(ccl_private float *res, float value = 1.0f) { - res[0] = 1.0f; + res[0] = value; res[1] = 0.0f; res[2] = 0.0f; res[3] = 0.0f; res[4] = 0.0f; - res[5] = 1.0f; + res[5] = value; res[6] = 0.0f; res[7] = 0.0f; res[8] = 0.0f; res[9] = 0.0f; - res[10] = 1.0f; + res[10] = value; res[11] = 0.0f; res[12] = 0.0f; res[13] = 0.0f; res[14] = 0.0f; - res[15] = 1.0f; + res[15] = value; } ccl_device_forceinline Transform convert_transform(ccl_private const float *m) { @@ -534,22 +539,7 @@ ccl_device_extern void osl_div_mfm(ccl_private float *res, float a, ccl_private ccl_device_extern void osl_div_m_ff(ccl_private float *res, float a, float b) { float f = (b == 0) ? 0.0f : (a / b); - res[0] = f; - res[1] = 0.0f; - res[2] = 0.0f; - res[3] = 0.0f; - res[4] = 0.0f; - res[5] = f; - res[6] = 0.0f; - res[7] = 0.0f; - res[8] = 0.0f; - res[9] = 0.0f; - res[10] = f; - res[11] = 0.0f; - res[12] = 0.0f; - res[13] = 0.0f; - res[14] = 0.0f; - res[15] = f; + copy_identity_matrix(res, f); } ccl_device_extern void osl_transform_vmv(ccl_private float3 *res, @@ -607,27 +597,43 @@ ccl_device_extern void osl_transformn_dvmdv(ccl_private float3 *res, } ccl_device_extern bool osl_get_matrix(ccl_private ShaderGlobals *sg, - ccl_private float *result, + ccl_private float *res, DeviceString from) { - if (from == DeviceStrings::u_ndc) { -copy_matrix(result, kernel_data.cam.ndctoworld); + if (from == DeviceStrings::u_common || from == DeviceStrings::u_world) { +copy_identity_matrix(res); return true; } - if (from == DeviceStrings::u_raster) { -copy_matrix(result, kernel_data.cam.rastertoworld); + if (from == DeviceStrings::u_shader || from == DeviceStrings::u_object) { +KernelGlobals kg = nullptr; +ccl_private ShaderData *const sd = static_cast(sg->renderstate); +int object = sd->object; + +if (object != OBJECT_NONE) { + const Transform tfm = object_get_transform(kg, sd); + copy_matrix(res, tfm); + return true; +} +else if (sd->t
[Bf-blender-cvs] [097a13f5be1] master: Fix broken Cycles rendering with recent OSL versions
Commit: 097a13f5be143bd37bfd635cbf31515d531d7a8a Author: Patrick Mours Date: Fri Nov 11 16:42:49 2022 +0100 Branches: master https://developer.blender.org/rB097a13f5be143bd37bfd635cbf31515d531d7a8a Fix broken Cycles rendering with recent OSL versions Commit c8dd33f5a37b6a6db0b6950d24f9a7cff5ceb799 in OSL changed behavior of shader parameters that reference each other and are also overwritten with an instance value. This is causing the "NormalIn" parameter of a few OSL nodes in Cycles to be set to zero somehow, which should instead have received the value from a "node_geometry" node Cycles generates and connects automatically. I am not entirely sure why that is happening, but these parameters are superfluous anyway, since OSL already provides the necessary data in the global variable "N". So this patch simply removes those parameters (which mimics SVM, where these parameters do not exist either), which also fixes the rendering artifacts that occured with recent OSL. Maniphest Tasks: T101222 Differential Revision: https://developer.blender.org/D16470 === M intern/cycles/kernel/osl/services_gpu.h M intern/cycles/kernel/osl/shaders/node_geometry.osl M intern/cycles/kernel/osl/shaders/node_normal_map.osl M intern/cycles/kernel/osl/shaders/node_tangent.osl M intern/cycles/kernel/osl/shaders/node_texture_coordinate.osl M intern/cycles/scene/shader_nodes.cpp M intern/cycles/scene/shader_nodes.h === diff --git a/intern/cycles/kernel/osl/services_gpu.h b/intern/cycles/kernel/osl/services_gpu.h index e6e19b8c484..f762c7258df 100644 --- a/intern/cycles/kernel/osl/services_gpu.h +++ b/intern/cycles/kernel/osl/services_gpu.h @@ -419,6 +419,8 @@ ccl_device_extern bool osl_transformc(ccl_private ShaderGlobals *sg, c_out[i] = rgb; } } + + return true; } /* Matrix Utilities */ diff --git a/intern/cycles/kernel/osl/shaders/node_geometry.osl b/intern/cycles/kernel/osl/shaders/node_geometry.osl index cc891abd6e3..5d9284deac2 100644 --- a/intern/cycles/kernel/osl/shaders/node_geometry.osl +++ b/intern/cycles/kernel/osl/shaders/node_geometry.osl @@ -3,8 +3,7 @@ #include "stdcycles.h" -shader node_geometry(normal NormalIn = N, - string bump_offset = "center", +shader node_geometry(string bump_offset = "center", output point Position = point(0.0, 0.0, 0.0), output normal Normal = normal(0.0, 0.0, 0.0), @@ -17,7 +16,7 @@ shader node_geometry(normal NormalIn = N, output float RandomPerIsland = 0.0) { Position = P; - Normal = NormalIn; + Normal = N; TrueNormal = Ng; Incoming = I; Parametric = point(1.0 - u - v, u, 0.0); diff --git a/intern/cycles/kernel/osl/shaders/node_normal_map.osl b/intern/cycles/kernel/osl/shaders/node_normal_map.osl index 3cda485c686..7e41bbf1720 100644 --- a/intern/cycles/kernel/osl/shaders/node_normal_map.osl +++ b/intern/cycles/kernel/osl/shaders/node_normal_map.osl @@ -3,13 +3,12 @@ #include "stdcycles.h" -shader node_normal_map(normal NormalIn = N, - float Strength = 1.0, +shader node_normal_map(float Strength = 1.0, color Color = color(0.5, 0.5, 1.0), string space = "tangent", string attr_name = "geom:tangent", string attr_sign_name = "geom:tangent_sign", - output normal Normal = NormalIn) + output normal Normal = N) { color mcolor = 2.0 * color(Color[0] - 0.5, Color[1] - 0.5, Color[2] - 0.5); int is_backfacing = backfacing(); @@ -71,5 +70,5 @@ shader node_normal_map(normal NormalIn = N, } if (Strength != 1.0) -Normal = normalize(NormalIn + (Normal - NormalIn) * max(Strength, 0.0)); +Normal = normalize(N + (Normal - N) * max(Strength, 0.0)); } diff --git a/intern/cycles/kernel/osl/shaders/node_tangent.osl b/intern/cycles/kernel/osl/shaders/node_tangent.osl index a302c001f08..b3808778b2f 100644 --- a/intern/cycles/kernel/osl/shaders/node_tangent.osl +++ b/intern/cycles/kernel/osl/shaders/node_tangent.osl @@ -3,8 +3,7 @@ #include "stdcycles.h" -shader node_tangent(normal NormalIn = N, -string attr_name = "geom:tangent", +shader node_tangent(string attr_name = "geom:tangent", string direction_type = "radial", string axis = "z", output normal Tangent = normalize(dPdu)) @@ -29,5 +28,5 @@ shader node_tangent(normal NormalIn = N, } T = transform("object", "world", T); - Tangent = cross(NormalIn, normalize(cross(T, NormalIn))); + Tangent = cross(N, n
[Bf-blender-cvs] [6a8ce5ec1c5] master: Fix abort when rendering with OSL and OptiX in Cycles
Commit: 6a8ce5ec1c550cbcaf2fbb8e05c0743b1bda40d2 Author: Patrick Mours Date: Thu Nov 10 19:27:07 2022 +0100 Branches: master https://developer.blender.org/rB6a8ce5ec1c550cbcaf2fbb8e05c0743b1bda40d2 Fix abort when rendering with OSL and OptiX in Cycles LLVM could kill the process during OSL PTX code generation, due to generated symbols contained invalid characters in their name. Those names are generated by Cycles and were not properly filtered: - If the locale was set to something other than the minimal locale (when Blender was built with WITH_INTERNATIONAL), pointers may be printed with grouping characters, like commas or dots, added to them. - Material names from Blender may contain the full range of UTF8 characters. This fixes those cases by forcing the locale used in the symbol name generation to the minimal locale and using the material name hash instead of the actual material name string. === M intern/cycles/scene/osl.cpp === diff --git a/intern/cycles/scene/osl.cpp b/intern/cycles/scene/osl.cpp index 3ea406b6935..4dc5fb4edf7 100644 --- a/intern/cycles/scene/osl.cpp +++ b/intern/cycles/scene/osl.cpp @@ -641,6 +641,8 @@ string OSLCompiler::id(ShaderNode *node) { /* assign layer unique name based on pointer address + bump mode */ stringstream stream; + stream.imbue(std::locale("C")); /* Ensure that no grouping characters (e.g. commas with en_US + locale) are added to the pointer string */ stream << "node_" << node->type->name << "_" << node; return stream.str(); @@ -1132,12 +1134,12 @@ OSL::ShaderGroupRef OSLCompiler::compile_type(Shader *shader, ShaderGraph *graph { current_type = type; - string name = shader->name.string(); - /* Replace invalid characters. */ - for (size_t i; (i = name.find_first_of(" .,:;+-*/#")) != string::npos;) -name.replace(i, 1, "_"); + /* Use name hash to identify shader group to avoid issues with non-alphanumeric characters */ + stringstream name; + name.imbue(std::locale("C")); + name << "shader_" << shader->name.hash(); - OSL::ShaderGroupRef group = ss->ShaderGroupBegin(name); + OSL::ShaderGroupRef group = ss->ShaderGroupBegin(name.str()); ShaderNode *output = graph->output(); ShaderNodeSet dependencies; ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [e6b38deb9db] master: Cycles: Add basic support for using OSL with OptiX
Commit: e6b38deb9dbb58118f6ee644409ce52f06eac5e5 Author: Patrick Mours Date: Wed Nov 9 14:25:32 2022 +0100 Branches: master https://developer.blender.org/rBe6b38deb9dbb58118f6ee644409ce52f06eac5e5 Cycles: Add basic support for using OSL with OptiX This patch generalizes the OSL support in Cycles to include GPU device types and adds an implementation for that in the OptiX device. There are some caveats still, including simplified texturing due to lack of OIIO on the GPU and a few missing OSL intrinsics. Note that this is incomplete and missing an update to the OSL library before being enabled! The implementation is already committed now to simplify further development. Maniphest Tasks: T101222 Differential Revision: https://developer.blender.org/D15902 === M build_files/cmake/platform/platform_win32.cmake M intern/cycles/blender/addon/__init__.py M intern/cycles/blender/addon/engine.py M intern/cycles/blender/addon/properties.py M intern/cycles/blender/addon/ui.py M intern/cycles/device/device.h M intern/cycles/device/kernel.cpp M intern/cycles/device/kernel.h M intern/cycles/device/multi/device.cpp M intern/cycles/device/optix/device.cpp M intern/cycles/device/optix/device_impl.cpp M intern/cycles/device/optix/device_impl.h M intern/cycles/device/optix/queue.cpp M intern/cycles/kernel/CMakeLists.txt M intern/cycles/kernel/closure/bsdf.h M intern/cycles/kernel/device/cuda/compat.h M intern/cycles/kernel/device/hip/compat.h M intern/cycles/kernel/device/metal/compat.h M intern/cycles/kernel/device/oneapi/compat.h M intern/cycles/kernel/device/optix/compat.h M intern/cycles/kernel/device/optix/globals.h A intern/cycles/kernel/device/optix/kernel_osl.cu M intern/cycles/kernel/integrator/displacement_shader.h M intern/cycles/kernel/integrator/surface_shader.h M intern/cycles/kernel/integrator/volume_shader.h M intern/cycles/kernel/osl/closures.cpp M intern/cycles/kernel/osl/closures_setup.h M intern/cycles/kernel/osl/closures_template.h M intern/cycles/kernel/osl/osl.h M intern/cycles/kernel/osl/services.cpp M intern/cycles/kernel/osl/services.h A intern/cycles/kernel/osl/services_gpu.h A intern/cycles/kernel/osl/services_optix.cu M intern/cycles/kernel/osl/types.h M intern/cycles/kernel/types.h M intern/cycles/scene/osl.cpp M intern/cycles/scene/osl.h M intern/cycles/scene/scene.cpp M intern/cycles/scene/shader.cpp M intern/cycles/scene/shader.h M intern/cycles/scene/shader_nodes.h M intern/cycles/util/defines.h M intern/cycles/util/transform.h === diff --git a/build_files/cmake/platform/platform_win32.cmake b/build_files/cmake/platform/platform_win32.cmake index 7a2d3ad948a..47673794652 100644 --- a/build_files/cmake/platform/platform_win32.cmake +++ b/build_files/cmake/platform/platform_win32.cmake @@ -419,7 +419,7 @@ if(WITH_IMAGE_OPENEXR) warn_hardcoded_paths(OpenEXR) set(OPENEXR ${LIBDIR}/openexr) set(OPENEXR_INCLUDE_DIR ${OPENEXR}/include) -set(OPENEXR_INCLUDE_DIRS ${OPENEXR_INCLUDE_DIR} ${IMATH_INCLUDE_DIRS} ${OPENEXR}/include/OpenEXR) +set(OPENEXR_INCLUDE_DIRS ${OPENEXR_INCLUDE_DIR} ${IMATH_INCLUDE_DIRS} ${OPENEXR_INCLUDE_DIR}/OpenEXR) set(OPENEXR_LIBPATH ${OPENEXR}/lib) # Check if the 3.x library name exists # if not assume this is a 2.x library folder @@ -568,7 +568,8 @@ if(WITH_OPENIMAGEIO) if(NOT OpenImageIO_FOUND) set(OPENIMAGEIO ${LIBDIR}/OpenImageIO) set(OPENIMAGEIO_LIBPATH ${OPENIMAGEIO}/lib) -set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO}/include) +set(OPENIMAGEIO_INCLUDE_DIR ${OPENIMAGEIO}/include) +set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO_INCLUDE_DIR}) set(OIIO_OPTIMIZED optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO.lib optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util.lib) set(OIIO_DEBUG debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_d.lib debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util_d.lib) set(OPENIMAGEIO_LIBRARIES ${OIIO_OPTIMIZED} ${OIIO_DEBUG}) @@ -785,6 +786,14 @@ if(WITH_CYCLES AND WITH_CYCLES_OSL) endif() find_path(OSL_INCLUDE_DIR OSL/oslclosure.h PATHS ${CYCLES_OSL}/include) find_program(OSL_COMPILER NAMES oslc PATHS ${CYCLES_OSL}/bin) + file(STRINGS "${OSL_INCLUDE_DIR}/OSL/oslversion.h" OSL_LIBRARY_VERSION_MAJOR + REGEX "^[ \t]*#define[ \t]+OSL_LIBRARY_VERSION_MAJOR[ \t]+[0-9]+.*$") + file(STRINGS "${OSL_INCLUDE_DIR}/OSL/oslversion.h" OSL_LIBRARY_VERSION_MINOR + REGEX "^[ \t]*#define[ \t]+OSL_LIBRARY_VERSION_MINOR[ \t]+[0-9]+.*$") + string(REGEX REPLACE ".*#define[ \t]+OSL_LIBRARY_VERSION_MAJOR[ \t]+([.0-9]+).*" + "\\1
[Bf-blender-cvs] [a45c36efae0] master: Cycles: Make OSL implementation independent from SVM
Commit: a45c36efae07f22dd1da1ebac728324aeafce85e Author: Patrick Mours Date: Mon Sep 12 18:46:20 2022 +0200 Branches: master https://developer.blender.org/rBa45c36efae07f22dd1da1ebac728324aeafce85e Cycles: Make OSL implementation independent from SVM Cleans up the file structure to be more similar to that of the SVM and also makes it possible to build kernels with OSL support, but without having to include SVM support. This patch was split from D15902. Differential Revision: https://developer.blender.org/D15949 === M intern/cycles/device/cpu/device_impl.cpp M intern/cycles/device/cpu/device_impl.h M intern/cycles/device/cpu/kernel_thread_globals.cpp M intern/cycles/kernel/CMakeLists.txt M intern/cycles/kernel/closure/bsdf.h M intern/cycles/kernel/film/data_passes.h M intern/cycles/kernel/geom/shader_data.h M intern/cycles/kernel/integrator/displacement_shader.h M intern/cycles/kernel/integrator/intersect_closest.h M intern/cycles/kernel/integrator/surface_shader.h M intern/cycles/kernel/integrator/volume_shader.h M intern/cycles/kernel/osl/CMakeLists.txt M intern/cycles/kernel/osl/closures.cpp A intern/cycles/kernel/osl/globals.cpp M intern/cycles/kernel/osl/globals.h R073intern/cycles/kernel/osl/shader.h intern/cycles/kernel/osl/osl.h M intern/cycles/kernel/osl/services.cpp M intern/cycles/kernel/osl/services.h D intern/cycles/kernel/osl/shader.cpp M intern/cycles/scene/osl.cpp === diff --git a/intern/cycles/device/cpu/device_impl.cpp b/intern/cycles/device/cpu/device_impl.cpp index 1e4b9baa0c0..a2b8d1cbbfa 100644 --- a/intern/cycles/device/cpu/device_impl.cpp +++ b/intern/cycles/device/cpu/device_impl.cpp @@ -28,7 +28,6 @@ #include "kernel/device/cpu/kernel.h" #include "kernel/types.h" -#include "kernel/osl/shader.h" #include "kernel/osl/globals.h" // clang-format on diff --git a/intern/cycles/device/cpu/device_impl.h b/intern/cycles/device/cpu/device_impl.h index 5c1f3cc6ce5..e7e77f18194 100644 --- a/intern/cycles/device/cpu/device_impl.h +++ b/intern/cycles/device/cpu/device_impl.h @@ -23,7 +23,6 @@ #include "kernel/device/cpu/kernel.h" #include "kernel/device/cpu/globals.h" -#include "kernel/osl/shader.h" #include "kernel/osl/globals.h" // clang-format on diff --git a/intern/cycles/device/cpu/kernel_thread_globals.cpp b/intern/cycles/device/cpu/kernel_thread_globals.cpp index 89545399602..99af1525d92 100644 --- a/intern/cycles/device/cpu/kernel_thread_globals.cpp +++ b/intern/cycles/device/cpu/kernel_thread_globals.cpp @@ -3,10 +3,7 @@ #include "device/cpu/kernel_thread_globals.h" -// clang-format off -#include "kernel/osl/shader.h" #include "kernel/osl/globals.h" -// clang-format on #include "util/profiling.h" @@ -20,7 +17,7 @@ CPUKernelThreadGlobals::CPUKernelThreadGlobals(const KernelGlobalsCPU _gl reset_runtime_memory(); #ifdef WITH_OSL - OSLShader::thread_init(this, reinterpret_cast(osl_globals_memory)); + OSLGlobals::thread_init(this, static_cast(osl_globals_memory)); #else (void)osl_globals_memory; #endif @@ -35,7 +32,7 @@ CPUKernelThreadGlobals::CPUKernelThreadGlobals(CPUKernelThreadGlobals &) n CPUKernelThreadGlobals::~CPUKernelThreadGlobals() { #ifdef WITH_OSL - OSLShader::thread_free(this); + OSLGlobals::thread_free(this); #endif } diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index aa31335393f..a89c5679b27 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -544,8 +544,6 @@ if(WITH_CYCLES_CUDA_BINARIES) cycles_set_solution_folder(cycles_kernel_cuda) endif() -### START - # HIP module if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP) @@ -620,7 +618,6 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP) cycles_set_solution_folder(cycles_kernel_hip) endif() -### END # OptiX PTX modules if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES) @@ -712,6 +709,8 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES) cycles_set_solution_folder(cycles_kernel_optix) endif() +# oneAPI module + if(WITH_CYCLES_DEVICE_ONEAPI) if(WIN32) set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/cycles_kernel_oneapi.dll) diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h index 02cf8bfe3e2..f0b28ff77c4 100644 --- a/intern/cycles/kernel/closure/bsdf.h +++ b/intern/cycles/kernel/closure/bsdf.h @@ -116,7 +116,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals kg, case CLOSURE_BSDF_DIFF
[Bf-blender-cvs] [8611c37f975] master: Cycles: Generate OSL closures using macros and a template file
Commit: 8611c37f975737efe0d159822edfc21733268f51 Author: Patrick Mours Date: Thu Sep 8 19:31:44 2022 +0200 Branches: master https://developer.blender.org/rB8611c37f975737efe0d159822edfc21733268f51 Cycles: Generate OSL closures using macros and a template file This has the advantage of being able to use information about the existing OSL closures in various places without code duplication. In addition, the setup code for all closures was moved to standalone functions to avoid usage of virtual function calls in preparation for GPU support. This patch was split from D15902. Differential Revision: https://developer.blender.org/D15917 === M .clang-format M intern/cycles/kernel/closure/alloc.h M intern/cycles/kernel/osl/CMakeLists.txt D intern/cycles/kernel/osl/background.cpp D intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp D intern/cycles/kernel/osl/bsdf_phong_ramp.cpp D intern/cycles/kernel/osl/bssrdf.cpp M intern/cycles/kernel/osl/closures.cpp D intern/cycles/kernel/osl/closures.h A intern/cycles/kernel/osl/closures_setup.h A intern/cycles/kernel/osl/closures_template.h D intern/cycles/kernel/osl/emissive.cpp M intern/cycles/kernel/osl/services.cpp M intern/cycles/kernel/osl/services.h M intern/cycles/kernel/osl/shader.cpp M intern/cycles/kernel/osl/shader.h A intern/cycles/kernel/osl/types.h M intern/cycles/scene/osl.cpp === diff --git a/.clang-format b/.clang-format index 7e88e6d1cb1..72add4594a4 100644 --- a/.clang-format +++ b/.clang-format @@ -273,5 +273,5 @@ StatementMacros: - PyObject_VAR_HEAD - ccl_gpu_kernel_postfix -MacroBlockBegin: "^BSDF_CLOSURE_CLASS_BEGIN$" -MacroBlockEnd: "^BSDF_CLOSURE_CLASS_END$" +MacroBlockBegin: "^OSL_CLOSURE_STRUCT_BEGIN$" +MacroBlockEnd: "^OSL_CLOSURE_STRUCT_END$" diff --git a/intern/cycles/kernel/closure/alloc.h b/intern/cycles/kernel/closure/alloc.h index 9847898ee89..1cf06614f3b 100644 --- a/intern/cycles/kernel/closure/alloc.h +++ b/intern/cycles/kernel/closure/alloc.h @@ -59,39 +59,10 @@ ccl_device_inline ccl_private ShaderClosure *bsdf_alloc(ccl_private ShaderData * * we will not allocate new closure. */ if (sample_weight >= CLOSURE_WEIGHT_CUTOFF) { ccl_private ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight); -if (sc == NULL) { - return NULL; -} - -sc->sample_weight = sample_weight; - -return sc; - } - - return NULL; -} - -#ifdef __OSL__ -ccl_device_inline ShaderClosure *bsdf_alloc_osl(ShaderData *sd, -int size, -Spectrum weight, -void *data) -{ - kernel_assert(isfinite_safe(weight)); - - const float sample_weight = fabsf(average(weight)); - - /* Use comparison this way to help dealing with non-finite weight: if the average is not finite - * we will not allocate new closure. */ - if (sample_weight >= CLOSURE_WEIGHT_CUTOFF) { -ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight); if (!sc) { return NULL; } -memcpy((void *)sc, data, size); - -sc->weight = weight; sc->sample_weight = sample_weight; return sc; @@ -99,6 +70,5 @@ ccl_device_inline ShaderClosure *bsdf_alloc_osl(ShaderData *sd, return NULL; } -#endif CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/CMakeLists.txt b/intern/cycles/kernel/osl/CMakeLists.txt index 7570490be7c..b27bcb066fd 100644 --- a/intern/cycles/kernel/osl/CMakeLists.txt +++ b/intern/cycles/kernel/osl/CMakeLists.txt @@ -10,18 +10,14 @@ set(INC_SYS ) set(SRC - background.cpp - bsdf_diffuse_ramp.cpp - bsdf_phong_ramp.cpp - emissive.cpp - bssrdf.cpp closures.cpp services.cpp shader.cpp ) set(HEADER_SRC - closures.h + closures_setup.h + closures_template.h globals.h services.h shader.h diff --git a/intern/cycles/kernel/osl/background.cpp b/intern/cycles/kernel/osl/background.cpp deleted file mode 100644 index 4b5a2686117..000 --- a/intern/cycles/kernel/osl/background.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * - * Adapted from Open Shading Language - * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al. - * All Rights Reserved. - * - * Modifications Copyright 2011-2022 Blender Foundation. */ - -#include - -#include - -#include "kernel/osl/closures.h" - -// clang-format off -#include "kernel/device/cpu/compat.h" -#include "kernel/device/cpu/globals.h" - -#include "kernel/closure/alloc.h" -#include "kernel/closure/emissive.h" - -#include "kernel/util/color.h" -// clang-format on - -CCL_NAMESPACE_BEGIN -
[Bf-blender-cvs] [ef7c9e793ec] master: Cycles: Remove separate OSL attribute map and instead always use SVM attribute map
Commit: ef7c9e793ec5331ac694eec9336565bd2254c406 Author: Patrick Mours Date: Fri Sep 9 11:55:35 2022 +0200 Branches: master https://developer.blender.org/rBef7c9e793ec5331ac694eec9336565bd2254c406 Cycles: Remove separate OSL attribute map and instead always use SVM attribute map The SVM attribute map is always generated and uses a simple linear search to lookup by an opaque ID, so can reuse that for OSL as well and simply use the attribute name hash as ID instead of generating a unique value separately. This works for both object and geometry attributes since the SVM attribute map already stores both. Simplifies code somewhat and reduces memory usage slightly. This patch was split from D15902. Differential Revision: https://developer.blender.org/D15918 === M intern/cycles/kernel/geom/attribute.h M intern/cycles/kernel/geom/primitive.h M intern/cycles/kernel/geom/subd_triangle.h M intern/cycles/kernel/geom/volume.h M intern/cycles/kernel/osl/globals.h M intern/cycles/kernel/osl/services.cpp M intern/cycles/kernel/osl/shader.cpp M intern/cycles/kernel/osl/shader.h M intern/cycles/kernel/types.h M intern/cycles/scene/geometry.cpp M intern/cycles/scene/geometry.h M intern/cycles/scene/osl.cpp M intern/cycles/scene/osl.h M intern/cycles/scene/shader.cpp M intern/cycles/scene/shader.h === diff --git a/intern/cycles/kernel/geom/attribute.h b/intern/cycles/kernel/geom/attribute.h index 31a9e39d528..3a0ee1b09d1 100644 --- a/intern/cycles/kernel/geom/attribute.h +++ b/intern/cycles/kernel/geom/attribute.h @@ -16,14 +16,14 @@ CCL_NAMESPACE_BEGIN /* Patch index for triangle, -1 if not subdivision triangle */ -ccl_device_inline uint subd_triangle_patch(KernelGlobals kg, ccl_private const ShaderData *sd) +ccl_device_inline uint subd_triangle_patch(KernelGlobals kg, int prim) { - return (sd->prim != PRIM_NONE) ? kernel_data_fetch(tri_patch, sd->prim) : ~0; + return (prim != PRIM_NONE) ? kernel_data_fetch(tri_patch, prim) : ~0; } -ccl_device_inline uint attribute_primitive_type(KernelGlobals kg, ccl_private const ShaderData *sd) +ccl_device_inline uint attribute_primitive_type(KernelGlobals kg, int prim, int type) { - if ((sd->type & PRIMITIVE_TRIANGLE) && subd_triangle_patch(kg, sd) != ~0) { + if ((type & PRIMITIVE_TRIANGLE) && subd_triangle_patch(kg, prim) != ~0) { return ATTR_PRIM_SUBD; } else { @@ -45,17 +45,16 @@ ccl_device_inline uint object_attribute_map_offset(KernelGlobals kg, int object) return kernel_data_fetch(objects, object).attribute_map_offset; } -ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals kg, - ccl_private const ShaderData *sd, - uint id) +ccl_device_inline AttributeDescriptor +find_attribute(KernelGlobals kg, int object, int prim, int type, uint64_t id) { - if (sd->object == OBJECT_NONE) { + if (object == OBJECT_NONE) { return attribute_not_found(); } /* for SVM, find attribute by unique id */ - uint attr_offset = object_attribute_map_offset(kg, sd->object); - attr_offset += attribute_primitive_type(kg, sd); + uint attr_offset = object_attribute_map_offset(kg, object); + attr_offset += attribute_primitive_type(kg, prim, type); AttributeMap attr_map = kernel_data_fetch(attributes_map, attr_offset); while (attr_map.id != id) { @@ -77,7 +76,7 @@ ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals kg, AttributeDescriptor desc; desc.element = (AttributeElement)attr_map.element; - if (sd->prim == PRIM_NONE && desc.element != ATTR_ELEMENT_MESH && + if (prim == PRIM_NONE && desc.element != ATTR_ELEMENT_MESH && desc.element != ATTR_ELEMENT_VOXEL && desc.element != ATTR_ELEMENT_OBJECT) { return attribute_not_found(); } @@ -91,11 +90,16 @@ ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals kg, return desc; } +ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals kg, + ccl_private const ShaderData *sd, + uint64_t id) +{ + return find_attribute(kg, sd->object, sd->prim, sd->type, id); +} + /* Transform matrix attribute on meshes */ -ccl_device Transform primitive_attribute_matrix(KernelGlobals kg, -ccl_private const ShaderData *sd, -const AttributeDescriptor desc) +ccl_device Transform primitive_attribute_matrix(KernelGlobals kg, const AttributeDescriptor desc) { Transform tfm; diff --git a/intern/cycles/kernel/geom
[Bf-blender-cvs] [d13ed3c1575] master: Merge branch 'blender-v3.3-release'
Commit: d13ed3c1575c5bc840c322ef7fc86b6b8505d450 Author: Patrick Mours Date: Fri Aug 12 16:14:51 2022 +0200 Branches: master https://developer.blender.org/rBd13ed3c1575c5bc840c322ef7fc86b6b8505d450 Merge branch 'blender-v3.3-release' === === ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [515a15f200e] blender-v3.3-release: Fix syntax error introduced in previous commit
Commit: 515a15f200ed3323b7584c2c46d28a4ca562 Author: Patrick Mours Date: Fri Aug 12 16:13:09 2022 +0200 Branches: blender-v3.3-release https://developer.blender.org/rB515a15f200ed3323b7584c2c46d28a4ca562 Fix syntax error introduced in previous commit === M intern/cycles/integrator/path_trace.h === diff --git a/intern/cycles/integrator/path_trace.h b/intern/cycles/integrator/path_trace.h index 9531e4fb186..59382b51d23 100644 --- a/intern/cycles/integrator/path_trace.h +++ b/intern/cycles/integrator/path_trace.h @@ -263,7 +263,7 @@ class PathTrace { unique_ptr denoiser_; /* Denoiser device descriptor which holds the denoised big tile for multi-device workloads. */ - unique_ptr denoiser_buffer_; + unique_ptr big_tile_denoise_work_; /* State which is common for all the steps of the render work. * Is brought up to date in the `render()` call and is accessed from all the steps involved into ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [e7908c07904] master: Merge branch 'blender-v3.3-release'
Commit: e7908c079042b8d64ee2a39bf2630e676e89fe6d Author: Patrick Mours Date: Fri Aug 12 16:04:06 2022 +0200 Branches: master https://developer.blender.org/rBe7908c079042b8d64ee2a39bf2630e676e89fe6d Merge branch 'blender-v3.3-release' === === ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [79787bf8e1e] blender-v3.3-release: Cycles: Improve denoiser update performance when rendering with multiple GPUs
Commit: 79787bf8e1e1d766e34dc6f8c5eda2efcceaa6cc Author: Patrick Mours Date: Fri Aug 12 15:49:30 2022 +0200 Branches: blender-v3.3-release https://developer.blender.org/rB79787bf8e1e1d766e34dc6f8c5eda2efcceaa6cc Cycles: Improve denoiser update performance when rendering with multiple GPUs This patch causes the render buffers to be copied to the denoiser device only once before denoising and output/display is then fed from that single buffer on the denoiser device. That way usually all but one copy (from all the render devices to the denoiser device) can be eliminated, provided that the denoiser device is also the display device (in which case interop is used to update the display). As such this patch also adds some logic that tries to ensure the chosen denoiser device is the same as the display device. Differential Revision: https://developer.blender.org/D15657 === M intern/cycles/device/cuda/device_impl.cpp M intern/cycles/device/optix/device_impl.cpp M intern/cycles/integrator/denoiser.cpp M intern/cycles/integrator/path_trace.cpp M intern/cycles/integrator/path_trace.h M intern/cycles/integrator/path_trace_tile.cpp M intern/cycles/integrator/path_trace_tile.h M intern/cycles/session/session.cpp === diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp index 00851a8e91c..01c021551f3 100644 --- a/intern/cycles/device/cuda/device_impl.cpp +++ b/intern/cycles/device/cuda/device_impl.cpp @@ -1202,11 +1202,11 @@ bool CUDADevice::should_use_graphics_interop() } vector gl_devices(num_all_devices); - uint num_gl_devices; + uint num_gl_devices = 0; cuGLGetDevices(_gl_devices, gl_devices.data(), num_all_devices, CU_GL_DEVICE_LIST_ALL); - for (CUdevice gl_device : gl_devices) { -if (gl_device == cuDevice) { + for (uint i = 0; i < num_gl_devices; ++i) { +if (gl_devices[i] == cuDevice) { return true; } } diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index 151983667c0..94a46acaf18 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -39,6 +39,9 @@ CCL_NAMESPACE_BEGIN // The original code is Copyright NVIDIA Corporation, BSD-3-Clause. namespace { +# if OPTIX_ABI_VERSION >= 60 +using ::optixUtilDenoiserInvokeTiled; +# else static OptixResult optixUtilDenoiserSplitImage(const OptixImage2D , const OptixImage2D , unsigned int overlapWindowSizeInPixels, @@ -215,6 +218,7 @@ static OptixResult optixUtilDenoiserInvokeTiled(OptixDenoiser denoiser, } return OPTIX_SUCCESS; } +# endif # if OPTIX_ABI_VERSION >= 55 static void execute_optix_task(TaskPool , OptixTask task, OptixResult _reason) diff --git a/intern/cycles/integrator/denoiser.cpp b/intern/cycles/integrator/denoiser.cpp index 94991d63e4c..831bd3a4407 100644 --- a/intern/cycles/integrator/denoiser.cpp +++ b/intern/cycles/integrator/denoiser.cpp @@ -101,10 +101,17 @@ static Device *find_best_device(Device *device, DenoiserType type) if ((sub_device->info.denoisers & type) == 0) { return; } + if (!best_device) { best_device = sub_device; } else { + /* Prefer a device that can use graphics interop for faster display update. */ + if (sub_device->should_use_graphics_interop() && + !best_device->should_use_graphics_interop()) { +best_device = sub_device; + } + /* TODO(sergey): Choose fastest device from available ones. Taking into account performance * of the device and data transfer cost. */ } diff --git a/intern/cycles/integrator/path_trace.cpp b/intern/cycles/integrator/path_trace.cpp index ed278821b46..3ec7b601d9f 100644 --- a/intern/cycles/integrator/path_trace.cpp +++ b/intern/cycles/integrator/path_trace.cpp @@ -26,6 +26,7 @@ PathTrace::PathTrace(Device *device, RenderScheduler _scheduler, TileManager _manager) : device_(device), + film_(film), device_scene_(device_scene), render_scheduler_(render_scheduler), tile_manager_(tile_manager) @@ -60,7 +61,17 @@ PathTrace::~PathTrace() void PathTrace::load_kernels() { if (denoiser_) { +/* Activate graphics interop while denoiser device is created, so that it can choose a device + * that supports interop for faster display updates. */ +if (display_ && path_trace_works_.size() > 1) { + display_->graphics_interop_activate(); +} + denoiser_->load_kernels(progress_); + +if (display_ && path_trace_works_.size() > 1) { + display_->graphics_interop_deactivate(); +}
[Bf-blender-cvs] [ef268c78933] master: Build: Fix build of library dependencies on Linux aarch64
Commit: ef268c78933079137288e326704431432adf9ad9 Author: Patrick Mours Date: Thu Jun 30 16:44:38 2022 +0200 Branches: master https://developer.blender.org/rBef268c78933079137288e326704431432adf9ad9 Build: Fix build of library dependencies on Linux aarch64 rBb9c37608a9e959a896f5358d4ab3d3d001a70833 moved evaluation of `versions.cmake` before `options.cmake`, as a result of which `BLENDER_PLATFORM_ARM` was no longer defined in `versions.cmake`, causing it to choose the wrong OpenSSL version for aarch64. This reverts that. Also fixes a compiler crash when building flex with some glibc versions. Differential Revision: https://developer.blender.org/D15319 === M build_files/build_environment/CMakeLists.txt M build_files/build_environment/cmake/flex.cmake M build_files/build_environment/cmake/ispc.cmake A build_files/build_environment/patches/flex.diff === diff --git a/build_files/build_environment/CMakeLists.txt b/build_files/build_environment/CMakeLists.txt index a9ff48b2a9b..e0350901cd0 100644 --- a/build_files/build_environment/CMakeLists.txt +++ b/build_files/build_environment/CMakeLists.txt @@ -29,8 +29,9 @@ cmake_minimum_required(VERSION 3.5) include(ExternalProject) include(cmake/check_software.cmake) -include(cmake/versions.cmake) include(cmake/options.cmake) +# versions.cmake needs to be included after options.cmake due to the BLENDER_PLATFORM_ARM variable being needed. +include(cmake/versions.cmake) include(cmake/boost_build_options.cmake) include(cmake/download.cmake) include(cmake/macros.cmake) diff --git a/build_files/build_environment/cmake/flex.cmake b/build_files/build_environment/cmake/flex.cmake index 2b04c8d5d68..99233adbcdc 100644 --- a/build_files/build_environment/cmake/flex.cmake +++ b/build_files/build_environment/cmake/flex.cmake @@ -5,6 +5,8 @@ ExternalProject_Add(external_flex URL_HASH ${FLEX_HASH_TYPE}=${FLEX_HASH} DOWNLOAD_DIR ${DOWNLOAD_DIR} PREFIX ${BUILD_DIR}/flex + # This patch fixes build with some versions of glibc (https://github.com/westes/flex/commit/24fd0551333e7eded87b64dd36062da3df2f6380) + PATCH_COMMAND ${PATCH_CMD} -d ${BUILD_DIR}/flex/src/external_flex < ${PATCH_DIR}/flex.diff CONFIGURE_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/flex/src/external_flex/ && ${CONFIGURE_COMMAND} --prefix=${LIBDIR}/flex BUILD_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/flex/src/external_flex/ && make -j${MAKE_THREADS} INSTALL_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/flex/src/external_flex/ && make install diff --git a/build_files/build_environment/cmake/ispc.cmake b/build_files/build_environment/cmake/ispc.cmake index 86dc1d9efa8..c2dbedca55f 100644 --- a/build_files/build_environment/cmake/ispc.cmake +++ b/build_files/build_environment/cmake/ispc.cmake @@ -28,7 +28,7 @@ elseif(UNIX) set(ISPC_EXTRA_ARGS_UNIX -DCMAKE_C_COMPILER=${LIBDIR}/llvm/bin/clang -DCMAKE_CXX_COMPILER=${LIBDIR}/llvm/bin/clang++ --DARM_ENABLED=Off +-DARM_ENABLED=${BLENDER_PLATFORM_ARM} -DFLEX_EXECUTABLE=${LIBDIR}/flex/bin/flex ) endif() diff --git a/build_files/build_environment/patches/flex.diff b/build_files/build_environment/patches/flex.diff new file mode 100644 index 000..d3f9e8b0a66 --- /dev/null +++ b/build_files/build_environment/patches/flex.diff @@ -0,0 +1,15 @@ +diff --git a/configure.ac b/configure.ac +index c6f12d644..3c977a4e3 100644 +--- a/configure.ac b/configure.ac +@@ -25,8 +25,10 @@ + # autoconf requirements and initialization + + AC_INIT([the fast lexical analyser generator],[2.6.4],[flex-h...@lists.sourceforge.net],[flex]) ++AC_PREREQ([2.60]) + AC_CONFIG_SRCDIR([src/scan.l]) + AC_CONFIG_AUX_DIR([build-aux]) ++AC_USE_SYSTEM_EXTENSIONS + LT_INIT + AM_INIT_AUTOMAKE([1.15 -Wno-portability foreign std-options dist-lzip parallel-tests subdir-objects]) + AC_CONFIG_HEADER([src/config.h]) ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [34f94a02f37] blender-v3.2-release: Fix use of OpenGL interop breaking in Hydra viewports that do not support it
Commit: 34f94a02f37005210f629f04635c457d98ff5f91 Author: Patrick Mours Date: Wed Jun 1 15:26:55 2022 +0200 Branches: blender-v3.2-release https://developer.blender.org/rB34f94a02f37005210f629f04635c457d98ff5f91 Fix use of OpenGL interop breaking in Hydra viewports that do not support it Rendering directly to a resource using OpenGL interop and Hgi doesn't work in Houdini, since it never uses the resulting resource (it does not call `HdRenderBuffer::GetResource`). But since doing that simultaneously disables mapping (`HdRenderBuffer::Map` is not implemented then), nothing was displayed. To fix this, keep track of whether a Hydra viewport does support displaying a Hgi resource directly, by checking whether `HdRenderBuffer::GetResource` is ever called and only enable use of OpenGL interop if that is the case. Differential Revision: https://developer.blender.org/D15090 === M intern/cycles/hydra/display_driver.cpp M intern/cycles/hydra/display_driver.h M intern/cycles/hydra/output_driver.cpp M intern/cycles/hydra/render_buffer.cpp M intern/cycles/hydra/render_buffer.h === diff --git a/intern/cycles/hydra/display_driver.cpp b/intern/cycles/hydra/display_driver.cpp index a809ace63e2..0c0b577c358 100644 --- a/intern/cycles/hydra/display_driver.cpp +++ b/intern/cycles/hydra/display_driver.cpp @@ -23,10 +23,18 @@ HdCyclesDisplayDriver::HdCyclesDisplayDriver(HdCyclesSession *renderParam, Hgi * HdCyclesDisplayDriver::~HdCyclesDisplayDriver() { - deinit(); + if (texture_) { +_hgi->DestroyTexture(_); + } + + if (gl_pbo_id_) { +glDeleteBuffers(1, _pbo_id_); + } + + gl_context_dispose(); } -void HdCyclesDisplayDriver::init() +void HdCyclesDisplayDriver::gl_context_create() { #ifdef _WIN32 if (!gl_context_) { @@ -64,16 +72,42 @@ void HdCyclesDisplayDriver::init() } } -void HdCyclesDisplayDriver::deinit() +bool HdCyclesDisplayDriver::gl_context_enable() { - if (texture_) { -_hgi->DestroyTexture(_); +#ifdef _WIN32 + if (!hdc_ || !gl_context_) { +return false; } - if (gl_pbo_id_) { -glDeleteBuffers(1, _pbo_id_); + mutex_.lock(); + + // Do not change context if this is called in the main thread + if (wglGetCurrentContext() == nullptr) { +if (!TF_VERIFY(wglMakeCurrent((HDC)hdc_, (HGLRC)gl_context_))) { + mutex_.unlock(); + return false; +} + } + + return true; +#else + return false; +#endif +} + +void HdCyclesDisplayDriver::gl_context_disable() +{ +#ifdef _WIN32 + if (wglGetCurrentContext() == gl_context_) { +TF_VERIFY(wglMakeCurrent(nullptr, nullptr)); } + mutex_.unlock(); +#endif +} + +void HdCyclesDisplayDriver::gl_context_dispose() +{ #ifdef _WIN32 if (gl_context_) { TF_VERIFY(wglDeleteContext((HGLRC)gl_context_)); @@ -90,13 +124,9 @@ bool HdCyclesDisplayDriver::update_begin(const Params , int texture_width, int texture_height) { -#ifdef _WIN32 - if (!hdc_ || !gl_context_) { + if (!gl_context_enable()) { return false; } -#endif - - graphics_interop_activate(); if (gl_render_sync_) { glWaitSync((GLsync)gl_render_sync_, 0, GL_TIMEOUT_IGNORED); @@ -121,15 +151,14 @@ bool HdCyclesDisplayDriver::update_begin(const Params , void HdCyclesDisplayDriver::update_end() { gl_upload_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - glFlush(); - graphics_interop_deactivate(); + gl_context_disable(); } void HdCyclesDisplayDriver::flush() { - graphics_interop_activate(); + gl_context_enable(); if (gl_upload_sync_) { glWaitSync((GLsync)gl_upload_sync_, 0, GL_TIMEOUT_IGNORED); @@ -139,7 +168,7 @@ void HdCyclesDisplayDriver::flush() glWaitSync((GLsync)gl_render_sync_, 0, GL_TIMEOUT_IGNORED); } - graphics_interop_deactivate(); + gl_context_disable(); } half4 *HdCyclesDisplayDriver::map_texture_buffer() @@ -179,25 +208,12 @@ DisplayDriver::GraphicsInterop HdCyclesDisplayDriver::graphics_interop_get() void HdCyclesDisplayDriver::graphics_interop_activate() { - mutex_.lock(); - -#ifdef _WIN32 - // Do not change context if this is called in the main thread - if (wglGetCurrentContext() == nullptr) { -TF_VERIFY(wglMakeCurrent((HDC)hdc_, (HGLRC)gl_context_)); - } -#endif + gl_context_enable(); } void HdCyclesDisplayDriver::graphics_interop_deactivate() { -#ifdef _WIN32 - if (wglGetCurrentContext() == gl_context_) { -TF_VERIFY(wglMakeCurrent(nullptr, nullptr)); - } -#endif - - mutex_.unlock(); + gl_context_disable(); } void HdCyclesDisplayDriver::clear() @@ -214,7 +230,11 @@ void HdCyclesDisplayDriver::draw(const Params ) return; } - init(); + if (!renderBuffer->IsResourceUsed()) { +return; + } + + gl_context_create(); // Cycles 'DisplayDriver' only supports 'half
[Bf-blender-cvs] [5c6053ccb1c] master: Fix misaligned address error when rendering 3D curves in the viewport with Cycles and OptiX 7.4
Commit: 5c6053ccb1cbbe57d5a9d0aa33eadc6cb3e9dc9a Author: Patrick Mours Date: Fri Jun 3 12:24:13 2022 +0200 Branches: master https://developer.blender.org/rB5c6053ccb1cbbe57d5a9d0aa33eadc6cb3e9dc9a Fix misaligned address error when rendering 3D curves in the viewport with Cycles and OptiX 7.4 Acceleration structures in the viewport default to building with the fast build flag, but the intersection program used for curves was queried with the fast trace flag. The resulting mismatch caused an exception in the intersection kernel. Since it's difficult to predict whether dynamic or static acceleration structures are going to be built at the time of kernel loading, this fixes the mismatch by always using the fast trace flag for curves. === M intern/cycles/device/optix/device_impl.cpp === diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index 35717c49d1a..9ab9bbb59c5 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -553,7 +553,8 @@ bool OptiXDevice::load_kernels(const uint kernel_features) OptixBuiltinISOptions builtin_options = {}; # if OPTIX_ABI_VERSION >= 55 builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM; - builtin_options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE; + builtin_options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE | + OPTIX_BUILD_FLAG_ALLOW_COMPACTION; builtin_options.curveEndcapFlags = OPTIX_CURVE_ENDCAP_DEFAULT; /* Disable end-caps. */ # else builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE; @@ -1387,7 +1388,10 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh, OptixAccelBufferSizes sizes = {}; OptixAccelBuildOptions options = {}; options.operation = operation; - if (use_fast_trace_bvh) { + if (use_fast_trace_bvh || + /* The build flags have to match the ones used to query the built-in curve intersection + program (see optixBuiltinISModuleGet above) */ + build_input.type == OPTIX_BUILD_INPUT_TYPE_CURVES) { VLOG(2) << "Using fast to trace OptiX BVH"; options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE | OPTIX_BUILD_FLAG_ALLOW_COMPACTION; } ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [a8c81ffa831] master: Cycles: Add half precision float support for volumes with NanoVDB
Commit: a8c81ffa83122661b80e842ddd641e366b3d5c04 Author: Patrick Mours Date: Fri May 20 18:01:26 2022 +0200 Branches: master https://developer.blender.org/rBa8c81ffa83122661b80e842ddd641e366b3d5c04 Cycles: Add half precision float support for volumes with NanoVDB This patch makes it possible to change the precision with which to store volume data in the NanoVDB data structure (as float, half, or using variable bit quantization) via the previously unused precision field in the volume data block. It makes it possible to further reduce memory usage during rendering, at a slight cost to the visual detail of a volume. Differential Revision: https://developer.blender.org/D10023 === M intern/cycles/blender/volume.cpp M intern/cycles/device/cuda/device_impl.cpp M intern/cycles/device/hip/device_impl.cpp M intern/cycles/device/memory.cpp M intern/cycles/kernel/device/cpu/image.h M intern/cycles/kernel/device/gpu/image.h M intern/cycles/scene/image.cpp M intern/cycles/scene/image_oiio.cpp M intern/cycles/scene/image_vdb.cpp M intern/cycles/scene/image_vdb.h M intern/cycles/scene/object.cpp M intern/cycles/util/texture.h M release/scripts/startup/bl_ui/properties_data_volume.py M source/blender/makesdna/DNA_volume_defaults.h M source/blender/makesdna/DNA_volume_types.h M source/blender/makesrna/intern/rna_volume.c === diff --git a/intern/cycles/blender/volume.cpp b/intern/cycles/blender/volume.cpp index 8dd2d45c0b6..a9a2c474f40 100644 --- a/intern/cycles/blender/volume.cpp +++ b/intern/cycles/blender/volume.cpp @@ -219,7 +219,10 @@ static void sync_smoke_volume( class BlenderVolumeLoader : public VDBImageLoader { public: - BlenderVolumeLoader(BL::BlendData _data, BL::Volume _volume, const string _name) + BlenderVolumeLoader(BL::BlendData _data, + BL::Volume _volume, + const string _name, + BL::VolumeRender::precision_enum precision_) : VDBImageLoader(grid_name), b_volume(b_volume) { b_volume.grids.load(b_data.ptr.data); @@ -240,6 +243,20 @@ class BlenderVolumeLoader : public VDBImageLoader { break; } } +#endif +#ifdef WITH_NANOVDB +switch (precision_) { + case BL::VolumeRender::precision_FULL: +precision = 32; +break; + case BL::VolumeRender::precision_HALF: +precision = 16; +break; + default: + case BL::VolumeRender::precision_VARIABLE: +precision = 0; +break; +} #endif } @@ -318,7 +335,8 @@ static void sync_volume_object(BL::BlendData _data, volume->attributes.add(std) : volume->attributes.add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_VOXEL); - ImageLoader *loader = new BlenderVolumeLoader(b_data, b_volume, name.string()); + ImageLoader *loader = new BlenderVolumeLoader( + b_data, b_volume, name.string(), b_render.precision()); ImageParams params; params.frame = b_volume.grids.frame(); diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp index 6908ae5ead3..75177566901 100644 --- a/intern/cycles/device/cuda/device_impl.cpp +++ b/intern/cycles/device/cuda/device_impl.cpp @@ -1084,7 +1084,9 @@ void CUDADevice::tex_alloc(device_texture ) need_texture_info = true; if (mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT && - mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) { + mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3 && + mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FPN && + mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FP16) { CUDA_RESOURCE_DESC resDesc; memset(, 0, sizeof(resDesc)); diff --git a/intern/cycles/device/hip/device_impl.cpp b/intern/cycles/device/hip/device_impl.cpp index 7159277b325..f8fdb86ca29 100644 --- a/intern/cycles/device/hip/device_impl.cpp +++ b/intern/cycles/device/hip/device_impl.cpp @@ -1042,7 +1042,9 @@ void HIPDevice::tex_alloc(device_texture ) need_texture_info = true; if (mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT && - mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) { + mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3 && + mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FPN && + mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FP16) { /* Bindless textures. */ hipResourceDesc resDesc; memset(, 0, sizeof(resDesc)); diff --git a/intern/cycles/device/memory.cpp b/intern/cycles/device/memory.cpp index 4c068dbdd3e..40cf2573cfb 100644 --- a/intern/cycles/device/memory.cpp +++ b/intern/cycles/device/memory.cpp @@ -165,6 +165,8 @@ device_texture::device_texture(Dev
[Bf-blender-cvs] [6fa5d520b86] master: Cycles: Add support for parallel compilation of OptiX module
Commit: 6fa5d520b861e8d5b96967452c50b459c52e8024 Author: Patrick Mours Date: Thu May 5 11:08:44 2022 +0200 Branches: master https://developer.blender.org/rB6fa5d520b861e8d5b96967452c50b459c52e8024 Cycles: Add support for parallel compilation of OptiX module OptiX 7.4 adds support for splitting the costly creation of an OptiX module into smaller tasks that can be executed in parallel on a thread pool. This is only really relevant for the "shader_raytrace" kernel variant as the main one is small and compiles fast either way. It sheds of a few seconds there (total gain is not massive currently, since it is difficult for the compiler to split up the huge shading entry point that is the primary one taking up time, but it is still measurable). Differential Revision: https://developer.blender.org/D14845 === M intern/cycles/device/optix/device_impl.cpp === diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index 8830d8c44ac..6329144131e 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -23,6 +23,7 @@ # include "util/md5.h" # include "util/path.h" # include "util/progress.h" +# include "util/task.h" # include "util/time.h" # undef __KERNEL_CPU__ @@ -216,6 +217,24 @@ static OptixResult optixUtilDenoiserInvokeTiled(OptixDenoiser denoiser, return OPTIX_SUCCESS; } +# if OPTIX_ABI_VERSION >= 55 +static void execute_optix_task(TaskPool , OptixTask task, OptixResult _reason) +{ + OptixTask additional_tasks[16]; + unsigned int num_additional_tasks = 0; + + const OptixResult result = optixTaskExecute(task, additional_tasks, 16, _additional_tasks); + if (result == OPTIX_SUCCESS) { +for (unsigned int i = 0; i < num_additional_tasks; ++i) { + pool.push(function_bind(_optix_task, std::ref(pool), additional_tasks[i], std::ref(failure_reason))); +} + } + else { +failure_reason = result; + } +} +# endif + } // namespace OptiXDevice::Denoiser::Denoiser(OptiXDevice *device) @@ -453,6 +472,23 @@ bool OptiXDevice::load_kernels(const uint kernel_features) return false; } +# if OPTIX_ABI_VERSION >= 55 +OptixTask task = nullptr; +OptixResult result = optixModuleCreateFromPTXWithTasks(context, + _options, + _options, + ptx_data.data(), + ptx_data.size(), + nullptr, + nullptr, + _module, + ); +if (result == OPTIX_SUCCESS) { + TaskPool pool; + execute_optix_task(pool, task, result); + pool.wait_work(); +} +# else const OptixResult result = optixModuleCreateFromPTX(context, _options, _options, @@ -461,6 +497,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features) nullptr, 0, _module); +# endif if (result != OPTIX_SUCCESS) { set_error(string_printf("Failed to load OptiX kernel from '%s' (%s)", ptx_filename.c_str(), ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [fc2c22e90c2] master: Cycles: Hydra fixes for stageMetersPerUnit and OpenGL context on Windows
Commit: fc2c22e90c252f683a42574d4382f7e3c23940e1 Author: Patrick Mours Date: Wed Apr 20 13:40:48 2022 +0200 Branches: master https://developer.blender.org/rBfc2c22e90c252f683a42574d4382f7e3c23940e1 Cycles: Hydra fixes for stageMetersPerUnit and OpenGL context on Windows Add "stageMetersPerUnit" render setting for USD files that have that set to something other than the default (e.g. exported by Blender). And fix a crash when an application creates a Hydra render pass on a thread that does not have an OpenGL context current. === M intern/cycles/hydra/display_driver.cpp M intern/cycles/hydra/display_driver.h M intern/cycles/hydra/file_reader.cpp M intern/cycles/hydra/render_delegate.cpp M intern/cycles/hydra/render_delegate.h === diff --git a/intern/cycles/hydra/display_driver.cpp b/intern/cycles/hydra/display_driver.cpp index 6f6ca35cd31..a809ace63e2 100644 --- a/intern/cycles/hydra/display_driver.cpp +++ b/intern/cycles/hydra/display_driver.cpp @@ -19,44 +19,66 @@ HDCYCLES_NAMESPACE_OPEN_SCOPE HdCyclesDisplayDriver::HdCyclesDisplayDriver(HdCyclesSession *renderParam, Hgi *hgi) : _renderParam(renderParam), _hgi(hgi) { +} + +HdCyclesDisplayDriver::~HdCyclesDisplayDriver() +{ + deinit(); +} + +void HdCyclesDisplayDriver::init() +{ #ifdef _WIN32 - hdc_ = GetDC(CreateWindowA("STATIC", - "HdCycles", - WS_OVERLAPPEDWINDOW | WS_CLIPSIBLINGS | WS_CLIPCHILDREN, - 0, - 0, - 64, - 64, - NULL, - NULL, - GetModuleHandle(NULL), - NULL)); - - int pixelFormat = GetPixelFormat(wglGetCurrentDC()); - PIXELFORMATDESCRIPTOR pfd = {sizeof(pfd)}; - DescribePixelFormat((HDC)hdc_, pixelFormat, sizeof(pfd), ); - SetPixelFormat((HDC)hdc_, pixelFormat, ); - - TF_VERIFY(gl_context_ = wglCreateContext((HDC)hdc_)); - TF_VERIFY(wglShareLists(wglGetCurrentContext(), (HGLRC)gl_context_)); + if (!gl_context_) { +hdc_ = GetDC(CreateWindowA("STATIC", + "HdCycles", + WS_OVERLAPPEDWINDOW | WS_CLIPSIBLINGS | WS_CLIPCHILDREN, + 0, + 0, + 64, + 64, + NULL, + NULL, + GetModuleHandle(NULL), + NULL)); + +int pixelFormat = GetPixelFormat(wglGetCurrentDC()); +PIXELFORMATDESCRIPTOR pfd = {sizeof(pfd)}; +DescribePixelFormat((HDC)hdc_, pixelFormat, sizeof(pfd), ); +SetPixelFormat((HDC)hdc_, pixelFormat, ); + +TF_VERIFY(gl_context_ = wglCreateContext((HDC)hdc_)); +TF_VERIFY(wglShareLists(wglGetCurrentContext(), (HGLRC)gl_context_)); + } + if (!gl_context_) { +return; + } #endif - glewInit(); + if (!gl_pbo_id_) { +if (glewInit() != GLEW_OK) { + return; +} - glGenBuffers(1, _pbo_id_); +glGenBuffers(1, _pbo_id_); + } } -HdCyclesDisplayDriver::~HdCyclesDisplayDriver() +void HdCyclesDisplayDriver::deinit() { if (texture_) { _hgi->DestroyTexture(_); } - glDeleteBuffers(1, _pbo_id_); + if (gl_pbo_id_) { +glDeleteBuffers(1, _pbo_id_); + } #ifdef _WIN32 - TF_VERIFY(wglDeleteContext((HGLRC)gl_context_)); - DestroyWindow(WindowFromDC((HDC)hdc_)); + if (gl_context_) { +TF_VERIFY(wglDeleteContext((HGLRC)gl_context_)); +DestroyWindow(WindowFromDC((HDC)hdc_)); + } #endif } @@ -192,6 +214,8 @@ void HdCyclesDisplayDriver::draw(const Params ) return; } + init(); + // Cycles 'DisplayDriver' only supports 'half4' format TF_VERIFY(renderBuffer->GetFormat() == HdFormatFloat16Vec4); diff --git a/intern/cycles/hydra/display_driver.h b/intern/cycles/hydra/display_driver.h index 668f7d76eed..20086830e6a 100644 --- a/intern/cycles/hydra/display_driver.h +++ b/intern/cycles/hydra/display_driver.h @@ -19,6 +19,9 @@ class HdCyclesDisplayDriver final : public CCL_NS::DisplayDriver { ~HdCyclesDisplayDriver(); private: + void init(); + void deinit(); + void next_tile_begin() override; bool update_begin(const Params , int texture_width, int texture_height) override; diff --git a/intern/cycles/hydra/file_reader.cpp b/intern/cycles/hydra/file_reader.cpp index 329cc959ac3..8925626d8c3 100644 --- a/intern/cycles/hydra/file_reader.cpp +++ b/intern/cycles/hydra/file_reader.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include HDCYCLES_NAMESPACE_OPEN_SCOPE @@ -69,6 +70,9 @@ void HdCyclesFileReader::read(Session *session,
[Bf-blender-cvs] [c31b89e76e0] master: Cycles: Add support for "stageMetersPerUnit" Hydra render setting
Commit: c31b89e76e0d216fc7b8807aa8ccd815b30ed93d Author: Patrick Mours Date: Tue Apr 12 18:08:25 2022 +0200 Branches: master https://developer.blender.org/rBc31b89e76e0d216fc7b8807aa8ccd815b30ed93d Cycles: Add support for "stageMetersPerUnit" Hydra render setting This can be useful to match transforms to what native Cycles would see in Blender, as USD typically uses centimeters, but Blender uses meters. This patch also fixes the hardcoded focal length multiplicator, which is now using the same units as everything else. Default of "stageMetersPerUnit" is 0.01 to match the USD default of centimeters. Differential Revision: https://developer.blender.org/D14630 === M intern/cycles/hydra/camera.cpp M intern/cycles/hydra/camera.h M intern/cycles/hydra/geometry.inl M intern/cycles/hydra/light.cpp M intern/cycles/hydra/render_delegate.cpp M intern/cycles/hydra/render_pass.cpp M intern/cycles/hydra/session.h === diff --git a/intern/cycles/hydra/camera.cpp b/intern/cycles/hydra/camera.cpp index c746a107899..62042cbbcd2 100644 --- a/intern/cycles/hydra/camera.cpp +++ b/intern/cycles/hydra/camera.cpp @@ -3,6 +3,7 @@ * Copyright 2022 Blender Foundation */ #include "hydra/camera.h" +#include "hydra/session.h" #include "scene/camera.h" #include @@ -12,6 +13,19 @@ HDCYCLES_NAMESPACE_OPEN_SCOPE extern Transform convert_transform(const GfMatrix4d ); +Transform convert_camera_transform(const GfMatrix4d , float metersPerUnit) +{ + Transform t = convert_transform(matrix); + // Flip Z axis + t.x.z *= -1.0f; + t.y.z *= -1.0f; + t.z.z *= -1.0f; + // Scale translation + t.x.w *= metersPerUnit; + t.y.w *= metersPerUnit; + t.z.w *= metersPerUnit; + return t; +} #if PXR_VERSION < 2102 // clang-format off @@ -61,13 +75,20 @@ void HdCyclesCamera::Sync(HdSceneDelegate *sceneDelegate, if (*dirtyBits & DirtyBits::DirtyTransform) { sceneDelegate->SampleTransform(id, &_transformSamples); +bool transform_found = false; for (size_t i = 0; i < _transformSamples.count; ++i) { if (_transformSamples.times[i] == 0.0f) { _transform = _transformSamples.values[i]; _data.SetTransform(_transform); +transform_found = true; break; } } + +if (!transform_found && _transformSamples.count) { + _transform = _transformSamples.values[0]; + _data.SetTransform(_transform); +} } #else if (*dirtyBits & DirtyBits::DirtyViewMatrix) { @@ -236,18 +257,21 @@ void HdCyclesCamera::Finalize(HdRenderParam *renderParam) HdCamera::Finalize(renderParam); } -void HdCyclesCamera::ApplyCameraSettings(Camera *cam) const +void HdCyclesCamera::ApplyCameraSettings(HdRenderParam *renderParam, Camera *cam) const { - ApplyCameraSettings(_data, _windowPolicy, cam); + ApplyCameraSettings(renderParam, _data, _windowPolicy, cam); + + const float metersPerUnit = static_cast(renderParam)->GetStageMetersPerUnit(); array motion(_transformSamples.count); - for (size_t i = 0; i < _transformSamples.count; ++i) -motion[i] = convert_transform(_transformSamples.values[i]) * -transform_scale(1.0f, 1.0f, -1.0f); + for (size_t i = 0; i < _transformSamples.count; ++i) { +motion[i] = convert_camera_transform(_transformSamples.values[i], metersPerUnit); + } cam->set_motion(motion); } -void HdCyclesCamera::ApplyCameraSettings(const GfCamera , +void HdCyclesCamera::ApplyCameraSettings(HdRenderParam *renderParam, + const GfCamera , CameraUtilConformWindowPolicy windowPolicy, Camera *cam) { @@ -261,20 +285,22 @@ void HdCyclesCamera::ApplyCameraSettings(const GfCamera , GfCamera::Orthographic == CAMERA_ORTHOGRAPHIC); cam->set_camera_type(static_cast(data.GetProjection())); + const float metersPerUnit = static_cast(renderParam)->GetStageMetersPerUnit(); + auto viewplane = data.GetFrustum().GetWindow(); auto focalLength = 1.0f; if (data.GetProjection() == GfCamera::Perspective) { viewplane *= 2.0 / viewplane.GetSize()[1]; // Normalize viewplane -focalLength = data.GetFocalLength() * 1e-3f; +focalLength = data.GetFocalLength() * GfCamera::FOCAL_LENGTH_UNIT * metersPerUnit; cam->set_fov(GfDegreesToRadians(data.GetFieldOfView(GfCamera::FOVVertical))); } - cam->set_sensorwidth(data.GetHorizontalAperture() * GfCamera::APERTURE_UNIT); - cam->set_sensorheight(data.GetVerticalAperture() * GfCamera::APERTURE_UNIT); + cam->set_sensorwidth(data.GetHorizontalAperture() * GfCamera::APERTURE_UNIT * metersPerUnit); + cam->set_sensorheight(data.GetVerticalAperture() * GfCamera
[Bf-blender-cvs] [e5136872881] master: Cycles: Fix a few type casting warnings
Commit: e51368728815e3700414a77bf91668425a9965ec Author: Patrick Mours Date: Tue Apr 5 17:30:01 2022 +0200 Branches: master https://developer.blender.org/rBe51368728815e3700414a77bf91668425a9965ec Cycles: Fix a few type casting warnings Stumbled over the `integrate_surface_volume_only_bounce` kernel function not returning the right type. The others too showed up as warnings when building Cycles as a standalone which didn't have those warnings disabled. Differential Revision: https://developer.blender.org/D14558 === M intern/cycles/kernel/integrator/shade_surface.h M intern/cycles/scene/geometry.cpp M intern/cycles/scene/geometry.h M intern/cycles/session/merge.cpp === diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h index a9bf3b5b432..55bb08044ae 100644 --- a/intern/cycles/kernel/integrator/shade_surface.h +++ b/intern/cycles/kernel/integrator/shade_surface.h @@ -346,8 +346,8 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce( } #ifdef __VOLUME__ -ccl_device_forceinline bool integrate_surface_volume_only_bounce(IntegratorState state, - ccl_private ShaderData *sd) +ccl_device_forceinline int integrate_surface_volume_only_bounce(IntegratorState state, +ccl_private ShaderData *sd) { if (!path_state_volume_next(state)) { return LABEL_NONE; diff --git a/intern/cycles/scene/geometry.cpp b/intern/cycles/scene/geometry.cpp index a2a15416ae6..8152a27046f 100644 --- a/intern/cycles/scene/geometry.cpp +++ b/intern/cycles/scene/geometry.cpp @@ -180,7 +180,7 @@ bool Geometry::has_true_displacement() const } void Geometry::compute_bvh( -Device *device, DeviceScene *dscene, SceneParams *params, Progress *progress, int n, int total) +Device *device, DeviceScene *dscene, SceneParams *params, Progress *progress, size_t n, size_t total) { if (progress->get_cancel()) return; diff --git a/intern/cycles/scene/geometry.h b/intern/cycles/scene/geometry.h index bbb50d5cbfe..0c2e70d483d 100644 --- a/intern/cycles/scene/geometry.h +++ b/intern/cycles/scene/geometry.h @@ -110,8 +110,8 @@ class Geometry : public Node { DeviceScene *dscene, SceneParams *params, Progress *progress, - int n, - int total); + size_t n, + size_t total); virtual PrimitiveType primitive_type() const = 0; diff --git a/intern/cycles/session/merge.cpp b/intern/cycles/session/merge.cpp index a88ffee6409..316f56630d6 100644 --- a/intern/cycles/session/merge.cpp +++ b/intern/cycles/session/merge.cpp @@ -531,7 +531,7 @@ static void read_layer_samples(vector , current_layer_samples.total = 0; current_layer_samples.per_pixel.resize(in_spec.width * in_spec.height); std::fill( -current_layer_samples.per_pixel.begin(), current_layer_samples.per_pixel.end(), 0); +current_layer_samples.per_pixel.begin(), current_layer_samples.per_pixel.end(), 0.0f); } if (layer.has_sample_pass) { ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [f60cffad38d] master: Cycles: Use USD dependencies when building Hydra render delegate
Commit: f60cffad38d12bdfefe503924e93c33a7c89f671 Author: Patrick Mours Date: Thu Mar 31 19:27:32 2022 +0200 Branches: master https://developer.blender.org/rBf60cffad38d12bdfefe503924e93c33a7c89f671 Cycles: Use USD dependencies when building Hydra render delegate Adds support for linking with some of the dependencies of a USD build instead of the precompiled libraries from Blender, specifically OpenSubdiv, OpenVDB and TBB. Other dependencies keep using the precompiled libraries from Blender, since they are linked statically anyway so it does't matter as much. Plus they have interdependencies that are difficult to resolve when only using selected libraries from the USD build and can't simply assume that USD was built with all of them. This patch also makes building the Hydra render delegate via the standalone repository work and fixes various small issues I ran into in general on Windows (e.g. the use of both fixed paths and `find_package` did not seem to work correctly). Building both the standalone Cycles application and the Hydra render delegate at the same time is supported now as well (the paths in the USD plugin JSON file are updated accordingly). All that needs to be done now to build is to specify a `PXR_ROOT` or `USD_ROOT` CMake variable pointing to the USD installation, everything else is taken care of automatically (CMake targets are loaded from the `pxrTargets.cmake` of USD and linked into the render delegate and OpenSubdiv, OpenVDB and TBB are replaced with those from USD when they exist). Differential Revision: https://developer.blender.org/D14523 === M build_files/cmake/platform/platform_win32.cmake M intern/cycles/CMakeLists.txt M intern/cycles/cmake/external_libs.cmake M intern/cycles/cmake/macros.cmake M intern/cycles/hydra/CMakeLists.txt M intern/cycles/hydra/camera.cpp M intern/cycles/hydra/config.h M intern/cycles/hydra/field.cpp M intern/cycles/hydra/material.cpp M intern/cycles/hydra/material.h M intern/cycles/hydra/node_util.cpp === diff --git a/build_files/cmake/platform/platform_win32.cmake b/build_files/cmake/platform/platform_win32.cmake index 8ae38e03fb1..b0dbc0d3264 100644 --- a/build_files/cmake/platform/platform_win32.cmake +++ b/build_files/cmake/platform/platform_win32.cmake @@ -401,7 +401,7 @@ if(WITH_CODEC_FFMPEG) ${LIBDIR}/ffmpeg/include/msvc ) windows_find_package(FFmpeg) - if(NOT FFMPEG_FOUND) + if(NOT FFmpeg_FOUND) warn_hardcoded_paths(FFmpeg) set(FFMPEG_LIBRARIES ${LIBDIR}/ffmpeg/lib/avcodec.lib @@ -415,7 +415,7 @@ endif() if(WITH_IMAGE_OPENEXR) windows_find_package(OpenEXR REQUIRED) - if(NOT OPENEXR_FOUND) + if(NOT OpenEXR_FOUND) set(OPENEXR_ROOT_DIR ${LIBDIR}/openexr) set(OPENEXR_VERSION "2.1") warn_hardcoded_paths(OpenEXR) @@ -531,17 +531,20 @@ if(WITH_BOOST) set(BOOST_LIBRARIES ${Boost_LIBRARIES}) set(BOOST_LIBPATH ${Boost_LIBRARY_DIRS}) endif() + set(BOOST_DEFINITIONS "-DBOOST_ALL_NO_LIB") endif() if(WITH_OPENIMAGEIO) windows_find_package(OpenImageIO) - set(OPENIMAGEIO ${LIBDIR}/OpenImageIO) - set(OPENIMAGEIO_LIBPATH ${OPENIMAGEIO}/lib) - set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO}/include) - set(OIIO_OPTIMIZED optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO.lib optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util.lib) - set(OIIO_DEBUG debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_d.lib debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util_d.lib) - set(OPENIMAGEIO_LIBRARIES ${OIIO_OPTIMIZED} ${OIIO_DEBUG}) + if(NOT OpenImageIO_FOUND) +set(OPENIMAGEIO ${LIBDIR}/OpenImageIO) +set(OPENIMAGEIO_LIBPATH ${OPENIMAGEIO}/lib) +set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO}/include) +set(OIIO_OPTIMIZED optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO.lib optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util.lib) +set(OIIO_DEBUG debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_d.lib debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util_d.lib) +set(OPENIMAGEIO_LIBRARIES ${OIIO_OPTIMIZED} ${OIIO_DEBUG}) + endif() set(OPENIMAGEIO_DEFINITIONS "-DUSE_TBB=0") set(OPENIMAGEIO_IDIFF "${OPENIMAGEIO}/bin/idiff.exe") @@ -572,31 +575,38 @@ if(WITH_LLVM) message(WARNING "LLVM debug libs not present on this system. Using release libs for debug builds.") set(LLVM_LIBRARY ${LLVM_LIBRARY_OPTIMIZED}) endif() - endif() if(WITH_OPENCOLORIO) - set(OPENCOLORIO ${LIBDIR}/OpenColorIO) - set(OPENCOLORIO_INCLUDE_DIRS ${OPENCOLORIO}/include) - set(OPENCOLORIO_LIBPATH ${OPENCOLORIO}/lib) - set(OPENCOLORIO_LIBRARIES -optimized ${OPENCOLORIO_LIBPATH}/OpenColorIO.lib -optimized ${OPENCOLORIO_LIBPATH}/libyaml-cpp.lib -optimized ${OPENCOLORIO_LIBPATH}/libexpatMD.lib -optimized ${OPENCOLORIO_LIBPATH}/pystring.lib -debug $
[Bf-blender-cvs] [5d38b13e61f] master: CMake: Rename "USD_LIBRARY_PREFIX" to "PXR_LIB_PREFIX" for consistency
Commit: 5d38b13e61ff04df6b8b4e52541910167225a18e Author: Patrick Mours Date: Wed Mar 23 16:52:02 2022 +0100 Branches: master https://developer.blender.org/rB5d38b13e61ff04df6b8b4e52541910167225a18e CMake: Rename "USD_LIBRARY_PREFIX" to "PXR_LIB_PREFIX" for consistency rBc1909770e7f192574ea62449dd14b4254637e604 introduced "PXR_LIB_PREFIX" for building the dependencies, so only makes sense to use the same name in the Hydra render delegate CMake too === M build_files/cmake/Modules/FindUSD.cmake M intern/cycles/hydra/CMakeLists.txt === diff --git a/build_files/cmake/Modules/FindUSD.cmake b/build_files/cmake/Modules/FindUSD.cmake index 75b5df9e196..d8f2ee22e6e 100644 --- a/build_files/cmake/Modules/FindUSD.cmake +++ b/build_files/cmake/Modules/FindUSD.cmake @@ -36,7 +36,8 @@ FIND_PATH(USD_INCLUDE_DIR # See https://github.com/PixarAnimationStudios/USD/blob/release/CHANGELOG.md#2111---2021-11-01 FIND_LIBRARY(USD_LIBRARY NAMES -usd_usd_m usd_usd_ms usd_m usd_ms ${USD_LIBRARY_PREFIX}usd +usd_usd_m usd_usd_ms usd_m usd_ms +${PXR_LIB_PREFIX}usd NAMES_PER_DIR HINTS ${_usd_SEARCH_DIRS} diff --git a/intern/cycles/hydra/CMakeLists.txt b/intern/cycles/hydra/CMakeLists.txt index 4ada4250780..703bd955135 100644 --- a/intern/cycles/hydra/CMakeLists.txt +++ b/intern/cycles/hydra/CMakeLists.txt @@ -92,22 +92,22 @@ target_compile_definitions(hdCyclesStatic target_link_libraries(hdCyclesStatic PRIVATE - ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}hd${CMAKE_LINK_LIBRARY_SUFFIX} - ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}plug${CMAKE_LINK_LIBRARY_SUFFIX} - ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}tf${CMAKE_LINK_LIBRARY_SUFFIX} - ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}trace${CMAKE_LINK_LIBRARY_SUFFIX} - ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}vt${CMAKE_LINK_LIBRARY_SUFFIX} - ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}work${CMAKE_LINK_LIBRARY_SUFFIX} - ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}sdf${CMAKE_LINK_LIBRARY_SUFFIX} - ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}cameraUtil${CMAKE_LINK_LIBRARY_SUFFIX} - ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}hf${CMAKE_LINK_LIBRARY_SUFFIX} - ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}pxOsd${CMAKE_LINK_LIBRARY_SUFFIX} - ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}gf${CMAKE_LINK_LIBRARY_SUFFIX} - ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}arch${CMAKE_LINK_LIBRARY_SUFFIX} - ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}hgi${CMAKE_LINK_LIBRARY_SUFFIX} - ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}glf${CMAKE_LINK_LIBRARY_SUFFIX} - ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}hdx${CMAKE_LINK_LIBRARY_SUFFIX} - ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}usdGeom${CMAKE_LINK_LIBRARY_SUFFIX} + ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}hd${CMAKE_LINK_LIBRARY_SUFFIX} + ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}plug${CMAKE_LINK_LIBRARY_SUFFIX} + ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}tf${CMAKE_LINK_LIBRARY_SUFFIX} + ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}trace${CMAKE_LINK_LIBRARY_SUFFIX} + ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}vt${CMAKE_LINK_LIBRARY_SUFFIX} + ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}work${CMAKE_LINK_LIBRARY_SUFFIX} + ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}sdf${CMAKE_LINK_LIBRARY_SUFFIX} + ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}cameraUtil${CMAKE_LINK_LIBRARY_SUFFIX} + ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}hf${CMAKE_LINK_LIBRARY_SUFFIX} + ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}pxOsd${CMAKE_LINK_LIBRARY_SUFFIX} + ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}gf${CMAKE_LINK_LIBRARY_SUFFIX} + ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}arch${CMAKE_LINK_LIBRARY_SUFFIX} + ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}hgi${CMAKE_LINK_LIBRARY_SUFFIX} + ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}glf${CMAKE_LINK_LIBRARY_SUFFIX} + ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}hdx${CMAKE_LINK_LIBRARY_SUFFIX} + ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}usdGeom${CMAKE_LINK_LIBRARY_SUFFIX} cycles_scene cycles_session cycles_graph ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [d350976ba06] master: Cycles: Add Hydra render delegate
Commit: d350976ba06d4ef93aa53fc4cd9da57be46ae924 Author: Patrick Mours Date: Wed Mar 23 16:07:43 2022 +0100 Branches: master https://developer.blender.org/rBd350976ba06d4ef93aa53fc4cd9da57be46ae924 Cycles: Add Hydra render delegate This patch adds a Hydra render delegate to Cycles, allowing Cycles to be used for rendering in applications that provide a Hydra viewport. The implementation was written from scratch against Cycles X, for integration into the Blender repository to make it possible to continue developing it in step with the rest of Cycles. For this purpose it follows the style of the rest of the Cycles code and can be built with a CMake option (`WITH_CYCLES_HYDRA_RENDER_DELEGATE=1`) similar to the existing standalone version of Cycles. Since Hydra render delegates need to be built against the exact USD version and other dependencies as the target application is using, this is intended to be built separate from Blender (`WITH_BLENDER=0` CMake option) and with support for library versions different from what Blender is using. As such the CMake build scripts for Windows had to be modified slightly, so that the Cycles Hydra render delegate can e.g. be built with MSVC 2017 again even though Blender requires MSVC 2019 now, and it's possible to specify custom paths to the USD SDK etc. The codebase supports building against the latest USD release 22.03 and all the way back to USD 20.08 (with some limitations). Reviewed By: brecht, LazyDodo Differential Revision: https://developer.blender.org/D14398 === M CMakeLists.txt M build_files/cmake/Modules/FindUSD.cmake M build_files/cmake/macros.cmake M build_files/cmake/platform/platform_win32.cmake M intern/cycles/CMakeLists.txt M intern/cycles/device/CMakeLists.txt A intern/cycles/hydra/CMakeLists.txt A intern/cycles/hydra/attribute.cpp A intern/cycles/hydra/attribute.h A intern/cycles/hydra/camera.cpp A intern/cycles/hydra/camera.h A intern/cycles/hydra/config.h A intern/cycles/hydra/curves.cpp A intern/cycles/hydra/curves.h A intern/cycles/hydra/display_driver.cpp A intern/cycles/hydra/display_driver.h A intern/cycles/hydra/field.cpp A intern/cycles/hydra/field.h A intern/cycles/hydra/geometry.h A intern/cycles/hydra/geometry.inl A intern/cycles/hydra/instancer.cpp A intern/cycles/hydra/instancer.h A intern/cycles/hydra/light.cpp A intern/cycles/hydra/light.h A intern/cycles/hydra/material.cpp A intern/cycles/hydra/material.h A intern/cycles/hydra/mesh.cpp A intern/cycles/hydra/mesh.h A intern/cycles/hydra/node_util.cpp A intern/cycles/hydra/node_util.h A intern/cycles/hydra/output_driver.cpp A intern/cycles/hydra/output_driver.h A intern/cycles/hydra/plugInfo.json A intern/cycles/hydra/plugin.cpp A intern/cycles/hydra/plugin.h A intern/cycles/hydra/pointcloud.cpp A intern/cycles/hydra/pointcloud.h A intern/cycles/hydra/render_buffer.cpp A intern/cycles/hydra/render_buffer.h A intern/cycles/hydra/render_delegate.cpp A intern/cycles/hydra/render_delegate.h A intern/cycles/hydra/render_pass.cpp A intern/cycles/hydra/render_pass.h A intern/cycles/hydra/resources/plugInfo.json A intern/cycles/hydra/session.cpp A intern/cycles/hydra/session.h A intern/cycles/hydra/volume.cpp A intern/cycles/hydra/volume.h M intern/cycles/integrator/render_scheduler.cpp M intern/cycles/kernel/svm/vertex_color.h M intern/cycles/scene/integrator.cpp M intern/cycles/scene/mesh.cpp M intern/cycles/scene/mesh.h M intern/cycles/session/session.h M intern/cycles/util/tbb.h === diff --git a/CMakeLists.txt b/CMakeLists.txt index d31a0c4a63d..bf40347e2ef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -408,6 +408,8 @@ option(WITH_CYCLES_DEBUG "Build Cycles with options useful for debug option(WITH_CYCLES_STANDALONE"Build Cycles standalone application" OFF) option(WITH_CYCLES_STANDALONE_GUI"Build Cycles standalone with GUI" OFF) +option(WITH_CYCLES_HYDRA_RENDER_DELEGATE "Build Cycles Hydra render delegate" OFF) + option(WITH_CYCLES_DEBUG_NAN "Build Cycles with additional asserts for detecting NaNs and invalid values" OFF) option(WITH_CYCLES_NATIVE_ONLY "Build Cycles with native kernel only (which fits current CPU, use for development only)" OFF) option(WITH_CYCLES_KERNEL_ASAN "Build Cycles kernels with address sanitizer when WITH_COMPILER_ASAN is on, even if it's very slow" OFF) @@ -742,9 +744,10 @@ endif() #- # Check
[Bf-blender-cvs] [08e719910bf] cycles_hydra: Cycles: Add Hydra render delegate
Commit: 08e719910bf2065ef0603cba8cc43ea236b2d090 Author: Patrick Mours Date: Mon Mar 21 10:58:51 2022 +0100 Branches: cycles_hydra https://developer.blender.org/rB08e719910bf2065ef0603cba8cc43ea236b2d090 Cycles: Add Hydra render delegate This patch adds a [Hydra](https://graphics.pixar.com/usd/release/glossary.html#usdglossary-hydra) render delegate to Cycles, allowing Cycles to be used for rendering in applications that provide a Hydra viewport (e.g. USDView or NVIDIA Omniverse Kit). The implementation was written from scratch against Cycles X, for integration into the Blender repository to make it possible to continue developing it in step with the rest of Cycles. For this purpose it follows the style of the rest of the [...] Supported features: - CPU/CUDA/OptiX/HIP/Metal support - Camera Settings - Render Settings (automatically queried from Cycles via node type system) - Basic AOVs (color, depth, normal, primId, instanceId) - Lights (Disk, Distant, Dome, Rect, Sphere) - Meshes - Geom Subsets - Subdivision Surfaces (using native Cycles support) - Custom Primvars (converted to Cycles attributes) - Cycles Materials (can be exported to USD using the [universal-scene-description branch of Blender](https://developer.blender.org/diffusion/B/history/universal-scene-description/)) - USD Preview Surface Materials - Curves - Point Clouds - OpenVDB Volumes Still missing features: - Motion Blur - Custom AOVs - ... Since Hydra render delegates need to be built against the exact USD version and other dependencies as the target application is using, this is intended to be built separate from Blender (`WITH_BLENDER=0` CMake option) and with support for library versions different from what Blender is using. As such the CMake build scripts for Windows had to be modified slightly, so that the Cycles Hydra render delegate can e.g. be built with MSVC 2017 again even though Blender requires MSVC 2019 now, an [...] This also includes an optimization for Hydra viewports that display the result using OpenGL, in which case the texture can be kept entirely on the GPU (see display_driver.cpp). Unfortunately this is a bit difficult since Hydra doesn't give any control over the OpenGL context created by an application, so the only way to make it available to Cycles (which is rendering on a separate thread) without disturbing the target application is to create a second OpenGL context that is sharing resour [...] --- **To build:** 1. [Set up a Blender build environment](https://wiki.blender.org/wiki/Building_Blender) as usual but download and apply this patch to the Git repository (Download Raw Diff on the right via `Save Link As` and then run `git apply patch.diff` with the downloaded file in your local repository after syncing to latest master branch). 2. Set these CMake variables: ``` WITH_BLENDER=0 WITH_CYCLES_HYDRA_RENDER_DELEGATE=1 USD_INCLUDE_DIRS=/include USD_LIBRARY_DIR=/lib USD_LIBRARY_PREFIX= ``` 3. Continue following the usual Blender build instructions. After building the INSTALL target, the output directory contains the `hdCycles` shared library and associated resource files which can be loaded as a USD plugin. **To execute:** 4. Copy `hdCycles.dll`/`hdCycles.a` and the `hdCycles` directory from the output directory to the USD plugin directory of the target application, or point a `PXR_PLUGINPATH_NAME` environment variable to the output directory. 5. Launch the target application, it should now automatically detect the Cycles Hydra render delegate. Differential Revision: https://developer.blender.org/D14398 === M CMakeLists.txt M build_files/cmake/Modules/FindUSD.cmake M build_files/cmake/macros.cmake M build_files/cmake/platform/platform_win32.cmake M intern/cycles/CMakeLists.txt M intern/cycles/device/CMakeLists.txt A intern/cycles/hydra/CMakeLists.txt A intern/cycles/hydra/attribute.cpp A intern/cycles/hydra/attribute.h A intern/cycles/hydra/camera.cpp A intern/cycles/hydra/camera.h A intern/cycles/hydra/config.h A intern/cycles/hydra/curves.cpp A intern/cycles/hydra/curves.h A intern/cycles/hydra/display_driver.cpp A intern/cycles/hydra/display_driver.h A intern/cycles/hydra/field.cpp A intern/cycles/hydra/field.h A intern/cycles/hydra/geometry.h A intern/cycles/hydra/geometry.inl A intern/cycles/hydra/instancer.cpp A intern/cycles/hydra/instancer.h A intern/cycles/hydra/light.cpp A intern/cycles/hydra/light.h A intern/cycles/hydra/material.cpp A intern/cycles/hydra/material.h A intern/cycles/hydra/mesh.cpp A intern/cycles/hydra/mesh.h A intern/cycles/hydra/node_util.cpp A intern/cycles/hydra/node_util.h A intern/cycles/hydra/output_driver.cpp A intern/cycles/hydra/output_driver.h A intern/cycles/hydra
[Bf-blender-cvs] [c8b946bc97f] cycles_hydra: Fix Windows build to use existing "WITH_WINDOWS_FIND_MODULES" CMake option
Commit: c8b946bc97f77892226e499821784dea1f7198d0 Author: Patrick Mours Date: Mon Mar 21 16:47:20 2022 +0100 Branches: cycles_hydra https://developer.blender.org/rBc8b946bc97f77892226e499821784dea1f7198d0 Fix Windows build to use existing "WITH_WINDOWS_FIND_MODULES" CMake option === M build_files/cmake/Modules/FindUSD.cmake M build_files/cmake/platform/platform_win32.cmake M intern/cycles/CMakeLists.txt === diff --git a/build_files/cmake/Modules/FindUSD.cmake b/build_files/cmake/Modules/FindUSD.cmake index c8c1f043b63..3d31228bf02 100644 --- a/build_files/cmake/Modules/FindUSD.cmake +++ b/build_files/cmake/Modules/FindUSD.cmake @@ -17,60 +17,51 @@ IF(NOT USD_ROOT_DIR AND NOT $ENV{USD_ROOT_DIR} STREQUAL "") SET(USD_ROOT_DIR $ENV{USD_ROOT_DIR}) ENDIF() -find_package(pxr REQUIRED OFF) - -if (NOT pxr_FOUND) - - SET(_usd_SEARCH_DIRS -${USD_ROOT_DIR} -/opt/lib/usd - ) +SET(_usd_SEARCH_DIRS + ${USD_ROOT_DIR} + /opt/lib/usd +) - FIND_PATH(USD_INCLUDE_DIR -NAMES - pxr/usd/usd/api.h -HINTS - ${_usd_SEARCH_DIRS} -PATH_SUFFIXES - include -DOC "Universal Scene Description (USD) header files" - ) +FIND_PATH(USD_INCLUDE_DIR + NAMES +pxr/usd/usd/api.h + HINTS +${_usd_SEARCH_DIRS} + PATH_SUFFIXES +include + DOC "Universal Scene Description (USD) header files" + NO_CMAKE_PATH +) - FIND_LIBRARY(USD_LIBRARY -NAMES - usd_m usd_ms -NAMES_PER_DIR -HINTS - ${_usd_SEARCH_DIRS} -PATH_SUFFIXES - lib64 lib lib/static -DOC "Universal Scene Description (USD) monolithic library" - ) +FIND_LIBRARY(USD_LIBRARY + NAMES +usd_m usd_ms ${USD_LIBRARY_PREFIX}usd + NAMES_PER_DIR + HINTS +${_usd_SEARCH_DIRS} + PATH_SUFFIXES +lib64 lib lib/static + DOC "Universal Scene Description (USD) library" +) - IF(${USD_LIBRARY_NOTFOUND}) -set(USD_FOUND FALSE) - ELSE() -# handle the QUIETLY and REQUIRED arguments and set USD_FOUND to TRUE if -# all listed variables are TRUE -INCLUDE(FindPackageHandleStandardArgs) -FIND_PACKAGE_HANDLE_STANDARD_ARGS(USD DEFAULT_MSG USD_LIBRARY USD_INCLUDE_DIR) +IF(${USD_LIBRARY_NOTFOUND}) + set(USD_FOUND FALSE) +ELSE() + # handle the QUIETLY and REQUIRED arguments and set USD_FOUND to TRUE if + # all listed variables are TRUE + INCLUDE(FindPackageHandleStandardArgs) + FIND_PACKAGE_HANDLE_STANDARD_ARGS(USD DEFAULT_MSG USD_LIBRARY USD_INCLUDE_DIR) -IF(USD_FOUND) - get_filename_component(USD_LIBRARY_DIR ${USD_LIBRARY} DIRECTORY) - SET(USD_INCLUDE_DIRS ${USD_INCLUDE_DIR}) - set(USD_LIBRARIES ${USD_LIBRARY}) -ENDIF() + IF(USD_FOUND) +get_filename_component(USD_LIBRARY_DIR ${USD_LIBRARY} DIRECTORY) +SET(USD_INCLUDE_DIRS ${USD_INCLUDE_DIR}) +set(USD_LIBRARIES ${USD_LIBRARY}) ENDIF() - - UNSET(_usd_SEARCH_DIRS) - -ELSE() -SET(USD_FOUND ON) -SET(USD_INCLUDE_DIR ${PXR_INCLUDE_DIRS}) -SET(USD_LIBRARIES ${PXR_LIBRARIES}) ENDIF() MARK_AS_ADVANCED( USD_INCLUDE_DIR USD_LIBRARY_DIR ) + +UNSET(_usd_SEARCH_DIRS) diff --git a/build_files/cmake/platform/platform_win32.cmake b/build_files/cmake/platform/platform_win32.cmake index ec0c83195e9..edbccee6152 100644 --- a/build_files/cmake/platform/platform_win32.cmake +++ b/build_files/cmake/platform/platform_win32.cmake @@ -255,9 +255,6 @@ if(NOT DEFINED LIBDIR) elseif(MSVC_VERSION GREATER 1909) message(STATUS "Visual Studio 2017 detected.") set(LIBDIR ${CMAKE_SOURCE_DIR}/../lib/${LIBDIR_BASE}_vc15) - elseif(MSVC_VERSION EQUAL 1900) -message(STATUS "Visual Studio 2015 detected.") -set(LIBDIR ${CMAKE_SOURCE_DIR}/../lib/${LIBDIR_BASE}_vc15) endif() else() message(STATUS "Using pre-compiled LIBDIR: ${LIBDIR}") @@ -306,8 +303,8 @@ set(ZLIB_INCLUDE_DIR ${LIBDIR}/zlib/include) set(ZLIB_LIBRARY ${LIBDIR}/zlib/lib/libz_st.lib) set(ZLIB_DIR ${LIBDIR}/zlib) -windows_find_package(zlib) # we want to find before finding things that depend on it like png -windows_find_package(png) +windows_find_package(ZLIB) # we want to find before finding things that depend on it like png +windows_find_package(PNG) if(NOT PNG_FOUND) warn_hardcoded_paths(libpng) @@ -319,9 +316,9 @@ if(NOT PNG_FOUND) endif() set(JPEG_NAMES ${JPEG_NAMES} libjpeg) -windows_find_package(jpeg REQUIRED) +windows_find_package(JPEG REQUIRED) if(NOT JPEG_FOUND) - warn_hardcoded_paths(jpeg) + warn_hardcoded_paths(JPEG) set(JPEG_INCLUDE_DIR ${LIBDIR}/jpeg/include) set(JPEG_LIBRARIES ${LIBDIR}/jpeg/lib/libjpeg.lib) endif() @@ -339,7 +336,7 @@ set(FREETYPE_LIBRARIES ${LIBDIR}/brotli/lib/brotlidec-static.lib ${LIBDIR}/brotli/lib/brotlicommon-static.lib ) -windows_find_package(freetype REQUIRED) +windows_find_package(Freetype REQUIRED
[Bf-blender-cvs] [3d5dbc1c449] blender-v3.0-release: Cycles: Reintroduce device-only memory handling that got lost in Cycles X merge
Commit: 3d5dbc1c44907c73d2e6e57a146cbadaea9623bd Author: Patrick Mours Date: Mon Dec 6 14:58:35 2021 +0100 Branches: blender-v3.0-release https://developer.blender.org/rB3d5dbc1c44907c73d2e6e57a146cbadaea9623bd Cycles: Reintroduce device-only memory handling that got lost in Cycles X merge Somehow only a part of rBf4f8b6dde32b0438e0b97a6d8ebeb89802987127 ended up in Cycles X, causing the issue that commit fixed, "OPTIX_ERROR_INVALID_VALUE" when the system is out of memory, to show up again. This adds the missing changes to fix that problem. Maniphest Tasks: T93620 Differential Revision: https://developer.blender.org/D13488 === M intern/cycles/device/cpu/device_impl.cpp M intern/cycles/device/cuda/device_impl.cpp M intern/cycles/device/optix/device_impl.cpp === diff --git a/intern/cycles/device/cpu/device_impl.cpp b/intern/cycles/device/cpu/device_impl.cpp index 68dec7f0af2..5db89d1e4fb 100644 --- a/intern/cycles/device/cpu/device_impl.cpp +++ b/intern/cycles/device/cpu/device_impl.cpp @@ -134,8 +134,7 @@ void CPUDevice::mem_alloc(device_memory ) << string_human_readable_size(mem.memory_size()) << ")"; } -if (mem.type == MEM_DEVICE_ONLY) { - assert(!mem.host_pointer); +if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) { size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES; void *data = util_aligned_malloc(mem.memory_size(), alignment); mem.device_pointer = (device_ptr)data; @@ -194,7 +193,7 @@ void CPUDevice::mem_free(device_memory ) tex_free((device_texture &)mem); } else if (mem.device_pointer) { -if (mem.type == MEM_DEVICE_ONLY) { +if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) { util_aligned_free((void *)mem.device_pointer); } mem.device_pointer = 0; diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp index 20945796a2d..8c5779f4a72 100644 --- a/intern/cycles/device/cuda/device_impl.cpp +++ b/intern/cycles/device/cuda/device_impl.cpp @@ -680,7 +680,7 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory , size_t pitch_ void *shared_pointer = 0; - if (mem_alloc_result != CUDA_SUCCESS && can_map_host) { + if (mem_alloc_result != CUDA_SUCCESS && can_map_host && mem.type != MEM_DEVICE_ONLY) { if (mem.shared_pointer) { /* Another device already allocated host memory. */ mem_alloc_result = CUDA_SUCCESS; @@ -703,8 +703,14 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory , size_t pitch_ } if (mem_alloc_result != CUDA_SUCCESS) { -status = " failed, out of device and host memory"; -set_error("System is out of GPU and shared host memory"); +if (mem.type == MEM_DEVICE_ONLY) { + status = " failed, out of device memory"; + set_error("System is out of GPU memory"); +} +else { + status = " failed, out of device and host memory"; + set_error("System is out of GPU and shared host memory"); +} } if (mem.name) { diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index f230f865f60..b33b5e21eee 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -44,14 +44,14 @@ CCL_NAMESPACE_BEGIN OptiXDevice::Denoiser::Denoiser(OptiXDevice *device) -: device(device), queue(device), state(device, "__denoiser_state") +: device(device), queue(device), state(device, "__denoiser_state", true) { } OptiXDevice::OptiXDevice(const DeviceInfo , Stats , Profiler ) : CUDADevice(info, stats, profiler), sbt_data(this, "__sbt", MEM_READ_ONLY), - launch_params(this, "__params"), + launch_params(this, "__params", false), denoiser_(this) { /* Make the CUDA context current. */ @@ -507,7 +507,7 @@ class OptiXDevice::DenoiseContext { : denoise_params(task.params), render_buffers(task.render_buffers), buffer_params(task.buffer_params), -guiding_buffer(device, "denoiser guiding passes buffer"), +guiding_buffer(device, "denoiser guiding passes buffer", true), num_samples(task.num_samples) { num_input_passes = 1; @@ -1001,6 +1001,13 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh, const OptixBuildInput _input, uint16_t num_motion_steps) { + /* Allocate and build acceleration structures only one at a time, to prevent parallel builds + * from running out of memory (since both original and compacted acceleration structure memory + * may be allocated at the same ti
[Bf-blender-cvs] [8393ccd0763] master: Cycles: Add OptiX temporal denoising support
Commit: 8393ccd07634b3152b18d4d527b1460dab9dbe06 Author: Patrick Mours Date: Tue Jan 4 21:39:54 2022 +0100 Branches: master https://developer.blender.org/rB8393ccd07634b3152b18d4d527b1460dab9dbe06 Cycles: Add OptiX temporal denoising support Enables the `bpy.ops.cycles.denoise_animation()` operator again and modifies it to support temporal denoising with OptiX. This requires renders that were done with both the "Vector" and "Denoising Data" passes. Differential Revision: https://developer.blender.org/D11442 === M intern/cycles/blender/python.cpp M intern/cycles/blender/sync.h M intern/cycles/device/denoise.cpp M intern/cycles/device/denoise.h M intern/cycles/device/optix/device_impl.cpp M intern/cycles/device/optix/device_impl.h M intern/cycles/device/queue.h M intern/cycles/kernel/device/gpu/kernel.h M intern/cycles/kernel/types.h M intern/cycles/scene/pass.cpp M intern/cycles/session/denoising.cpp M intern/cycles/session/denoising.h === diff --git a/intern/cycles/blender/python.cpp b/intern/cycles/blender/python.cpp index 024dae306b0..f509d5c2eeb 100644 --- a/intern/cycles/blender/python.cpp +++ b/intern/cycles/blender/python.cpp @@ -735,27 +735,20 @@ static bool image_parse_filepaths(PyObject *pyfilepaths, vector static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *keywords) { -#if 1 - (void)args; - (void)keywords; -#else static const char *keyword_list[] = { - "preferences", "scene", "view_layer", "input", "output", "tile_size", "samples", NULL}; + "preferences", "scene", "view_layer", "input", "output", NULL}; PyObject *pypreferences, *pyscene, *pyviewlayer; PyObject *pyinput, *pyoutput = NULL; - int tile_size = 0, samples = 0; if (!PyArg_ParseTupleAndKeywords(args, keywords, - "|Oii", + "|O", (char **)keyword_list, , , , , - , - _size, - )) { + )) { return NULL; } @@ -777,14 +770,10 @@ static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *key _ViewLayer, PyLong_AsVoidPtr(pyviewlayer), ); - PointerRNA cviewlayer = RNA_pointer_get(, "cycles"); + BL::ViewLayer b_view_layer(viewlayerptr); - DenoiseParams params; - params.radius = get_int(cviewlayer, "denoising_radius"); - params.strength = get_float(cviewlayer, "denoising_strength"); - params.feature_strength = get_float(cviewlayer, "denoising_feature_strength"); - params.relative_pca = get_boolean(cviewlayer, "denoising_relative_pca"); - params.neighbor_frames = get_int(cviewlayer, "denoising_neighbor_frames"); + DenoiseParams params = BlenderSync::get_denoise_params(b_scene, b_view_layer, true); + params.use = true; /* Parse file paths list. */ vector input, output; @@ -812,24 +801,15 @@ static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *key } /* Create denoiser. */ - DenoiserPipeline denoiser(device); - denoiser.params = params; + DenoiserPipeline denoiser(device, params); denoiser.input = input; denoiser.output = output; - if (tile_size > 0) { -denoiser.tile_size = make_int2(tile_size, tile_size); - } - if (samples > 0) { -denoiser.samples_override = samples; - } - /* Run denoiser. */ if (!denoiser.run()) { PyErr_SetString(PyExc_ValueError, denoiser.error.c_str()); return NULL; } -#endif Py_RETURN_NONE; } diff --git a/intern/cycles/blender/sync.h b/intern/cycles/blender/sync.h index d074f90bb1b..3722b938863 100644 --- a/intern/cycles/blender/sync.h +++ b/intern/cycles/blender/sync.h @@ -105,11 +105,11 @@ class BlenderSync { static BufferParams get_buffer_params( BL::SpaceView3D _v3d, BL::RegionView3D _rv3d, Camera *cam, int width, int height); - private: static DenoiseParams get_denoise_params(BL::Scene _scene, BL::ViewLayer _view_layer, bool background); + private: /* sync */ void sync_lights(BL::Depsgraph _depsgraph, bool update_all); void sync_materials(BL::Depsgraph _depsgraph, bool update_all); diff --git a/i
[Bf-blender-cvs] [ca143fafa67] master: Cleanup: Silence "integer conversion resulted in a change of sign" warning in Cycles kernel code
Commit: ca143fafa674f5dbec39ded3ecbba4b0abfe93db Author: Patrick Mours Date: Mon Jan 3 16:26:15 2022 +0100 Branches: master https://developer.blender.org/rBca143fafa674f5dbec39ded3ecbba4b0abfe93db Cleanup: Silence "integer conversion resulted in a change of sign" warning in Cycles kernel code Occured because "PATH_RAY_SHADOW_CATCHER_BACKGROUND" is expressed as an unsigned integer, because too large for a signed integer, but the "PathRayFlag" enum type defaulted to a signed integer still. === M intern/cycles/kernel/types.h === diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h index 20abea37649..1d0537f9547 100644 --- a/intern/cycles/kernel/types.h +++ b/intern/cycles/kernel/types.h @@ -202,7 +202,7 @@ enum SamplingPattern { /* These flags values correspond to `raytypes` in `osl.cpp`, so keep them in sync! */ -enum PathRayFlag { +enum PathRayFlag : uint32_t { /* * Ray visibility. * @@ -1559,7 +1559,7 @@ enum { /* Kernel Features */ -enum KernelFeatureFlag : unsigned int { +enum KernelFeatureFlag : uint32_t { /* Shader nodes. */ KERNEL_FEATURE_NODE_BSDF = (1U << 0U), KERNEL_FEATURE_NODE_EMISSION = (1U << 1U), ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [e14f8c2dd76] master: Cycles: Reintroduce device-only memory handling that got lost in Cycles X merge
Commit: e14f8c2dd765a5f20d652899434174daa039804b Author: Patrick Mours Date: Mon Dec 6 14:58:35 2021 +0100 Branches: master https://developer.blender.org/rBe14f8c2dd765a5f20d652899434174daa039804b Cycles: Reintroduce device-only memory handling that got lost in Cycles X merge Somehow only a part of rBf4f8b6dde32b0438e0b97a6d8ebeb89802987127 ended up in Cycles X, causing the issue that commit fixed, "OPTIX_ERROR_INVALID_VALUE" when the system is out of memory, to show up again. This adds the missing changes to fix that problem. Maniphest Tasks: T93620 Differential Revision: https://developer.blender.org/D13488 === M intern/cycles/device/cpu/device_impl.cpp M intern/cycles/device/cuda/device_impl.cpp M intern/cycles/device/optix/device_impl.cpp === diff --git a/intern/cycles/device/cpu/device_impl.cpp b/intern/cycles/device/cpu/device_impl.cpp index 62b9cc93dae..6f3c8b42124 100644 --- a/intern/cycles/device/cpu/device_impl.cpp +++ b/intern/cycles/device/cpu/device_impl.cpp @@ -129,8 +129,7 @@ void CPUDevice::mem_alloc(device_memory ) << string_human_readable_size(mem.memory_size()) << ")"; } -if (mem.type == MEM_DEVICE_ONLY) { - assert(!mem.host_pointer); +if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) { size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES; void *data = util_aligned_malloc(mem.memory_size(), alignment); mem.device_pointer = (device_ptr)data; @@ -189,7 +188,7 @@ void CPUDevice::mem_free(device_memory ) tex_free((device_texture &)mem); } else if (mem.device_pointer) { -if (mem.type == MEM_DEVICE_ONLY) { +if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) { util_aligned_free((void *)mem.device_pointer); } mem.device_pointer = 0; diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp index ee55e6dc632..8d022040414 100644 --- a/intern/cycles/device/cuda/device_impl.cpp +++ b/intern/cycles/device/cuda/device_impl.cpp @@ -678,7 +678,7 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory , size_t pitch_ void *shared_pointer = 0; - if (mem_alloc_result != CUDA_SUCCESS && can_map_host) { + if (mem_alloc_result != CUDA_SUCCESS && can_map_host && mem.type != MEM_DEVICE_ONLY) { if (mem.shared_pointer) { /* Another device already allocated host memory. */ mem_alloc_result = CUDA_SUCCESS; @@ -701,8 +701,14 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory , size_t pitch_ } if (mem_alloc_result != CUDA_SUCCESS) { -status = " failed, out of device and host memory"; -set_error("System is out of GPU and shared host memory"); +if (mem.type == MEM_DEVICE_ONLY) { + status = " failed, out of device memory"; + set_error("System is out of GPU memory"); +} +else { + status = " failed, out of device and host memory"; + set_error("System is out of GPU and shared host memory"); +} } if (mem.name) { diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index a0c748fb6cd..da3c1ac57d1 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -46,14 +46,14 @@ CCL_NAMESPACE_BEGIN OptiXDevice::Denoiser::Denoiser(OptiXDevice *device) -: device(device), queue(device), state(device, "__denoiser_state") +: device(device), queue(device), state(device, "__denoiser_state", true) { } OptiXDevice::OptiXDevice(const DeviceInfo , Stats , Profiler ) : CUDADevice(info, stats, profiler), sbt_data(this, "__sbt", MEM_READ_ONLY), - launch_params(this, "__params"), + launch_params(this, "__params", false), denoiser_(this) { /* Make the CUDA context current. */ @@ -523,7 +523,7 @@ class OptiXDevice::DenoiseContext { : denoise_params(task.params), render_buffers(task.render_buffers), buffer_params(task.buffer_params), -guiding_buffer(device, "denoiser guiding passes buffer"), +guiding_buffer(device, "denoiser guiding passes buffer", true), num_samples(task.num_samples) { num_input_passes = 1; @@ -1015,6 +1015,13 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh, const OptixBuildInput _input, uint16_t num_motion_steps) { + /* Allocate and build acceleration structures only one at a time, to prevent parallel builds + * from running out of memory (since both original and compacted acceleration structure memory + * may be allocated at the same time for the duration of
[Bf-blender-cvs] [17665494186] master: Fix T92308: OptiX denoising fails with high resolutions
Commit: 17665494186816cebb9e8304199e40f9ee033990 Author: Patrick Mours Date: Wed Dec 1 11:54:42 2021 +0100 Branches: master https://developer.blender.org/rB17665494186816cebb9e8304199e40f9ee033990 Fix T92308: OptiX denoising fails with high resolutions The OptiX denoiser does have an upper limit as to how many pixels it can denoise at once, so this changes the OptiX denoising process to use tiles for high resolution images. The OptiX SDK does have an utility function for this purpose, so changes are minor, adjusting the configured tile size and including enough overlap. Maniphest Tasks: T92308 Differential Revision: https://developer.blender.org/D13436 === M intern/cycles/device/optix/device_impl.cpp M intern/cycles/device/optix/device_impl.h === diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index 7a78504f458..a0c748fb6cd 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -41,6 +41,8 @@ # define __KERNEL_OPTIX__ # include "kernel/device/optix/globals.h" +# include + CCL_NAMESPACE_BEGIN OptiXDevice::Denoiser::Denoiser(OptiXDevice *device) @@ -884,35 +886,33 @@ bool OptiXDevice::denoise_create_if_needed(DenoiseContext ) bool OptiXDevice::denoise_configure_if_needed(DenoiseContext ) { - if (denoiser_.is_configured && (denoiser_.configured_size.x == context.buffer_params.width && - denoiser_.configured_size.y == context.buffer_params.height)) { + /* Limit maximum tile size denoiser can be invoked with. */ + const int2 tile_size = make_int2(min(context.buffer_params.width, 4096), + min(context.buffer_params.height, 4096)); + + if (denoiser_.is_configured && + (denoiser_.configured_size.x == tile_size.x && denoiser_.configured_size.y == tile_size.y)) { return true; } - const BufferParams _params = context.buffer_params; - - OptixDenoiserSizes sizes = {}; optix_assert(optixDenoiserComputeMemoryResources( - denoiser_.optix_denoiser, buffer_params.width, buffer_params.height, )); - - /* Denoiser is invoked on whole images only, so no overlap needed (would be used for tiling). */ - denoiser_.scratch_size = sizes.withoutOverlapScratchSizeInBytes; - denoiser_.scratch_offset = sizes.stateSizeInBytes; + denoiser_.optix_denoiser, tile_size.x, tile_size.y, _.sizes)); /* Allocate denoiser state if tile size has changed since last setup. */ - denoiser_.state.alloc_to_device(denoiser_.scratch_offset + denoiser_.scratch_size); + denoiser_.state.alloc_to_device(denoiser_.sizes.stateSizeInBytes + + denoiser_.sizes.withOverlapScratchSizeInBytes); /* Initialize denoiser state for the current tile size. */ const OptixResult result = optixDenoiserSetup( denoiser_.optix_denoiser, 0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called on a stream that is not the default stream */ - buffer_params.width, - buffer_params.height, + tile_size.x + denoiser_.sizes.overlapWindowSizeInPixels * 2, + tile_size.y + denoiser_.sizes.overlapWindowSizeInPixels * 2, denoiser_.state.device_pointer, - denoiser_.scratch_offset, - denoiser_.state.device_pointer + denoiser_.scratch_offset, - denoiser_.scratch_size); + denoiser_.sizes.stateSizeInBytes, + denoiser_.state.device_pointer + denoiser_.sizes.stateSizeInBytes, + denoiser_.sizes.withOverlapScratchSizeInBytes); if (result != OPTIX_SUCCESS) { set_error("Failed to set up OptiX denoiser"); return false; @@ -921,8 +921,7 @@ bool OptiXDevice::denoise_configure_if_needed(DenoiseContext ) cuda_assert(cuCtxSynchronize()); denoiser_.is_configured = true; - denoiser_.configured_size.x = buffer_params.width; - denoiser_.configured_size.y = buffer_params.height; + denoiser_.configured_size = tile_size; return true; } @@ -993,18 +992,20 @@ bool OptiXDevice::denoise_run(DenoiseContext , const DenoisePass ) guide_layers.albedo = albedo_layer; guide_layers.normal = normal_layer; - optix_assert(optixDenoiserInvoke(denoiser_.optix_denoiser, - denoiser_.queue.stream(), - , - denoiser_.state.device_pointer, - denoiser_.scratch_offset, - _layers, - _layers, - 1, - 0, - 0, - denoiser_.state.device_pointer + denoiser_.scratch_offset, -
[Bf-blender-cvs] [7a97e925fde] master: Cycles: Add support for building with OptiX 7.4 SDK and use built-in catmull-rom curve type
Commit: 7a97e925fde585ffafd7bdfe310d161cb6d51bc1 Author: Patrick Mours Date: Wed Nov 24 15:19:02 2021 +0100 Branches: master https://developer.blender.org/rB7a97e925fde585ffafd7bdfe310d161cb6d51bc1 Cycles: Add support for building with OptiX 7.4 SDK and use built-in catmull-rom curve type Some enum names were changed/removed in OptiX 7.4, so some changes are necessary to make things compile still. In addition, OptiX 7.4 also adds built-in support for catmull-rom curves, so it is no longer necessary to convert the catmull-rom data to cubic bsplines first, and has endcaps disabled by default now, so can remove the special handling via any-hit programs that filtered them out before. Differential Revision: https://developer.blender.org/D13351 === M intern/cycles/device/optix/device_impl.cpp M intern/cycles/kernel/device/optix/kernel.cu === diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index 6e897e3831f..b82b1281eb8 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -208,11 +208,15 @@ bool OptiXDevice::load_kernels(const uint kernel_features) } else { module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3; -module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO; +module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE; } module_options.boundValues = nullptr; module_options.numBoundValues = 0; +# if OPTIX_ABI_VERSION >= 55 + module_options.payloadTypes = nullptr; + module_options.numPayloadTypes = 0; +# endif OptixPipelineCompileOptions pipeline_options = {}; /* Default to no motion blur and two-level graph, since it is the fastest option. */ @@ -227,7 +231,11 @@ bool OptiXDevice::load_kernels(const uint kernel_features) pipeline_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE; if (kernel_features & KERNEL_FEATURE_HAIR) { if (kernel_features & KERNEL_FEATURE_HAIR_THICK) { +# if OPTIX_ABI_VERSION >= 55 + pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM; +# else pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE; +# endif } else pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM; @@ -324,7 +332,13 @@ bool OptiXDevice::load_kernels(const uint kernel_features) if (kernel_features & KERNEL_FEATURE_HAIR_THICK) { /* Built-in thick curve intersection. */ OptixBuiltinISOptions builtin_options = {}; +# if OPTIX_ABI_VERSION >= 55 + builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM; + builtin_options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE; + builtin_options.curveEndcapFlags = OPTIX_CURVE_ENDCAP_DEFAULT; /* Disable endcaps. */ +# else builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE; +# endif builtin_options.usesMotionBlur = false; optix_assert(optixBuiltinISModuleGet( @@ -411,7 +425,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features) link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL; } else { -link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO; +link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE; } if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) { @@ -1178,6 +1192,15 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress , bool refit) int ka = max(k0 - 1, curve.first_key); int kb = min(k1 + 1, curve.first_key + curve.num_keys - 1); + index_data[i] = i * 4; + float4 *const v = vertex_data.data() + step * num_vertices + index_data[i]; + +# if OPTIX_ABI_VERSION >= 55 + v[0] = make_float4(keys[ka].x, keys[ka].y, keys[ka].z, curve_radius[ka]); + v[1] = make_float4(keys[k0].x, keys[k0].y, keys[k0].z, curve_radius[k0]); + v[2] = make_float4(keys[k1].x, keys[k1].y, keys[k1].z, curve_radius[k1]); + v[3] = make_float4(keys[kb].x, keys[kb].y, keys[kb].z, curve_radius[kb]); +# else const float4 px = make_float4(keys[ka].x, keys[k0].x, keys[k1].x, keys[kb].x); const float4 py = make_float4(keys[ka].y, keys[k0].y, keys[k1].y, keys[kb].y); const float4 pz = make_float4(keys[ka].z, keys[k0].z, keys[k1].z, keys[kb].z); @@ -1190,8 +1213,6 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress , bool refit) static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f; static const float4 cr2bsp3 = make_float4(-2, +5, -4, +7) / 6.f; - index_data[i] = i * 4; - float4 *const v = vertex_data.data() + step *
[Bf-blender-cvs] [809ae823b7c] master: Merge branch 'blender-v3.0-release'
Commit: 809ae823b7cb612fda219c0e277425bba175090f Author: Patrick Mours Date: Fri Nov 12 19:00:23 2021 +0100 Branches: master https://developer.blender.org/rB809ae823b7cb612fda219c0e277425bba175090f Merge branch 'blender-v3.0-release' === === ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [9d0d4b8601d] blender-v3.0-release: Fix T93029: OptiX denoising artifacts at high sample counts in specific scenes
Commit: 9d0d4b8601dfb9de335dd7af32562cbfb94238a6 Author: Patrick Mours Date: Fri Nov 12 18:59:50 2021 +0100 Branches: blender-v3.0-release https://developer.blender.org/rB9d0d4b8601dfb9de335dd7af32562cbfb94238a6 Fix T93029: OptiX denoising artifacts at high sample counts in specific scenes Partially reverts commit rB440a3475b8f5410e5c41bfbed5ce82771b41356f because "optixDenoiserComputeIntensity" does not currently support input images that are not packed (the "pixelStrideInBytes" field is not zero). As a result the intensity calculation would take into account data from other passes in the image, some of which was scaled by the number of samples still and therefore produce widely incorrect results that then caused artifacts in the denoised image. Maniphest Tasks: T93029 === M intern/cycles/device/optix/device_impl.cpp === diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index 8e554d0ba2f..bb690551c04 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -886,8 +886,7 @@ bool OptiXDevice::denoise_configure_if_needed(DenoiseContext ) denoiser_.scratch_offset = sizes.stateSizeInBytes; /* Allocate denoiser state if tile size has changed since last setup. */ - denoiser_.state.alloc_to_device(denoiser_.scratch_offset + denoiser_.scratch_size + - sizeof(float)); + denoiser_.state.alloc_to_device(denoiser_.scratch_offset + denoiser_.scratch_size); /* Initialize denoiser state for the current tile size. */ const OptixResult result = optixDenoiserSetup( @@ -971,16 +970,6 @@ bool OptiXDevice::denoise_run(DenoiseContext , const DenoisePass ) /* Finally run denoising. */ OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */ - params.hdrIntensity = denoiser_.state.device_pointer + denoiser_.scratch_offset + -denoiser_.scratch_size; - - optix_assert( - optixDenoiserComputeIntensity(denoiser_.optix_denoiser, -denoiser_.queue.stream(), -_layer, -params.hdrIntensity, -denoiser_.state.device_pointer + denoiser_.scratch_offset, -denoiser_.scratch_size)); OptixDenoiserLayer image_layers = {}; image_layers.input = color_layer; ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [f5656204352] master: Fix T92985: CUDA errors with Cycles film convert kernels
Commit: f56562043521a5c160585aea3f28167b4d3bc77d Author: Patrick Mours Date: Wed Nov 10 14:37:15 2021 +0100 Branches: master https://developer.blender.org/rBf56562043521a5c160585aea3f28167b4d3bc77d Fix T92985: CUDA errors with Cycles film convert kernels rB3a4c8f406a3a3bf0627477c6183a594fa707a6e2 changed the macros that create the film convert kernel entry points, but in the process accidentally changed the parameter definition to one of those (which caused CUDA launch and misaligned address errors) and changed the implementation as well. This restores the correct implementation from before. In addition, the `ccl_gpu_kernel_threads` macro did not work as intended and caused the generated launch bounds to end up with an incorrect input for the second parameter (it was set to "thread_num_registers", rather than the result of the block number calculation). I'm not entirely sure why, as the macro definition looked sound to me. Decided to simply go with two separate macros instead, to simplify and solve this. Also changed how state is captured with the `ccl_gpu_kernel_lambda` macro slightly, to avoid a compiler warning (expression has no effect) that otherwise occurred. Maniphest Tasks: T92985 Differential Revision: https://developer.blender.org/D13175 === M intern/cycles/kernel/CMakeLists.txt M intern/cycles/kernel/device/cuda/config.h M intern/cycles/kernel/device/gpu/kernel.h M intern/cycles/kernel/device/hip/config.h M intern/cycles/kernel/device/metal/compat.h === diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index f311b0e74bb..39cb886b16e 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -379,7 +379,6 @@ if(WITH_CYCLES_CUDA_BINARIES) ${SRC_KERNEL_HEADERS} ${SRC_KERNEL_DEVICE_GPU_HEADERS} ${SRC_KERNEL_DEVICE_CUDA_HEADERS} -${SRC_KERNEL_DEVICE_METAL_HEADERS} ${SRC_UTIL_HEADERS} ) set(cuda_cubins) diff --git a/intern/cycles/kernel/device/cuda/config.h b/intern/cycles/kernel/device/cuda/config.h index e333fe90332..003881d7912 100644 --- a/intern/cycles/kernel/device/cuda/config.h +++ b/intern/cycles/kernel/device/cuda/config.h @@ -92,25 +92,19 @@ /* Compute number of threads per block and minimum blocks per multiprocessor * given the maximum number of registers per thread. */ - -#define ccl_gpu_kernel_threads(block_num_threads) \ - extern "C" __global__ void __launch_bounds__(block_num_threads) - -#define ccl_gpu_kernel_threads_registers(block_num_threads, thread_num_registers) \ +#define ccl_gpu_kernel(block_num_threads, thread_num_registers) \ extern "C" __global__ void __launch_bounds__(block_num_threads, \ GPU_MULTIPRESSOR_MAX_REGISTERS / \ (block_num_threads * thread_num_registers)) -/* allow ccl_gpu_kernel to accept 1 or 2 parameters */ -#define SELECT_MACRO(_1, _2, NAME, ...) NAME -#define ccl_gpu_kernel(...) \ - SELECT_MACRO(__VA_ARGS__, ccl_gpu_kernel_threads_registers, ccl_gpu_kernel_threads)(__VA_ARGS__) +#define ccl_gpu_kernel_threads(block_num_threads) \ + extern "C" __global__ void __launch_bounds__(block_num_threads) #define ccl_gpu_kernel_signature(name, ...) kernel_gpu_##name(__VA_ARGS__) #define ccl_gpu_kernel_call(x) x -/* define a function object where "func" is the lambda body, and additional parameters are used to +/* Define a function object where "func" is the lambda body, and additional parameters are used to * specify captured state */ #define ccl_gpu_kernel_lambda(func, ...) \ struct KernelLambda { \ @@ -119,8 +113,7 @@ { \ return (func); \ } \ - } ccl_gpu_kernel_lambda_pass; \ - ccl_gpu_kernel_lambda_pass + } ccl_gpu_kernel_lambda_pass /* sanity checks */ diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index 2ec6a49ec7b..e954178ec63 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -56,8 +56,7 @@ */ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) -ccl_gpu_kernel_signature(integrator_reset, - int num_states) +ccl_gpu_kernel_signature(integrator_reset, int num_states) { const int state = ccl_gpu_global_id_x(); @@ -265,7 +264,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) } } -ccl_gpu_kernel(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) +ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_signature(integrator_queued_paths_array, int num_states, cc
[Bf-blender-cvs] [faeb2cc9005] master: Merge branch 'blender-v3.0-release'
Commit: faeb2cc9005739efd6d58a7ab1e9170bf064b656 Author: Patrick Mours Date: Tue Nov 9 14:49:36 2021 +0100 Branches: master https://developer.blender.org/rBfaeb2cc9005739efd6d58a7ab1e9170bf064b656 Merge branch 'blender-v3.0-release' === === ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [440a3475b8f] blender-v3.0-release: Cycles: Improve OptiX denoising with dark images and fix crash when denoiser is destroyed
Commit: 440a3475b8f5410e5c41bfbed5ce82771b41356f Author: Patrick Mours Date: Tue Nov 9 12:17:09 2021 +0100 Branches: blender-v3.0-release https://developer.blender.org/rB440a3475b8f5410e5c41bfbed5ce82771b41356f Cycles: Improve OptiX denoising with dark images and fix crash when denoiser is destroyed Adds a pass before denoising that calculates the intensity of the image, which can be passed into the OptiX denoiser for more optimal results for very dark or very bright images. In addition this also fixes a crash that sometimes occurred on exit. The OptiX denoiser object has to be destroyed before the OptiX device context object (since it references that). But in C++ the destructor function of a class is called before its fields are destructed, so "~OptiXDevice" was always called before "OptiXDevice::~Denoiser" and therefore "optixDeviceContextDestroy" was called before "optixDenoiserDestroy", hence the crash. Differential Revision: https://developer.blender.org/D13160 === M intern/cycles/device/optix/device_impl.cpp M intern/cycles/device/optix/device_impl.h M intern/cycles/kernel/device/gpu/kernel.h === diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index 7f94212f383..8e554d0ba2f 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -48,14 +48,6 @@ OptiXDevice::Denoiser::Denoiser(OptiXDevice *device) { } -OptiXDevice::Denoiser::~Denoiser() -{ - const CUDAContextScope scope(device); - if (optix_denoiser != nullptr) { -optixDenoiserDestroy(optix_denoiser); - } -} - OptiXDevice::OptiXDevice(const DeviceInfo , Stats , Profiler ) : CUDADevice(info, stats, profiler), sbt_data(this, "__sbt", MEM_READ_ONLY), @@ -133,6 +125,11 @@ OptiXDevice::~OptiXDevice() } } + /* Make sure denoiser is destroyed before device context! */ + if (denoiser_.optix_denoiser != nullptr) { +optixDenoiserDestroy(denoiser_.optix_denoiser); + } + optixDeviceContextDestroy(context); } @@ -884,11 +881,13 @@ bool OptiXDevice::denoise_configure_if_needed(DenoiseContext ) optix_assert(optixDenoiserComputeMemoryResources( denoiser_.optix_denoiser, buffer_params.width, buffer_params.height, )); - denoiser_.scratch_size = sizes.withOverlapScratchSizeInBytes; + /* Denoiser is invoked on whole images only, so no overlap needed (would be used for tiling). */ + denoiser_.scratch_size = sizes.withoutOverlapScratchSizeInBytes; denoiser_.scratch_offset = sizes.stateSizeInBytes; /* Allocate denoiser state if tile size has changed since last setup. */ - denoiser_.state.alloc_to_device(denoiser_.scratch_offset + denoiser_.scratch_size); + denoiser_.state.alloc_to_device(denoiser_.scratch_offset + denoiser_.scratch_size + + sizeof(float)); /* Initialize denoiser state for the current tile size. */ const OptixResult result = optixDenoiserSetup( @@ -942,8 +941,6 @@ bool OptiXDevice::denoise_run(DenoiseContext , const DenoisePass ) color_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3; } - device_vector fake_albedo(this, "fake_albedo", MEM_READ_WRITE); - /* Optional albedo and color passes. */ if (context.num_input_passes > 1) { const device_ptr d_guiding_buffer = context.guiding_params.device_pointer; @@ -974,6 +971,17 @@ bool OptiXDevice::denoise_run(DenoiseContext , const DenoisePass ) /* Finally run denoising. */ OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */ + params.hdrIntensity = denoiser_.state.device_pointer + denoiser_.scratch_offset + +denoiser_.scratch_size; + + optix_assert( + optixDenoiserComputeIntensity(denoiser_.optix_denoiser, +denoiser_.queue.stream(), +_layer, +params.hdrIntensity, +denoiser_.state.device_pointer + denoiser_.scratch_offset, +denoiser_.scratch_size)); + OptixDenoiserLayer image_layers = {}; image_layers.input = color_layer; image_layers.output = output_layer; diff --git a/intern/cycles/device/optix/device_impl.h b/intern/cycles/device/optix/device_impl.h index 3ec98098eb7..5cfc249b430 100644 --- a/intern/cycles/device/optix/device_impl.h +++ b/intern/cycles/device/optix/device_impl.h @@ -82,7 +82,6 @@ class OptiXDevice : public CUDADevice { class Denoiser { public: explicit Denoiser(OptiXDevice *device); -~Denoiser(); OptiXDevice *device; OptiXDeviceQueue queue; diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index f86a8c692aa..5848ba5d
[Bf-blender-cvs] [9daf6a69a6a] blender-v3.0-release: Fix T92472: OptiX denoising artifacts with recent GPU driver 495.29.05 or newer on Linux
Commit: 9daf6a69a6acd95f0b46bc45e5f3ae27d0904764 Author: Patrick Mours Date: Tue Nov 9 12:24:54 2021 +0100 Branches: blender-v3.0-release https://developer.blender.org/rB9daf6a69a6acd95f0b46bc45e5f3ae27d0904764 Fix T92472: OptiX denoising artifacts with recent GPU driver 495.29.05 or newer on Linux Adds a workaround for a driver bug in r495 that causes artifacts with OptiX denoising. `optixDenoiserSetup` is not working properly there when called with a stream other than the default stream, so use the default stream for now and force synchronization across the entire context afterwards to ensure the other stream Cycles uses to enqueue the actual denoising command cannot execute before the denoising setup has finished. Maniphest Tasks: T92472 Differential Revision: https://developer.blender.org/D13158 === M intern/cycles/device/optix/device_impl.cpp === diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index 9b9a5ac0de7..7f94212f383 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -891,20 +891,23 @@ bool OptiXDevice::denoise_configure_if_needed(DenoiseContext ) denoiser_.state.alloc_to_device(denoiser_.scratch_offset + denoiser_.scratch_size); /* Initialize denoiser state for the current tile size. */ - const OptixResult result = optixDenoiserSetup(denoiser_.optix_denoiser, -denoiser_.queue.stream(), -buffer_params.width, -buffer_params.height, -denoiser_.state.device_pointer, -denoiser_.scratch_offset, -denoiser_.state.device_pointer + -denoiser_.scratch_offset, -denoiser_.scratch_size); + const OptixResult result = optixDenoiserSetup( + denoiser_.optix_denoiser, + 0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called +on a stream that is not the default stream */ + buffer_params.width, + buffer_params.height, + denoiser_.state.device_pointer, + denoiser_.scratch_offset, + denoiser_.state.device_pointer + denoiser_.scratch_offset, + denoiser_.scratch_size); if (result != OPTIX_SUCCESS) { set_error("Failed to set up OptiX denoiser"); return false; } + cuda_assert(cuCtxSynchronize()); + denoiser_.is_configured = true; denoiser_.configured_size.x = buffer_params.width; denoiser_.configured_size.y = buffer_params.height; ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [31dfdb6379c] blender-v2.93-release: Add missing "CUDA_ERROR_UNSUPPORTED_PTX_VERSION" to CUEW
Commit: 31dfdb6379cd42b919ba529eb9cfd3f29eb8de64 Author: Patrick Mours Date: Tue Nov 2 12:30:28 2021 +0100 Branches: blender-v2.93-release https://developer.blender.org/rB31dfdb6379cd42b919ba529eb9cfd3f29eb8de64 Add missing "CUDA_ERROR_UNSUPPORTED_PTX_VERSION" to CUEW This is required for Cycles to report a meaningful error message when it fails to load a PTX module created with a newer CUDA toolkit version than the driver supports. Fix crash when kernel loading failed (T91879) Ref T91879 === M extern/cuew/include/cuew.h M extern/cuew/src/cuew.c M intern/cycles/device/cuda/device_cuda_impl.cpp === diff --git a/extern/cuew/include/cuew.h b/extern/cuew/include/cuew.h index 0fa0f1291fa..85522744ad1 100644 --- a/extern/cuew/include/cuew.h +++ b/extern/cuew/include/cuew.h @@ -609,6 +609,7 @@ typedef enum cudaError_enum { CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219, CUDA_ERROR_NVLINK_UNCORRECTABLE = 220, CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221, + CUDA_ERROR_UNSUPPORTED_PTX_VERSION = 222, CUDA_ERROR_INVALID_SOURCE = 300, CUDA_ERROR_FILE_NOT_FOUND = 301, CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302, diff --git a/extern/cuew/src/cuew.c b/extern/cuew/src/cuew.c index 7a1b0018a24..9eba9306323 100644 --- a/extern/cuew/src/cuew.c +++ b/extern/cuew/src/cuew.c @@ -736,6 +736,7 @@ const char *cuewErrorString(CUresult result) { case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: return "Invalid graphics context"; case CUDA_ERROR_NVLINK_UNCORRECTABLE: return "Nvlink uncorrectable"; case CUDA_ERROR_JIT_COMPILER_NOT_FOUND: return "Jit compiler not found"; +case CUDA_ERROR_UNSUPPORTED_PTX_VERSION: return "Unsupported PTX version"; case CUDA_ERROR_INVALID_SOURCE: return "Invalid source"; case CUDA_ERROR_FILE_NOT_FOUND: return "File not found"; case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared object failed to resolve"; diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp index cebe8ce631e..e9d8dc5a7de 100644 --- a/intern/cycles/device/cuda/device_cuda_impl.cpp +++ b/intern/cycles/device/cuda/device_cuda_impl.cpp @@ -583,9 +583,9 @@ bool CUDADevice::load_kernels(const DeviceRequestedFeatures _features) if (result == CUDA_SUCCESS) { reserve_local_memory(requested_features); - } - load_functions(); +load_functions(); + } return (result == CUDA_SUCCESS); } ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [b382632665b] blender-v2.83-release: Add missing "CUDA_ERROR_UNSUPPORTED_PTX_VERSION" to CUEW
Commit: b382632665b3552d580a3c65e94dd36857d5fb68 Author: Patrick Mours Date: Fri Oct 29 10:29:25 2021 +0200 Branches: blender-v2.83-release https://developer.blender.org/rBb382632665b3552d580a3c65e94dd36857d5fb68 Add missing "CUDA_ERROR_UNSUPPORTED_PTX_VERSION" to CUEW This is required for Cycles to report a meaningful error message when it fails to load a PTX module created with a newer CUDA toolkit version than the driver supports. Ref T91879 === M extern/cuew/include/cuew.h M extern/cuew/src/cuew.c M intern/cycles/device/cuda/device_cuda_impl.cpp === diff --git a/extern/cuew/include/cuew.h b/extern/cuew/include/cuew.h index 0fa0f1291fa..85522744ad1 100644 --- a/extern/cuew/include/cuew.h +++ b/extern/cuew/include/cuew.h @@ -609,6 +609,7 @@ typedef enum cudaError_enum { CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219, CUDA_ERROR_NVLINK_UNCORRECTABLE = 220, CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221, + CUDA_ERROR_UNSUPPORTED_PTX_VERSION = 222, CUDA_ERROR_INVALID_SOURCE = 300, CUDA_ERROR_FILE_NOT_FOUND = 301, CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302, diff --git a/extern/cuew/src/cuew.c b/extern/cuew/src/cuew.c index f477ec48a18..e5349763197 100644 --- a/extern/cuew/src/cuew.c +++ b/extern/cuew/src/cuew.c @@ -736,6 +736,7 @@ const char *cuewErrorString(CUresult result) { case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: return "Invalid graphics context"; case CUDA_ERROR_NVLINK_UNCORRECTABLE: return "Nvlink uncorrectable"; case CUDA_ERROR_JIT_COMPILER_NOT_FOUND: return "Jit compiler not found"; +case CUDA_ERROR_UNSUPPORTED_PTX_VERSION: return "Unsupported PTX version"; case CUDA_ERROR_INVALID_SOURCE: return "Invalid source"; case CUDA_ERROR_FILE_NOT_FOUND: return "File not found"; case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared object failed to resolve"; diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp index 6196f642f8e..22a136e5ab9 100644 --- a/intern/cycles/device/cuda/device_cuda_impl.cpp +++ b/intern/cycles/device/cuda/device_cuda_impl.cpp @@ -545,9 +545,9 @@ bool CUDADevice::load_kernels(const DeviceRequestedFeatures _features) if (result == CUDA_SUCCESS) { reserve_local_memory(requested_features); - } - load_functions(); +load_functions(); + } return (result == CUDA_SUCCESS); } ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [3a655711955] master: Fix T90666: Toggling motion blur while persistent data is enabled results in artifacts
Commit: 3a65571195524ea50682611306ab4d103807112a Author: Patrick Mours Date: Fri Oct 8 13:45:34 2021 +0200 Branches: master https://developer.blender.org/rB3a65571195524ea50682611306ab4d103807112a Fix T90666: Toggling motion blur while persistent data is enabled results in artifacts Enabling or disabling motion blur requires rebuilding the BVH of affected geometry and uploading modified vertices to the device (since without motion blur the transform is applied to the vertex positions, whereas with motion blur this is done during traversal). Previously neither was happening when persistent data was enabled, since the relevant node sockets were not tagged as modified after toggling motion blur. The change to blender_object.cpp makes it so `geom->set_use_motion_blur()` is always called (regardless of motion blur being toggled on or off), which will tag the geometry as modified if that value changed and ensures the BVH is updated. The change to hair.cpp/mesh.cpp was necessary since after motion blur is disabled, the transform is applied to the vertex positions of a mesh, but those changes were not uploaded to the device. This is fixed now that they are tagged as modified. Maniphest Tasks: T90666 Differential Revision: https://developer.blender.org/D12781 === M intern/cycles/blender/blender_object.cpp M intern/cycles/render/hair.cpp M intern/cycles/render/mesh.cpp === diff --git a/intern/cycles/blender/blender_object.cpp b/intern/cycles/blender/blender_object.cpp index 95da4a2df84..4b1c4edef7e 100644 --- a/intern/cycles/blender/blender_object.cpp +++ b/intern/cycles/blender/blender_object.cpp @@ -104,23 +104,22 @@ void BlenderSync::sync_object_motion_init(BL::Object _parent, BL::Object _ob array motion; object->set_motion(motion); - Scene::MotionType need_motion = scene->need_motion(); - if (need_motion == Scene::MOTION_NONE || !object->get_geometry()) { + Geometry *geom = object->get_geometry(); + if (!geom) { return; } - Geometry *geom = object->get_geometry(); - int motion_steps = 0; bool use_motion_blur = false; + Scene::MotionType need_motion = scene->need_motion(); if (need_motion == Scene::MOTION_BLUR) { motion_steps = object_motion_steps(b_parent, b_ob, Object::MAX_MOTION_STEPS); if (motion_steps && object_use_deform_motion(b_parent, b_ob)) { use_motion_blur = true; } } - else { + else if (need_motion != Scene::MOTION_NONE) { motion_steps = 3; } diff --git a/intern/cycles/render/hair.cpp b/intern/cycles/render/hair.cpp index e104455f7dd..e757e3fd3e0 100644 --- a/intern/cycles/render/hair.cpp +++ b/intern/cycles/render/hair.cpp @@ -441,6 +441,9 @@ void Hair::apply_transform(const Transform , const bool apply_to_motion) curve_radius[i] = radius; } + tag_curve_keys_modified(); + tag_curve_radius_modified(); + if (apply_to_motion) { Attribute *curve_attr = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp index 2ecea3101db..9c93f6f881c 100644 --- a/intern/cycles/render/mesh.cpp +++ b/intern/cycles/render/mesh.cpp @@ -508,6 +508,8 @@ void Mesh::apply_transform(const Transform , const bool apply_to_motion) for (size_t i = 0; i < verts.size(); i++) verts[i] = transform_point(, verts[i]); + tag_verts_modified(); + if (apply_to_motion) { Attribute *attr = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [c11585a82f9] master: Add missing "CUDA_ERROR_UNSUPPORTED_PTX_VERSION" to CUEW
Commit: c11585a82f97e51c01c4f4f309b85bdf7602ca08 Author: Patrick Mours Date: Tue Oct 5 16:36:33 2021 +0200 Branches: master https://developer.blender.org/rBc11585a82f97e51c01c4f4f309b85bdf7602ca08 Add missing "CUDA_ERROR_UNSUPPORTED_PTX_VERSION" to CUEW This is required for Cycles to report a meaningful error message when it fails to load a PTX module created with a newer CUDA toolkit version than the driver supports. Ref T91879 === M extern/cuew/include/cuew.h M extern/cuew/src/cuew.c === diff --git a/extern/cuew/include/cuew.h b/extern/cuew/include/cuew.h index a2142b8f2ba..5979f48e43d 100644 --- a/extern/cuew/include/cuew.h +++ b/extern/cuew/include/cuew.h @@ -609,6 +609,7 @@ typedef enum cudaError_enum { CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219, CUDA_ERROR_NVLINK_UNCORRECTABLE = 220, CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221, + CUDA_ERROR_UNSUPPORTED_PTX_VERSION = 222, CUDA_ERROR_INVALID_SOURCE = 300, CUDA_ERROR_FILE_NOT_FOUND = 301, CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302, diff --git a/extern/cuew/src/cuew.c b/extern/cuew/src/cuew.c index 7a1b0018a24..9eba9306323 100644 --- a/extern/cuew/src/cuew.c +++ b/extern/cuew/src/cuew.c @@ -736,6 +736,7 @@ const char *cuewErrorString(CUresult result) { case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: return "Invalid graphics context"; case CUDA_ERROR_NVLINK_UNCORRECTABLE: return "Nvlink uncorrectable"; case CUDA_ERROR_JIT_COMPILER_NOT_FOUND: return "Jit compiler not found"; +case CUDA_ERROR_UNSUPPORTED_PTX_VERSION: return "Unsupported PTX version"; case CUDA_ERROR_INVALID_SOURCE: return "Invalid source"; case CUDA_ERROR_FILE_NOT_FOUND: return "File not found"; case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared object failed to resolve"; ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org List details, subscription details or unsubscribe: https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [2189dfd6e25] master: Cycles: Rework OptiX visibility flags handling
Commit: 2189dfd6e25a7bb6b734116619d87bc2d2a535ff Author: Patrick Mours Date: Wed Sep 22 16:23:08 2021 +0200 Branches: master https://developer.blender.org/rB2189dfd6e25a7bb6b734116619d87bc2d2a535ff Cycles: Rework OptiX visibility flags handling Before the visibility test against the visibility flags was performed in an any-hit program in OptiX (called `__anyhit__kernel_optix_visibility_test`), which was using the `__prim_visibility` array. This is not entirely correct however, since `__prim_visibility` is filled with the merged visibility flags of all objects that reference that primitive, so if one object uses different visibility flags than another object, but they both are instances of the same geometry, they would appear the same way. The reason that the any-hit program was used rather than the OptiX instance visibility mask is that the latter is currently limited to 8 bits only, which is not sufficient to contain all Cycles visibility flags (12 bits). To mostly fix the problem with multiple instances and different visibility flags, I changed things to use the OptiX instance visibility mask for a subset of the Cycles visibility flags (`PATH_RAY_CAMERA` to `PATH_RAY_VOLUME_SCATTER`, which fit into 8 bits) and only fall back to the visibility test any-hit program if that isn't enough (e.g. the ray visibility mask exceeds 8 bits or when using the built-in curves from OptiX, since the any-hit program is then also used to skip the curve endcaps). This may also improve performance in some cases, since by default OptiX can now perform the normal scene intersection trace calls entirely on RT cores without having to jump back to the SM on every hit to execute the any-hit program. Fixes T89801 Differential Revision: https://developer.blender.org/D12604 === M intern/cycles/device/optix/device_impl.cpp M intern/cycles/device/optix/device_impl.h M intern/cycles/kernel/bvh/bvh.h M intern/cycles/kernel/device/optix/kernel.cu === diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index b54d423a183..5f5eff53063 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -315,6 +315,11 @@ bool OptiXDevice::load_kernels(const uint kernel_features) group_descs[PG_HITS].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP; group_descs[PG_HITS].hitgroup.moduleAH = optix_module; group_descs[PG_HITS].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_shadow_all_hit"; + group_descs[PG_HITV].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP; + group_descs[PG_HITV].hitgroup.moduleCH = optix_module; + group_descs[PG_HITV].hitgroup.entryFunctionNameCH = "__closesthit__kernel_optix_hit"; + group_descs[PG_HITV].hitgroup.moduleAH = optix_module; + group_descs[PG_HITV].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_volume_test"; if (kernel_features & KERNEL_FEATURE_HAIR) { if (kernel_features & KERNEL_FEATURE_HAIR_THICK) { @@ -397,6 +402,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features) trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH); trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH); trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH); + trace_css = std::max(trace_css, stack_size[PG_HITV].cssIS + stack_size[PG_HITV].cssAH); trace_css = std::max(trace_css, stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH); trace_css = std::max(trace_css, @@ -421,6 +427,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features) pipeline_groups.push_back(groups[PG_HITD]); pipeline_groups.push_back(groups[PG_HITS]); pipeline_groups.push_back(groups[PG_HITL]); +pipeline_groups.push_back(groups[PG_HITV]); if (motion_blur) { pipeline_groups.push_back(groups[PG_HITD_MOTION]); pipeline_groups.push_back(groups[PG_HITS_MOTION]); @@ -459,6 +466,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features) pipeline_groups.push_back(groups[PG_HITD]); pipeline_groups.push_back(groups[PG_HITS]); pipeline_groups.push_back(groups[PG_HITL]); +pipeline_groups.push_back(groups[PG_HITV]); if (motion_blur) { pipeline_groups.push_back(groups[PG_HITD_MOTION]); pipeline_groups.push_back(groups[PG_HITS_MOTION]); @@ -1390,25 +1398,33 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress , bool refit) /* Set user instance ID to object index (but leave low bit blank). */ instance.instanceId = ob->get_device_index() << 1; - /* Have to have at least one bit in the mask, or else instance would always be culled. */ - instance.visibilityMask = 1; +
[Bf-blender-cvs] [cad00ba01b4] cycles-x: Cycles X: Improve performance of transparent shadows with OptiX
Commit: cad00ba01b4ac7412c51c6d0143e402dc56967d7 Author: Patrick Mours Date: Thu Sep 16 18:16:38 2021 +0200 Branches: cycles-x https://developer.blender.org/rBcad00ba01b4ac7412c51c6d0143e402dc56967d7 Cycles X: Improve performance of transparent shadows with OptiX This changes the shadow record-all any-hit program to accept all hits (return without calling `optixIgnoreIntersection`) beyond the furthest distance recorded after the maximum number of hits that can be recorded was reached. OptiX will not call the any-hit program anymore for hits beyond the distance of the accepted hits and also reduces the current ray length behind the scenes. As a result performance improves drastically in scenes where shadow rays can hit a lot of transparent objects, like the "koro" benchmark scene. With this applied I now get similar performance with both CUDA and OptiX in "koro". Not quite perfect yet, but much better than before. Reviewed By: brecht Differential Revision: https://developer.blender.org/D12524 === M intern/cycles/kernel/device/optix/kernel.cu M intern/cycles/kernel/integrator/integrator_shade_shadow.h === diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu index 8c68522289f..a4603b53150 100644 --- a/intern/cycles/kernel/device/optix/kernel.cu +++ b/intern/cycles/kernel/device/optix/kernel.cu @@ -169,11 +169,13 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit() extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() { #ifdef __SHADOW_RECORD_ALL__ + bool ignore_intersection = false; + const uint prim = optixGetPrimitiveIndex(); # ifdef __VISIBILITY_FLAG__ const uint visibility = optixGetPayload_4(); if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) { -return optixIgnoreIntersection(); +ignore_intersection = true; } # endif @@ -190,7 +192,7 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() // Filter out curve endcaps if (u == 0.0f || u == 1.0f) { - return optixIgnoreIntersection(); + ignore_intersection = true; } } # endif @@ -199,7 +201,9 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() int record_index = num_hits; const int max_hits = optixGetPayload_3(); - optixSetPayload_2(num_hits + 1); + if (!ignore_intersection) { +optixSetPayload_2(num_hits + 1); + } Intersection *const isect_array = get_payload_ptr_0(); @@ -218,37 +222,37 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() } if (optixGetRayTmax() >= max_recorded_t) { - return optixIgnoreIntersection(); + /* Accept hit, so that OptiX won't consider any more hits beyond it anymore. */ + return; } record_index = max_recorded_hit; } - - /* TODO: is there a way to shorten the ray length when max_hits is reached, so Optix - * can discard triangles beyond it? */ # endif - Intersection *const isect = isect_array + record_index; - isect->u = u; - isect->v = v; - isect->t = optixGetRayTmax(); - isect->prim = prim; - isect->object = get_object_id(); - isect->type = kernel_tex_fetch(__prim_type, prim); + if (!ignore_intersection) { +Intersection *const isect = isect_array + record_index; +isect->u = u; +isect->v = v; +isect->t = optixGetRayTmax(); +isect->prim = prim; +isect->object = get_object_id(); +isect->type = kernel_tex_fetch(__prim_type, prim); # ifdef __TRANSPARENT_SHADOWS__ - // Detect if this surface has a shader with transparent shadows - if (!shader_transparent_shadow(NULL, isect) || max_hits == 0) { +// Detect if this surface has a shader with transparent shadows +if (!shader_transparent_shadow(NULL, isect) || max_hits == 0) { # endif -// If no transparent shadows, all light is blocked and we can stop immediately -optixSetPayload_5(true); -return optixTerminateRay(); + // If no transparent shadows, all light is blocked and we can stop immediately + optixSetPayload_5(true); + return optixTerminateRay(); # ifdef __TRANSPARENT_SHADOWS__ +} +# endif } // Continue tracing optixIgnoreIntersection(); -# endif #endif } diff --git a/intern/cycles/kernel/integrator/integrator_shade_shadow.h b/intern/cycles/kernel/integrator/integrator_shade_shadow.h index fb836191c94..fd3c3ae1653 100644 --- a/intern/cycles/kernel/integrator/integrator_shade_shadow.h +++ b/intern/cycles/kernel/integrator/integrator_shade_shadow.h @@ -23,6 +23,11 @@ CCL_NAMESPACE_BEGIN +ccl_device_inline bool shadow_intersections_has_remaining(const int num_hits) +{ + return num_hits >= INTEGRATOR_SHADOW_ISECT_SIZE; +} + #ifdef __TRANSP
[Bf-blender-cvs] [27db38f0a72] cycles-x: Cycles X: Shading performance improvements by changing inlining behavior for SVM
Commit: 27db38f0a729411f13c99a60574d59c70d461be5 Author: Patrick Mours Date: Mon Jul 5 12:58:22 2021 +0200 Branches: cycles-x https://developer.blender.org/rB27db38f0a729411f13c99a60574d59c70d461be5 Cycles X: Shading performance improvements by changing inlining behavior for SVM The shading kernels (shade_surface, ...) are limited by memory a lot. I found several hotspots where execution was stalled waiting for spills to be loaded back into registers. That's something that can be adjusted by changing the inlining logic: For example, the compiler did not inline "kernel_write_denoising_features" (even though it was marked __inline__), which caused it to force synchronization before the function call. Forcing it inline avoided that and got rid of that hotspot. Then there was cubic texture filtering and NanoVDB, which introduced huge code chunks into each texture sampling evaluation (increasing register and instruction cache pressure), even though they are rarely actually used. Making them __noinline__ outsources that overhead to only occur when actually used. Another case is the SVM. The compiler currently converts the node type switch statement into a binary searched branch sequence. This means depending on the SVM node hit, the GPU has to branch over large portions of code, which increases instruction cache pressure immensely (GPU is fetching lots of code even for stuff it immediately jumps away from again, while jumping through the binary searched branches). This can be reduced somewhat by making all the node functions __noinline__, so that the GPU only has to branch over a bunch of call instructions, rather than all the inlined code. The SVM "offset" value is passed by value into the node functions now and returned through function return value, to make the compiler keep it in a register. Otherwise when passed as a pointer, in OptiX the compiler was forced to move it into local memory (since functions are compiled separately there, so the compiler is unaware of how that pointer is used). Differential Revision: https://developer.blender.org/D11816 === M intern/cycles/kernel/device/cuda/image.h M intern/cycles/kernel/kernel_passes.h M intern/cycles/kernel/svm/svm.h M intern/cycles/kernel/svm/svm_ao.h M intern/cycles/kernel/svm/svm_attribute.h M intern/cycles/kernel/svm/svm_bevel.h M intern/cycles/kernel/svm/svm_blackbody.h M intern/cycles/kernel/svm/svm_brick.h M intern/cycles/kernel/svm/svm_brightness.h M intern/cycles/kernel/svm/svm_bump.h M intern/cycles/kernel/svm/svm_camera.h M intern/cycles/kernel/svm/svm_checker.h M intern/cycles/kernel/svm/svm_clamp.h M intern/cycles/kernel/svm/svm_closure.h M intern/cycles/kernel/svm/svm_convert.h M intern/cycles/kernel/svm/svm_displace.h M intern/cycles/kernel/svm/svm_fresnel.h M intern/cycles/kernel/svm/svm_gamma.h M intern/cycles/kernel/svm/svm_geometry.h M intern/cycles/kernel/svm/svm_gradient.h M intern/cycles/kernel/svm/svm_hsv.h M intern/cycles/kernel/svm/svm_ies.h M intern/cycles/kernel/svm/svm_image.h M intern/cycles/kernel/svm/svm_invert.h M intern/cycles/kernel/svm/svm_light_path.h M intern/cycles/kernel/svm/svm_magic.h M intern/cycles/kernel/svm/svm_map_range.h M intern/cycles/kernel/svm/svm_mapping.h M intern/cycles/kernel/svm/svm_math.h M intern/cycles/kernel/svm/svm_mix.h M intern/cycles/kernel/svm/svm_musgrave.h M intern/cycles/kernel/svm/svm_noisetex.h M intern/cycles/kernel/svm/svm_normal.h M intern/cycles/kernel/svm/svm_ramp.h M intern/cycles/kernel/svm/svm_sepcomb_hsv.h M intern/cycles/kernel/svm/svm_sky.h M intern/cycles/kernel/svm/svm_tex_coord.h M intern/cycles/kernel/svm/svm_types.h M intern/cycles/kernel/svm/svm_value.h M intern/cycles/kernel/svm/svm_vector_rotate.h M intern/cycles/kernel/svm/svm_vector_transform.h M intern/cycles/kernel/svm/svm_vertex_color.h M intern/cycles/kernel/svm/svm_voronoi.h M intern/cycles/kernel/svm/svm_voxel.h M intern/cycles/kernel/svm/svm_wave.h M intern/cycles/kernel/svm/svm_wavelength.h M intern/cycles/kernel/svm/svm_white_noise.h M intern/cycles/kernel/svm/svm_wireframe.h === diff --git a/intern/cycles/kernel/device/cuda/image.h b/intern/cycles/kernel/device/cuda/image.h index 92a66ecf9a0..e127fe88df3 100644 --- a/intern/cycles/kernel/device/cuda/image.h +++ b/intern/cycles/kernel/device/cuda/image.h @@ -65,7 +65,7 @@ ccl_device float cubic_h1(float a) /* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */ template -ccl_device T kernel_tex_image_interp_bicubic(const TextureInfo , float x, float y) +c
[Bf-blender-cvs] [45cfa58ee82] cycles-x: Enable built-in OptiX curves by default
Commit: 45cfa58ee82a3ccdeaa5d07cb69e0f672b356e08 Author: Patrick Mours Date: Thu Jun 24 12:53:43 2021 +0200 Branches: cycles-x https://developer.blender.org/rB45cfa58ee82a3ccdeaa5d07cb69e0f672b356e08 Enable built-in OptiX curves by default Starting with OptiX 7.3 curves now behave as expected with Cycles and render the same as the custom intersection implementation, so enable by default. === M intern/cycles/blender/addon/properties.py M intern/cycles/blender/addon/ui.py M intern/cycles/blender/blender_python.cpp M intern/cycles/device/optix/device_impl.cpp M intern/cycles/util/util_debug.cpp M intern/cycles/util/util_debug.h === diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 6afce0829f8..4997e9e4381 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -708,11 +708,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): debug_use_cuda_adaptive_compile: BoolProperty(name="Adaptive Compile", default=False) -debug_use_optix_curves_api: BoolProperty( -name="Native OptiX Curve Primitive", -description="Use OptiX curves API for hair instead of custom implementation", -default=False -) debug_use_optix_debug: BoolProperty( name="OptiX Module Debug", description="Load OptiX module in debug mode: lower logging verbosity level, enable validations, and lower optimization level", diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index 4d6418ed84a..8ab8e051ec0 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -1779,7 +1779,6 @@ class CYCLES_RENDER_PT_debug(CyclesDebugButtonsPanel, Panel): col = layout.column() col.label(text="OptiX Flags:") -col.prop(cscene, "debug_use_optix_curves_api") col.prop(cscene, "debug_use_optix_debug") col.separator() diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp index fbb6c07bfb6..59826fa3e85 100644 --- a/intern/cycles/blender/blender_python.cpp +++ b/intern/cycles/blender/blender_python.cpp @@ -90,7 +90,6 @@ bool debug_flags_sync_from_scene(BL::Scene b_scene) /* Synchronize CUDA flags. */ flags.cuda.adaptive_compile = get_boolean(cscene, "debug_use_cuda_adaptive_compile"); /* Synchronize OptiX flags. */ - flags.optix.use_curves_api = get_boolean(cscene, "debug_use_optix_curves_api"); flags.optix.use_debug = get_boolean(cscene, "debug_use_optix_debug"); /* Synchronize OpenCL device type. */ switch (get_enum(cscene, "debug_opencl_device_type")) { diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index 2d2b596b95b..44f7964304d 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -232,12 +232,13 @@ bool OptiXDevice::load_kernels(const DeviceRequestedFeatures _features # if OPTIX_ABI_VERSION >= 36 pipeline_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE; if (requested_features.use_hair) { -if (DebugFlags().optix.use_curves_api && requested_features.use_hair_thick) { +#if OPTIX_ABI_VERSION >= 47 +if (requested_features.use_hair_thick) { pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE; } -else { +else +#endif pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM; -} } # endif @@ -337,8 +338,8 @@ bool OptiXDevice::load_kernels(const DeviceRequestedFeatures _features group_descs[PG_HITS].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon"; } -# if OPTIX_ABI_VERSION >= 36 -if (DebugFlags().optix.use_curves_api && requested_features.use_hair_thick) { +# if OPTIX_ABI_VERSION >= 47 +if (requested_features.use_hair_thick) { OptixBuiltinISOptions builtin_options = {}; builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE; builtin_options.usesMotionBlur = false; @@ -409,7 +410,7 @@ bool OptiXDevice::load_kernels(const DeviceRequestedFeatures _features trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH); trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH); trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH); -# if OPTIX_ABI_VERSION >= 36 +# if OPTIX_ABI_VERSION >= 47 trace_css = std::max(trace_css, stack_size[PG_HITD_MOTI
[Bf-blender-cvs] [b046bc536be] master: Fix T88096: Baking with OptiX and displacement fails
Commit: b046bc536bec914013c678b552ce6cef7dd308e6 Author: Patrick Mours Date: Tue May 25 16:56:16 2021 +0200 Branches: master https://developer.blender.org/rBb046bc536bec914013c678b552ce6cef7dd308e6 Fix T88096: Baking with OptiX and displacement fails Using displacement runs the shader eval kernel, but since OptiX modules are not loaded when baking is active, those were not available and therefore failed to launch. This fixes that by falling back to the CUDA kernels. === M intern/cycles/device/device_optix.cpp === diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index 01de0724cb2..b008dfa376f 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -726,7 +726,11 @@ class OptiXDevice : public CUDADevice { } } else if (task.type == DeviceTask::SHADER) { - launch_shader_eval(task, thread_index); + // CUDA kernels are used when doing baking + if (optix_module == NULL) +CUDADevice::shader(task); + else +launch_shader_eval(task, thread_index); } else if (task.type == DeviceTask::DENOISE_BUFFER) { // Set up a single tile that covers the whole task and denoise it ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [ffa70e76901] blender-v2.93-release: Fix missing Cycles CPU name for Arm processors
Commit: ffa70e769010a3b7e6b80be6f80b21dfb8713f13 Author: Patrick Mours Date: Thu Apr 29 15:51:29 2021 +0200 Branches: blender-v2.93-release https://developer.blender.org/rBffa70e769010a3b7e6b80be6f80b21dfb8713f13 Fix missing Cycles CPU name for Arm processors === M intern/cycles/util/util_system.cpp === diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp index 2c1716ce515..6500a59e42c 100644 --- a/intern/cycles/util/util_system.cpp +++ b/intern/cycles/util/util_system.cpp @@ -166,12 +166,33 @@ static void __cpuid(int data[4], int selector) string system_cpu_brand_string() { +#if !defined(WIN32) && !defined(__x86_64__) && !defined(__i386__) + FILE *cpuinfo = fopen("/proc/cpuinfo", "r"); + if (cpuinfo != nullptr) { +char cpuinfo_buf[513] = ""; +fread(cpuinfo_buf, sizeof(cpuinfo_buf) - 1, 1, cpuinfo); +fclose(cpuinfo); + +char *modelname = strstr(cpuinfo_buf, "model name"); +if (modelname != nullptr) { + modelname = strchr(modelname, ':'); + if (modelname != nullptr) { +modelname += 2; +char *modelname_end = strchr(modelname, '\n'); +if (modelname_end != nullptr) { + *modelname_end = '\0'; + return modelname; +} + } +} + } +#else char buf[49] = {0}; int result[4] = {0}; __cpuid(result, 0x8000); - if (result[0] >= (int)0x8004) { + if (result[0] != 0 && result[0] >= (int)0x8004) { __cpuid((int *)(buf + 0), 0x8002); __cpuid((int *)(buf + 16), 0x8003); __cpuid((int *)(buf + 32), 0x8004); @@ -183,7 +204,7 @@ string system_cpu_brand_string() return brand; } - +#endif return "Unknown CPU"; } ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [94960250b53] master: Cycles: Fix build with OptiX 7.3 SDK
Commit: 94960250b539c46315808fcb9bdb9d64c0f71eea Author: Patrick Mours Date: Mon Apr 26 14:55:39 2021 +0200 Branches: master https://developer.blender.org/rB94960250b539c46315808fcb9bdb9d64c0f71eea Cycles: Fix build with OptiX 7.3 SDK === M intern/cycles/device/device_optix.cpp === diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index fcf8fab9cc4..cce11507fa1 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -955,14 +955,21 @@ class OptiXDevice : public CUDADevice { // Create OptiX denoiser handle on demand when it is first used OptixDenoiserOptions denoiser_options = {}; assert(task.denoising.input_passes >= 1 && task.denoising.input_passes <= 3); +# if OPTIX_ABI_VERSION >= 47 +denoiser_options.guideAlbedo = task.denoising.input_passes >= 2; +denoiser_options.guideNormal = task.denoising.input_passes >= 3; +check_result_optix_ret(optixDenoiserCreate( +context, OPTIX_DENOISER_MODEL_KIND_HDR, _options, )); +# else denoiser_options.inputKind = static_cast( OPTIX_DENOISER_INPUT_RGB + (task.denoising.input_passes - 1)); -# if OPTIX_ABI_VERSION < 28 +#if OPTIX_ABI_VERSION < 28 denoiser_options.pixelFormat = OPTIX_PIXEL_FORMAT_FLOAT3; -# endif +#endif check_result_optix_ret(optixDenoiserCreate(context, _options, )); check_result_optix_ret( optixDenoiserSetModel(denoiser, OPTIX_DENOISER_MODEL_KIND_HDR, NULL, 0)); +# endif // OptiX denoiser handle was created with the requested number of input passes denoiser_input_passes = task.denoising.input_passes; @@ -1032,10 +1039,34 @@ class OptiXDevice : public CUDADevice { # endif output_layers[0].format = OPTIX_PIXEL_FORMAT_FLOAT3; +# if OPTIX_ABI_VERSION >= 47 + OptixDenoiserLayer image_layers = {}; + image_layers.input = input_layers[0]; + image_layers.output = output_layers[0]; + + OptixDenoiserGuideLayer guide_layers = {}; + guide_layers.albedo = input_layers[1]; + guide_layers.normal = input_layers[2]; +# endif + // Finally run denonising OptixDenoiserParams params = {}; // All parameters are disabled/zero +# if OPTIX_ABI_VERSION >= 47 check_result_optix_ret(optixDenoiserInvoke(denoiser, - 0, + NULL, + , + denoiser_state.device_pointer, + scratch_offset, + _layers, + _layers, + 1, + overlap_offset.x, + overlap_offset.y, + denoiser_state.device_pointer + scratch_offset, + scratch_size)); +# else + check_result_optix_ret(optixDenoiserInvoke(denoiser, + NULL, , denoiser_state.device_pointer, scratch_offset, @@ -1046,6 +1077,7 @@ class OptiXDevice : public CUDADevice { output_layers, denoiser_state.device_pointer + scratch_offset, scratch_size)); +# endif # if OPTIX_DENOISER_NO_PIXEL_STRIDE void *output_args[] = {_ptr, ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [847579b4225] master: Add support for building on Linux aarch64
Commit: 847579b422507917c4252ecc5c777bf5e0fc6f09 Author: Patrick Mours Date: Tue Apr 20 14:00:05 2021 +0200 Branches: master https://developer.blender.org/rB847579b422507917c4252ecc5c777bf5e0fc6f09 Add support for building on Linux aarch64 Differential Revision: https://developer.blender.org/D10958 === M build_files/build_environment/CMakeLists.txt M build_files/build_environment/cmake/boost.cmake M build_files/build_environment/cmake/embree.cmake M build_files/build_environment/cmake/gmp.cmake M build_files/build_environment/cmake/harvest.cmake M build_files/build_environment/cmake/llvm.cmake M build_files/build_environment/cmake/opencolorio.cmake M build_files/build_environment/cmake/options.cmake M build_files/build_environment/cmake/png.cmake M build_files/build_environment/cmake/sse2neon.cmake M build_files/build_environment/cmake/ssl.cmake M build_files/build_environment/cmake/ssl.conf M build_files/build_environment/cmake/tbb.cmake M build_files/build_environment/cmake/versions.cmake M build_files/build_environment/cmake/x264.cmake M build_files/build_environment/install_deps.sh M build_files/build_environment/patches/cmakelists_tbb.txt M build_files/build_environment/patches/tbb.diff M build_files/build_environment/patches/theora.diff M build_files/build_environment/patches/usd.diff M build_files/cmake/Modules/FindEmbree.cmake M intern/cycles/util/util_simd.h M intern/cycles/util/util_sseb.h M intern/cycles/util/util_ssef.h M intern/cycles/util/util_ssei.h M intern/cycles/util/util_system.cpp === diff --git a/build_files/build_environment/CMakeLists.txt b/build_files/build_environment/CMakeLists.txt index a3d694b4bc3..fb79eee62be 100644 --- a/build_files/build_environment/CMakeLists.txt +++ b/build_files/build_environment/CMakeLists.txt @@ -113,7 +113,7 @@ include(cmake/expat.cmake) include(cmake/yamlcpp.cmake) include(cmake/opencolorio.cmake) -if(APPLE AND ("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64")) +if(BLENDER_PLATFORM_ARM) include(cmake/sse2neon.cmake) endif() diff --git a/build_files/build_environment/cmake/boost.cmake b/build_files/build_environment/cmake/boost.cmake index 8b36af7dc41..5170a3a123e 100644 --- a/build_files/build_environment/cmake/boost.cmake +++ b/build_files/build_environment/cmake/boost.cmake @@ -18,6 +18,12 @@ set(BOOST_ADDRESS_MODEL 64) +if(BLENDER_PLATFORM_ARM) + set(BOOST_ARCHITECTURE arm) +else() + set(BOOST_ARCHITECTURE x86) +endif() + if(WIN32) set(BOOST_TOOLSET toolset=msvc-14.1) set(BOOST_COMPILER_STRING -vc141) @@ -29,7 +35,6 @@ if(WIN32) if(BUILD_MODE STREQUAL Release) set(BOOST_HARVEST_CMD ${BOOST_HARVEST_CMD} && ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/boost/include/boost-${BOOST_VERSION_NODOTS_SHORT}/ ${HARVEST_TARGET}/boost/include/) endif() - elseif(APPLE) set(BOOST_CONFIGURE_COMMAND ./bootstrap.sh) set(BOOST_BUILD_COMMAND ./b2) @@ -93,7 +98,7 @@ ExternalProject_Add(external_boost UPDATE_COMMAND "" PATCH_COMMAND ${BOOST_PATCH_COMMAND} CONFIGURE_COMMAND ${BOOST_CONFIGURE_COMMAND} - BUILD_COMMAND ${BOOST_BUILD_COMMAND} ${BOOST_BUILD_OPTIONS} -j${MAKE_THREADS} architecture=x86 address-model=${BOOST_ADDRESS_MODEL} link=static threading=multi ${BOOST_OPTIONS}--prefix=${LIBDIR}/boost install + BUILD_COMMAND ${BOOST_BUILD_COMMAND} ${BOOST_BUILD_OPTIONS} -j${MAKE_THREADS} architecture=${BOOST_ARCHITECTURE} address-model=${BOOST_ADDRESS_MODEL} link=static threading=multi ${BOOST_OPTIONS}--prefix=${LIBDIR}/boost install BUILD_IN_SOURCE 1 INSTALL_COMMAND "${BOOST_HARVEST_CMD}" ) diff --git a/build_files/build_environment/cmake/embree.cmake b/build_files/build_environment/cmake/embree.cmake index 4830630def0..cd693d766dc 100644 --- a/build_files/build_environment/cmake/embree.cmake +++ b/build_files/build_environment/cmake/embree.cmake @@ -47,7 +47,7 @@ else() set(EMBREE_BUILD_DIR) endif() -if(APPLE AND ("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64")) +if(BLENDER_PLATFORM_ARM) ExternalProject_Add(external_embree GIT_REPOSITORY ${EMBREE_ARM_GIT} GIT_TAG "blender-arm" diff --git a/build_files/build_environment/cmake/gmp.cmake b/build_files/build_environment/cmake/gmp.cmake index 323630a63aa..6ca81678a32 100644 --- a/build_files/build_environment/cmake/gmp.cmake +++ b/build_files/build_environment/cmake/gmp.cmake @@ -25,19 +25,12 @@ else() set(GMP_OPTIONS --enable-static --disable-shared ) endif() -if(APPLE) - if("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64") -set(GMP_OPTIONS - ${GMP_OPTIONS} - --disable-assembly -) - else() -set(GMP_OPTIONS - ${GMP
[Bf-blender-cvs] [7cbd66d42fb] master: Cycles: Initialize all OptiX structs to zero before use
Commit: 7cbd66d42fb3f43b26f7dbea61f182f00987eafb Author: Patrick Mours Date: Tue Apr 13 13:43:34 2021 +0200 Branches: master https://developer.blender.org/rB7cbd66d42fb3f43b26f7dbea61f182f00987eafb Cycles: Initialize all OptiX structs to zero before use This is done to ensure building with newer OptiX SDK releases that add new struct fields gives deterministic results (no uninitialized fields and therefore random data is passed to OptiX). === M intern/cycles/device/device_optix.cpp === diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index 51e1a0033ba..fcf8fab9cc4 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -362,7 +362,7 @@ class OptiXDevice : public CUDADevice { } } -OptixModuleCompileOptions module_options; +OptixModuleCompileOptions module_options = {}; module_options.maxRegisterCount = 0; // Do not set an explicit register limit # ifdef WITH_CYCLES_DEBUG module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0; @@ -377,7 +377,7 @@ class OptiXDevice : public CUDADevice { module_options.numBoundValues = 0; # endif -OptixPipelineCompileOptions pipeline_options; +OptixPipelineCompileOptions pipeline_options = {}; // Default to no motion blur and two-level graph, since it is the fastest option pipeline_options.usesMotionBlur = false; pipeline_options.traversableGraphFlags = @@ -477,7 +477,7 @@ class OptiXDevice : public CUDADevice { # if OPTIX_ABI_VERSION >= 36 if (DebugFlags().optix.curves_api && requested_features.use_hair_thick) { -OptixBuiltinISOptions builtin_options; +OptixBuiltinISOptions builtin_options = {}; builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE; builtin_options.usesMotionBlur = false; @@ -571,7 +571,7 @@ class OptiXDevice : public CUDADevice { stack_size[PG_HITS_MOTION].cssIS + stack_size[PG_HITS_MOTION].cssAH); # endif -OptixPipelineLinkOptions link_options; +OptixPipelineLinkOptions link_options = {}; link_options.maxTraceDepth = 1; # ifdef WITH_CYCLES_DEBUG link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL; @@ -953,7 +953,7 @@ class OptiXDevice : public CUDADevice { } // Create OptiX denoiser handle on demand when it is first used -OptixDenoiserOptions denoiser_options; +OptixDenoiserOptions denoiser_options = {}; assert(task.denoising.input_passes >= 1 && task.denoising.input_passes <= 3); denoiser_options.inputKind = static_cast( OPTIX_DENOISER_INPUT_RGB + (task.denoising.input_passes - 1)); @@ -1157,7 +1157,7 @@ class OptiXDevice : public CUDADevice { // Compute memory usage OptixAccelBufferSizes sizes = {}; -OptixAccelBuildOptions options; +OptixAccelBuildOptions options = {}; options.operation = operation; if (background) { // Prefer best performance and lowest memory consumption in background @@ -1195,7 +1195,7 @@ class OptiXDevice : public CUDADevice { } // Finally build the acceleration structure -OptixAccelEmitDesc compacted_size_prop; +OptixAccelEmitDesc compacted_size_prop = {}; compacted_size_prop.type = OPTIX_PROPERTY_TYPE_COMPACTED_SIZE; // A tiny space was allocated for this property at the end of the temporary buffer above // Make sure this pointer is 8-byte aligned ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [f1fe42d912f] master: Cycles: Do not allocate tile buffers on all devices when peer memory is active and denoising is not
Commit: f1fe42d912f088259bbc82d597121978204e991d Author: Patrick Mours Date: Tue Mar 30 12:59:03 2021 +0200 Branches: master https://developer.blender.org/rBf1fe42d912f088259bbc82d597121978204e991d Cycles: Do not allocate tile buffers on all devices when peer memory is active and denoising is not Separate tile buffers on all devices only need to exist when denoising is active (so any overlap being rendered simultaneously does not write to the same memory region). When denoising is not active they can be distributed like all other memory when peer memory support is available. Reviewed By: brecht Differential Revision: https://developer.blender.org/D10858 === M intern/cycles/device/device_multi.cpp === diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index b272e59f99d..35faadcbec5 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -46,10 +46,13 @@ class MultiDevice : public Device { list devices, denoising_devices; device_ptr unique_key; vector> peer_islands; + bool use_denoising; bool matching_rendering_and_denoising_devices; MultiDevice(DeviceInfo , Stats , Profiler , bool background_) - : Device(info, stats, profiler, background_), unique_key(1) + : Device(info, stats, profiler, background_), +unique_key(1), +use_denoising(!info.denoising_devices.empty()) { foreach (DeviceInfo , info.multi_devices) { /* Always add CPU devices at the back since GPU devices can change @@ -194,6 +197,7 @@ class MultiDevice : public Device { if (!sub.device->load_kernels(requested_features)) return false; +use_denoising = requested_features.use_denoising; if (requested_features.use_denoising) { /* Only need denoising feature, everything else is unused. */ DeviceRequestedFeatures denoising_features; @@ -400,7 +404,7 @@ class MultiDevice : public Device { size_t existing_size = mem.device_size; /* The tile buffers are allocated on each device (see below), so copy to all of them */ -if (strcmp(mem.name, "RenderBuffers") == 0) { +if (strcmp(mem.name, "RenderBuffers") == 0 && use_denoising) { foreach (SubDevice , devices) { mem.device = sub.device; mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0; @@ -466,7 +470,7 @@ class MultiDevice : public Device { /* This is a hack to only allocate the tile buffers on denoising devices * Similarly the tile buffers also need to be allocated separately on all devices so any * overlap rendered for denoising does not interfere with each other */ -if (strcmp(mem.name, "RenderBuffers") == 0) { +if (strcmp(mem.name, "RenderBuffers") == 0 && use_denoising) { vector device_pointers; device_pointers.reserve(devices.size()); @@ -518,7 +522,7 @@ class MultiDevice : public Device { size_t existing_size = mem.device_size; /* Free memory that was allocated for all devices (see above) on each device */ -if (strcmp(mem.name, "RenderBuffers") == 0 || mem.type == MEM_PIXELS) { +if (mem.type == MEM_PIXELS || (strcmp(mem.name, "RenderBuffers") == 0 && use_denoising)) { foreach (SubDevice , devices) { mem.device = sub.device; mem.device_pointer = sub.ptr_map[key]; ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [f4f8b6dde32] master: Cycles: Change device-only memory to actually only allocate on the device
Commit: f4f8b6dde32b0438e0b97a6d8ebeb89802987127 Author: Patrick Mours Date: Wed Mar 3 14:35:50 2021 +0100 Branches: master https://developer.blender.org/rBf4f8b6dde32b0438e0b97a6d8ebeb89802987127 Cycles: Change device-only memory to actually only allocate on the device This patch changes the `MEM_DEVICE_ONLY` type to only allocate on the device and fail if that is not possible anymore because out-of-memory (since OptiX acceleration structures may not be allocated in host memory). It also fixes high peak memory usage during OptiX acceleration structure building. Reviewed By: brecht Maniphest Tasks: T85985 Differential Revision: https://developer.blender.org/D10535 === M intern/cycles/bvh/bvh_optix.cpp M intern/cycles/device/cuda/device_cuda_impl.cpp M intern/cycles/device/device_cpu.cpp M intern/cycles/device/device_denoising.h M intern/cycles/device/device_memory.h M intern/cycles/device/device_optix.cpp === diff --git a/intern/cycles/bvh/bvh_optix.cpp b/intern/cycles/bvh/bvh_optix.cpp index e094f339ede..d630e8965dc 100644 --- a/intern/cycles/bvh/bvh_optix.cpp +++ b/intern/cycles/bvh/bvh_optix.cpp @@ -27,8 +27,8 @@ BVHOptiX::BVHOptiX(const BVHParams _, Device *device) : BVH(params_, geometry_, objects_), traversable_handle(0), - as_data(device, params_.top_level ? "optix tlas" : "optix blas"), - motion_transform_data(device, "optix motion transform") + as_data(device, params_.top_level ? "optix tlas" : "optix blas", false), + motion_transform_data(device, "optix motion transform", false) { } diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp index 44a51835f4c..5b62292ca55 100644 --- a/intern/cycles/device/cuda/device_cuda_impl.cpp +++ b/intern/cycles/device/cuda/device_cuda_impl.cpp @@ -854,7 +854,7 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory , size_t pitch_ void *shared_pointer = 0; - if (mem_alloc_result != CUDA_SUCCESS && can_map_host) { + if (mem_alloc_result != CUDA_SUCCESS && can_map_host && mem.type != MEM_DEVICE_ONLY) { if (mem.shared_pointer) { /* Another device already allocated host memory. */ mem_alloc_result = CUDA_SUCCESS; @@ -877,8 +877,14 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory , size_t pitch_ } if (mem_alloc_result != CUDA_SUCCESS) { -status = " failed, out of device and host memory"; -set_error("System is out of GPU and shared host memory"); +if (mem.type == MEM_DEVICE_ONLY) { + status = " failed, out of device memory"; + set_error("System is out of GPU memory"); +} +else { + status = " failed, out of device and host memory"; + set_error("System is out of GPU and shared host memory"); +} } if (mem.name) { diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index fdfd3f83be6..e2f9c7391da 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -396,8 +396,7 @@ class CPUDevice : public Device { << string_human_readable_size(mem.memory_size()) << ")"; } - if (mem.type == MEM_DEVICE_ONLY) { -assert(!mem.host_pointer); + if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) { size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES; void *data = util_aligned_malloc(mem.memory_size(), alignment); mem.device_pointer = (device_ptr)data; @@ -459,7 +458,7 @@ class CPUDevice : public Device { tex_free((device_texture &)mem); } else if (mem.device_pointer) { - if (mem.type == MEM_DEVICE_ONLY) { + if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) { util_aligned_free((void *)mem.device_pointer); } mem.device_pointer = 0; diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h index 2c0dc23b44a..bb8bdfdd225 100644 --- a/intern/cycles/device/device_denoising.h +++ b/intern/cycles/device/device_denoising.h @@ -171,7 +171,8 @@ class DenoisingTask { bool gpu_temporary_mem; DenoiseBuffers(Device *device) -: mem(device, "denoising pixel buffer"), temporary_mem(device, "denoising temporary mem") +: mem(device, "denoising pixel buffer"), + temporary_mem(device, "denoising temporary mem", true) { } } buffer; diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h index 1f63a152458..97459b9ae6a 100644 --- a/intern/cycles/device/device_memory.h +++ b/intern/cycles/device/devic
[Bf-blender-cvs] [b2e1b13abde] master: Cycles: Add option to change input passes for viewport denoising
Commit: b2e1b13abde787c2aad97d5c317357cf84360bdb Author: Patrick Mours Date: Mon Feb 22 18:09:48 2021 +0100 Branches: master https://developer.blender.org/rBb2e1b13abde787c2aad97d5c317357cf84360bdb Cycles: Add option to change input passes for viewport denoising There are cases where the default input passes of color+albedo do not yield useful results and while this was possible to change that for final frame rendering (in the layer settings), viewport denoising always used a fixed color+albedo. This adds an option to change the input passes for viewport denoising too, so that one can use it in scenes that otherwise wouldn't work well with it. Reviewed By: brecht Differential Revision: https://developer.blender.org/D10404 === M intern/cycles/blender/addon/properties.py M intern/cycles/blender/addon/ui.py M intern/cycles/blender/blender_sync.cpp === diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 0708c371a0e..dc4437bdc52 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -646,6 +646,12 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): min=0, max=(1 << 24), default=1, ) +preview_denoising_input_passes: EnumProperty( +name="Viewport Input Passes", +description="Passes used by the denoiser to distinguish noise from shader and geometry detail", +items=enum_denoising_input_passes, +default='RGB_ALBEDO', +) debug_reset_timeout: FloatProperty( name="Reset timeout", @@ -1434,7 +1440,6 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup): items=enum_denoising_input_passes, default='RGB_ALBEDO', ) - denoising_openimagedenoise_input_passes: EnumProperty( name="Input Passes", description="Passes used by the denoiser to distinguish noise from shader and geometry detail", diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index 68f6291b373..c9b4dc25cf2 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -275,6 +275,8 @@ class CYCLES_RENDER_PT_sampling_denoising(CyclesButtonsPanel, Panel): sub.prop(cscene, "denoiser", text="") +layout.separator() + heading = layout.column(align=False, heading="Viewport") row = heading.row(align=True) row.prop(cscene, "use_preview_denoising", text="") @@ -285,6 +287,9 @@ class CYCLES_RENDER_PT_sampling_denoising(CyclesButtonsPanel, Panel): sub = heading.row(align=True) sub.active = cscene.use_preview_denoising sub.prop(cscene, "preview_denoising_start_sample", text="Start Sample") +sub = heading.row(align=True) +sub.active = cscene.use_preview_denoising +sub.prop(cscene, "preview_denoising_input_passes", text="Input Passes") class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel): diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index b6a5f67ec2d..0e61f4f2615 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -1005,6 +1005,9 @@ DenoiseParams BlenderSync::get_denoise_params(BL::Scene _scene, cscene, "preview_denoiser", DENOISER_NUM, DENOISER_NONE); denoising.start_sample = get_int(cscene, "preview_denoising_start_sample"); +denoising.input_passes = (DenoiserInput)get_enum( +cscene, "preview_denoising_input_passes", DENOISER_INPUT_NUM, (int)denoising.input_passes); + /* Auto select fastest denoiser. */ if (denoising.type == DENOISER_NONE) { if (!Device::available_devices(DEVICE_MASK_OPTIX).empty()) { ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [c661515090b] blender-v2.83-release: Cycles: Add CUDA 11 build support
Commit: c661515090b189061c4388a335573ac3d5745925 Author: Patrick Mours Date: Tue Oct 13 12:33:34 2020 +0200 Branches: blender-v2.83-release https://developer.blender.org/rBc661515090b189061c4388a335573ac3d5745925 Cycles: Add CUDA 11 build support With this patch the build system checks whether the "CUDA10_NVCC_EXECUTABLE" CMake variable is set and if so will use that to build sm_30 kernels. Similarily for sm_8x kernels it checks "CUDA11_NVCC_EXECUTABLE". All other kernels are built using the default CUDA toolkit. This makes it possible to use either the CUDA 10 or CUDA 11 toolkit by default and only selectively use the other for the kernels where its a hard requirement. Reviewed By: brecht Differential Revision: https://developer.blender.org/D9179 === M CMakeLists.txt M build_files/buildbot/worker_compile.py M build_files/cmake/config/blender_release.cmake M intern/cycles/CMakeLists.txt M intern/cycles/kernel/CMakeLists.txt === diff --git a/CMakeLists.txt b/CMakeLists.txt index 6f705ffbe44..b15bbb7486b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -369,7 +369,7 @@ option(WITH_CYCLES_CUDA_BINARIES"Build Cycles CUDA binaries" OFF) option(WITH_CYCLES_CUBIN_COMPILER "Build cubins with nvrtc based compiler instead of nvcc" OFF) option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful on machines with limited RAM)" OFF) mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL) -set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 compute_75 CACHE STRING "CUDA architectures to build binaries for") +set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 compute_75 CACHE STRING "CUDA architectures to build binaries for") mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH) unset(PLATFORM_DEFAULT) option(WITH_CYCLES_LOGGING "Build Cycles with logging support" ON) diff --git a/build_files/buildbot/worker_compile.py b/build_files/buildbot/worker_compile.py index 340f507df4c..d5482571c11 100644 --- a/build_files/buildbot/worker_compile.py +++ b/build_files/buildbot/worker_compile.py @@ -44,13 +44,17 @@ def get_cmake_options(builder): optix_sdk_dir = os.path.join(builder.blender_dir, '..', '..', 'NVIDIA-Optix-SDK') options.append('-DOPTIX_ROOT_DIR:PATH=' + optix_sdk_dir) -# Workers have multiple CUDA versions installed. Select 10.1 for Blender 2.83 releases. +# Workaround to build sm_30 kernels with CUDA 10, since CUDA 11 no longer supports that architecture if builder.platform == 'win': -options.append('-DCUDA_TOOLKIT_ROOT_DIR:PATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1') -options.append('-DCUDA_NVCC_EXECUTABLE:FILEPATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/nvcc.exe') +options.append('-DCUDA10_TOOLKIT_ROOT_DIR:PATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1') +options.append('-DCUDA10_NVCC_EXECUTABLE:FILEPATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/nvcc.exe') +options.append('-DCUDA11_TOOLKIT_ROOT_DIR:PATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.1') +options.append('-DCUDA11_NVCC_EXECUTABLE:FILEPATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.1/bin/nvcc.exe') elif builder.platform == 'linux': -options.append('-DCUDA_TOOLKIT_ROOT_DIR:PATH=/usr/local/cuda-10.1') - options.append('-DCUDA_NVCC_EXECUTABLE:FILEPATH=/usr/local/cuda-10.1/bin/nvcc') +options.append('-DCUDA10_TOOLKIT_ROOT_DIR:PATH=/usr/local/cuda-10.1') + options.append('-DCUDA10_NVCC_EXECUTABLE:FILEPATH=/usr/local/cuda-10.1/bin/nvcc') +options.append('-DCUDA11_TOOLKIT_ROOT_DIR:PATH=/usr/local/cuda-11.1') + options.append('-DCUDA11_NVCC_EXECUTABLE:FILEPATH=/usr/local/cuda-11.1/bin/nvcc') options.append("-C" + os.path.join(builder.blender_dir, config_file)) options.append("-DCMAKE_INSTALL_PREFIX=%s" % (builder.install_dir)) diff --git a/build_files/cmake/config/blender_release.cmake b/build_files/cmake/config/blender_release.cmake index 2d52fb22c86..e1b7560e4fc 100644 --- a/build_files/cmake/config/blender_release.cmake +++ b/build_files/cmake/config/blender_release.cmake @@ -52,7 +52,7 @@ set(WITH_USD ON CACHE BOOL "" FORCE) set(WITH_MEM_JEMALLOC ON CACHE BOOL "" FORCE) set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE) set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE) -set(CYCLES_CUDA_BINARIES_ARCH sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;sm_70;sm_75;compute_75 CACHE STRING "" FORCE) +set(CYCLES_CUDA_BINARIES_ARCH sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;s
[Bf-blender-cvs] [f00ba344115] blender-v2.83-release: Cycles: Enable OptiX on first generation Maxwell GPUs again
Commit: f00ba344115ca07f255fdd6088956e4d035714a0 Author: Patrick Mours Date: Mon Jul 27 16:11:00 2020 +0200 Branches: blender-v2.83-release https://developer.blender.org/rBf00ba344115ca07f255fdd6088956e4d035714a0 Cycles: Enable OptiX on first generation Maxwell GPUs again === M intern/cycles/device/device_optix.cpp M intern/cycles/kernel/CMakeLists.txt === diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index 2b28d1e1dbb..db04c13d083 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -1543,11 +1543,10 @@ void device_optix_info(const vector _devices, vector" --target 52 +-target 50 -ptx -i ${CMAKE_CURRENT_SOURCE_DIR}/${input} ${cuda_flags} @@ -573,7 +573,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES) COMMAND ${CUDA_NVCC_EXECUTABLE} --ptx - -arch=sm_52 + -arch=sm_50 ${cuda_flags} ${input} WORKING_DIRECTORY ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [b4bddf2e3bd] blender-v2.83-release: Fix OptiX being shown as available on first generation Maxwell GPUs
Commit: b4bddf2e3bd3e33a9ca7488fc78112f8a5fa0544 Author: Patrick Mours Date: Fri Jul 24 15:36:09 2020 +0200 Branches: blender-v2.83-release https://developer.blender.org/rBb4bddf2e3bd3e33a9ca7488fc78112f8a5fa0544 Fix OptiX being shown as available on first generation Maxwell GPUs The OptiX kernels are compiled for target "compute_sm_52", which is only available on second generation Maxwell GPUs, so disable support for older ones. === M intern/cycles/device/device_optix.cpp === diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index db04c13d083..2b28d1e1dbb 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -1543,10 +1543,11 @@ void device_optix_info(const vector _devices, vectorhttps://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [08aaa07adbd] blender-v2.83-release: Cycles: Use pre-compiled PTX kernel for older generation when no matching one is found
Commit: 08aaa07adbd46e27f4226f29559be156f14a524b Author: Patrick Mours Date: Fri Jul 17 15:06:55 2020 +0200 Branches: blender-v2.83-release https://developer.blender.org/rB08aaa07adbd46e27f4226f29559be156f14a524b Cycles: Use pre-compiled PTX kernel for older generation when no matching one is found This patch changes the discovery of pre-compiled kernels, to look for any PTX, even if it does not match the current architecture version exactly. It works because the driver can JIT-compile PTX generated for architectures less than or equal to the current one. This e.g. makes it possible to render on a new GPU architecture even if no pre-compiled binary kernel was distributed for it as part of the Blender installation. Reviewed By: brecht Differential Revision: https://developer.blender.org/D8332 === M CMakeLists.txt M build_files/cmake/config/blender_release.cmake M intern/cycles/device/cuda/device_cuda_impl.cpp M intern/cycles/kernel/CMakeLists.txt === diff --git a/CMakeLists.txt b/CMakeLists.txt index 83f547eb593..6f705ffbe44 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -369,7 +369,7 @@ option(WITH_CYCLES_CUDA_BINARIES"Build Cycles CUDA binaries" OFF) option(WITH_CYCLES_CUBIN_COMPILER "Build cubins with nvrtc based compiler instead of nvcc" OFF) option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful on machines with limited RAM)" OFF) mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL) -set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 CACHE STRING "CUDA architectures to build binaries for") +set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 compute_75 CACHE STRING "CUDA architectures to build binaries for") mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH) unset(PLATFORM_DEFAULT) option(WITH_CYCLES_LOGGING "Build Cycles with logging support" ON) diff --git a/build_files/cmake/config/blender_release.cmake b/build_files/cmake/config/blender_release.cmake index 01a59e451aa..2d52fb22c86 100644 --- a/build_files/cmake/config/blender_release.cmake +++ b/build_files/cmake/config/blender_release.cmake @@ -52,7 +52,7 @@ set(WITH_USD ON CACHE BOOL "" FORCE) set(WITH_MEM_JEMALLOC ON CACHE BOOL "" FORCE) set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE) set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE) -set(CYCLES_CUDA_BINARIES_ARCH sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;sm_70;sm_75 CACHE STRING "" FORCE) +set(CYCLES_CUDA_BINARIES_ARCH sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;sm_70;sm_75;compute_75 CACHE STRING "" FORCE) set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE) # platform dependent options diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp index ba5d479e0e7..870f9f9ecf9 100644 --- a/intern/cycles/device/cuda/device_cuda_impl.cpp +++ b/intern/cycles/device/cuda/device_cuda_impl.cpp @@ -352,11 +352,24 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures _featu } } -const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor)); -VLOG(1) << "Testing for pre-compiled kernel " << ptx << "."; -if (path_exists(ptx)) { - VLOG(1) << "Using precompiled kernel."; - return ptx; +/* The driver can JIT-compile PTX generated for older generations, so find the closest one. */ +int ptx_major = major, ptx_minor = minor; +while (ptx_major >= 3) { + const string ptx = path_get( + string_printf("lib/%s_compute_%d%d.ptx", name, ptx_major, ptx_minor)); + VLOG(1) << "Testing for pre-compiled kernel " << ptx << "."; + if (path_exists(ptx)) { +VLOG(1) << "Using precompiled kernel."; +return ptx; + } + + if (ptx_minor > 0) { +ptx_minor--; + } + else { +ptx_major--; +ptx_minor = 9; + } } } diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 2e839a616e9..6ab0b9d39d2 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -549,7 +549,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES) ${SRC_UTIL_HEADERS} COMMAND ${CUBIN_CC_ENV} "$" --target 30 +-target 52 -ptx -i ${CMAKE_CURRENT_SOURCE_DIR}/${input} ${cuda_flags} @@ -573,7 +573,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES) COMMAND
[Bf-blender-cvs] [510541563ef] blender-v2.83-release: Cycles: Enable OptiX on all Maxwell+ GPUs
Commit: 510541563efa8f34e3ed6632e53aef31c3665a2f Author: Patrick Mours Date: Fri Jun 5 12:33:00 2020 +0200 Branches: blender-v2.83-release https://developer.blender.org/rB510541563efa8f34e3ed6632e53aef31c3665a2f Cycles: Enable OptiX on all Maxwell+ GPUs === M intern/cycles/device/device_optix.cpp === diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index 441fa35f8af..db04c13d083 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -1537,34 +1537,22 @@ bool device_optix_init() void device_optix_info(const vector _devices, vector ) { + devices.reserve(cuda_devices.size()); + // Simply add all supported CUDA devices as OptiX devices again - for (const DeviceInfo _info : cuda_devices) { -DeviceInfo info = cuda_info; + for (DeviceInfo info : cuda_devices) { assert(info.type == DEVICE_CUDA); -info.type = DEVICE_OPTIX; -info.id += "_OptiX"; -// Figure out RTX support -CUdevice cuda_device = 0; -CUcontext cuda_context = NULL; -unsigned int rtcore_version = 0; -if (cuDeviceGet(_device, info.num) == CUDA_SUCCESS && -cuDevicePrimaryCtxRetain(_context, cuda_device) == CUDA_SUCCESS) { - OptixDeviceContext optix_context = NULL; - if (optixDeviceContextCreate(cuda_context, nullptr, _context) == OPTIX_SUCCESS) { -optixDeviceContextGetProperty(optix_context, - OPTIX_DEVICE_PROPERTY_RTCORE_VERSION, - _version, - sizeof(rtcore_version)); -optixDeviceContextDestroy(optix_context); - } - cuDevicePrimaryCtxRelease(cuda_device); +int major; +cuDeviceGetAttribute(, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, info.num); +if (major < 5) { + continue; // Only Maxwell and up are supported by OptiX } -// Only add devices with RTX support -if (rtcore_version != 0 || getenv("CYCLES_OPTIX_TEST")) { - devices.push_back(info); -} +info.type = DEVICE_OPTIX; +info.id += "_OptiX"; + +devices.push_back(info); } } ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [510541563ef] tmp-2.83-cycles-rtx3-kernels: Cycles: Enable OptiX on all Maxwell+ GPUs
Commit: 510541563efa8f34e3ed6632e53aef31c3665a2f Author: Patrick Mours Date: Fri Jun 5 12:33:00 2020 +0200 Branches: tmp-2.83-cycles-rtx3-kernels https://developer.blender.org/rB510541563efa8f34e3ed6632e53aef31c3665a2f Cycles: Enable OptiX on all Maxwell+ GPUs === M intern/cycles/device/device_optix.cpp === diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index 441fa35f8af..db04c13d083 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -1537,34 +1537,22 @@ bool device_optix_init() void device_optix_info(const vector _devices, vector ) { + devices.reserve(cuda_devices.size()); + // Simply add all supported CUDA devices as OptiX devices again - for (const DeviceInfo _info : cuda_devices) { -DeviceInfo info = cuda_info; + for (DeviceInfo info : cuda_devices) { assert(info.type == DEVICE_CUDA); -info.type = DEVICE_OPTIX; -info.id += "_OptiX"; -// Figure out RTX support -CUdevice cuda_device = 0; -CUcontext cuda_context = NULL; -unsigned int rtcore_version = 0; -if (cuDeviceGet(_device, info.num) == CUDA_SUCCESS && -cuDevicePrimaryCtxRetain(_context, cuda_device) == CUDA_SUCCESS) { - OptixDeviceContext optix_context = NULL; - if (optixDeviceContextCreate(cuda_context, nullptr, _context) == OPTIX_SUCCESS) { -optixDeviceContextGetProperty(optix_context, - OPTIX_DEVICE_PROPERTY_RTCORE_VERSION, - _version, - sizeof(rtcore_version)); -optixDeviceContextDestroy(optix_context); - } - cuDevicePrimaryCtxRelease(cuda_device); +int major; +cuDeviceGetAttribute(, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, info.num); +if (major < 5) { + continue; // Only Maxwell and up are supported by OptiX } -// Only add devices with RTX support -if (rtcore_version != 0 || getenv("CYCLES_OPTIX_TEST")) { - devices.push_back(info); -} +info.type = DEVICE_OPTIX; +info.id += "_OptiX"; + +devices.push_back(info); } } ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [92f79432047] tmp-2.83-cycles-rtx3-kernels: Cycles: Add CUDA 11 build support
Commit: 92f794320477240d1fd84edc8cb7089f4a10fae7 Author: Patrick Mours Date: Tue Oct 13 12:33:34 2020 +0200 Branches: tmp-2.83-cycles-rtx3-kernels https://developer.blender.org/rB92f794320477240d1fd84edc8cb7089f4a10fae7 Cycles: Add CUDA 11 build support With this patch the build system checks whether the "CUDA10_NVCC_EXECUTABLE" CMake variable is set and if so will use that to build sm_30 kernels. Similarily for sm_8x kernels it checks "CUDA11_NVCC_EXECUTABLE". All other kernels are built using the default CUDA toolkit. This makes it possible to use either the CUDA 10 or CUDA 11 toolkit by default and only selectively use the other for the kernels where its a hard requirement. Reviewed By: brecht Differential Revision: https://developer.blender.org/D9179 === M CMakeLists.txt M build_files/buildbot/worker_compile.py M build_files/cmake/config/blender_release.cmake M intern/cycles/CMakeLists.txt M intern/cycles/kernel/CMakeLists.txt === diff --git a/CMakeLists.txt b/CMakeLists.txt index 6f705ffbe44..b15bbb7486b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -369,7 +369,7 @@ option(WITH_CYCLES_CUDA_BINARIES"Build Cycles CUDA binaries" OFF) option(WITH_CYCLES_CUBIN_COMPILER "Build cubins with nvrtc based compiler instead of nvcc" OFF) option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful on machines with limited RAM)" OFF) mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL) -set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 compute_75 CACHE STRING "CUDA architectures to build binaries for") +set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 compute_75 CACHE STRING "CUDA architectures to build binaries for") mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH) unset(PLATFORM_DEFAULT) option(WITH_CYCLES_LOGGING "Build Cycles with logging support" ON) diff --git a/build_files/buildbot/worker_compile.py b/build_files/buildbot/worker_compile.py index 340f507df4c..d5482571c11 100644 --- a/build_files/buildbot/worker_compile.py +++ b/build_files/buildbot/worker_compile.py @@ -44,13 +44,17 @@ def get_cmake_options(builder): optix_sdk_dir = os.path.join(builder.blender_dir, '..', '..', 'NVIDIA-Optix-SDK') options.append('-DOPTIX_ROOT_DIR:PATH=' + optix_sdk_dir) -# Workers have multiple CUDA versions installed. Select 10.1 for Blender 2.83 releases. +# Workaround to build sm_30 kernels with CUDA 10, since CUDA 11 no longer supports that architecture if builder.platform == 'win': -options.append('-DCUDA_TOOLKIT_ROOT_DIR:PATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1') -options.append('-DCUDA_NVCC_EXECUTABLE:FILEPATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/nvcc.exe') +options.append('-DCUDA10_TOOLKIT_ROOT_DIR:PATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1') +options.append('-DCUDA10_NVCC_EXECUTABLE:FILEPATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/nvcc.exe') +options.append('-DCUDA11_TOOLKIT_ROOT_DIR:PATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.1') +options.append('-DCUDA11_NVCC_EXECUTABLE:FILEPATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.1/bin/nvcc.exe') elif builder.platform == 'linux': -options.append('-DCUDA_TOOLKIT_ROOT_DIR:PATH=/usr/local/cuda-10.1') - options.append('-DCUDA_NVCC_EXECUTABLE:FILEPATH=/usr/local/cuda-10.1/bin/nvcc') +options.append('-DCUDA10_TOOLKIT_ROOT_DIR:PATH=/usr/local/cuda-10.1') + options.append('-DCUDA10_NVCC_EXECUTABLE:FILEPATH=/usr/local/cuda-10.1/bin/nvcc') +options.append('-DCUDA11_TOOLKIT_ROOT_DIR:PATH=/usr/local/cuda-11.1') + options.append('-DCUDA11_NVCC_EXECUTABLE:FILEPATH=/usr/local/cuda-11.1/bin/nvcc') options.append("-C" + os.path.join(builder.blender_dir, config_file)) options.append("-DCMAKE_INSTALL_PREFIX=%s" % (builder.install_dir)) diff --git a/build_files/cmake/config/blender_release.cmake b/build_files/cmake/config/blender_release.cmake index 2d52fb22c86..e1b7560e4fc 100644 --- a/build_files/cmake/config/blender_release.cmake +++ b/build_files/cmake/config/blender_release.cmake @@ -52,7 +52,7 @@ set(WITH_USD ON CACHE BOOL "" FORCE) set(WITH_MEM_JEMALLOC ON CACHE BOOL "" FORCE) set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE) set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE) -set(CYCLES_CUDA_BINARIES_ARCH sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;sm_70;sm_75;compute_75 CACHE STRING "" FORCE) +set(CYCLES_CUDA_BINARIES_ARCH sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;s
[Bf-blender-cvs] [b4bddf2e3bd] tmp-2.83-cycles-rtx3-kernels: Fix OptiX being shown as available on first generation Maxwell GPUs
Commit: b4bddf2e3bd3e33a9ca7488fc78112f8a5fa0544 Author: Patrick Mours Date: Fri Jul 24 15:36:09 2020 +0200 Branches: tmp-2.83-cycles-rtx3-kernels https://developer.blender.org/rBb4bddf2e3bd3e33a9ca7488fc78112f8a5fa0544 Fix OptiX being shown as available on first generation Maxwell GPUs The OptiX kernels are compiled for target "compute_sm_52", which is only available on second generation Maxwell GPUs, so disable support for older ones. === M intern/cycles/device/device_optix.cpp === diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index db04c13d083..2b28d1e1dbb 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -1543,10 +1543,11 @@ void device_optix_info(const vector _devices, vectorhttps://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [f00ba344115] tmp-2.83-cycles-rtx3-kernels: Cycles: Enable OptiX on first generation Maxwell GPUs again
Commit: f00ba344115ca07f255fdd6088956e4d035714a0 Author: Patrick Mours Date: Mon Jul 27 16:11:00 2020 +0200 Branches: tmp-2.83-cycles-rtx3-kernels https://developer.blender.org/rBf00ba344115ca07f255fdd6088956e4d035714a0 Cycles: Enable OptiX on first generation Maxwell GPUs again === M intern/cycles/device/device_optix.cpp M intern/cycles/kernel/CMakeLists.txt === diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index 2b28d1e1dbb..db04c13d083 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -1543,11 +1543,10 @@ void device_optix_info(const vector _devices, vector" --target 52 +-target 50 -ptx -i ${CMAKE_CURRENT_SOURCE_DIR}/${input} ${cuda_flags} @@ -573,7 +573,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES) COMMAND ${CUDA_NVCC_EXECUTABLE} --ptx - -arch=sm_52 + -arch=sm_50 ${cuda_flags} ${input} WORKING_DIRECTORY ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [08aaa07adbd] tmp-2.83-cycles-rtx3-kernels: Cycles: Use pre-compiled PTX kernel for older generation when no matching one is found
Commit: 08aaa07adbd46e27f4226f29559be156f14a524b Author: Patrick Mours Date: Fri Jul 17 15:06:55 2020 +0200 Branches: tmp-2.83-cycles-rtx3-kernels https://developer.blender.org/rB08aaa07adbd46e27f4226f29559be156f14a524b Cycles: Use pre-compiled PTX kernel for older generation when no matching one is found This patch changes the discovery of pre-compiled kernels, to look for any PTX, even if it does not match the current architecture version exactly. It works because the driver can JIT-compile PTX generated for architectures less than or equal to the current one. This e.g. makes it possible to render on a new GPU architecture even if no pre-compiled binary kernel was distributed for it as part of the Blender installation. Reviewed By: brecht Differential Revision: https://developer.blender.org/D8332 === M CMakeLists.txt M build_files/cmake/config/blender_release.cmake M intern/cycles/device/cuda/device_cuda_impl.cpp M intern/cycles/kernel/CMakeLists.txt === diff --git a/CMakeLists.txt b/CMakeLists.txt index 83f547eb593..6f705ffbe44 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -369,7 +369,7 @@ option(WITH_CYCLES_CUDA_BINARIES"Build Cycles CUDA binaries" OFF) option(WITH_CYCLES_CUBIN_COMPILER "Build cubins with nvrtc based compiler instead of nvcc" OFF) option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful on machines with limited RAM)" OFF) mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL) -set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 CACHE STRING "CUDA architectures to build binaries for") +set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 compute_75 CACHE STRING "CUDA architectures to build binaries for") mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH) unset(PLATFORM_DEFAULT) option(WITH_CYCLES_LOGGING "Build Cycles with logging support" ON) diff --git a/build_files/cmake/config/blender_release.cmake b/build_files/cmake/config/blender_release.cmake index 01a59e451aa..2d52fb22c86 100644 --- a/build_files/cmake/config/blender_release.cmake +++ b/build_files/cmake/config/blender_release.cmake @@ -52,7 +52,7 @@ set(WITH_USD ON CACHE BOOL "" FORCE) set(WITH_MEM_JEMALLOC ON CACHE BOOL "" FORCE) set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE) set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE) -set(CYCLES_CUDA_BINARIES_ARCH sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;sm_70;sm_75 CACHE STRING "" FORCE) +set(CYCLES_CUDA_BINARIES_ARCH sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;sm_70;sm_75;compute_75 CACHE STRING "" FORCE) set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE) # platform dependent options diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp index ba5d479e0e7..870f9f9ecf9 100644 --- a/intern/cycles/device/cuda/device_cuda_impl.cpp +++ b/intern/cycles/device/cuda/device_cuda_impl.cpp @@ -352,11 +352,24 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures _featu } } -const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor)); -VLOG(1) << "Testing for pre-compiled kernel " << ptx << "."; -if (path_exists(ptx)) { - VLOG(1) << "Using precompiled kernel."; - return ptx; +/* The driver can JIT-compile PTX generated for older generations, so find the closest one. */ +int ptx_major = major, ptx_minor = minor; +while (ptx_major >= 3) { + const string ptx = path_get( + string_printf("lib/%s_compute_%d%d.ptx", name, ptx_major, ptx_minor)); + VLOG(1) << "Testing for pre-compiled kernel " << ptx << "."; + if (path_exists(ptx)) { +VLOG(1) << "Using precompiled kernel."; +return ptx; + } + + if (ptx_minor > 0) { +ptx_minor--; + } + else { +ptx_major--; +ptx_minor = 9; + } } } diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 2e839a616e9..6ab0b9d39d2 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -549,7 +549,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES) ${SRC_UTIL_HEADERS} COMMAND ${CUBIN_CC_ENV} "$" --target 30 +-target 52 -ptx -i ${CMAKE_CURRENT_SOURCE_DIR}/${input} ${cuda_flags} @@ -573,7 +573,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES) COMMAND
[Bf-blender-cvs] [9f89166b52b] blender-v2.92-release master: Fix T85148: OptiX viewport denoising regression
Commit: 9f89166b52b1de880c14847a1d0cd830d7c83f5b Author: Patrick Mours Date: Fri Jan 29 13:35:00 2021 +0100 Branches: blender-v2.92-release master https://developer.blender.org/rB9f89166b52b1de880c14847a1d0cd830d7c83f5b Fix T85148: OptiX viewport denoising regression Commit 6e74a8b69f215e63e136cb4c497e738371ac798f changed the denoiser input passes default to include the normal pass. This does not always produce optimal images though, hence why the default was previously set to only include the color and albedo passes. This restores that behavior, so that viewport denoising with OptiX produces the same results as before. === M intern/cycles/device/device_task.h === diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h index f9b47c59e95..a9298a9126c 100644 --- a/intern/cycles/device/device_task.h +++ b/intern/cycles/device/device_task.h @@ -100,7 +100,9 @@ class DenoiseParams { neighbor_frames = 2; clamp_input = true; -input_passes = DENOISER_INPUT_RGB_ALBEDO_NORMAL; +/* Default to color + albedo only, since normal input does not always have the desired effect + * when denoising with OptiX. */ +input_passes = DENOISER_INPUT_RGB_ALBEDO; start_sample = 0; } ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [b2e00e8f8e0] master: Merge branch 'blender-v2.92-release'
Commit: b2e00e8f8e03d3ae4ca3ea9f66f90ee0a2d008f9 Author: Patrick Mours Date: Fri Jan 29 13:35:21 2021 +0100 Branches: master https://developer.blender.org/rBb2e00e8f8e03d3ae4ca3ea9f66f90ee0a2d008f9 Merge branch 'blender-v2.92-release' === === ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [9b80291412f] master: Merge branch 'blender-v2.92-release'
Commit: 9b80291412feed2a9942eb41d0bd9390035a702c Author: Patrick Mours Date: Wed Jan 27 15:29:39 2021 +0100 Branches: master https://developer.blender.org/rB9b80291412feed2a9942eb41d0bd9390035a702c Merge branch 'blender-v2.92-release' === === ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [a92ebab5da3] master: Windows: Add "--debug-cycles" argument to "blender_debug_gpu.bat" batch file
Commit: a92ebab5da3bcbe3ee1b87348e51f6bcb347b881 Author: Patrick Mours Date: Thu Jan 21 16:28:02 2021 +0100 Branches: master https://developer.blender.org/rBa92ebab5da3bcbe3ee1b87348e51f6bcb347b881 Windows: Add "--debug-cycles" argument to "blender_debug_gpu.bat" batch file This extends the "blender_debug_gpu.bat" batch file to also be useful for triaging Cycles problems. OptiX initialization errors or problem while iterating CUDA devices are only logged when the `--debug-cycles` flag is specified, so adding that here. Reviewed By: brecht, LazyDodo Differential Revision: https://developer.blender.org/D10167 === M release/windows/batch/blender_debug_gpu.cmd === diff --git a/release/windows/batch/blender_debug_gpu.cmd b/release/windows/batch/blender_debug_gpu.cmd index 46d126ab621..53d7863ec70 100644 --- a/release/windows/batch/blender_debug_gpu.cmd +++ b/release/windows/batch/blender_debug_gpu.cmd @@ -12,5 +12,5 @@ mkdir "%temp%\blender\debug_logs" > NUL 2>&1 echo. echo Starting blender and waiting for it to exit set PYTHONPATH= -"%~dp0\blender" --debug --debug-gpu --python-expr "import bpy; bpy.ops.wm.sysinfo(filepath=r'%temp%\blender\debug_logs\blender_system_info.txt')" > "%temp%\blender\debug_logs\blender_debug_output.txt" 2>&1 < %0 +"%~dp0\blender" --debug --debug-gpu --debug-cycles --python-expr "import bpy; bpy.ops.wm.sysinfo(filepath=r'%temp%\blender\debug_logs\blender_system_info.txt')" > "%temp%\blender\debug_logs\blender_debug_output.txt" 2>&1 < %0 explorer "%temp%\blender\debug_logs" \ No newline at end of file ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [fc5f9a8ac90] master: Merge branch 'blender-v2.92-release'
Commit: fc5f9a8ac900bcc0c712f89d4efca3583b0906ab Author: Patrick Mours Date: Wed Jan 20 14:40:46 2021 +0100 Branches: master https://developer.blender.org/rBfc5f9a8ac900bcc0c712f89d4efca3583b0906ab Merge branch 'blender-v2.92-release' === === ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [4a09907eab2] blender-v2.92-release master: Fix T84049: Crash when using Cycles Progressive Refine with OptiX+CPU
Commit: 4a09907eab2a3b6da53b1942aebefdcf58bbd604 Author: Patrick Mours Date: Wed Jan 20 14:12:43 2021 +0100 Branches: blender-v2.92-release master https://developer.blender.org/rB4a09907eab2a3b6da53b1942aebefdcf58bbd604 Fix T84049: Crash when using Cycles Progressive Refine with OptiX+CPU Tile stealing may steal a CPU tile buffer and move it to the GPU, but next time around that tile may be re-used on the CPU again (in progressive refinement mode). The buffer would still be on the GPU then though, so is inaccessible to the CPU. As a result Blender crashed when the CPU tried to write results to that tile buffer. This fixes that by ensuring a stolen tile buffer is moved back to the device it is used on before rendering. === M intern/cycles/render/session.cpp === diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 0debc08d911..f3cdae77d47 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -540,6 +540,10 @@ bool Session::acquire_tile(RenderTile , Device *tile_device, uint tile_typ tile->buffers = new RenderBuffers(tile_device); tile->buffers->reset(buffer_params); } + else if (tile->buffers->buffer.device != tile_device) { +/* Move buffer to current tile device again in case it was stolen before. */ +tile->buffers->buffer.move_device(tile_device); + } tile->buffers->map_neighbor_copied = false; ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [4fde594fda1] blender-v2.92-release master: Fix T84650: CPU render + OptiX denoiser leaves empty tiles unprocessed
Commit: 4fde594fda13abf98437bc5d0012decc2bd3d5f6 Author: Patrick Mours Date: Mon Jan 18 15:30:25 2021 +0100 Branches: blender-v2.92-release master https://developer.blender.org/rB4fde594fda13abf98437bc5d0012decc2bd3d5f6 Fix T84650: CPU render + OptiX denoiser leaves empty tiles unprocessed The OptiX denoiser is part of the OptiX device, so to the tile manager looks like a GPU device. As a result the tile stealing implementation erroneously stole CPU tiles and moved them to that OptiX device, even though in this configuration the OptiX device was only set up for denoising and not rendering. Launching the render kernel therefore caused a crash because of a missing AS etc. This fixes that by ensuring tiles can only be stolen by devices that support render tiles. === M intern/cycles/render/session.cpp === diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index a00f8154148..0debc08d911 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -459,7 +459,11 @@ bool Session::acquire_tile(RenderTile , Device *tile_device, uint tile_typ int device_num = device->device_number(tile_device); while (!tile_manager.next_tile(tile, device_num, tile_types)) { -if (steal_tile(rtile, tile_device, tile_lock)) { +/* Can only steal tiles on devices that support rendering + * This is because denoising tiles cannot be stolen (see below) + */ +if ((tile_types & (RenderTile::PATH_TRACE | RenderTile::BAKE)) && +steal_tile(rtile, tile_device, tile_lock)) { return true; } ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [ce0f212498c] master: Merge branch 'blender-v2.92-release'
Commit: ce0f212498c3347f512966bf85d08feefe1d032e Author: Patrick Mours Date: Mon Jan 18 15:30:48 2021 +0100 Branches: master https://developer.blender.org/rBce0f212498c3347f512966bf85d08feefe1d032e Merge branch 'blender-v2.92-release' === === ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [c66f00dc26b] master: Fix Cycles rendering with OptiX after instance limit increase when building with old SDK
Commit: c66f00dc26b08d5f7be6aef080c1a0ec2de19cd7 Author: Patrick Mours Date: Fri Jan 8 13:38:26 2021 +0100 Branches: master https://developer.blender.org/rBc66f00dc26b08d5f7be6aef080c1a0ec2de19cd7 Fix Cycles rendering with OptiX after instance limit increase when building with old SDK Commit d259e7dcfbbd37cec5a45fdfb554f24de10d0268 increased the instance limit, but only provided a fall back for the host code for older OptiX SDKs, not for kernel code. This caused a mismatch when an old SDK was used (as is currently the case on buildbot) and subsequent rendering artifacts. This fixes that by moving the bit that is checked to a common location that works with both old an new SDK versions. === M intern/cycles/device/device_optix.cpp M intern/cycles/kernel/kernels/optix/kernel_optix.cu === diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index de98e3f3594..f19289f966e 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -1514,16 +1514,19 @@ class OptiXDevice : public CUDADevice { } else { unsigned int num_instances = 0; + unsigned int max_num_instances = 0x; bvh_optix->as_data.free(); bvh_optix->traversable_handle = 0; bvh_optix->motion_transform_data.free(); -# if OPTIX_ABI_VERSION < 23 - if (bvh->objects.size() > 0x7F) { -# else - if (bvh->objects.size() > 0x7FF) { -# endif + optixDeviceContextGetProperty(context, + OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID, +_num_instances, +sizeof(max_num_instances)); + // Do not count first bit, which is used to distinguish instanced and non-instanced objects + max_num_instances >>= 1; + if (bvh->objects.size() > max_num_instances) { progress.set_error( "Failed to build OptiX acceleration structure because there are too many instances"); return; @@ -1582,8 +1585,8 @@ class OptiXDevice : public CUDADevice { instance.transform[5] = 1.0f; instance.transform[10] = 1.0f; -// Set user instance ID to object index -instance.instanceId = ob->get_device_index(); +// Set user instance ID to object index (but leave low bit blank) +instance.instanceId = ob->get_device_index() << 1; // Have to have at least one bit in the mask, or else instance would always be culled instance.visibilityMask = 1; @@ -1689,13 +1692,9 @@ class OptiXDevice : public CUDADevice { else { // Disable instance transform if geometry already has it applied to vertex data instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM; -// Non-instanced objects read ID from prim_object, so -// distinguish them from instanced objects with high bit set -# if OPTIX_ABI_VERSION < 23 -instance.instanceId |= 0x80; -# else -instance.instanceId |= 0x800; -# endif +// Non-instanced objects read ID from 'prim_object', so distinguish +// them from instanced objects with the low bit set +instance.instanceId |= 1; } } } diff --git a/intern/cycles/kernel/kernels/optix/kernel_optix.cu b/intern/cycles/kernel/kernels/optix/kernel_optix.cu index 0c2c84fdbdf..7f609eab474 100644 --- a/intern/cycles/kernel/kernels/optix/kernel_optix.cu +++ b/intern/cycles/kernel/kernels/optix/kernel_optix.cu @@ -45,13 +45,12 @@ template ccl_device_forceinline uint get_object_id() uint object = optixGetInstanceId(); #endif // Choose between always returning object ID or only for instances - if (always) -// Can just remove the high bit since instance always contains object ID -return object & 0x7FF; // OPTIX_ABI_VERSION >= 23 ? 0x7FF : 0x7F - // Set to OBJECT_NONE if this is not an instanced object - else if (object & 0x800) // OPTIX_ABI_VERSION >= 23 ? 0x800 : 0x80 -object = OBJECT_NONE; - return object; + if (always || (object & 1) == 0) +// Can just remove the low bit since instance always contains object ID +return object >> 1; + else +// Set to OBJECT_NONE if this is not an instanced object +return OBJECT_NONE; } extern "C" __global__ void __raygen__kernel_optix_path_trace() ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [d259e7dcfbb] master: Cycles: Increase instance limit for OptiX acceleration structure building
Commit: d259e7dcfbbd37cec5a45fdfb554f24de10d0268 Author: Patrick Mours Date: Thu Jan 7 18:54:29 2021 +0100 Branches: master https://developer.blender.org/rBd259e7dcfbbd37cec5a45fdfb554f24de10d0268 Cycles: Increase instance limit for OptiX acceleration structure building For a while now OptiX had support for 28-bits of instance IDs, instead of the initial 24-bits (see also value reported by OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID). This change makes use of that and also adds an error reported when the number of instances an OptiX acceleration structure is created with goes beyond the limit, to make this clear instead of just rendering an image with artifacts. Manifest Tasks: T81431 === M intern/cycles/device/device_optix.cpp M intern/cycles/kernel/kernels/optix/kernel_optix.cu === diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index 673fc1752bb..de98e3f3594 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -1519,6 +1519,16 @@ class OptiXDevice : public CUDADevice { bvh_optix->traversable_handle = 0; bvh_optix->motion_transform_data.free(); +# if OPTIX_ABI_VERSION < 23 + if (bvh->objects.size() > 0x7F) { +# else + if (bvh->objects.size() > 0x7FF) { +# endif +progress.set_error( +"Failed to build OptiX acceleration structure because there are too many instances"); +return; + } + // Fill instance descriptions # if OPTIX_ABI_VERSION < 41 device_vector aabbs(this, "optix tlas aabbs", MEM_READ_ONLY); @@ -1681,7 +1691,11 @@ class OptiXDevice : public CUDADevice { instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM; // Non-instanced objects read ID from prim_object, so // distinguish them from instanced objects with high bit set +# if OPTIX_ABI_VERSION < 23 instance.instanceId |= 0x80; +# else +instance.instanceId |= 0x800; +# endif } } } diff --git a/intern/cycles/kernel/kernels/optix/kernel_optix.cu b/intern/cycles/kernel/kernels/optix/kernel_optix.cu index 8ccd2555091..0c2c84fdbdf 100644 --- a/intern/cycles/kernel/kernels/optix/kernel_optix.cu +++ b/intern/cycles/kernel/kernels/optix/kernel_optix.cu @@ -47,9 +47,9 @@ template ccl_device_forceinline uint get_object_id() // Choose between always returning object ID or only for instances if (always) // Can just remove the high bit since instance always contains object ID -return object & 0x7F; +return object & 0x7FF; // OPTIX_ABI_VERSION >= 23 ? 0x7FF : 0x7F // Set to OBJECT_NONE if this is not an instanced object - else if (object & 0x80) + else if (object & 0x800) // OPTIX_ABI_VERSION >= 23 ? 0x800 : 0x80 object = OBJECT_NONE; return object; } ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [3373d14b1b0] master: Fix T83925: Crash when rendering on the CPU with OptiX denoiser enabled
Commit: 3373d14b1b05b2ee5dc88efff5dc8b1a5fe20f36 Author: Patrick Mours Date: Tue Jan 5 18:37:31 2021 +0100 Branches: master https://developer.blender.org/rB3373d14b1b05b2ee5dc88efff5dc8b1a5fe20f36 Fix T83925: Crash when rendering on the CPU with OptiX denoiser enabled Rendering on the CPU uses the Embree BVH layout, whether the OptiX denoiser is enabled or not. This means the "build_bvh" function gets a "BVHEmbree" object to fill and not a "BVHMulti" as it was assuming before, which caused crashes due to memory geting overwritten incorrectly. This fixes that by redirecting Embree BVH builds to the Embree device. Manifest Tasks: T83925 === M intern/cycles/device/device_multi.cpp === diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index e5b138917ff..44959577fb5 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -248,11 +248,14 @@ class MultiDevice : public Device { void build_bvh(BVH *bvh, Progress , bool refit) override { /* Try to build and share a single acceleration structure, if possible */ -if (bvh->params.bvh_layout == BVH_LAYOUT_BVH2) { +if (bvh->params.bvh_layout == BVH_LAYOUT_BVH2 || bvh->params.bvh_layout == BVH_LAYOUT_EMBREE) { devices.back().device->build_bvh(bvh, progress, refit); return; } +assert(bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX || + bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE); + BVHMulti *const bvh_multi = static_cast(bvh); bvh_multi->sub_bvhs.resize(devices.size()); ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [166c0db3f94] master: Fix T83915: Subdivision Surface modifier causes visual artifacts in Cycles rendered viewport - CPU and OptiX
Commit: 166c0db3f9412925b501b7172875cb8ee2eb6958 Author: Patrick Mours Date: Tue Jan 5 14:39:29 2021 +0100 Branches: master https://developer.blender.org/rB166c0db3f9412925b501b7172875cb8ee2eb6958 Fix T83915: Subdivision Surface modifier causes visual artifacts in Cycles rendered viewport - CPU and OptiX Changing the geometry in the current scene caused the primitive offsets for all geometry to change, but the values would not be updated in all bottom-level BVH structures. Rendering artifacts and crashes where the result. This fixes that by ensuring all BVH structures are updated when the primitive offsets change. === M intern/cycles/bvh/bvh_embree.cpp M intern/cycles/render/geometry.cpp M intern/cycles/render/geometry.h === diff --git a/intern/cycles/bvh/bvh_embree.cpp b/intern/cycles/bvh/bvh_embree.cpp index b874bda7186..c082478e5b1 100644 --- a/intern/cycles/bvh/bvh_embree.cpp +++ b/intern/cycles/bvh/bvh_embree.cpp @@ -682,6 +682,7 @@ void BVHEmbree::refit(Progress ) if (mesh->num_triangles() > 0) { RTCGeometry geom = rtcGetGeometry(scene, geom_id); set_tri_vertex_buffer(geom, mesh, true); + rtcSetGeometryUserData(geom, (void *)mesh->optix_prim_offset); rtcCommitGeometry(geom); } } @@ -690,6 +691,7 @@ void BVHEmbree::refit(Progress ) if (hair->num_curves() > 0) { RTCGeometry geom = rtcGetGeometry(scene, geom_id + 1); set_curve_vertex_buffer(geom, hair, true); + rtcSetGeometryUserData(geom, (void *)hair->optix_prim_offset); rtcCommitGeometry(geom); } } diff --git a/intern/cycles/render/geometry.cpp b/intern/cycles/render/geometry.cpp index 64b98a91853..6fc217f2d76 100644 --- a/intern/cycles/render/geometry.cpp +++ b/intern/cycles/render/geometry.cpp @@ -280,6 +280,15 @@ void Geometry::tag_update(Scene *scene, bool rebuild) scene->object_manager->need_update = true; } +void Geometry::tag_bvh_update(bool rebuild) +{ + tag_modified(); + + if (rebuild) { +need_update_rebuild = true; + } +} + /* Geometry Manager */ GeometryManager::GeometryManager() @@ -915,7 +924,7 @@ void GeometryManager::device_update_attributes(Device *device, scene->object_manager->device_update_mesh_offsets(device, dscene, scene); } -void GeometryManager::mesh_calc_offset(Scene *scene) +void GeometryManager::mesh_calc_offset(Scene *scene, BVHLayout bvh_layout) { size_t vert_size = 0; size_t tri_size = 0; @@ -930,6 +939,14 @@ void GeometryManager::mesh_calc_offset(Scene *scene) size_t optix_prim_size = 0; foreach (Geometry *geom, scene->geometry) { +if (geom->optix_prim_offset != optix_prim_size) { + /* Need to rebuild BVH in OptiX, since refit only allows modified mesh data there */ + const bool has_optix_bvh = bvh_layout == BVH_LAYOUT_OPTIX || + bvh_layout == BVH_LAYOUT_MULTI_OPTIX || + bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE; + geom->tag_bvh_update(has_optix_bvh); +} + if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) { Mesh *mesh = static_cast(geom); @@ -1526,7 +1543,9 @@ void GeometryManager::device_update(Device *device, /* Device update. */ device_free(device, dscene); - mesh_calc_offset(scene); + const BVHLayout bvh_layout = BVHParams::best_bvh_layout(scene->params.bvh_layout, + device->get_bvh_layout_mask()); + mesh_calc_offset(scene, bvh_layout); if (true_displacement_used) { scoped_callback_timer timer([scene](double time) { if (scene->update_stats) { @@ -1553,8 +1572,6 @@ void GeometryManager::device_update(Device *device, } /* Update displacement. */ - BVHLayout bvh_layout = BVHParams::best_bvh_layout(scene->params.bvh_layout, - device->get_bvh_layout_mask()); bool displacement_done = false; size_t num_bvh = 0; diff --git a/intern/cycles/render/geometry.h b/intern/cycles/render/geometry.h index d3daf0cc809..b124e950ad2 100644 --- a/intern/cycles/render/geometry.h +++ b/intern/cycles/render/geometry.h @@ -157,6 +157,8 @@ class Geometry : public Node { /* Updates */ void tag_update(Scene *scene, bool rebuild); + + void tag_bvh_update(bool rebuild); }; /* Geometry Manager */ @@ -198,7 +200,7 @@ class GeometryManager { vector _attributes); /* Compute verts/triangles/curves offsets in global arrays. */ - void mesh_calc_offset(Scene *scene); + void mesh_calc_offset(Scene *scene, BVHLayout bvh_layout); void device_update_object(Device *device, DeviceScene *dscene, Scene *scene, Progress ); __
[Bf-blender-cvs] [bfb6fce6594] master: Cycles: Add CPU+GPU rendering support with OptiX
Commit: bfb6fce6594e9cf133bd18aee311c1e5e32dc799 Author: Patrick Mours Date: Thu Dec 10 14:18:25 2020 +0100 Branches: master https://developer.blender.org/rBbfb6fce6594e9cf133bd18aee311c1e5e32dc799 Cycles: Add CPU+GPU rendering support with OptiX Adds support for building multiple BVH types in order to support using both CPU and OptiX devices for rendering simultaneously. Primitive packing for Embree and OptiX is now standalone, so it only needs to be run once and can be shared between the two. Additionally, BVH building was made a device call, so that each device backend can decide how to perform the building. The multi-device for instance creates a special multi-BVH that holds references to several sub-BVHs, one for each sub-device. Reviewed By: brecht, kevindietrich Differential Revision: https://developer.blender.org/D9718 === M intern/cycles/blender/addon/properties.py M intern/cycles/blender/blender_device.cpp M intern/cycles/bvh/CMakeLists.txt M intern/cycles/bvh/bvh.cpp M intern/cycles/bvh/bvh.h M intern/cycles/bvh/bvh2.cpp M intern/cycles/bvh/bvh2.h M intern/cycles/bvh/bvh_embree.cpp M intern/cycles/bvh/bvh_embree.h A intern/cycles/bvh/bvh_multi.cpp A intern/cycles/bvh/bvh_multi.h M intern/cycles/bvh/bvh_optix.cpp M intern/cycles/bvh/bvh_optix.h M intern/cycles/device/cuda/device_cuda.h M intern/cycles/device/cuda/device_cuda_impl.cpp M intern/cycles/device/device.cpp M intern/cycles/device/device.h M intern/cycles/device/device_cpu.cpp M intern/cycles/device/device_multi.cpp M intern/cycles/device/device_optix.cpp M intern/cycles/kernel/bvh/bvh_embree.h M intern/cycles/kernel/kernel_types.h M intern/cycles/render/geometry.cpp M intern/cycles/render/geometry.h M intern/cycles/render/hair.cpp M intern/cycles/render/hair.h M intern/cycles/render/mesh.cpp M intern/cycles/render/mesh.h M intern/cycles/render/scene.cpp M intern/cycles/render/scene.h === diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 1cb29fc6cb0..2f204b2c658 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -1570,7 +1570,7 @@ class CyclesPreferences(bpy.types.AddonPreferences): elif entry.type == 'CPU': cpu_devices.append(entry) # Extend all GPU devices with CPU. -if compute_device_type in {'CUDA', 'OPENCL'}: +if compute_device_type in {'CUDA', 'OPTIX', 'OPENCL'}: devices.extend(cpu_devices) return devices diff --git a/intern/cycles/blender/blender_device.cpp b/intern/cycles/blender/blender_device.cpp index ffcaef0b2a9..977f8297de1 100644 --- a/intern/cycles/blender/blender_device.cpp +++ b/intern/cycles/blender/blender_device.cpp @@ -90,8 +90,7 @@ DeviceInfo blender_device_info(BL::Preferences _preferences, BL::Scene _scen mask |= DEVICE_MASK_CUDA; } else if (compute_device == COMPUTE_DEVICE_OPTIX) { -/* Cannot use CPU and OptiX device at the same time right now, so replace mask. */ -mask = DEVICE_MASK_OPTIX; +mask |= DEVICE_MASK_OPTIX; } else if (compute_device == COMPUTE_DEVICE_OPENCL) { mask |= DEVICE_MASK_OPENCL; diff --git a/intern/cycles/bvh/CMakeLists.txt b/intern/cycles/bvh/CMakeLists.txt index 703c69b1797..8cc72359757 100644 --- a/intern/cycles/bvh/CMakeLists.txt +++ b/intern/cycles/bvh/CMakeLists.txt @@ -25,6 +25,7 @@ set(SRC bvh_binning.cpp bvh_build.cpp bvh_embree.cpp + bvh_multi.cpp bvh_node.cpp bvh_optix.cpp bvh_sort.cpp @@ -38,6 +39,7 @@ set(SRC_HEADERS bvh_binning.h bvh_build.h bvh_embree.h + bvh_multi.h bvh_node.h bvh_optix.h bvh_params.h diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp index a51ac4cf4a9..256382e63ba 100644 --- a/intern/cycles/bvh/bvh.cpp +++ b/intern/cycles/bvh/bvh.cpp @@ -17,17 +17,11 @@ #include "bvh/bvh.h" -#include "render/hair.h" -#include "render/mesh.h" -#include "render/object.h" - #include "bvh/bvh2.h" -#include "bvh/bvh_build.h" #include "bvh/bvh_embree.h" -#include "bvh/bvh_node.h" +#include "bvh/bvh_multi.h" #include "bvh/bvh_optix.h" -#include "util/util_foreach.h" #include "util/util_logging.h" #include "util/util_progress.h" @@ -38,14 +32,17 @@ CCL_NAMESPACE_BEGIN const char *bvh_layout_name(BVHLayout layout) { switch (layout) { -case BVH_LAYOUT_BVH2: - return "BVH2"; case BVH_LAYOUT_NONE: return "NONE"; +case BVH_LAYOUT_BVH2: + return "
[Bf-blender-cvs] [41bca5a3eed] master: Fix T83581: "Only local" ambient occlusion option causes error on OptiX 2.92
Commit: 41bca5a3eed81d79a62899fcb04fa76674f09c88 Author: Patrick Mours Date: Wed Dec 9 17:06:28 2020 +0100 Branches: master https://developer.blender.org/rB41bca5a3eed81d79a62899fcb04fa76674f09c88 Fix T83581: "Only local" ambient occlusion option causes error on OptiX 2.92 The SVM AO node calls "scene_intersect_local" with a NULL pointer for the intersection information, which caused a crash with OptiX since it was not checking for this case and always dereferencing this pointer. This fixes that by checking whether any hit information was requested first (like is done in the BVH2 intersection routines). === M intern/cycles/kernel/kernels/optix/kernel_optix.cu === diff --git a/intern/cycles/kernel/kernels/optix/kernel_optix.cu b/intern/cycles/kernel/kernels/optix/kernel_optix.cu index fd9065098dd..8ccd2555091 100644 --- a/intern/cycles/kernel/kernels/optix/kernel_optix.cu +++ b/intern/cycles/kernel/kernels/optix/kernel_optix.cu @@ -118,12 +118,18 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit() return optixIgnoreIntersection(); } + const uint max_hits = optixGetPayload_5(); + if (max_hits == 0) { +// Special case for when no hit information is requested, just report that something was hit +optixSetPayload_5(true); +return optixTerminateRay(); + } + int hit = 0; uint *const lcg_state = get_payload_ptr_0(); LocalIntersection *const local_isect = get_payload_ptr_2(); if (lcg_state) { -const uint max_hits = optixGetPayload_5(); for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) { if (optixGetRayTmax() == local_isect->hits[i].t) { return optixIgnoreIntersection(); ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [d7cf464b493] master: Cycles: Remove "OptiX support is experimental" notice
Commit: d7cf464b493581a381540673aa7ed9e4ff47b425 Author: Patrick Mours Date: Tue Dec 8 16:13:04 2020 +0100 Branches: master https://developer.blender.org/rBd7cf464b493581a381540673aa7ed9e4ff47b425 Cycles: Remove "OptiX support is experimental" notice OptiX support is not in fact experimental anymore, so it is time for that notice to go. All Cycles features that are currently supported on the GPU do work now when OptiX is selected. === M intern/cycles/blender/addon/properties.py === diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 0d861fde6fc..1cb29fc6cb0 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -1620,11 +1620,6 @@ class CyclesPreferences(bpy.types.AddonPreferences): for device in devices: box.prop(device, "use", text=device.name) -if device_type == 'OPTIX': -col = box.column(align=True) -col.label(text="OptiX support is experimental", icon='INFO') -col.label(text="Not all Cycles features are supported yet", icon='BLANK1') - def draw_impl(self, layout, context): row = layout.row() row.prop(self, "compute_device_type", expand=True) ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [612b83bbd18] master: Cycles: Enable baking panel in OptiX and redirect those requests to CUDA for now
Commit: 612b83bbd183c214b2d252cf19cdf581f3d9cede Author: Patrick Mours Date: Tue Dec 8 15:42:00 2020 +0100 Branches: master https://developer.blender.org/rB612b83bbd183c214b2d252cf19cdf581f3d9cede Cycles: Enable baking panel in OptiX and redirect those requests to CUDA for now This enables support for baking when OptiX is active, but uses CUDA for that behind the scenes, since the way baking is currently implemented does not work well with OptiX. Reviewed By: brecht Differential Revision: https://developer.blender.org/D9784 === M intern/cycles/blender/addon/ui.py M intern/cycles/device/device_optix.cpp === diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index 623e5cf9e37..f24265d256a 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -1822,10 +1822,6 @@ class CYCLES_RENDER_PT_bake(CyclesButtonsPanel, Panel): bl_options = {'DEFAULT_CLOSED'} COMPAT_ENGINES = {'CYCLES'} -@classmethod -def poll(cls, context): -return CyclesButtonsPanel.poll(context) and not use_optix(context) - def draw(self, context): layout = self.layout layout.use_property_split = True @@ -1836,6 +1832,9 @@ class CYCLES_RENDER_PT_bake(CyclesButtonsPanel, Panel): cbk = scene.render.bake rd = scene.render +if use_optix(context): +layout.label(text="Baking is performed using CUDA instead of OptiX", icon='INFO') + if rd.use_bake_multires: layout.operator("object.bake_image", icon='RENDER_STILL') layout.prop(rd, "use_bake_multires") diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index 682540a51fd..c6276c1e955 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -297,6 +297,10 @@ class OptiXDevice : public CUDADevice { BVHLayoutMask get_bvh_layout_mask() const override { +// CUDA kernels are used when doing baking, so need to build a BVH those can understand too! +if (optix_module == NULL) + return CUDADevice::get_bvh_layout_mask(); + // OptiX has its own internal acceleration structure format return BVH_LAYOUT_OPTIX; } @@ -330,10 +334,9 @@ class OptiXDevice : public CUDADevice { return false; } -// Disable baking for now, since its kernel is not well-suited for inlining and is very slow +// Baking is currently performed using CUDA, so no need to load OptiX kernels if (requested_features.use_baking) { - set_error("OptiX backend does not support baking yet"); - return false; + return true; } const CUDAContextScope scope(cuContext); @@ -700,6 +703,11 @@ class OptiXDevice : public CUDADevice { while (task.acquire_tile(this, tile, task.tile_types)) { if (tile.task == RenderTile::PATH_TRACE) launch_render(task, tile, thread_index); +else if (tile.task == RenderTile::BAKE) { + // Perform baking using CUDA, since it is not currently implemented in OptiX + device_vector work_tiles(this, "work_tiles", MEM_READ_ONLY); + CUDADevice::render(task, tile, work_tiles); +} else if (tile.task == RenderTile::DENOISE) launch_denoise(task, tile); task.release_tile(tile); ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [c10546f5e9f] master: Cycles: Add support for shader raytracing in OptiX
Commit: c10546f5e9fe2a300b6a21e1e16b22c93060d0e9 Author: Patrick Mours Date: Thu Dec 3 12:19:36 2020 +0100 Branches: master https://developer.blender.org/rBc10546f5e9fe2a300b6a21e1e16b22c93060d0e9 Cycles: Add support for shader raytracing in OptiX Support for the AO and bevel shader nodes requires calling "optixTrace" from within the shading VM, which is only allowed from inlined functions to the raygen program or callables. This patch therefore converts the shading VM to use direct callables to make it work. To prevent performance regressions a separate kernel module is compiled and used for this purpose. Reviewed By: brecht Differential Revision: https://developer.blender.org/D9733 === M intern/cycles/device/device_optix.cpp M intern/cycles/kernel/CMakeLists.txt M intern/cycles/kernel/kernel_subsurface.h M intern/cycles/kernel/kernel_types.h M intern/cycles/kernel/kernel_volume.h M intern/cycles/kernel/svm/svm.h === diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index 95234845f98..682540a51fd 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -141,7 +141,8 @@ class OptiXDevice : public CUDADevice { PG_BAKE, // kernel_bake_evaluate PG_DISP, // kernel_displace_evaluate PG_BACK, // kernel_background_evaluate -NUM_PROGRAM_GROUPS +PG_CALL, +NUM_PROGRAM_GROUPS = PG_CALL + 3 }; // List of OptiX pipelines @@ -334,11 +335,6 @@ class OptiXDevice : public CUDADevice { set_error("OptiX backend does not support baking yet"); return false; } -// Disable shader raytracing support for now, since continuation callables are slow -if (requested_features.use_shader_raytrace) { - set_error("OptiX backend does not support 'Ambient Occlusion' and 'Bevel' shader nodes yet"); - return false; -} const CUDAContextScope scope(cuContext); @@ -410,7 +406,9 @@ class OptiXDevice : public CUDADevice { } { // Load and compile PTX module with OptiX kernels - string ptx_data, ptx_filename = path_get("lib/kernel_optix.ptx"); + string ptx_data, ptx_filename = path_get(requested_features.use_shader_raytrace ? + "lib/kernel_optix_shader_raytrace.ptx" : + "lib/kernel_optix.ptx"); if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) { if (!getenv("OPTIX_ROOT_DIR")) { set_error( @@ -525,6 +523,21 @@ class OptiXDevice : public CUDADevice { group_descs[PG_BACK].raygen.entryFunctionName = "__raygen__kernel_optix_background"; } +// Shader raytracing replaces some functions with direct callables +if (requested_features.use_shader_raytrace) { + group_descs[PG_CALL + 0].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; + group_descs[PG_CALL + 0].callables.moduleDC = optix_module; + group_descs[PG_CALL + 0].callables.entryFunctionNameDC = "__direct_callable__svm_eval_nodes"; + group_descs[PG_CALL + 1].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; + group_descs[PG_CALL + 1].callables.moduleDC = optix_module; + group_descs[PG_CALL + 1].callables.entryFunctionNameDC = + "__direct_callable__kernel_volume_shadow"; + group_descs[PG_CALL + 2].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; + group_descs[PG_CALL + 2].callables.moduleDC = optix_module; + group_descs[PG_CALL + 2].callables.entryFunctionNameDC = + "__direct_callable__subsurface_scatter_multi_setup"; +} + check_result_optix_ret(optixProgramGroupCreate( context, group_descs, NUM_PROGRAM_GROUPS, _options, nullptr, 0, groups)); @@ -564,33 +577,51 @@ class OptiXDevice : public CUDADevice { # endif { // Create path tracing pipeline - OptixProgramGroup pipeline_groups[] = { -groups[PG_RGEN], -groups[PG_MISS], -groups[PG_HITD], -groups[PG_HITS], -groups[PG_HITL], + vector pipeline_groups; + pipeline_groups.reserve(NUM_PROGRAM_GROUPS); + pipeline_groups.push_back(groups[PG_RGEN]); + pipeline_groups.push_back(groups[PG_MISS]); + pipeline_groups.push_back(groups[PG_HITD]); + pipeline_groups.push_back(groups[PG_HITS]); + pipeline_groups.push_back(groups[PG_HITL]); # if OPTIX_ABI_VERSION >= 36 -groups[PG_HITD_MOTION], -groups[PG_HITS_MOTION], + if (motion_blur) { +pipeline_groups.push_back(groups[PG_HITD_MOTION]); +pipeline_groups.push_back(groups[PG_HITS_MOTION]); + } # endif - }; - check_result_optix_ret( -
[Bf-blender-cvs] [a3c40912153] master: Fix Cycles device kernels containing debug assertation code
Commit: a3c40912153235508aaccbd310f247073029becb Author: Patrick Mours Date: Thu Dec 3 15:20:50 2020 +0100 Branches: master https://developer.blender.org/rBa3c40912153235508aaccbd310f247073029becb Fix Cycles device kernels containing debug assertation code NanoVDB includes "assert.h" and makes use of "assert" in several places and since the compile pipeline for CUDA/OptiX kernels does not define "NDEBUG" for release builds, those debug checks were always added. This is not intended, so this patch disables "assert" for CUDA/OptiX by defining "NDEBUG" before including NanoVDB headers. This also fixes a warning about unknown pragmas in NanoVDB thrown by the CUDA compiler. === M intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h M intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h === diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h index 44c658d4cab..59b96c86c50 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h @@ -18,6 +18,7 @@ #define __KERNEL_CPU_IMAGE_H__ #ifdef WITH_NANOVDB +# define NANOVDB_USE_INTRINSICS # include # include #endif diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h index 001bc652810..82ad9225fc3 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h +++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h @@ -15,6 +15,8 @@ */ #ifdef WITH_NANOVDB +# define NDEBUG /* Disable "assert" in device code */ +# define NANOVDB_USE_INTRINSICS # include "nanovdb/NanoVDB.h" # include "nanovdb/util/SampleFromVoxels.h" #endif ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [a8f1bea5901] master: Fix NanoVDB not being enabled/disabled correctly in CMake profiles
Commit: a8f1bea5901c2cccf9b1408090f85ee072589cce Author: Patrick Mours Date: Thu Nov 12 12:49:12 2020 +0100 Branches: master https://developer.blender.org/rBa8f1bea5901c2cccf9b1408090f85ee072589cce Fix NanoVDB not being enabled/disabled correctly in CMake profiles This caused warnings when e.g. building the lite profile because NanoVDB was not disabled, but OpenVDB was. This Fixes this by setting the "WITH_NANOVDB" flag too. === M build_files/cmake/config/blender_full.cmake M build_files/cmake/config/blender_lite.cmake M build_files/cmake/config/blender_release.cmake M build_files/cmake/config/bpy_module.cmake === diff --git a/build_files/cmake/config/blender_full.cmake b/build_files/cmake/config/blender_full.cmake index c5ed59dfaa5..08065ec0276 100644 --- a/build_files/cmake/config/blender_full.cmake +++ b/build_files/cmake/config/blender_full.cmake @@ -44,6 +44,7 @@ set(WITH_OPENMP ON CACHE BOOL "" FORCE) set(WITH_OPENSUBDIV ON CACHE BOOL "" FORCE) set(WITH_OPENVDB ON CACHE BOOL "" FORCE) set(WITH_OPENVDB_BLOSC ON CACHE BOOL "" FORCE) +set(WITH_NANOVDB ON CACHE BOOL "" FORCE) set(WITH_POTRACE ON CACHE BOOL "" FORCE) set(WITH_PYTHON_INSTALL ON CACHE BOOL "" FORCE) set(WITH_QUADRIFLOW ON CACHE BOOL "" FORCE) diff --git a/build_files/cmake/config/blender_lite.cmake b/build_files/cmake/config/blender_lite.cmake index f53bdaac41e..4150094e9f5 100644 --- a/build_files/cmake/config/blender_lite.cmake +++ b/build_files/cmake/config/blender_lite.cmake @@ -51,6 +51,7 @@ set(WITH_OPENIMAGEIO OFF CACHE BOOL "" FORCE) set(WITH_OPENMP OFF CACHE BOOL "" FORCE) set(WITH_OPENSUBDIV OFF CACHE BOOL "" FORCE) set(WITH_OPENVDB OFF CACHE BOOL "" FORCE) +set(WITH_NANOVDB OFF CACHE BOOL "" FORCE) set(WITH_QUADRIFLOW OFF CACHE BOOL "" FORCE) set(WITH_SDL OFF CACHE BOOL "" FORCE) set(WITH_TBB OFF CACHE BOOL "" FORCE) diff --git a/build_files/cmake/config/blender_release.cmake b/build_files/cmake/config/blender_release.cmake index f8f7b730efe..fd3225b0287 100644 --- a/build_files/cmake/config/blender_release.cmake +++ b/build_files/cmake/config/blender_release.cmake @@ -45,6 +45,7 @@ set(WITH_OPENMP ON CACHE BOOL "" FORCE) set(WITH_OPENSUBDIV ON CACHE BOOL "" FORCE) set(WITH_OPENVDB ON CACHE BOOL "" FORCE) set(WITH_OPENVDB_BLOSC ON CACHE BOOL "" FORCE) +set(WITH_NANOVDB ON CACHE BOOL "" FORCE) set(WITH_POTRACE ON CACHE BOOL "" FORCE) set(WITH_PYTHON_INSTALL ON CACHE BOOL "" FORCE) set(WITH_QUADRIFLOW ON CACHE BOOL "" FORCE) diff --git a/build_files/cmake/config/bpy_module.cmake b/build_files/cmake/config/bpy_module.cmake index 2c0da81a1ea..7fc68f97f29 100644 --- a/build_files/cmake/config/bpy_module.cmake +++ b/build_files/cmake/config/bpy_module.cmake @@ -28,6 +28,7 @@ set(WITH_OPENCOLLADA OFF CACHE BOOL "" FORCE) set(WITH_INTERNATIONAL OFF CACHE BOOL "" FORCE) set(WITH_BULLET OFF CACHE BOOL "" FORCE) set(WITH_OPENVDB OFF CACHE BOOL "" FORCE) +set(WITH_NANOVDB OFF CACHE BOOL "" FORCE) set(WITH_ALEMBIC OFF CACHE BOOL "" FORCE) # Depends on Python install, do this to quiet warning. ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [a63208823c8] master: Fix NanoVDB compile errors with recent NanoVDB versions
Commit: a63208823c8426b76270393f9217d3cf3ef66d0b Author: Patrick Mours Date: Tue Nov 10 18:28:14 2020 +0100 Branches: master https://developer.blender.org/rBa63208823c8426b76270393f9217d3cf3ef66d0b Fix NanoVDB compile errors with recent NanoVDB versions There were some changes to the NanoVDB API that broke the way Cycles was previously using it. With these changes it compiles successfully again and also still compiles with the NanoVDB revision that is currently part of the Blender dependencies. Ref T81454. === M intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h M intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h === diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h index aaf58cbd0ab..44c658d4cab 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h @@ -478,7 +478,7 @@ template struct TextureInterpolator { #ifdef WITH_NANOVDB template struct NanoVDBInterpolator { - typedef nanovdb::ReadAccessor> ReadAccessorT; + typedef typename nanovdb::NanoGrid::AccessorType AccessorType; static ccl_always_inline float4 read(float r) { @@ -490,16 +490,22 @@ template struct NanoVDBInterpolator { return make_float4(r[0], r[1], r[2], 1.0f); } - static ccl_always_inline float4 interp_3d_closest(ReadAccessorT acc, float x, float y, float z) + static ccl_always_inline float4 interp_3d_closest(const AccessorType , +float x, +float y, +float z) { const nanovdb::Vec3f xyz(x, y, z); -return read(nanovdb::NearestNeighborSampler(acc)(xyz)); +return read(nanovdb::SampleFromVoxels(acc)(xyz)); } - static ccl_always_inline float4 interp_3d_linear(ReadAccessorT acc, float x, float y, float z) + static ccl_always_inline float4 interp_3d_linear(const AccessorType , + float x, + float y, + float z) { const nanovdb::Vec3f xyz(x - 0.5f, y - 0.5f, z - 0.5f); -return read(nanovdb::TrilinearSampler(acc)(xyz)); +return read(nanovdb::SampleFromVoxels(acc)(xyz)); } # if defined(__GNUC__) || defined(__clang__) @@ -508,7 +514,7 @@ template struct NanoVDBInterpolator { static ccl_never_inline # endif float4 - interp_3d_cubic(ReadAccessorT acc, float x, float y, float z) + interp_3d_cubic(const AccessorType , float x, float y, float z) { int ix, iy, iz; int nix, niy, niz; @@ -561,15 +567,15 @@ template struct NanoVDBInterpolator { using namespace nanovdb; NanoGrid *const grid = (NanoGrid *)info.data; -const NanoRoot = grid->tree().root(); +AccessorType acc = grid->getAccessor(); switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) { case INTERPOLATION_CLOSEST: -return interp_3d_closest(root, x, y, z); +return interp_3d_closest(acc, x, y, z); case INTERPOLATION_LINEAR: -return interp_3d_linear(root, x, y, z); +return interp_3d_linear(acc, x, y, z); default: -return interp_3d_cubic(root, x, y, z); +return interp_3d_cubic(acc, x, y, z); } } }; diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h index b8aaacba960..001bc652810 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h +++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h @@ -159,18 +159,18 @@ ccl_device_inline T kernel_tex_image_interp_nanovdb( const TextureInfo , float x, float y, float z, uint interpolation) { using namespace nanovdb; - typedef ReadAccessor> ReadAccessorT; NanoGrid *const grid = (NanoGrid *)info.data; - const NanoRoot = grid->tree().root(); + typedef typename nanovdb::NanoGrid::AccessorType AccessorType; + AccessorType acc = grid->getAccessor(); switch (interpolation) { case INTERPOLATION_CLOSEST: - return NearestNeighborSampler(root)(Vec3f(x, y, z)); + return SampleFromVoxels(acc)(Vec3f(x, y, z)); case INTERPOLATION_LINEAR: - return TrilinearSampler(root)(Vec3f(x - 0.5f, y - 0.5f, z - 0.5f)); + return SampleFromVoxels(acc)(Vec3f(x - 0.5f, y - 0.5f, z - 0.5f)); default: - TrilinearSampler s(root); + SampleFromVoxels s(acc); return kernel_tex_image_interp_tricubic_nanovdb(s, x - 0.5f, y - 0.5f, z - 0.5f); } } ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [bd6bfba64da] master: Cycles: Enable NanoVDB usage by default
Commit: bd6bfba64dad2e14cab2c8372ba0f3ad39b93cdc Author: Patrick Mours Date: Tue Nov 10 16:19:47 2020 +0100 Branches: master https://developer.blender.org/rBbd6bfba64dad2e14cab2c8372ba0f3ad39b93cdc Cycles: Enable NanoVDB usage by default As discussed during the Rendering Metting. Ref T81454. === M CMakeLists.txt === diff --git a/CMakeLists.txt b/CMakeLists.txt index aa791a53f81..67b57dc2fc4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -203,7 +203,7 @@ option(WITH_OPENVDB "Enable features relying on OpenVDB" ON) option(WITH_OPENVDB_BLOSC "Enable blosc compression for OpenVDB, only enable if OpenVDB was built with blosc support" ON) option(WITH_OPENVDB_3_ABI_COMPATIBLE "Assume OpenVDB library has been compiled with version 3 ABI compatibility" OFF) mark_as_advanced(WITH_OPENVDB_3_ABI_COMPATIBLE) -option(WITH_NANOVDB "Enable usage of NanoVDB data structure for accelerated rendering on the GPU" OFF) +option(WITH_NANOVDB "Enable usage of NanoVDB data structure for rendering on the GPU" ON) # GHOST Windowing Library Options option(WITH_GHOST_DEBUG "Enable debugging output for the GHOST library" OFF) ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [118e31a0a99] master: Cycles: Fix tricubic sampling with NanoVDB
Commit: 118e31a0a995ae4e8845376215d9c35017a8f781 Author: Patrick Mours Date: Fri Nov 6 15:19:58 2020 +0100 Branches: master https://developer.blender.org/rB118e31a0a995ae4e8845376215d9c35017a8f781 Cycles: Fix tricubic sampling with NanoVDB Volumes using tricubic sampling were producing different results with NanoVDB compared to dense textures. This fixes that by using the same tricubic sampling algorithm in both cases. It also fixes some remaining offset issues and some minor things that broke OpenCL kernel compilation on NVIDIA. Reviewed By: brecht Differential Revision: https://developer.blender.org/D9491 === M intern/cycles/kernel/kernel_compat_opencl.h M intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h M intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h M intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h M intern/cycles/render/image_vdb.cpp M intern/cycles/util/util_types.h === diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h index ba7ab43a47a..1848f6059b6 100644 --- a/intern/cycles/kernel/kernel_compat_opencl.h +++ b/intern/cycles/kernel/kernel_compat_opencl.h @@ -48,7 +48,7 @@ #define ccl_align(n) __attribute__((aligned(n))) #define ccl_optional_struct_init -#if __OPENCL_VERSION__ >= 200 +#if __OPENCL_VERSION__ >= 200 && !defined(__NV_CL_C_VERSION) # define ccl_loop_no_unroll __attribute__((opencl_unroll_hint(1))) #else # define ccl_loop_no_unroll diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h index b466b41f456..b97400a443a 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h @@ -28,7 +28,6 @@ CCL_NAMESPACE_BEGIN * instruction sets. */ namespace { -template struct TextureInterpolator { #define SET_CUBIC_SPLINE_WEIGHTS(u, t) \ { \ u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \ @@ -38,6 +37,15 @@ template struct TextureInterpolator { } \ (void)0 +ccl_always_inline float frac(float x, int *ix) +{ + int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0); + *ix = i; + return x - (float)i; +} + +template struct TextureInterpolator { + static ccl_always_inline float4 read(float4 r) { return r; @@ -106,13 +114,6 @@ template struct TextureInterpolator { return clamp(x, 0, width - 1); } - static ccl_always_inline float frac(float x, int *ix) - { -int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0); -*ix = i; -return x - (float)i; - } - /* 2D interpolation */ static ccl_always_inline float4 interp_closest(const TextureInfo , float x, float y) @@ -370,7 +371,7 @@ template struct TextureInterpolator { static ccl_never_inline #endif float4 - interp_3d_tricubic(const TextureInfo , float x, float y, float z) + interp_3d_cubic(const TextureInfo , float x, float y, float z) { int width = info.width; int height = info.height; @@ -469,14 +470,16 @@ template struct TextureInterpolator { case INTERPOLATION_LINEAR: return interp_3d_linear(info, x, y, z); default: -return interp_3d_tricubic(info, x, y, z); +return interp_3d_cubic(info, x, y, z); } } -#undef SET_CUBIC_SPLINE_WEIGHTS }; #ifdef WITH_NANOVDB template struct NanoVDBInterpolator { + + typedef nanovdb::ReadAccessor> ReadAccessorT; + static ccl_always_inline float4 read(float r) { return make_float4(r, r, r, 1.0f); @@ -487,26 +490,93 @@ template struct NanoVDBInterpolator { return make_float4(r[0], r[1], r[2], 1.0f); } + static ccl_always_inline float4 interp_3d_closest(ReadAccessorT acc, float x, float y, float z) + { +const nanovdb::Vec3f xyz(x, y, z); +return read(nanovdb::NearestNeighborSampler(acc)(xyz)); + } + + static ccl_always_inline float4 interp_3d_linear(ReadAccessorT acc, float x, float y, float z) + { +const nanovdb::Vec3f xyz(x - 0.5f, y - 0.5f, z - 0.5f); +return read(nanovdb::TrilinearSampler(acc)(xyz)); + } + +# if defined(__GNUC__) || defined(__clang__) + static ccl_always_inline +# else + static ccl_never_inline +# endif + float4 + interp_3d_cubic(ReadAccessorT acc, float x, float y, float z) + { +int ix, iy, iz; +int nix, niy, niz; +int pix, piy, piz; +int nnix, nniy, nniz; +/* Tricubic b-spline interpolation. */ +const float tx = frac(x - 0.5f, ); +const float ty = frac(y - 0.5f, ); +const float tz = frac(z - 0.5f, ); +pix = ix - 1; +piy = iy - 1; +piz = iz - 1; +nix = ix + 1; +niy = iy + 1; +niz = iz + 1; +nnix = ix + 2; +nniy = iy + 2; +nniz = iz + 2; + +const int xc[4] = {pix, ix, nix, nnix}; +const int yc[4
[Bf-blender-cvs] [fd9124ed6b3] master: Fix Cycles volume render differences with NanoVDB when using linear sampling
Commit: fd9124ed6b35fc3701ec3a4a9980c6eda5324fac Author: Patrick Mours Date: Wed Nov 4 15:09:06 2020 +0100 Branches: master https://developer.blender.org/rBfd9124ed6b35fc3701ec3a4a9980c6eda5324fac Fix Cycles volume render differences with NanoVDB when using linear sampling The NanoVDB sampling implementation behaves different from dense texture sampling, so this adds a small offset to the voxel indices to correct for that. Also removes the need to modify the sampling coordinates by moving all the necessary transformations into the image transform. See also T81454. === M intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h M intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h M intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h M intern/cycles/render/image_vdb.cpp M intern/cycles/render/object.cpp === diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h index 347d0fec7f5..b466b41f456 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h @@ -490,21 +490,17 @@ template struct NanoVDBInterpolator { static ccl_always_inline float4 interp_3d(const TextureInfo , float x, float y, float z, InterpolationType interp) { +const nanovdb::Vec3f xyz(x, y, z); nanovdb::NanoGrid *const grid = (nanovdb::NanoGrid *)info.data; const nanovdb::NanoRoot = grid->tree().root(); -const nanovdb::Coord off(root.bbox().min()); -const nanovdb::Coord dim(root.bbox().dim()); -const nanovdb::Vec3f xyz(off[0] + x * dim[0], off[1] + y * dim[1], off[2] + z * dim[2]); - typedef nanovdb::ReadAccessor> ReadAccessorT; switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) { - default: - case INTERPOLATION_LINEAR: -return read(nanovdb::SampleFromVoxels(root)(xyz)); case INTERPOLATION_CLOSEST: return read(nanovdb::SampleFromVoxels(root)(xyz)); - case INTERPOLATION_CUBIC: + case INTERPOLATION_LINEAR: +return read(nanovdb::SampleFromVoxels(root)(xyz)); + default: return read(nanovdb::SampleFromVoxels(root)(xyz)); } } diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h index 5a005a3f65b..c2a0ee06dbc 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h +++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h @@ -130,21 +130,17 @@ template ccl_device_inline T kernel_tex_image_interp_nanovdb( const TextureInfo , float x, float y, float z, uint interpolation) { + const nanovdb::Vec3f xyz(x, y, z); nanovdb::NanoGrid *const grid = (nanovdb::NanoGrid *)info.data; const nanovdb::NanoRoot = grid->tree().root(); - const nanovdb::Coord off(root.bbox().min()); - const nanovdb::Coord dim(root.bbox().dim()); - const nanovdb::Vec3f xyz(off[0] + x * dim[0], off[1] + y * dim[1], off[2] + z * dim[2]); - typedef nanovdb::ReadAccessor> ReadAccessorT; switch (interpolation) { -default: -case INTERPOLATION_LINEAR: - return nanovdb::SampleFromVoxels(root)(xyz); case INTERPOLATION_CLOSEST: return nanovdb::SampleFromVoxels(root)(xyz); -case INTERPOLATION_CUBIC: +case INTERPOLATION_LINEAR: + return nanovdb::SampleFromVoxels(root)(xyz); +default: return nanovdb::SampleFromVoxels(root)(xyz); } } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h index 2f44f249c5f..cbf9a208112 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h +++ b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h @@ -229,32 +229,29 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float3 P uint interpolation = (interp == INTERPOLATION_NONE) ? info->interpolation : interp; #ifdef WITH_NANOVDB + cnanovdb_Vec3F xyz; + xyz.mVec[0] = x; + xyz.mVec[1] = y; + xyz.mVec[2] = z; + if (info->data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT) { ccl_global cnanovdb_griddata *grid = (ccl_global cnanovdb_griddata *)(kg->buffers[info->cl_buffer] + info->data); const ccl_global cnanovdb_rootdataF *root = cnanovdb_treedata_rootF( cnanovdb_griddata_tree(grid)); -cnanovdb_Vec3F xyz; -xyz.mVec[0] = root->mBBox_min.mVec[0] + - x * (root->mBBox_max.mVec[0] - root->mBBox_min.mVec[0]); -xyz.mVec[1] = root->mBBox_min.mVec[1] + - y * (root->mBBox_max.mVec[1] - root->mBBox_min.mVec[1]); -xyz.mVec[2] = root->mBBox_min.mVec[2] + - z * (root->mBBox_max.mVec[2] - root->mBBox_min.mVec[2]); - cnanovdb_readaccessor acc; cnanovdb_read
[Bf-blender-cvs] [cf7343a3555] master: Fix Cycles kernel compile error with NanoVDB because of type redefinition
Commit: cf7343a35559c7fec2047c3e5d7ef4dd7c1e64a5 Author: Patrick Mours Date: Mon Nov 2 18:00:13 2020 +0100 Branches: master https://developer.blender.org/rBcf7343a35559c7fec2047c3e5d7ef4dd7c1e64a5 Fix Cycles kernel compile error with NanoVDB because of type redefinition Cycles defines some basic integer types since it cannot use the standard headers when compiling with NVRTC. NanoVDB however only does this when the "__CUDACC_RTC__" define is set and otherwise includes the standard "stdint.h" header which clashes with those typedefs. So for compatibility do the same thing in the Cycles kernel headers. See also T81454. === M intern/cycles/kernel/kernel_compat_cuda.h M intern/cycles/kernel/kernel_compat_optix.h === diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index 4094e173da9..ea3b78b7cef 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -32,8 +32,12 @@ /* Manual definitions so we can compile without CUDA toolkit. */ +#ifdef __CUDACC_RTC__ typedef unsigned int uint32_t; typedef unsigned long long uint64_t; +#else +# include +#endif typedef unsigned short half; typedef unsigned long long CUtexObject; diff --git a/intern/cycles/kernel/kernel_compat_optix.h b/intern/cycles/kernel/kernel_compat_optix.h index e58d8b2aa63..064c99ca100 100644 --- a/intern/cycles/kernel/kernel_compat_optix.h +++ b/intern/cycles/kernel/kernel_compat_optix.h @@ -31,8 +31,12 @@ # define ATTR_FALLTHROUGH #endif +#ifdef __CUDACC_RTC__ typedef unsigned int uint32_t; typedef unsigned long long uint64_t; +#else +# include +#endif typedef unsigned short half; typedef unsigned long long CUtexObject; ___ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs
[Bf-blender-cvs] [c26ad42ea43] master: Simplify and enable NanoVDB dependency installation
Commit: c26ad42ea43aa2160a765980087f3dd320db55f0 Author: Patrick Mours Date: Thu Oct 29 13:38:16 2020 +0100 Branches: master https://developer.blender.org/rBc26ad42ea43aa2160a765980087f3dd320db55f0 Simplify and enable NanoVDB dependency installation Changes NanoVDB to be a standalone dependency that is independent of the OpenVDB one. It works by downloading the "feature/nanovdb" branch of OpenVDB, but using the NanoVDB CMake in the "nanovdb" subdirectory. Since it is header-only, only the install target is used. Reviewed By: brecht Differential Revision: https://developer.blender.org/D9383 === M build_files/build_environment/CMakeLists.txt M build_files/build_environment/cmake/harvest.cmake A build_files/build_environment/cmake/nanovdb.cmake M build_files/build_environment/cmake/openvdb.cmake M build_files/build_environment/cmake/options.cmake M build_files/build_environment/cmake/versions.cmake D build_files/build_environment/patches/openvdb_nanovdb.diff === diff --git a/build_files/build_environment/CMakeLists.txt b/build_files/build_environment/CMakeLists.txt index 59c15a03119..0bc85f20c16 100644 --- a/build_files/build_environment/CMakeLists.txt +++ b/build_files/build_environment/CMakeLists.txt @@ -85,6 +85,7 @@ include(cmake/flexbison.cmake) include(cmake/osl.cmake) include(cmake/tbb.cmake) include(cmake/openvdb.cmake) +include(cmake/nanovdb.cmake) include(cmake/python.cmake) include(cmake/python_site_packages.cmake) include(cmake/package_python.cmake) diff --git a/build_files/build_environment/cmake/harvest.cmake b/build_files/build_environment/cmake/harvest.cmake index 1fb56c4d568..1c5354aeb42 100644 --- a/build_files/build_environment/cmake/harvest.cmake +++ b/build_files/build_environment/cmake/harvest.cmake @@ -146,10 +146,8 @@ harvest(openjpeg/lib openjpeg/lib "*.a") harvest(opensubdiv/include opensubdiv/include "*.h") harvest(opensubdiv/lib opensubdiv/lib "*.a") harvest(openvdb/include/openvdb openvdb/include/openvdb "*.h") -if(WITH_NANOVDB) - harvest(openvdb/nanovdb nanovdb/include/nanovdb "*.h") -endif() harvest(openvdb/lib openvdb/lib "*.a") +harvest(nanovdb/nanovdb nanovdb/include/nanovdb "*.h") harvest(xr_openxr_sdk/include/openxr xr_openxr_sdk/include/openxr "*.h") harvest(xr_openxr_sdk/lib xr_openxr_sdk/lib "*.a") harvest(osl/bin osl/bin "oslc") diff --git a/build_files/build_environment/cmake/nanovdb.cmake b/build_files/build_environment/cmake/nanovdb.cmake new file mode 100644 index 000..89e7c38642d --- /dev/null +++ b/build_files/build_environment/cmake/nanovdb.cmake @@ -0,0 +1,54 @@ +# * BEGIN GPL LICENSE BLOCK * +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# +# * END GPL LICENSE BLOCK * + +set(NANOVDB_EXTRA_ARGS + # NanoVDB is header-only, so only need the install target + -DNANOVDB_BUILD_UNITTESTS=OFF + -DNANOVDB_BUILD_EXAMPLES=OFF + -DNANOVDB_BUILD_BENCHMARK=OFF + -DNANOVDB_BUILD_DOCS=OFF + -DNANOVDB_BUILD_TOOLS=OFF + -DNANOVDB_CUDA_KEEP_PTX=OFF + # Do not need to include any of the dependencies because of this + -DNANOVDB_USE_OPENVDB=OFF + -DNANOVDB_USE_OPENGL=OFF + -DNANOVDB_USE_OPENCL=OFF + -DNANOVDB_USE_CUDA=OFF + -DNANOVDB_USE_TBB=OFF + -DNANOVDB_USE_BLOSC=OFF + -DNANOVDB_USE_ZLIB=OFF + -DNANOVDB_USE_OPTIX=OFF + -DNANOVDB_ALLOW_FETCHCONTENT=OFF +) + +ExternalProject_Add(nanovdb + URL ${NANOVDB_URI} + DOWNLOAD_DIR ${DOWNLOAD_DIR} + URL_HASH MD5=${NANOVDB_HASH} + PREFIX ${BUILD_DIR}/nanovdb + SOURCE_SUBDIR nanovdb + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/nanovdb ${DEFAULT_CMAKE_FLAGS} ${NANOVDB_EXTRA_ARGS} + INSTALL_DIR ${LIBDIR}/nanovdb +) + +if(WIN32) + ExternalProject_Add_Step(nanovdb after_install +COMMAND ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/nanovdb/nanovdb ${HARVEST_TARGET}/nanovdb/include/nanovdb +DEPENDEES install + ) +endif() diff --git a/build_files/build_environment/cmake/openvdb.cmake b/build_files/build_environment/cmake/openvdb.cmake index 07d0297d5aa..2962f085e1b 100644 --- a/build_files/build_environme