from:"Patrick Mours"

[Bf-blender-cvs] [f2538c71739] master: Fix T104335: MNEE + OptiX OSL results in illegal address error

2023-02-06 Thread Patrick Mours

Commit: f2538c71739a19baa506201d80d1c48d73f4d504
Author: Patrick Mours
Date:   Mon Feb 6 15:06:52 2023 +0100
Branches: master
https://developer.blender.org/rBf2538c71739a19baa506201d80d1c48d73f4d504

Fix T104335: MNEE + OptiX OSL results in illegal address error

The OptiX pipeline created for OSL was missing sufficient continuation
stack to handle the MNEE ray generation program.

===

M   intern/cycles/device/optix/device_impl.cpp

===

diff --git a/intern/cycles/device/optix/device_impl.cpp 
b/intern/cycles/device/optix/device_impl.cpp
index 23e7bbfa7bb..06589140ad9 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -854,12 +854,14 @@ bool OptiXDevice::load_osl_kernels()
 context, group_descs, 2, _options, nullptr, 0, _groups[i * 
2]));
   }
 
+  OptixStackSizes stack_size[NUM_PROGRAM_GROUPS] = {};
   vector osl_stack_size(osl_groups.size());
 
   /* Update SBT with new entries. */
   sbt_data.alloc(NUM_PROGRAM_GROUPS + osl_groups.size());
   for (int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
 optix_assert(optixSbtRecordPackHeader(groups[i], _data[i]));
+optix_assert(optixProgramGroupGetStackSize(groups[i], _size[i]));
   }
   for (size_t i = 0; i < osl_groups.size(); ++i) {
 if (osl_groups[i] != NULL) {
@@ -907,13 +909,15 @@ bool OptiXDevice::load_osl_kernels()
  0,
  [PIP_SHADE]));
 
+const unsigned int css = 
std::max(stack_size[PG_RGEN_SHADE_SURFACE_RAYTRACE].cssRG,
+  
stack_size[PG_RGEN_SHADE_SURFACE_MNEE].cssRG);
 unsigned int dss = 0;
 for (unsigned int i = 0; i < osl_stack_size.size(); ++i) {
   dss = std::max(dss, osl_stack_size[i].dssDC);
 }
 
 optix_assert(optixPipelineSetStackSize(
-pipelines[PIP_SHADE], 0, dss, 0, pipeline_options.usesMotionBlur ? 3 : 
2));
+pipelines[PIP_SHADE], 0, dss, css, pipeline_options.usesMotionBlur ? 3 
: 2));
   }
 
   return !have_error();

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [fa9fc59b560] master: Fix T104240: OptiX OSL texture loading broken with displacement

2023-01-31 Thread Patrick Mours

Commit: fa9fc59b560a9743b1cbe7d46e0d5de98a2f3567
Author: Patrick Mours
Date:   Tue Jan 31 16:35:47 2023 +0100
Branches: master
https://developer.blender.org/rBfa9fc59b560a9743b1cbe7d46e0d5de98a2f3567

Fix T104240: OptiX OSL texture loading broken with displacement

The image manager used to handle OSL textures on the GPU by
default loads images after displacement is evaluated. This is a
problem when the displacement shader uses any textures, hence
why the geometry manager already makes the image manager
load any images used in the displacement shader graph early
(`GeometryManager::device_update_displacement_images`).
This only handled Cycles image nodes however, not OSL nodes, so
if any `texture` calls were made in OSL those would be missed and
therefore crash when accessed on the GPU. Unfortunately it is not
simple to determine which textures referenced by OSL are needed
for displacement, so the solution for now is to simply load all of
them early if true displacement is used.
This patch also fixes the result of the displacement shader not
being used properly in OptiX.

Maniphest Tasks: T104240

Differential Revision: https://developer.blender.org/D17162

===

M   intern/cycles/kernel/osl/osl.h
M   intern/cycles/scene/geometry.cpp

===

diff --git a/intern/cycles/kernel/osl/osl.h b/intern/cycles/kernel/osl/osl.h
index ffaf87b7048..18288d202b5 100644
--- a/intern/cycles/kernel/osl/osl.h
+++ b/intern/cycles/kernel/osl/osl.h
@@ -161,7 +161,10 @@ ccl_device_inline void osl_eval_nodes(KernelGlobals kg,
 /* shadeindex = */ 0);
 #  endif
 
-  if (globals.Ci) {
+  if constexpr (type == SHADER_TYPE_DISPLACEMENT) {
+sd->P = globals.P;
+  }
+  else if (globals.Ci) {
 flatten_closure_tree(kg, sd, path_flag, globals.Ci);
   }
 }
diff --git a/intern/cycles/scene/geometry.cpp b/intern/cycles/scene/geometry.cpp
index a1df24878c9..4c5013b5a9f 100644
--- a/intern/cycles/scene/geometry.cpp
+++ b/intern/cycles/scene/geometry.cpp
@@ -23,7 +23,10 @@
 #include "subd/patch_table.h"
 #include "subd/split.h"
 
-#include "kernel/osl/globals.h"
+#ifdef WITH_OSL
+#  include "kernel/osl/globals.h"
+#  include "kernel/osl/services.h"
+#endif
 
 #include "util/foreach.h"
 #include "util/log.h"
@@ -1671,6 +1674,7 @@ void 
GeometryManager::device_update_displacement_images(Device *device,
   TaskPool pool;
   ImageManager *image_manager = scene->image_manager;
   set bump_images;
+  bool has_osl_node = false;
   foreach (Geometry *geom, scene->geometry) {
 if (geom->is_modified()) {
   /* Geometry-level check for hair shadow transparency.
@@ -1690,6 +1694,9 @@ void 
GeometryManager::device_update_displacement_images(Device *device,
   continue;
 }
 foreach (ShaderNode *node, shader->graph->nodes) {
+  if (node->special_type == SHADER_SPECIAL_TYPE_OSL) {
+has_osl_node = true;
+  }
   if (node->special_type != SHADER_SPECIAL_TYPE_IMAGE_SLOT) {
 continue;
   }
@@ -1705,6 +1712,28 @@ void 
GeometryManager::device_update_displacement_images(Device *device,
   }
 }
   }
+
+#ifdef WITH_OSL
+  /* If any OSL node is used for displacement, it may reference a texture. But 
it's
+   * unknown which ones, so have to load them all. */
+  if (has_osl_node) {
+set services_shared;
+device->foreach_device([_shared](Device *sub_device) {
+  OSLGlobals *og = (OSLGlobals *)sub_device->get_cpu_osl_memory();
+  services_shared.insert(og->services);
+});
+
+for (OSLRenderServices *services : services_shared) {
+  for (auto it = services->textures.begin(); it != 
services->textures.end(); ++it) {
+if (it->second->handle.get_manager() == image_manager) {
+  const int slot = it->second->handle.svm_slot();
+  bump_images.insert(slot);
+}
+  }
+}
+  }
+#endif
+
   foreach (int slot, bump_images) {
 pool.push(function_bind(
 ::device_update_slot, image_manager, device, scene, slot, 
));

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [129093fbce0] master: Cycles: Fix crash when rendering with OSL on multiple GPUs

2023-01-30 Thread Patrick Mours

Commit: 129093fbce0b73219fa8c270072ba5400120033d
Author: Patrick Mours
Date:   Mon Jan 30 19:40:02 2023 +0100
Branches: master
https://developer.blender.org/rB129093fbce0b73219fa8c270072ba5400120033d

Cycles: Fix crash when rendering with OSL on multiple GPUs

The `MultiDevice` implementation of `get_cpu_osl_memory` returns a
nullptr when there is no CPU device in the mix. As such access to that
crashed in `update_osl_globals`. But that only updates maps that are not
currently used on the GPU anyway, so can just skip that when the CPU
is not used for rendering.

Maniphest Tasks: T104216

===

M   intern/cycles/scene/geometry.cpp

===

diff --git a/intern/cycles/scene/geometry.cpp b/intern/cycles/scene/geometry.cpp
index 8e831187477..a1df24878c9 100644
--- a/intern/cycles/scene/geometry.cpp
+++ b/intern/cycles/scene/geometry.cpp
@@ -306,6 +306,11 @@ void GeometryManager::update_osl_globals(Device *device, 
Scene *scene)
 {
 #ifdef WITH_OSL
   OSLGlobals *og = (OSLGlobals *)device->get_cpu_osl_memory();
+  if (og == nullptr) {
+/* Can happen when rendering with multiple GPUs, but no CPU (in which case 
the name maps filled
+ * below are not used anyway) */
+return;
+  }
 
   og->object_name_map.clear();
   og->object_names.clear();

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [4635dd6aed4] master: Fix T104157: Deleting an active OSL node causes issues

2023-01-27 Thread Patrick Mours

Commit: 4635dd6aed4c97ea234508e774db991926a7b3cf
Author: Patrick Mours
Date:   Fri Jan 27 15:58:03 2023 +0100
Branches: master
https://developer.blender.org/rB4635dd6aed4c97ea234508e774db991926a7b3cf

Fix T104157: Deleting an active OSL node causes issues

Removing all OSL script nodes from the shader graph would cause that
graph to no longer report it using `KERNEL_FEATURE_SHADER_RAYTRACE`
via `ShaderManager::get_graph_kernel_features`, but the shader object
itself still would have the `has_surface_raytrace` field set.
This caused kernels to be reloaded without shader raytracing support, but
later the `DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE`
kernel would still be invoked since the shader continued to report it
requiring that through the `SD_HAS_RAYTRACE` flag set because of
`has_surface_raytrace`.
Fix that by ensuring `has_surface_raytrace` is reset on every shader update,
so that when all OSL script nodes are deleted it is set to false, and only
stays true when there are still OSL script nodes (or other nodes using it).

Maniphest Tasks: T104157

Differential Revision: https://developer.blender.org/D17140

===

M   intern/cycles/scene/osl.cpp

===

diff --git a/intern/cycles/scene/osl.cpp b/intern/cycles/scene/osl.cpp
index 73a8553c5d5..53e993b8135 100644
--- a/intern/cycles/scene/osl.cpp
+++ b/intern/cycles/scene/osl.cpp
@@ -1241,6 +1241,7 @@ void OSLCompiler::compile(OSLGlobals *og, Shader *shader)
 
 shader->has_surface = false;
 shader->has_surface_transparent = false;
+shader->has_surface_raytrace = false;
 shader->has_surface_bssrdf = false;
 shader->has_bump = has_bump;
 shader->has_bssrdf_bump = has_bump;

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [9066f2e0437] master: Cycles: Add support for OSL texture intrinsic on the GPU

2023-01-19 Thread Patrick Mours

Commit: 9066f2e0437a45d66f3b6a4bb0de7acf5ec40030
Author: Patrick Mours
Date:   Wed Jan 18 17:28:03 2023 +0100
Branches: master
https://developer.blender.org/rB9066f2e0437a45d66f3b6a4bb0de7acf5ec40030

Cycles: Add support for OSL texture intrinsic on the GPU

This makes it possible to use `texture` and `texture3d` in custom
OSL shaders with a constant image file name as argument on the
GPU, where previously texturing was only possible through Cycles
nodes.
For constant file name arguments, OSL calls
`OSL::RendererServices::get_texture_handle()` with the file name
string to convert it into an opaque handle for use on the GPU.
That is now used to load the respective image file using the Cycles
image manager and generate a SVM handle that can be used on
the GPU. Some care is necessary as the renderer services class is
shared across multiple Cycles instances, whereas the Cycles image
manager is local to each.

Maniphest Tasks: T101222

Differential Revision: https://developer.blender.org/D17032

===

M   intern/cycles/kernel/osl/services.cpp
M   intern/cycles/kernel/osl/services.h
M   intern/cycles/kernel/osl/services_gpu.h
M   intern/cycles/kernel/osl/types.h
M   intern/cycles/kernel/svm/ies.h
M   intern/cycles/scene/image.cpp
M   intern/cycles/scene/image.h
M   intern/cycles/scene/osl.cpp

===

diff --git a/intern/cycles/kernel/osl/services.cpp 
b/intern/cycles/kernel/osl/services.cpp
index 95d58875b91..92708df3162 100644
--- a/intern/cycles/kernel/osl/services.cpp
+++ b/intern/cycles/kernel/osl/services.cpp
@@ -20,6 +20,7 @@
 
 #include "kernel/osl/globals.h"
 #include "kernel/osl/services.h"
+#include "kernel/osl/types.h"
 
 #include "util/foreach.h"
 #include "util/log.h"
@@ -119,6 +120,8 @@ ustring OSLRenderServices::u_u("u");
 ustring OSLRenderServices::u_v("v");
 ustring OSLRenderServices::u_empty;
 
+ImageManager *OSLRenderServices::image_manager = nullptr;
+
 OSLRenderServices::OSLRenderServices(OSL::TextureSystem *texture_system, int 
device_type)
 : OSL::RendererServices(texture_system), device_type_(device_type)
 {
@@ -1154,7 +1157,7 @@ TextureSystem::TextureHandle 
*OSLRenderServices::get_texture_handle(ustring file
 /* For non-OIIO textures, just return a pointer to our own 
OSLTextureHandle. */
 if (it != textures.end()) {
   if (it->second->type != OSLTextureHandle::OIIO) {
-return (TextureSystem::TextureHandle *)it->second.get();
+return reinterpret_cast(it->second.get());
   }
 }
 
@@ -1173,16 +1176,53 @@ TextureSystem::TextureHandle 
*OSLRenderServices::get_texture_handle(ustring file
 
 /* Assign OIIO texture handle and return. */
 it->second->oiio_handle = handle;
-return (TextureSystem::TextureHandle *)it->second.get();
+return reinterpret_cast(it->second.get());
   }
   else {
-if (it != textures.end() && it->second->type == OSLTextureHandle::SVM &&
-it->second->svm_slots[0].w == -1) {
-  return reinterpret_cast(
-  static_cast(it->second->svm_slots[0].y + 1));
+/* Construct GPU texture handle for existing textures. */
+if (it != textures.end()) {
+  switch (it->second->type) {
+case OSLTextureHandle::OIIO:
+  return NULL;
+case OSLTextureHandle::SVM:
+  if (!it->second->handle.empty() && it->second->handle.get_manager() 
!= image_manager) {
+it.clear();
+break;
+  }
+  return reinterpret_cast(OSL_TEXTURE_HANDLE_TYPE_SVM |
+  
it->second->svm_slots[0].y);
+case OSLTextureHandle::IES:
+  if (!it->second->handle.empty() && it->second->handle.get_manager() 
!= image_manager) {
+it.clear();
+break;
+  }
+  return reinterpret_cast(OSL_TEXTURE_HANDLE_TYPE_IES |
+  
it->second->svm_slots[0].y);
+case OSLTextureHandle::AO:
+  return reinterpret_cast(
+  OSL_TEXTURE_HANDLE_TYPE_AO_OR_BEVEL | 1);
+case OSLTextureHandle::BEVEL:
+  return reinterpret_cast(
+  OSL_TEXTURE_HANDLE_TYPE_AO_OR_BEVEL | 2);
+  }
+}
+
+if (!image_manager) {
+  return NULL;
+}
+
+/* Load new textures using SVM image manager. */
+ImageHandle handle = image_manager->add_image(filename.string(), 
ImageParams());
+if (handle.empty()) {
+  return NULL;
+}
+
+if (!textures.insert(filename, new OSLTextureHandle(handle))) {
+  return NULL;
 }
 
-return NULL;
+return reinterpret_cast(OSL_TEXTURE_HANDLE_TYPE_SVM |
+

[Bf-blender-cvs] [153e1dc31a5] master: Fix T103513: Images lose their alpha channel when OSL is enabled for GPU Compute

2023-01-02 Thread Patrick Mours

Commit: 153e1dc31a518fc307c7d87bb5bd700688e438f9
Author: Patrick Mours
Date:   Mon Jan 2 13:52:59 2023 +0100
Branches: master
https://developer.blender.org/rB153e1dc31a518fc307c7d87bb5bd700688e438f9

Fix T103513: Images lose their alpha channel when OSL is enabled for GPU Compute

The "osl_texture" intrinsic was not implemented correctly. It should handle 
alpha
separately from color, the number of channels input parameter only counts color
channels.

===

M   intern/cycles/kernel/osl/services_gpu.h

===

diff --git a/intern/cycles/kernel/osl/services_gpu.h 
b/intern/cycles/kernel/osl/services_gpu.h
index 744c7103b28..b9ffd959f1a 100644
--- a/intern/cycles/kernel/osl/services_gpu.h
+++ b/intern/cycles/kernel/osl/services_gpu.h
@@ -1532,7 +1532,7 @@ ccl_device_extern void 
osl_texture_set_missingcolor_alpha(ccl_private OSLTexture
 ccl_device_extern bool osl_texture(ccl_private ShaderGlobals *sg,
DeviceString filename,
ccl_private void *texture_handle,
-   OSLTextureOptions *opt,
+   ccl_private OSLTextureOptions *opt,
float s,
float t,
float dsdx,
@@ -1557,13 +1557,14 @@ ccl_device_extern bool osl_texture(ccl_private 
ShaderGlobals *sg,
 
   const float4 rgba = kernel_tex_image_interp(nullptr, id, s, 1.0f - t);
 
-  result[0] = rgba.x;
+  if (nchannels > 0)
+result[0] = rgba.x;
   if (nchannels > 1)
 result[1] = rgba.y;
   if (nchannels > 2)
 result[2] = rgba.z;
-  if (nchannels > 3)
-result[3] = rgba.w;
+  if (alpha)
+*alpha = rgba.w;
 
   return true;
 }
@@ -1571,7 +1572,7 @@ ccl_device_extern bool osl_texture(ccl_private 
ShaderGlobals *sg,
 ccl_device_extern bool osl_texture3d(ccl_private ShaderGlobals *sg,
  DeviceString filename,
  ccl_private void *texture_handle,
- OSLTextureOptions *opt,
+ ccl_private OSLTextureOptions *opt,
  ccl_private const float3 *P,
  ccl_private const float3 *dPdx,
  ccl_private const float3 *dPdy,
@@ -1594,13 +1595,14 @@ ccl_device_extern bool osl_texture3d(ccl_private 
ShaderGlobals *sg,
 
   const float4 rgba = kernel_tex_image_interp_3d(nullptr, id, *P, 
INTERPOLATION_NONE);
 
-  result[0] = rgba.x;
+  if (nchannels > 0)
+result[0] = rgba.x;
   if (nchannels > 1)
 result[1] = rgba.y;
   if (nchannels > 2)
 result[2] = rgba.z;
-  if (nchannels > 3)
-result[3] = rgba.w;
+  if (alpha)
+*alpha = rgba.w;
 
   return true;
 }
@@ -1608,7 +1610,7 @@ ccl_device_extern bool osl_texture3d(ccl_private 
ShaderGlobals *sg,
 ccl_device_extern bool osl_environment(ccl_private ShaderGlobals *sg,
DeviceString filename,
ccl_private void *texture_handle,
-   OSLTextureOptions *opt,
+   ccl_private OSLTextureOptions *opt,
ccl_private const float3 *R,
ccl_private const float3 *dRdx,
ccl_private const float3 *dRdy,
@@ -1621,13 +1623,14 @@ ccl_device_extern bool osl_environment(ccl_private 
ShaderGlobals *sg,
ccl_private float *dalphay,
ccl_private void *errormessage)
 {
-  result[0] = 1.0f;
+  if (nchannels > 0)
+result[0] = 1.0f;
   if (nchannels > 1)
 result[1] = 0.0f;
   if (nchannels > 2)
 result[2] = 1.0f;
-  if (nchannels > 3)
-result[3] = 1.0f;
+  if (alpha)
+*alpha = 1.0f;
 
   return false;
 }

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [6bd6d7aec75] geometry-nodes-simulation: Fix T103258: Deleting a shader with OptiX OSL results in an illegal address error

2022-12-19 Thread Patrick Mours

Commit: 6bd6d7aec754e4e4f3061bb2de34b73c76a60944
Author: Patrick Mours
Date:   Fri Dec 16 15:41:21 2022 +0100
Branches: geometry-nodes-simulation
https://developer.blender.org/rB6bd6d7aec754e4e4f3061bb2de34b73c76a60944

Fix T103258: Deleting a shader with OptiX OSL results in an illegal address 
error

Materials without connections to the output node would crash with OSL
in OptiX, since the Cycles `OSLCompiler` generates an empty shader
group reference for them, which resulted in the OptiX device
implementation setting an empty SBT entry for the corresponding direct
callables, which then crashed when calling those direct callables was
attempted in `osl_eval_nodes`. This fixes that by setting the SBT entries
for empty shader groups to a dummy direct callable that does nothing.

===

M   intern/cycles/device/optix/device_impl.cpp

===

diff --git a/intern/cycles/device/optix/device_impl.cpp 
b/intern/cycles/device/optix/device_impl.cpp
index 601e1193e26..23e7bbfa7bb 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -866,6 +866,11 @@ bool OptiXDevice::load_osl_kernels()
   optix_assert(optixSbtRecordPackHeader(osl_groups[i], 
_data[NUM_PROGRAM_GROUPS + i]));
   optix_assert(optixProgramGroupGetStackSize(osl_groups[i], 
_stack_size[i]));
 }
+else {
+  /* Default to "__direct_callable__dummy_services", so that OSL 
evaluation for empty
+   * materials has direct callables to call and does not crash. */
+  optix_assert(optixSbtRecordPackHeader(osl_groups.back(), 
_data[NUM_PROGRAM_GROUPS + i]));
+}
   }
   sbt_data.copy_to_device(); /* Upload updated SBT to device. */

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [cfb77c54b06] geometry-nodes-simulation: Fix T103257: Enabling or disabling viewport denoising while using OptiX OSL results in an error

2022-12-19 Thread Patrick Mours

Commit: cfb77c54b060e835ffc209a1d8d994faa63427b0
Author: Patrick Mours
Date:   Fri Dec 16 14:01:51 2022 +0100
Branches: geometry-nodes-simulation
https://developer.blender.org/rBcfb77c54b060e835ffc209a1d8d994faa63427b0

Fix T103257: Enabling or disabling viewport denoising while using OptiX OSL 
results in an error

Switching viewport denoising causes kernels to be reloaded with a new
feature mask, which would destroy the existing OptiX pipelines. But OSL
kernels were not reloaded as well, leaving the shading pipeline
uninitialized and therefore causing an error when it is later attempted to
execute it. This fixes that by ensuring OSL kernels are always reloaded
when the normal kernels are too.

===

M   intern/cycles/device/optix/device_impl.cpp

===

diff --git a/intern/cycles/device/optix/device_impl.cpp 
b/intern/cycles/device/optix/device_impl.cpp
index f4d1969f3f3..601e1193e26 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -579,7 +579,11 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
   link_options.maxTraceDepth = 1;
   link_options.debugLevel = module_options.debugLevel;
 
-  if (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE) 
&& !use_osl) {
+  if (use_osl) {
+/* Re-create OSL pipeline in case kernels are reloaded after it has been 
created before. */
+load_osl_kernels();
+  }
+  else if (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | 
KERNEL_FEATURE_MNEE)) {
 /* Create shader raytracing and MNEE pipeline. */
 vector pipeline_groups;
 pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
@@ -743,6 +747,11 @@ bool OptiXDevice::load_osl_kernels()
 }
   }
 
+  if (osl_kernels.empty()) {
+/* No OSL shader groups, so no need to create a pipeline. */
+return true;
+  }
+
   OptixProgramGroupOptions group_options = {}; /* There are no options 
currently. */
   OptixModuleCompileOptions module_options = {};
   module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3;

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [a8530d31c29] master: Fix T103258: Deleting a shader with OptiX OSL results in an illegal address error

2022-12-16 Thread Patrick Mours

Commit: a8530d31c2971756df7f2b440a0de3d6fcfc3061
Author: Patrick Mours
Date:   Fri Dec 16 15:41:21 2022 +0100
Branches: master
https://developer.blender.org/rBa8530d31c2971756df7f2b440a0de3d6fcfc3061

Fix T103258: Deleting a shader with OptiX OSL results in an illegal address 
error

Materials without connections to the output node would crash with OSL
in OptiX, since the Cycles `OSLCompiler` generates an empty shader
group reference for them, which resulted in the OptiX device
implementation setting an empty SBT entry for the corresponding direct
callables, which then crashed when calling those direct callables was
attempted in `osl_eval_nodes`. This fixes that by setting the SBT entries
for empty shader groups to a dummy direct callable that does nothing.

===

M   intern/cycles/device/optix/device_impl.cpp

===

diff --git a/intern/cycles/device/optix/device_impl.cpp 
b/intern/cycles/device/optix/device_impl.cpp
index 601e1193e26..23e7bbfa7bb 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -866,6 +866,11 @@ bool OptiXDevice::load_osl_kernels()
   optix_assert(optixSbtRecordPackHeader(osl_groups[i], 
_data[NUM_PROGRAM_GROUPS + i]));
   optix_assert(optixProgramGroupGetStackSize(osl_groups[i], 
_stack_size[i]));
 }
+else {
+  /* Default to "__direct_callable__dummy_services", so that OSL 
evaluation for empty
+   * materials has direct callables to call and does not crash. */
+  optix_assert(optixSbtRecordPackHeader(osl_groups.back(), 
_data[NUM_PROGRAM_GROUPS + i]));
+}
   }
   sbt_data.copy_to_device(); /* Upload updated SBT to device. */

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [c9eb5834607] master: Fix T103257: Enabling or disabling viewport denoising while using OptiX OSL results in an error

2022-12-16 Thread Patrick Mours

Commit: c9eb5834607804eec1ead46289bb66a968ff2a1c
Author: Patrick Mours
Date:   Fri Dec 16 14:01:51 2022 +0100
Branches: master
https://developer.blender.org/rBc9eb5834607804eec1ead46289bb66a968ff2a1c

Fix T103257: Enabling or disabling viewport denoising while using OptiX OSL 
results in an error

Switching viewport denoising causes kernels to be reloaded with a new
feature mask, which would destroy the existing OptiX pipelines. But OSL
kernels were not reloaded as well, leaving the shading pipeline
uninitialized and therefore causing an error when it is later attempted to
execute it. This fixes that by ensuring OSL kernels are always reloaded
when the normal kernels are too.

===

M   intern/cycles/device/optix/device_impl.cpp

===

diff --git a/intern/cycles/device/optix/device_impl.cpp 
b/intern/cycles/device/optix/device_impl.cpp
index f4d1969f3f3..601e1193e26 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -579,7 +579,11 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
   link_options.maxTraceDepth = 1;
   link_options.debugLevel = module_options.debugLevel;
 
-  if (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE) 
&& !use_osl) {
+  if (use_osl) {
+/* Re-create OSL pipeline in case kernels are reloaded after it has been 
created before. */
+load_osl_kernels();
+  }
+  else if (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | 
KERNEL_FEATURE_MNEE)) {
 /* Create shader raytracing and MNEE pipeline. */
 vector pipeline_groups;
 pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
@@ -743,6 +747,11 @@ bool OptiXDevice::load_osl_kernels()
 }
   }
 
+  if (osl_kernels.empty()) {
+/* No OSL shader groups, so no need to create a pipeline. */
+return true;
+  }
+
   OptixProgramGroupOptions group_options = {}; /* There are no options 
currently. */
   OptixModuleCompileOptions module_options = {};
   module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3;

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [c30fdb9cf52] master: Fix mismatching PTX function declarations for OSL intrinsics with string parameters

2022-12-14 Thread Patrick Mours

Commit: c30fdb9cf52d62dcd25fbc93f29c2983cc90a447
Author: Patrick Mours
Date:   Wed Dec 14 15:21:39 2022 +0100
Branches: master
https://developer.blender.org/rBc30fdb9cf52d62dcd25fbc93f29c2983cc90a447

Fix mismatching PTX function declarations for OSL intrinsics with string 
parameters

The use of a struct for device strings caused the CUDA compiler to
generate byte arrays as the argument type, whereas OSL generated
primitive integer types (for the hash). Fix that by using a typedef
instead so that the CUDA compiler too will use an integer type in the
PTX it generates.

Maniphest Tasks: T101222

===

M   intern/cycles/kernel/osl/services_gpu.h
M   intern/cycles/kernel/osl/types.h

===

diff --git a/intern/cycles/kernel/osl/services_gpu.h 
b/intern/cycles/kernel/osl/services_gpu.h
index 75cf39919a0..744c7103b28 100644
--- a/intern/cycles/kernel/osl/services_gpu.h
+++ b/intern/cycles/kernel/osl/services_gpu.h
@@ -14,111 +14,111 @@
 namespace DeviceStrings {
 
 /* "" */
-ccl_device_constant DeviceString _emptystring_ = {0ull};
+ccl_device_constant DeviceString _emptystring_ = 0ull;
 /* "common" */
-ccl_device_constant DeviceString u_common = {14645198576927606093ull};
+ccl_device_constant DeviceString u_common = 14645198576927606093ull;
 /* "world" */
-ccl_device_constant DeviceString u_world = {16436542438370751598ull};
+ccl_device_constant DeviceString u_world = 16436542438370751598ull;
 /* "shader" */
-ccl_device_constant DeviceString u_shader = {4279676006089868ull};
+ccl_device_constant DeviceString u_shader = 4279676006089868ull;
 /* "object" */
-ccl_device_constant DeviceString u_object = {973692718279674627ull};
+ccl_device_constant DeviceString u_object = 973692718279674627ull;
 /* "NDC" */
-ccl_device_constant DeviceString u_ndc = {5148305047403260775ull};
+ccl_device_constant DeviceString u_ndc = 5148305047403260775ull;
 /* "screen" */
-ccl_device_constant DeviceString u_screen = {14159088609039777114ull};
+ccl_device_constant DeviceString u_screen = 14159088609039777114ull;
 /* "camera" */
-ccl_device_constant DeviceString u_camera = {2159505832145726196ull};
+ccl_device_constant DeviceString u_camera = 2159505832145726196ull;
 /* "raster" */
-ccl_device_constant DeviceString u_raster = {7759263238610201778ull};
+ccl_device_constant DeviceString u_raster = 7759263238610201778ull;
 /* "hsv" */
-ccl_device_constant DeviceString u_hsv = {2177035556331879497ull};
+ccl_device_constant DeviceString u_hsv = 2177035556331879497ull;
 /* "hsl" */
-ccl_device_constant DeviceString u_hsl = {7749766809258288148ull};
+ccl_device_constant DeviceString u_hsl = 7749766809258288148ull;
 /* "XYZ" */
-ccl_device_constant DeviceString u_xyz = {4957977063494975483ull};
+ccl_device_constant DeviceString u_xyz = 4957977063494975483ull;
 /* "xyY" */
-ccl_device_constant DeviceString u_xyy = {5138822319725660255ull};
+ccl_device_constant DeviceString u_xyy = 5138822319725660255ull;
 /* "sRGB" */
-ccl_device_constant DeviceString u_srgb = {15368599878474175032ull};
+ccl_device_constant DeviceString u_srgb = 15368599878474175032ull;
 /* "object:location" */
-ccl_device_constant DeviceString u_object_location = {7846190347358762897ull};
+ccl_device_constant DeviceString u_object_location = 7846190347358762897ull;
 /* "object:color" */
-ccl_device_constant DeviceString u_object_color = {12695623857059169556ull};
+ccl_device_constant DeviceString u_object_color = 12695623857059169556ull;
 /* "object:alpha" */
-ccl_device_constant DeviceString u_object_alpha = {11165053919428293151ull};
+ccl_device_constant DeviceString u_object_alpha = 11165053919428293151ull;
 /* "object:index" */
-ccl_device_constant DeviceString u_object_index = {6588325838217472556ull};
+ccl_device_constant DeviceString u_object_index = 6588325838217472556ull;
 /* "geom:dupli_generated" */
-ccl_device_constant DeviceString u_geom_dupli_generated = 
{6715607178003388908ull};
+ccl_device_constant DeviceString u_geom_dupli_generated = 
6715607178003388908ull;
 /* "geom:dupli_uv" */
-ccl_device_constant DeviceString u_geom_dupli_uv = {1294253317490155849ull};
+ccl_device_constant DeviceString u_geom_dupli_uv = 1294253317490155849ull;
 /* "material:index" */
-ccl_device_constant DeviceString u_material_index = {741770758159634623ull};
+ccl_device_constant DeviceString u_material_index = 741770758159634623ull;
 /* "object:random" */
-ccl_device_constant DeviceString u_object_random = {15789063994977955884ull};
+ccl_device_constant DeviceString u_object_random = 15789063994977955884ull;
 /* "particle:index" */
-ccl_device_constant DeviceString u_particle_index = {9489711748229903784ull};
+ccl

[Bf-blender-cvs] [41a3de878f6] blender-v3.4-release: Fix part of T102450: Cycles OSL render issues for with normals in shader nodes

2022-11-21 Thread Patrick Mours

Commit: 41a3de878f64ae19e4f80c58102cc64e583d3a5f
Author: Patrick Mours
Date:   Fri Nov 11 16:42:49 2022 +0100
Branches: blender-v3.4-release
https://developer.blender.org/rB41a3de878f64ae19e4f80c58102cc64e583d3a5f

Fix part of T102450: Cycles OSL render issues for with normals in shader nodes

Commit c8dd33f5a37b6a6db0b6950d24f9a7cff5ceb799 in OSL changed behavior of
parameters that reference each other and are also overwritten with an
instance value. This is causing the "NormalIn" parameter of a few OSL nodes
in Cycles to be set to zero somehow, which should instead have received the
value from a "node_geometry" node Cycles generates and connects automatically.

I am not entirely sure why that is happening, but these parameters are
superfluous anyway, since OSL already provides the necessary data in the
global variable "N". So this patch simply removes those parameters (which
mimics SVM, where these parameters do not exist either), which also fixes
the rendering artifacts that occured with recent OSL.

While this fixes built-in shader nodes, custom OSL scripts can still have
this problem.

Ref T101222

Differential Revision: https://developer.blender.org/D16470

===

M   intern/cycles/kernel/osl/shaders/node_geometry.osl
M   intern/cycles/kernel/osl/shaders/node_normal_map.osl
M   intern/cycles/kernel/osl/shaders/node_tangent.osl
M   intern/cycles/kernel/osl/shaders/node_texture_coordinate.osl
M   intern/cycles/scene/shader_nodes.cpp
M   intern/cycles/scene/shader_nodes.h

===

diff --git a/intern/cycles/kernel/osl/shaders/node_geometry.osl 
b/intern/cycles/kernel/osl/shaders/node_geometry.osl
index cc891abd6e3..5d9284deac2 100644
--- a/intern/cycles/kernel/osl/shaders/node_geometry.osl
+++ b/intern/cycles/kernel/osl/shaders/node_geometry.osl
@@ -3,8 +3,7 @@
 
 #include "stdcycles.h"
 
-shader node_geometry(normal NormalIn = N,
- string bump_offset = "center",
+shader node_geometry(string bump_offset = "center",
 
  output point Position = point(0.0, 0.0, 0.0),
  output normal Normal = normal(0.0, 0.0, 0.0),
@@ -17,7 +16,7 @@ shader node_geometry(normal NormalIn = N,
  output float RandomPerIsland = 0.0)
 {
   Position = P;
-  Normal = NormalIn;
+  Normal = N;
   TrueNormal = Ng;
   Incoming = I;
   Parametric = point(1.0 - u - v, u, 0.0);
diff --git a/intern/cycles/kernel/osl/shaders/node_normal_map.osl 
b/intern/cycles/kernel/osl/shaders/node_normal_map.osl
index 3cda485c686..7e41bbf1720 100644
--- a/intern/cycles/kernel/osl/shaders/node_normal_map.osl
+++ b/intern/cycles/kernel/osl/shaders/node_normal_map.osl
@@ -3,13 +3,12 @@
 
 #include "stdcycles.h"
 
-shader node_normal_map(normal NormalIn = N,
-   float Strength = 1.0,
+shader node_normal_map(float Strength = 1.0,
color Color = color(0.5, 0.5, 1.0),
string space = "tangent",
string attr_name = "geom:tangent",
string attr_sign_name = "geom:tangent_sign",
-   output normal Normal = NormalIn)
+   output normal Normal = N)
 {
   color mcolor = 2.0 * color(Color[0] - 0.5, Color[1] - 0.5, Color[2] - 0.5);
   int is_backfacing = backfacing();
@@ -71,5 +70,5 @@ shader node_normal_map(normal NormalIn = N,
   }
 
   if (Strength != 1.0)
-Normal = normalize(NormalIn + (Normal - NormalIn) * max(Strength, 0.0));
+Normal = normalize(N + (Normal - N) * max(Strength, 0.0));
 }
diff --git a/intern/cycles/kernel/osl/shaders/node_tangent.osl 
b/intern/cycles/kernel/osl/shaders/node_tangent.osl
index a302c001f08..b3808778b2f 100644
--- a/intern/cycles/kernel/osl/shaders/node_tangent.osl
+++ b/intern/cycles/kernel/osl/shaders/node_tangent.osl
@@ -3,8 +3,7 @@
 
 #include "stdcycles.h"
 
-shader node_tangent(normal NormalIn = N,
-string attr_name = "geom:tangent",
+shader node_tangent(string attr_name = "geom:tangent",
 string direction_type = "radial",
 string axis = "z",
 output normal Tangent = normalize(dPdu))
@@ -29,5 +28,5 @@ shader node_tangent(normal NormalIn = N,
   }
 
   T = transform("object", "world", T);
-  Tangent = cross(NormalIn, normalize(cross(T, NormalIn)));
+  Tangent = cross(N, normalize(cross(T, N)));
 }
diff --git a/intern/cycles/kernel/osl/shaders/node_texture_coordinate.osl 
b/intern/cycles/kernel/osl/shaders/node_texture_coordinate.osl
index 24875ce140a..cd2fdae3cb3 100644
--- a/intern/cycles/kernel/osl/shaders/node_texture_coordinate.osl
+++ b/intern/cycles/kernel/osl/shaders/node_texture_coordinate.osl
@@

[Bf-blender-cvs] [a859837cdea] master: Cleanup: Move OptiX denoiser code from device into denoiser class

2022-11-15 Thread Patrick Mours

Commit: a859837cdea0c34268c870da25b038e3826aecc2
Author: Patrick Mours
Date:   Tue Nov 15 13:05:23 2022 +0100
Branches: master
https://developer.blender.org/rBa859837cdea0c34268c870da25b038e3826aecc2

Cleanup: Move OptiX denoiser code from device into denoiser class

Cycles already treats denoising fairly separate in its code, with a
dedicated `Denoiser` base class used to describe denoising
behavior. That class has been fully implemented for OIDN
(`denoiser_oidn.cpp`), but for OptiX was mostly empty
(`denoiser_optix.cpp`) and denoising was instead implemented in
the OptiX device. That meant denoising code was split over various
files and directories, making it a bit awkward to work with. This
patch moves the OptiX denoising implementation into the existing
`OptiXDenoiser` class, so that everything is in one place. There are
no functional changes, code has been mostly moved as-is. To
retain support for potential other denoiser implementations based
on a GPU device in the future, the `DeviceDenoiser` base class was
kept and slightly extended (and its file renamed to
`denoiser_gpu.cpp` to follow similar naming rules as
`path_trace_work_*.cpp`).

Differential Revision: https://developer.blender.org/D16502

===

M   intern/cycles/CMakeLists.txt
M   intern/cycles/device/CMakeLists.txt
M   intern/cycles/device/denoise.h
M   intern/cycles/device/device.h
M   intern/cycles/device/optix/device_impl.cpp
M   intern/cycles/device/optix/device_impl.h
M   intern/cycles/integrator/CMakeLists.txt
M   intern/cycles/integrator/denoiser.cpp
D   intern/cycles/integrator/denoiser_device.h
R077intern/cycles/integrator/denoiser_device.cpp
intern/cycles/integrator/denoiser_gpu.cpp
A   intern/cycles/integrator/denoiser_gpu.h
M   intern/cycles/integrator/denoiser_optix.cpp
M   intern/cycles/integrator/denoiser_optix.h

===

diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt
index 329aa3990f6..c6590a07ee4 100644
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -253,6 +253,33 @@ if(WITH_CYCLES_OSL)
   )
 endif()
 
+if(WITH_CYCLES_DEVICE_CUDA OR WITH_CYCLES_DEVICE_OPTIX)
+  add_definitions(-DWITH_CUDA)
+
+  if(WITH_CUDA_DYNLOAD)
+include_directories(
+  ../../extern/cuew/include
+)
+add_definitions(-DWITH_CUDA_DYNLOAD)
+  else()
+include_directories(
+  SYSTEM
+  ${CUDA_TOOLKIT_INCLUDE}
+)
+  endif()
+endif()
+
+if(WITH_CYCLES_DEVICE_HIP)
+  add_definitions(-DWITH_HIP)
+
+  if(WITH_HIP_DYNLOAD)
+include_directories(
+  ../../extern/hipew/include
+)
+add_definitions(-DWITH_HIP_DYNLOAD)
+  endif()
+endif()
+
 if(WITH_CYCLES_DEVICE_OPTIX)
   find_package(OptiX 7.3.0)
 
@@ -261,12 +288,16 @@ if(WITH_CYCLES_DEVICE_OPTIX)
 include_directories(
   SYSTEM
   ${OPTIX_INCLUDE_DIR}
-  )
+)
   else()
 set_and_warn_library_found("OptiX" OPTIX_FOUND WITH_CYCLES_DEVICE_OPTIX)
   endif()
 endif()
 
+if(WITH_CYCLES_DEVICE_METAL)
+  add_definitions(-DWITH_METAL)
+endif()
+
 if (WITH_CYCLES_DEVICE_ONEAPI)
   add_definitions(-DWITH_ONEAPI)
 endif()
diff --git a/intern/cycles/device/CMakeLists.txt 
b/intern/cycles/device/CMakeLists.txt
index bfca3ab6aea..6808d8c04d7 100644
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@@ -8,28 +8,13 @@ set(INC
 set(INC_SYS )
 
 if(WITH_CYCLES_DEVICE_OPTIX OR WITH_CYCLES_DEVICE_CUDA)
-  if(WITH_CUDA_DYNLOAD)
-list(APPEND INC
-  ../../../extern/cuew/include
-)
-add_definitions(-DWITH_CUDA_DYNLOAD)
-  else()
-list(APPEND INC_SYS
-  ${CUDA_TOOLKIT_INCLUDE}
-)
+  if(NOT WITH_CUDA_DYNLOAD)
 add_definitions(-DCYCLES_CUDA_NVCC_EXECUTABLE="${CUDA_NVCC_EXECUTABLE}")
   endif()
 
   
add_definitions(-DCYCLES_RUNTIME_OPTIX_ROOT_DIR="${CYCLES_RUNTIME_OPTIX_ROOT_DIR}")
 endif()
 
-if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
-  list(APPEND INC
-../../../extern/hipew/include
-  )
-  add_definitions(-DWITH_HIP_DYNLOAD)
-endif()
-
 set(SRC_BASE
   device.cpp
   denoise.cpp
@@ -168,24 +153,15 @@ if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
   )
 endif()
 
-if(WITH_CYCLES_DEVICE_CUDA)
-  add_definitions(-DWITH_CUDA)
-endif()
-if(WITH_CYCLES_DEVICE_HIP)
-  add_definitions(-DWITH_HIP)
-endif()
-if(WITH_CYCLES_DEVICE_OPTIX)
-  add_definitions(-DWITH_OPTIX)
-endif()
 if(WITH_CYCLES_DEVICE_METAL)
   list(APPEND LIB
 ${METAL_LIBRARY}
   )
-  add_definitions(-DWITH_METAL)
   list(APPEND SRC
 ${SRC_METAL}
   )
 endif()
+
 if (WITH_CYCLES_DEVICE_ONEAPI)
   if(WITH_CYCLES_ONEAPI_BINARIES)
 set(cycles_kernel_oneapi_lib_suffix "_aot")
@@ -203,7 +179,6 @@ if (WITH_CYCLES_DEVICE_ONEAPI)
   else()
 list(APPEND LIB ${SYCL_LIBRARY})
   endif()
-  add_definitions(-DWITH_ONEAPI)
   list(APPEND SRC
 ${SRC_ONEAPI}
   )
diff

[Bf-blender-cvs] [9d827a1834a] master: Fix OSL object matrix with Cycles on the GPU

2022-11-11 Thread Patrick Mours

Commit: 9d827a1834ab0e2211488251dc9133e7164652dd
Author: Patrick Mours
Date:   Fri Nov 11 20:20:47 2022 +0100
Branches: master
https://developer.blender.org/rB9d827a1834ab0e2211488251dc9133e7164652dd

Fix OSL object matrix with Cycles on the GPU

The OSL GPU services implementation of "osl_get_matrix" and
"osl_get_inverse_matrix" was missing support for the "common",
"shader" and "object" matrices and thus any matrix operations in OSL
shaders using these would not work. This patch adds the proper
implementation copied from the OSL CPU services.

Maniphest Tasks: T101222

===

M   intern/cycles/kernel/osl/services_gpu.h

===

diff --git a/intern/cycles/kernel/osl/services_gpu.h 
b/intern/cycles/kernel/osl/services_gpu.h
index f762c7258df..75cf39919a0 100644
--- a/intern/cycles/kernel/osl/services_gpu.h
+++ b/intern/cycles/kernel/osl/services_gpu.h
@@ -15,6 +15,14 @@ namespace DeviceStrings {
 
 /* "" */
 ccl_device_constant DeviceString _emptystring_ = {0ull};
+/* "common" */
+ccl_device_constant DeviceString u_common = {14645198576927606093ull};
+/* "world" */
+ccl_device_constant DeviceString u_world = {16436542438370751598ull};
+/* "shader" */
+ccl_device_constant DeviceString u_shader = {4279676006089868ull};
+/* "object" */
+ccl_device_constant DeviceString u_object = {973692718279674627ull};
 /* "NDC" */
 ccl_device_constant DeviceString u_ndc = {5148305047403260775ull};
 /* "screen" */
@@ -23,10 +31,6 @@ ccl_device_constant DeviceString u_screen = 
{14159088609039777114ull};
 ccl_device_constant DeviceString u_camera = {2159505832145726196ull};
 /* "raster" */
 ccl_device_constant DeviceString u_raster = {7759263238610201778ull};
-/* "world" */
-ccl_device_constant DeviceString u_world = {16436542438370751598ull};
-/* "common" */
-ccl_device_constant DeviceString u_common = {14645198576927606093ull};
 /* "hsv" */
 ccl_device_constant DeviceString u_hsv = {2177035556331879497ull};
 /* "hsl" */
@@ -425,6 +429,7 @@ ccl_device_extern bool osl_transformc(ccl_private 
ShaderGlobals *sg,
 
 /* Matrix Utilities */
 
+#include "kernel/geom/object.h"
 #include "util/transform.h"
 
 ccl_device_forceinline void copy_matrix(ccl_private float *res, const 
Transform )
@@ -465,24 +470,24 @@ ccl_device_forceinline void copy_matrix(ccl_private float 
*res, const Projection
   res[14] = tfm.z.w;
   res[15] = tfm.w.w;
 }
-ccl_device_forceinline void copy_identity_matrix(ccl_private float *res)
+ccl_device_forceinline void copy_identity_matrix(ccl_private float *res, float 
value = 1.0f)
 {
-  res[0] = 1.0f;
+  res[0] = value;
   res[1] = 0.0f;
   res[2] = 0.0f;
   res[3] = 0.0f;
   res[4] = 0.0f;
-  res[5] = 1.0f;
+  res[5] = value;
   res[6] = 0.0f;
   res[7] = 0.0f;
   res[8] = 0.0f;
   res[9] = 0.0f;
-  res[10] = 1.0f;
+  res[10] = value;
   res[11] = 0.0f;
   res[12] = 0.0f;
   res[13] = 0.0f;
   res[14] = 0.0f;
-  res[15] = 1.0f;
+  res[15] = value;
 }
 ccl_device_forceinline Transform convert_transform(ccl_private const float *m)
 {
@@ -534,22 +539,7 @@ ccl_device_extern void osl_div_mfm(ccl_private float *res, 
float a, ccl_private
 ccl_device_extern void osl_div_m_ff(ccl_private float *res, float a, float b)
 {
   float f = (b == 0) ? 0.0f : (a / b);
-  res[0] = f;
-  res[1] = 0.0f;
-  res[2] = 0.0f;
-  res[3] = 0.0f;
-  res[4] = 0.0f;
-  res[5] = f;
-  res[6] = 0.0f;
-  res[7] = 0.0f;
-  res[8] = 0.0f;
-  res[9] = 0.0f;
-  res[10] = f;
-  res[11] = 0.0f;
-  res[12] = 0.0f;
-  res[13] = 0.0f;
-  res[14] = 0.0f;
-  res[15] = f;
+  copy_identity_matrix(res, f);
 }
 
 ccl_device_extern void osl_transform_vmv(ccl_private float3 *res,
@@ -607,27 +597,43 @@ ccl_device_extern void osl_transformn_dvmdv(ccl_private 
float3 *res,
 }
 
 ccl_device_extern bool osl_get_matrix(ccl_private ShaderGlobals *sg,
-  ccl_private float *result,
+  ccl_private float *res,
   DeviceString from)
 {
-  if (from == DeviceStrings::u_ndc) {
-copy_matrix(result, kernel_data.cam.ndctoworld);
+  if (from == DeviceStrings::u_common || from == DeviceStrings::u_world) {
+copy_identity_matrix(res);
 return true;
   }
-  if (from == DeviceStrings::u_raster) {
-copy_matrix(result, kernel_data.cam.rastertoworld);
+  if (from == DeviceStrings::u_shader || from == DeviceStrings::u_object) {
+KernelGlobals kg = nullptr;
+ccl_private ShaderData *const sd = static_cast(sg->renderstate);
+int object = sd->object;
+
+if (object != OBJECT_NONE) {
+  const Transform tfm = object_get_transform(kg, sd);
+  copy_matrix(res, tfm);
+  return true;
+}
+else if (sd->t

[Bf-blender-cvs] [097a13f5be1] master: Fix broken Cycles rendering with recent OSL versions

2022-11-11 Thread Patrick Mours

Commit: 097a13f5be143bd37bfd635cbf31515d531d7a8a
Author: Patrick Mours
Date:   Fri Nov 11 16:42:49 2022 +0100
Branches: master
https://developer.blender.org/rB097a13f5be143bd37bfd635cbf31515d531d7a8a

Fix broken Cycles rendering with recent OSL versions

Commit c8dd33f5a37b6a6db0b6950d24f9a7cff5ceb799 in OSL
changed behavior of shader parameters that reference each other
and are also overwritten with an instance value.
This is causing the "NormalIn" parameter of a few OSL nodes in
Cycles to be set to zero somehow, which should instead have
received the value from a "node_geometry" node Cycles generates
and connects automatically. I am not entirely sure why that is
happening, but these parameters are superfluous anyway, since
OSL already provides the necessary data in the global variable "N".
So this patch simply removes those parameters (which mimics
SVM, where these parameters do not exist either), which also
fixes the rendering artifacts that occured with recent OSL.

Maniphest Tasks: T101222

Differential Revision: https://developer.blender.org/D16470

===

M   intern/cycles/kernel/osl/services_gpu.h
M   intern/cycles/kernel/osl/shaders/node_geometry.osl
M   intern/cycles/kernel/osl/shaders/node_normal_map.osl
M   intern/cycles/kernel/osl/shaders/node_tangent.osl
M   intern/cycles/kernel/osl/shaders/node_texture_coordinate.osl
M   intern/cycles/scene/shader_nodes.cpp
M   intern/cycles/scene/shader_nodes.h

===

diff --git a/intern/cycles/kernel/osl/services_gpu.h 
b/intern/cycles/kernel/osl/services_gpu.h
index e6e19b8c484..f762c7258df 100644
--- a/intern/cycles/kernel/osl/services_gpu.h
+++ b/intern/cycles/kernel/osl/services_gpu.h
@@ -419,6 +419,8 @@ ccl_device_extern bool osl_transformc(ccl_private 
ShaderGlobals *sg,
   c_out[i] = rgb;
 }
   }
+
+  return true;
 }
 
 /* Matrix Utilities */
diff --git a/intern/cycles/kernel/osl/shaders/node_geometry.osl 
b/intern/cycles/kernel/osl/shaders/node_geometry.osl
index cc891abd6e3..5d9284deac2 100644
--- a/intern/cycles/kernel/osl/shaders/node_geometry.osl
+++ b/intern/cycles/kernel/osl/shaders/node_geometry.osl
@@ -3,8 +3,7 @@
 
 #include "stdcycles.h"
 
-shader node_geometry(normal NormalIn = N,
- string bump_offset = "center",
+shader node_geometry(string bump_offset = "center",
 
  output point Position = point(0.0, 0.0, 0.0),
  output normal Normal = normal(0.0, 0.0, 0.0),
@@ -17,7 +16,7 @@ shader node_geometry(normal NormalIn = N,
  output float RandomPerIsland = 0.0)
 {
   Position = P;
-  Normal = NormalIn;
+  Normal = N;
   TrueNormal = Ng;
   Incoming = I;
   Parametric = point(1.0 - u - v, u, 0.0);
diff --git a/intern/cycles/kernel/osl/shaders/node_normal_map.osl 
b/intern/cycles/kernel/osl/shaders/node_normal_map.osl
index 3cda485c686..7e41bbf1720 100644
--- a/intern/cycles/kernel/osl/shaders/node_normal_map.osl
+++ b/intern/cycles/kernel/osl/shaders/node_normal_map.osl
@@ -3,13 +3,12 @@
 
 #include "stdcycles.h"
 
-shader node_normal_map(normal NormalIn = N,
-   float Strength = 1.0,
+shader node_normal_map(float Strength = 1.0,
color Color = color(0.5, 0.5, 1.0),
string space = "tangent",
string attr_name = "geom:tangent",
string attr_sign_name = "geom:tangent_sign",
-   output normal Normal = NormalIn)
+   output normal Normal = N)
 {
   color mcolor = 2.0 * color(Color[0] - 0.5, Color[1] - 0.5, Color[2] - 0.5);
   int is_backfacing = backfacing();
@@ -71,5 +70,5 @@ shader node_normal_map(normal NormalIn = N,
   }
 
   if (Strength != 1.0)
-Normal = normalize(NormalIn + (Normal - NormalIn) * max(Strength, 0.0));
+Normal = normalize(N + (Normal - N) * max(Strength, 0.0));
 }
diff --git a/intern/cycles/kernel/osl/shaders/node_tangent.osl 
b/intern/cycles/kernel/osl/shaders/node_tangent.osl
index a302c001f08..b3808778b2f 100644
--- a/intern/cycles/kernel/osl/shaders/node_tangent.osl
+++ b/intern/cycles/kernel/osl/shaders/node_tangent.osl
@@ -3,8 +3,7 @@
 
 #include "stdcycles.h"
 
-shader node_tangent(normal NormalIn = N,
-string attr_name = "geom:tangent",
+shader node_tangent(string attr_name = "geom:tangent",
 string direction_type = "radial",
 string axis = "z",
 output normal Tangent = normalize(dPdu))
@@ -29,5 +28,5 @@ shader node_tangent(normal NormalIn = N,
   }
 
   T = transform("object", "world", T);
-  Tangent = cross(NormalIn, normalize(cross(T, NormalIn)));
+  Tangent = cross(N, n

[Bf-blender-cvs] [6a8ce5ec1c5] master: Fix abort when rendering with OSL and OptiX in Cycles

2022-11-10 Thread Patrick Mours

Commit: 6a8ce5ec1c550cbcaf2fbb8e05c0743b1bda40d2
Author: Patrick Mours
Date:   Thu Nov 10 19:27:07 2022 +0100
Branches: master
https://developer.blender.org/rB6a8ce5ec1c550cbcaf2fbb8e05c0743b1bda40d2

Fix abort when rendering with OSL and OptiX in Cycles

LLVM could kill the process during OSL PTX code generation, due
to generated symbols contained invalid characters in their name.
Those names are generated by Cycles and were not properly filtered:

- If the locale was set to something other than the minimal locale
  (when Blender was built with WITH_INTERNATIONAL), pointers
  may be printed with grouping characters, like commas or dots,
  added to them.
- Material names from Blender may contain the full range of UTF8
  characters.

This fixes those cases by forcing the locale used in the symbol name
generation to the minimal locale and using the material name hash
instead of the actual material name string.

===

M   intern/cycles/scene/osl.cpp

===

diff --git a/intern/cycles/scene/osl.cpp b/intern/cycles/scene/osl.cpp
index 3ea406b6935..4dc5fb4edf7 100644
--- a/intern/cycles/scene/osl.cpp
+++ b/intern/cycles/scene/osl.cpp
@@ -641,6 +641,8 @@ string OSLCompiler::id(ShaderNode *node)
 {
   /* assign layer unique name based on pointer address + bump mode */
   stringstream stream;
+  stream.imbue(std::locale("C")); /* Ensure that no grouping characters (e.g. 
commas with en_US
+ locale) are added to the pointer string */
   stream << "node_" << node->type->name << "_" << node;
 
   return stream.str();
@@ -1132,12 +1134,12 @@ OSL::ShaderGroupRef OSLCompiler::compile_type(Shader 
*shader, ShaderGraph *graph
 {
   current_type = type;
 
-  string name = shader->name.string();
-  /* Replace invalid characters. */
-  for (size_t i; (i = name.find_first_of(" .,:;+-*/#")) != string::npos;)
-name.replace(i, 1, "_");
+  /* Use name hash to identify shader group to avoid issues with 
non-alphanumeric characters */
+  stringstream name;
+  name.imbue(std::locale("C"));
+  name << "shader_" << shader->name.hash();
 
-  OSL::ShaderGroupRef group = ss->ShaderGroupBegin(name);
+  OSL::ShaderGroupRef group = ss->ShaderGroupBegin(name.str());
 
   ShaderNode *output = graph->output();
   ShaderNodeSet dependencies;

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [e6b38deb9db] master: Cycles: Add basic support for using OSL with OptiX

2022-11-09 Thread Patrick Mours

Commit: e6b38deb9dbb58118f6ee644409ce52f06eac5e5
Author: Patrick Mours
Date:   Wed Nov 9 14:25:32 2022 +0100
Branches: master
https://developer.blender.org/rBe6b38deb9dbb58118f6ee644409ce52f06eac5e5

Cycles: Add basic support for using OSL with OptiX

This patch  generalizes the OSL support in Cycles to include GPU
device types and adds an implementation for that in the OptiX
device. There are some caveats still, including simplified texturing
due to lack of OIIO on the GPU and a few missing OSL intrinsics.

Note that this is incomplete and missing an update to the OSL
library before being enabled! The implementation is already
committed now to simplify further development.

Maniphest Tasks: T101222

Differential Revision: https://developer.blender.org/D15902

===

M   build_files/cmake/platform/platform_win32.cmake
M   intern/cycles/blender/addon/__init__.py
M   intern/cycles/blender/addon/engine.py
M   intern/cycles/blender/addon/properties.py
M   intern/cycles/blender/addon/ui.py
M   intern/cycles/device/device.h
M   intern/cycles/device/kernel.cpp
M   intern/cycles/device/kernel.h
M   intern/cycles/device/multi/device.cpp
M   intern/cycles/device/optix/device.cpp
M   intern/cycles/device/optix/device_impl.cpp
M   intern/cycles/device/optix/device_impl.h
M   intern/cycles/device/optix/queue.cpp
M   intern/cycles/kernel/CMakeLists.txt
M   intern/cycles/kernel/closure/bsdf.h
M   intern/cycles/kernel/device/cuda/compat.h
M   intern/cycles/kernel/device/hip/compat.h
M   intern/cycles/kernel/device/metal/compat.h
M   intern/cycles/kernel/device/oneapi/compat.h
M   intern/cycles/kernel/device/optix/compat.h
M   intern/cycles/kernel/device/optix/globals.h
A   intern/cycles/kernel/device/optix/kernel_osl.cu
M   intern/cycles/kernel/integrator/displacement_shader.h
M   intern/cycles/kernel/integrator/surface_shader.h
M   intern/cycles/kernel/integrator/volume_shader.h
M   intern/cycles/kernel/osl/closures.cpp
M   intern/cycles/kernel/osl/closures_setup.h
M   intern/cycles/kernel/osl/closures_template.h
M   intern/cycles/kernel/osl/osl.h
M   intern/cycles/kernel/osl/services.cpp
M   intern/cycles/kernel/osl/services.h
A   intern/cycles/kernel/osl/services_gpu.h
A   intern/cycles/kernel/osl/services_optix.cu
M   intern/cycles/kernel/osl/types.h
M   intern/cycles/kernel/types.h
M   intern/cycles/scene/osl.cpp
M   intern/cycles/scene/osl.h
M   intern/cycles/scene/scene.cpp
M   intern/cycles/scene/shader.cpp
M   intern/cycles/scene/shader.h
M   intern/cycles/scene/shader_nodes.h
M   intern/cycles/util/defines.h
M   intern/cycles/util/transform.h

===

diff --git a/build_files/cmake/platform/platform_win32.cmake 
b/build_files/cmake/platform/platform_win32.cmake
index 7a2d3ad948a..47673794652 100644
--- a/build_files/cmake/platform/platform_win32.cmake
+++ b/build_files/cmake/platform/platform_win32.cmake
@@ -419,7 +419,7 @@ if(WITH_IMAGE_OPENEXR)
 warn_hardcoded_paths(OpenEXR)
 set(OPENEXR ${LIBDIR}/openexr)
 set(OPENEXR_INCLUDE_DIR ${OPENEXR}/include)
-set(OPENEXR_INCLUDE_DIRS ${OPENEXR_INCLUDE_DIR} ${IMATH_INCLUDE_DIRS} 
${OPENEXR}/include/OpenEXR)
+set(OPENEXR_INCLUDE_DIRS ${OPENEXR_INCLUDE_DIR} ${IMATH_INCLUDE_DIRS} 
${OPENEXR_INCLUDE_DIR}/OpenEXR)
 set(OPENEXR_LIBPATH ${OPENEXR}/lib)
 # Check if the 3.x library name exists
 # if not assume this is a 2.x library folder
@@ -568,7 +568,8 @@ if(WITH_OPENIMAGEIO)
   if(NOT OpenImageIO_FOUND)
 set(OPENIMAGEIO ${LIBDIR}/OpenImageIO)
 set(OPENIMAGEIO_LIBPATH ${OPENIMAGEIO}/lib)
-set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO}/include)
+set(OPENIMAGEIO_INCLUDE_DIR ${OPENIMAGEIO}/include)
+set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO_INCLUDE_DIR})
 set(OIIO_OPTIMIZED optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO.lib 
optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util.lib)
 set(OIIO_DEBUG debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_d.lib debug 
${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util_d.lib)
 set(OPENIMAGEIO_LIBRARIES ${OIIO_OPTIMIZED} ${OIIO_DEBUG})
@@ -785,6 +786,14 @@ if(WITH_CYCLES AND WITH_CYCLES_OSL)
   endif()
   find_path(OSL_INCLUDE_DIR OSL/oslclosure.h PATHS ${CYCLES_OSL}/include)
   find_program(OSL_COMPILER NAMES oslc PATHS ${CYCLES_OSL}/bin)
+  file(STRINGS "${OSL_INCLUDE_DIR}/OSL/oslversion.h" OSL_LIBRARY_VERSION_MAJOR
+   REGEX "^[ \t]*#define[ \t]+OSL_LIBRARY_VERSION_MAJOR[ \t]+[0-9]+.*$")
+  file(STRINGS "${OSL_INCLUDE_DIR}/OSL/oslversion.h" OSL_LIBRARY_VERSION_MINOR
+   REGEX "^[ \t]*#define[ \t]+OSL_LIBRARY_VERSION_MINOR[ \t]+[0-9]+.*$")
+  string(REGEX REPLACE ".*#define[ \t]+OSL_LIBRARY_VERSION_MAJOR[ 
\t]+([.0-9]+).*"
+ "\\1

[Bf-blender-cvs] [a45c36efae0] master: Cycles: Make OSL implementation independent from SVM

2022-09-13 Thread Patrick Mours

Commit: a45c36efae07f22dd1da1ebac728324aeafce85e
Author: Patrick Mours
Date:   Mon Sep 12 18:46:20 2022 +0200
Branches: master
https://developer.blender.org/rBa45c36efae07f22dd1da1ebac728324aeafce85e

Cycles: Make OSL implementation independent from SVM

Cleans up the file structure to be more similar to that of the SVM
and also makes it possible to build kernels with OSL support, but
without having to include SVM support.

This patch was split from D15902.

Differential Revision: https://developer.blender.org/D15949

===

M   intern/cycles/device/cpu/device_impl.cpp
M   intern/cycles/device/cpu/device_impl.h
M   intern/cycles/device/cpu/kernel_thread_globals.cpp
M   intern/cycles/kernel/CMakeLists.txt
M   intern/cycles/kernel/closure/bsdf.h
M   intern/cycles/kernel/film/data_passes.h
M   intern/cycles/kernel/geom/shader_data.h
M   intern/cycles/kernel/integrator/displacement_shader.h
M   intern/cycles/kernel/integrator/intersect_closest.h
M   intern/cycles/kernel/integrator/surface_shader.h
M   intern/cycles/kernel/integrator/volume_shader.h
M   intern/cycles/kernel/osl/CMakeLists.txt
M   intern/cycles/kernel/osl/closures.cpp
A   intern/cycles/kernel/osl/globals.cpp
M   intern/cycles/kernel/osl/globals.h
R073intern/cycles/kernel/osl/shader.h   intern/cycles/kernel/osl/osl.h
M   intern/cycles/kernel/osl/services.cpp
M   intern/cycles/kernel/osl/services.h
D   intern/cycles/kernel/osl/shader.cpp
M   intern/cycles/scene/osl.cpp

===

diff --git a/intern/cycles/device/cpu/device_impl.cpp 
b/intern/cycles/device/cpu/device_impl.cpp
index 1e4b9baa0c0..a2b8d1cbbfa 100644
--- a/intern/cycles/device/cpu/device_impl.cpp
+++ b/intern/cycles/device/cpu/device_impl.cpp
@@ -28,7 +28,6 @@
 #include "kernel/device/cpu/kernel.h"
 #include "kernel/types.h"
 
-#include "kernel/osl/shader.h"
 #include "kernel/osl/globals.h"
 // clang-format on
 
diff --git a/intern/cycles/device/cpu/device_impl.h 
b/intern/cycles/device/cpu/device_impl.h
index 5c1f3cc6ce5..e7e77f18194 100644
--- a/intern/cycles/device/cpu/device_impl.h
+++ b/intern/cycles/device/cpu/device_impl.h
@@ -23,7 +23,6 @@
 #include "kernel/device/cpu/kernel.h"
 #include "kernel/device/cpu/globals.h"
 
-#include "kernel/osl/shader.h"
 #include "kernel/osl/globals.h"
 // clang-format on
 
diff --git a/intern/cycles/device/cpu/kernel_thread_globals.cpp 
b/intern/cycles/device/cpu/kernel_thread_globals.cpp
index 89545399602..99af1525d92 100644
--- a/intern/cycles/device/cpu/kernel_thread_globals.cpp
+++ b/intern/cycles/device/cpu/kernel_thread_globals.cpp
@@ -3,10 +3,7 @@
 
 #include "device/cpu/kernel_thread_globals.h"
 
-// clang-format off
-#include "kernel/osl/shader.h"
 #include "kernel/osl/globals.h"
-// clang-format on
 
 #include "util/profiling.h"
 
@@ -20,7 +17,7 @@ CPUKernelThreadGlobals::CPUKernelThreadGlobals(const 
KernelGlobalsCPU _gl
   reset_runtime_memory();
 
 #ifdef WITH_OSL
-  OSLShader::thread_init(this, reinterpret_cast(osl_globals_memory));
+  OSLGlobals::thread_init(this, static_cast(osl_globals_memory));
 #else
   (void)osl_globals_memory;
 #endif
@@ -35,7 +32,7 @@ 
CPUKernelThreadGlobals::CPUKernelThreadGlobals(CPUKernelThreadGlobals &) n
 CPUKernelThreadGlobals::~CPUKernelThreadGlobals()
 {
 #ifdef WITH_OSL
-  OSLShader::thread_free(this);
+  OSLGlobals::thread_free(this);
 #endif
 }
 
diff --git a/intern/cycles/kernel/CMakeLists.txt 
b/intern/cycles/kernel/CMakeLists.txt
index aa31335393f..a89c5679b27 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -544,8 +544,6 @@ if(WITH_CYCLES_CUDA_BINARIES)
   cycles_set_solution_folder(cycles_kernel_cuda)
 endif()
 
-### START
-
 # HIP module
 
 if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
@@ -620,7 +618,6 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
   cycles_set_solution_folder(cycles_kernel_hip)
 endif()
 
-### END
 # OptiX PTX modules
 
 if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
@@ -712,6 +709,8 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
   cycles_set_solution_folder(cycles_kernel_optix)
 endif()
 
+# oneAPI module
+
 if(WITH_CYCLES_DEVICE_ONEAPI)
   if(WIN32)
 set(cycles_kernel_oneapi_lib 
${CMAKE_CURRENT_BINARY_DIR}/cycles_kernel_oneapi.dll)
diff --git a/intern/cycles/kernel/closure/bsdf.h 
b/intern/cycles/kernel/closure/bsdf.h
index 02cf8bfe3e2..f0b28ff77c4 100644
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -116,7 +116,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals kg,
 case CLOSURE_BSDF_DIFF

[Bf-blender-cvs] [8611c37f975] master: Cycles: Generate OSL closures using macros and a template file

2022-09-09 Thread Patrick Mours

Commit: 8611c37f975737efe0d159822edfc21733268f51
Author: Patrick Mours
Date:   Thu Sep 8 19:31:44 2022 +0200
Branches: master
https://developer.blender.org/rB8611c37f975737efe0d159822edfc21733268f51

Cycles: Generate OSL closures using macros and a template file

This has the advantage of being able to use information about the
existing OSL closures in various places without code duplication. In
addition, the setup code for all closures was moved to standalone
functions to avoid usage of virtual function calls in preparation for GPU
support.

This patch was split from D15902.

Differential Revision: https://developer.blender.org/D15917

===

M   .clang-format
M   intern/cycles/kernel/closure/alloc.h
M   intern/cycles/kernel/osl/CMakeLists.txt
D   intern/cycles/kernel/osl/background.cpp
D   intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
D   intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
D   intern/cycles/kernel/osl/bssrdf.cpp
M   intern/cycles/kernel/osl/closures.cpp
D   intern/cycles/kernel/osl/closures.h
A   intern/cycles/kernel/osl/closures_setup.h
A   intern/cycles/kernel/osl/closures_template.h
D   intern/cycles/kernel/osl/emissive.cpp
M   intern/cycles/kernel/osl/services.cpp
M   intern/cycles/kernel/osl/services.h
M   intern/cycles/kernel/osl/shader.cpp
M   intern/cycles/kernel/osl/shader.h
A   intern/cycles/kernel/osl/types.h
M   intern/cycles/scene/osl.cpp

===

diff --git a/.clang-format b/.clang-format
index 7e88e6d1cb1..72add4594a4 100644
--- a/.clang-format
+++ b/.clang-format
@@ -273,5 +273,5 @@ StatementMacros:
   - PyObject_VAR_HEAD
   - ccl_gpu_kernel_postfix
 
-MacroBlockBegin: "^BSDF_CLOSURE_CLASS_BEGIN$"
-MacroBlockEnd: "^BSDF_CLOSURE_CLASS_END$"
+MacroBlockBegin: "^OSL_CLOSURE_STRUCT_BEGIN$"
+MacroBlockEnd: "^OSL_CLOSURE_STRUCT_END$"
diff --git a/intern/cycles/kernel/closure/alloc.h 
b/intern/cycles/kernel/closure/alloc.h
index 9847898ee89..1cf06614f3b 100644
--- a/intern/cycles/kernel/closure/alloc.h
+++ b/intern/cycles/kernel/closure/alloc.h
@@ -59,39 +59,10 @@ ccl_device_inline ccl_private ShaderClosure 
*bsdf_alloc(ccl_private ShaderData *
* we will not allocate new closure. */
   if (sample_weight >= CLOSURE_WEIGHT_CUTOFF) {
 ccl_private ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, 
weight);
-if (sc == NULL) {
-  return NULL;
-}
-
-sc->sample_weight = sample_weight;
-
-return sc;
-  }
-
-  return NULL;
-}
-
-#ifdef __OSL__
-ccl_device_inline ShaderClosure *bsdf_alloc_osl(ShaderData *sd,
-int size,
-Spectrum weight,
-void *data)
-{
-  kernel_assert(isfinite_safe(weight));
-
-  const float sample_weight = fabsf(average(weight));
-
-  /* Use comparison this way to help dealing with non-finite weight: if the 
average is not finite
-   * we will not allocate new closure. */
-  if (sample_weight >= CLOSURE_WEIGHT_CUTOFF) {
-ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
 if (!sc) {
   return NULL;
 }
 
-memcpy((void *)sc, data, size);
-
-sc->weight = weight;
 sc->sample_weight = sample_weight;
 
 return sc;
@@ -99,6 +70,5 @@ ccl_device_inline ShaderClosure *bsdf_alloc_osl(ShaderData 
*sd,
 
   return NULL;
 }
-#endif
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/CMakeLists.txt 
b/intern/cycles/kernel/osl/CMakeLists.txt
index 7570490be7c..b27bcb066fd 100644
--- a/intern/cycles/kernel/osl/CMakeLists.txt
+++ b/intern/cycles/kernel/osl/CMakeLists.txt
@@ -10,18 +10,14 @@ set(INC_SYS
 )
 
 set(SRC
-  background.cpp
-  bsdf_diffuse_ramp.cpp
-  bsdf_phong_ramp.cpp
-  emissive.cpp
-  bssrdf.cpp
   closures.cpp
   services.cpp
   shader.cpp
 )
 
 set(HEADER_SRC
-  closures.h
+  closures_setup.h
+  closures_template.h
   globals.h
   services.h
   shader.h
diff --git a/intern/cycles/kernel/osl/background.cpp 
b/intern/cycles/kernel/osl/background.cpp
deleted file mode 100644
index 4b5a2686117..000
--- a/intern/cycles/kernel/osl/background.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- *
- * Adapted from Open Shading Language
- * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
- * All Rights Reserved.
- *
- * Modifications Copyright 2011-2022 Blender Foundation. */
-
-#include 
-
-#include 
-
-#include "kernel/osl/closures.h"
-
-// clang-format off
-#include "kernel/device/cpu/compat.h"
-#include "kernel/device/cpu/globals.h"
-
-#include "kernel/closure/alloc.h"
-#include "kernel/closure/emissive.h"
-
-#include "kernel/util/color.h"
-// clang-format on
-
-CCL_NAMESPACE_BEGIN
-

[Bf-blender-cvs] [ef7c9e793ec] master: Cycles: Remove separate OSL attribute map and instead always use SVM attribute map

2022-09-09 Thread Patrick Mours

Commit: ef7c9e793ec5331ac694eec9336565bd2254c406
Author: Patrick Mours
Date:   Fri Sep 9 11:55:35 2022 +0200
Branches: master
https://developer.blender.org/rBef7c9e793ec5331ac694eec9336565bd2254c406

Cycles: Remove separate OSL attribute map and instead always use SVM attribute 
map

The SVM attribute map is always generated and uses a simple
linear search to lookup by an opaque ID, so can reuse that for OSL
as well and simply use the attribute name hash as ID instead of
generating a unique value separately. This works for both object
and geometry attributes since the SVM attribute map already
stores both. Simplifies code somewhat and reduces memory
usage slightly.

This patch was split from D15902.

Differential Revision: https://developer.blender.org/D15918

===

M   intern/cycles/kernel/geom/attribute.h
M   intern/cycles/kernel/geom/primitive.h
M   intern/cycles/kernel/geom/subd_triangle.h
M   intern/cycles/kernel/geom/volume.h
M   intern/cycles/kernel/osl/globals.h
M   intern/cycles/kernel/osl/services.cpp
M   intern/cycles/kernel/osl/shader.cpp
M   intern/cycles/kernel/osl/shader.h
M   intern/cycles/kernel/types.h
M   intern/cycles/scene/geometry.cpp
M   intern/cycles/scene/geometry.h
M   intern/cycles/scene/osl.cpp
M   intern/cycles/scene/osl.h
M   intern/cycles/scene/shader.cpp
M   intern/cycles/scene/shader.h

===

diff --git a/intern/cycles/kernel/geom/attribute.h 
b/intern/cycles/kernel/geom/attribute.h
index 31a9e39d528..3a0ee1b09d1 100644
--- a/intern/cycles/kernel/geom/attribute.h
+++ b/intern/cycles/kernel/geom/attribute.h
@@ -16,14 +16,14 @@ CCL_NAMESPACE_BEGIN
 
 /* Patch index for triangle, -1 if not subdivision triangle */
 
-ccl_device_inline uint subd_triangle_patch(KernelGlobals kg, ccl_private const 
ShaderData *sd)
+ccl_device_inline uint subd_triangle_patch(KernelGlobals kg, int prim)
 {
-  return (sd->prim != PRIM_NONE) ? kernel_data_fetch(tri_patch, sd->prim) : ~0;
+  return (prim != PRIM_NONE) ? kernel_data_fetch(tri_patch, prim) : ~0;
 }
 
-ccl_device_inline uint attribute_primitive_type(KernelGlobals kg, ccl_private 
const ShaderData *sd)
+ccl_device_inline uint attribute_primitive_type(KernelGlobals kg, int prim, 
int type)
 {
-  if ((sd->type & PRIMITIVE_TRIANGLE) && subd_triangle_patch(kg, sd) != ~0) {
+  if ((type & PRIMITIVE_TRIANGLE) && subd_triangle_patch(kg, prim) != ~0) {
 return ATTR_PRIM_SUBD;
   }
   else {
@@ -45,17 +45,16 @@ ccl_device_inline uint 
object_attribute_map_offset(KernelGlobals kg, int object)
   return kernel_data_fetch(objects, object).attribute_map_offset;
 }
 
-ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals kg,
- ccl_private const 
ShaderData *sd,
- uint id)
+ccl_device_inline AttributeDescriptor
+find_attribute(KernelGlobals kg, int object, int prim, int type, uint64_t id)
 {
-  if (sd->object == OBJECT_NONE) {
+  if (object == OBJECT_NONE) {
 return attribute_not_found();
   }
 
   /* for SVM, find attribute by unique id */
-  uint attr_offset = object_attribute_map_offset(kg, sd->object);
-  attr_offset += attribute_primitive_type(kg, sd);
+  uint attr_offset = object_attribute_map_offset(kg, object);
+  attr_offset += attribute_primitive_type(kg, prim, type);
   AttributeMap attr_map = kernel_data_fetch(attributes_map, attr_offset);
 
   while (attr_map.id != id) {
@@ -77,7 +76,7 @@ ccl_device_inline AttributeDescriptor 
find_attribute(KernelGlobals kg,
   AttributeDescriptor desc;
   desc.element = (AttributeElement)attr_map.element;
 
-  if (sd->prim == PRIM_NONE && desc.element != ATTR_ELEMENT_MESH &&
+  if (prim == PRIM_NONE && desc.element != ATTR_ELEMENT_MESH &&
   desc.element != ATTR_ELEMENT_VOXEL && desc.element != 
ATTR_ELEMENT_OBJECT) {
 return attribute_not_found();
   }
@@ -91,11 +90,16 @@ ccl_device_inline AttributeDescriptor 
find_attribute(KernelGlobals kg,
   return desc;
 }
 
+ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals kg,
+ ccl_private const 
ShaderData *sd,
+ uint64_t id)
+{
+  return find_attribute(kg, sd->object, sd->prim, sd->type, id);
+}
+
 /* Transform matrix attribute on meshes */
 
-ccl_device Transform primitive_attribute_matrix(KernelGlobals kg,
-ccl_private const ShaderData 
*sd,
-const AttributeDescriptor desc)
+ccl_device Transform primitive_attribute_matrix(KernelGlobals kg, const 
AttributeDescriptor desc)
 {
   Transform tfm;
 
diff --git a/intern/cycles/kernel/geom

[Bf-blender-cvs] [d13ed3c1575] master: Merge branch 'blender-v3.3-release'

2022-08-12 Thread Patrick Mours

Commit: d13ed3c1575c5bc840c322ef7fc86b6b8505d450
Author: Patrick Mours
Date:   Fri Aug 12 16:14:51 2022 +0200
Branches: master
https://developer.blender.org/rBd13ed3c1575c5bc840c322ef7fc86b6b8505d450

Merge branch 'blender-v3.3-release'

===



===



___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [515a15f200e] blender-v3.3-release: Fix syntax error introduced in previous commit

2022-08-12 Thread Patrick Mours

Commit: 515a15f200ed3323b7584c2c46d28a4ca562
Author: Patrick Mours
Date:   Fri Aug 12 16:13:09 2022 +0200
Branches: blender-v3.3-release
https://developer.blender.org/rB515a15f200ed3323b7584c2c46d28a4ca562

Fix syntax error introduced in previous commit

===

M   intern/cycles/integrator/path_trace.h

===

diff --git a/intern/cycles/integrator/path_trace.h 
b/intern/cycles/integrator/path_trace.h
index 9531e4fb186..59382b51d23 100644
--- a/intern/cycles/integrator/path_trace.h
+++ b/intern/cycles/integrator/path_trace.h
@@ -263,7 +263,7 @@ class PathTrace {
   unique_ptr denoiser_;
 
   /* Denoiser device descriptor which holds the denoised big tile for 
multi-device workloads. */
-  unique_ptr denoiser_buffer_;
+  unique_ptr big_tile_denoise_work_;
 
   /* State which is common for all the steps of the render work.
* Is brought up to date in the `render()` call and is accessed from all the 
steps involved into

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [e7908c07904] master: Merge branch 'blender-v3.3-release'

2022-08-12 Thread Patrick Mours

Commit: e7908c079042b8d64ee2a39bf2630e676e89fe6d
Author: Patrick Mours
Date:   Fri Aug 12 16:04:06 2022 +0200
Branches: master
https://developer.blender.org/rBe7908c079042b8d64ee2a39bf2630e676e89fe6d

Merge branch 'blender-v3.3-release'

===



===



___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [79787bf8e1e] blender-v3.3-release: Cycles: Improve denoiser update performance when rendering with multiple GPUs

2022-08-12 Thread Patrick Mours

Commit: 79787bf8e1e1d766e34dc6f8c5eda2efcceaa6cc
Author: Patrick Mours
Date:   Fri Aug 12 15:49:30 2022 +0200
Branches: blender-v3.3-release
https://developer.blender.org/rB79787bf8e1e1d766e34dc6f8c5eda2efcceaa6cc

Cycles: Improve denoiser update performance when rendering with multiple GPUs

This patch causes the render buffers to be copied to the denoiser
device only once before denoising and output/display is then fed
from that single buffer on the denoiser device. That way usually all
but one copy (from all the render devices to the denoiser device)
can be eliminated, provided that the denoiser device is also the
display device (in which case interop is used to update the display).
As such this patch also adds some logic that tries to ensure the
chosen denoiser device is the same as the display device.

Differential Revision: https://developer.blender.org/D15657

===

M   intern/cycles/device/cuda/device_impl.cpp
M   intern/cycles/device/optix/device_impl.cpp
M   intern/cycles/integrator/denoiser.cpp
M   intern/cycles/integrator/path_trace.cpp
M   intern/cycles/integrator/path_trace.h
M   intern/cycles/integrator/path_trace_tile.cpp
M   intern/cycles/integrator/path_trace_tile.h
M   intern/cycles/session/session.cpp

===

diff --git a/intern/cycles/device/cuda/device_impl.cpp 
b/intern/cycles/device/cuda/device_impl.cpp
index 00851a8e91c..01c021551f3 100644
--- a/intern/cycles/device/cuda/device_impl.cpp
+++ b/intern/cycles/device/cuda/device_impl.cpp
@@ -1202,11 +1202,11 @@ bool CUDADevice::should_use_graphics_interop()
   }
 
   vector gl_devices(num_all_devices);
-  uint num_gl_devices;
+  uint num_gl_devices = 0;
   cuGLGetDevices(_gl_devices, gl_devices.data(), num_all_devices, 
CU_GL_DEVICE_LIST_ALL);
 
-  for (CUdevice gl_device : gl_devices) {
-if (gl_device == cuDevice) {
+  for (uint i = 0; i < num_gl_devices; ++i) {
+if (gl_devices[i] == cuDevice) {
   return true;
 }
   }
diff --git a/intern/cycles/device/optix/device_impl.cpp 
b/intern/cycles/device/optix/device_impl.cpp
index 151983667c0..94a46acaf18 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -39,6 +39,9 @@ CCL_NAMESPACE_BEGIN
 // The original code is Copyright NVIDIA Corporation, BSD-3-Clause.
 namespace {
 
+#  if OPTIX_ABI_VERSION >= 60
+using ::optixUtilDenoiserInvokeTiled;
+#  else
 static OptixResult optixUtilDenoiserSplitImage(const OptixImage2D ,
const OptixImage2D ,
unsigned int 
overlapWindowSizeInPixels,
@@ -215,6 +218,7 @@ static OptixResult 
optixUtilDenoiserInvokeTiled(OptixDenoiser denoiser,
   }
   return OPTIX_SUCCESS;
 }
+#  endif
 
 #  if OPTIX_ABI_VERSION >= 55
 static void execute_optix_task(TaskPool , OptixTask task, OptixResult 
_reason)
diff --git a/intern/cycles/integrator/denoiser.cpp 
b/intern/cycles/integrator/denoiser.cpp
index 94991d63e4c..831bd3a4407 100644
--- a/intern/cycles/integrator/denoiser.cpp
+++ b/intern/cycles/integrator/denoiser.cpp
@@ -101,10 +101,17 @@ static Device *find_best_device(Device *device, 
DenoiserType type)
 if ((sub_device->info.denoisers & type) == 0) {
   return;
 }
+
 if (!best_device) {
   best_device = sub_device;
 }
 else {
+  /* Prefer a device that can use graphics interop for faster display 
update. */
+  if (sub_device->should_use_graphics_interop() &&
+  !best_device->should_use_graphics_interop()) {
+best_device = sub_device;
+  }
+
   /* TODO(sergey): Choose fastest device from available ones. Taking into 
account performance
* of the device and data transfer cost. */
 }
diff --git a/intern/cycles/integrator/path_trace.cpp 
b/intern/cycles/integrator/path_trace.cpp
index ed278821b46..3ec7b601d9f 100644
--- a/intern/cycles/integrator/path_trace.cpp
+++ b/intern/cycles/integrator/path_trace.cpp
@@ -26,6 +26,7 @@ PathTrace::PathTrace(Device *device,
  RenderScheduler _scheduler,
  TileManager _manager)
 : device_(device),
+  film_(film),
   device_scene_(device_scene),
   render_scheduler_(render_scheduler),
   tile_manager_(tile_manager)
@@ -60,7 +61,17 @@ PathTrace::~PathTrace()
 void PathTrace::load_kernels()
 {
   if (denoiser_) {
+/* Activate graphics interop while denoiser device is created, so that it 
can choose a device
+ * that supports interop for faster display updates. */
+if (display_ && path_trace_works_.size() > 1) {
+  display_->graphics_interop_activate();
+}
+
 denoiser_->load_kernels(progress_);
+
+if (display_ && path_trace_works_.size() > 1) {
+  display_->graphics_interop_deactivate();
+}

[Bf-blender-cvs] [ef268c78933] master: Build: Fix build of library dependencies on Linux aarch64

2022-06-30 Thread Patrick Mours

Commit: ef268c78933079137288e326704431432adf9ad9
Author: Patrick Mours
Date:   Thu Jun 30 16:44:38 2022 +0200
Branches: master
https://developer.blender.org/rBef268c78933079137288e326704431432adf9ad9

Build: Fix build of library dependencies on Linux aarch64

rBb9c37608a9e959a896f5358d4ab3d3d001a70833 moved evaluation of
`versions.cmake` before `options.cmake`, as a result of which
`BLENDER_PLATFORM_ARM` was no longer defined in `versions.cmake`,
causing it to choose the wrong OpenSSL version for aarch64. This
reverts that. Also fixes a compiler crash when building flex with some
glibc versions.

Differential Revision: https://developer.blender.org/D15319

===

M   build_files/build_environment/CMakeLists.txt
M   build_files/build_environment/cmake/flex.cmake
M   build_files/build_environment/cmake/ispc.cmake
A   build_files/build_environment/patches/flex.diff

===

diff --git a/build_files/build_environment/CMakeLists.txt 
b/build_files/build_environment/CMakeLists.txt
index a9ff48b2a9b..e0350901cd0 100644
--- a/build_files/build_environment/CMakeLists.txt
+++ b/build_files/build_environment/CMakeLists.txt
@@ -29,8 +29,9 @@ cmake_minimum_required(VERSION 3.5)
 
 include(ExternalProject)
 include(cmake/check_software.cmake)
-include(cmake/versions.cmake)
 include(cmake/options.cmake)
+# versions.cmake needs to be included after options.cmake due to the 
BLENDER_PLATFORM_ARM variable being needed.
+include(cmake/versions.cmake)
 include(cmake/boost_build_options.cmake)
 include(cmake/download.cmake)
 include(cmake/macros.cmake)
diff --git a/build_files/build_environment/cmake/flex.cmake 
b/build_files/build_environment/cmake/flex.cmake
index 2b04c8d5d68..99233adbcdc 100644
--- a/build_files/build_environment/cmake/flex.cmake
+++ b/build_files/build_environment/cmake/flex.cmake
@@ -5,6 +5,8 @@ ExternalProject_Add(external_flex
   URL_HASH ${FLEX_HASH_TYPE}=${FLEX_HASH}
   DOWNLOAD_DIR ${DOWNLOAD_DIR}
   PREFIX ${BUILD_DIR}/flex
+  # This patch fixes build with some versions of glibc 
(https://github.com/westes/flex/commit/24fd0551333e7eded87b64dd36062da3df2f6380)
+  PATCH_COMMAND ${PATCH_CMD} -d ${BUILD_DIR}/flex/src/external_flex < 
${PATCH_DIR}/flex.diff
   CONFIGURE_COMMAND ${CONFIGURE_ENV} && cd 
${BUILD_DIR}/flex/src/external_flex/ && ${CONFIGURE_COMMAND} 
--prefix=${LIBDIR}/flex
   BUILD_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/flex/src/external_flex/ && 
make -j${MAKE_THREADS}
   INSTALL_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/flex/src/external_flex/ 
&& make install
diff --git a/build_files/build_environment/cmake/ispc.cmake 
b/build_files/build_environment/cmake/ispc.cmake
index 86dc1d9efa8..c2dbedca55f 100644
--- a/build_files/build_environment/cmake/ispc.cmake
+++ b/build_files/build_environment/cmake/ispc.cmake
@@ -28,7 +28,7 @@ elseif(UNIX)
   set(ISPC_EXTRA_ARGS_UNIX
 -DCMAKE_C_COMPILER=${LIBDIR}/llvm/bin/clang
 -DCMAKE_CXX_COMPILER=${LIBDIR}/llvm/bin/clang++
--DARM_ENABLED=Off
+-DARM_ENABLED=${BLENDER_PLATFORM_ARM}
 -DFLEX_EXECUTABLE=${LIBDIR}/flex/bin/flex
   )
 endif()
diff --git a/build_files/build_environment/patches/flex.diff 
b/build_files/build_environment/patches/flex.diff
new file mode 100644
index 000..d3f9e8b0a66
--- /dev/null
+++ b/build_files/build_environment/patches/flex.diff
@@ -0,0 +1,15 @@
+diff --git a/configure.ac b/configure.ac
+index c6f12d644..3c977a4e3 100644
+--- a/configure.ac
 b/configure.ac
+@@ -25,8 +25,10 @@
+ # autoconf requirements and initialization
+ 
+ AC_INIT([the fast lexical analyser 
generator],[2.6.4],[flex-h...@lists.sourceforge.net],[flex])
++AC_PREREQ([2.60])
+ AC_CONFIG_SRCDIR([src/scan.l])
+ AC_CONFIG_AUX_DIR([build-aux])
++AC_USE_SYSTEM_EXTENSIONS
+ LT_INIT
+ AM_INIT_AUTOMAKE([1.15 -Wno-portability foreign std-options dist-lzip 
parallel-tests subdir-objects])
+ AC_CONFIG_HEADER([src/config.h])

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [34f94a02f37] blender-v3.2-release: Fix use of OpenGL interop breaking in Hydra viewports that do not support it

2022-06-03 Thread Patrick Mours

Commit: 34f94a02f37005210f629f04635c457d98ff5f91
Author: Patrick Mours
Date:   Wed Jun 1 15:26:55 2022 +0200
Branches: blender-v3.2-release
https://developer.blender.org/rB34f94a02f37005210f629f04635c457d98ff5f91

Fix use of OpenGL interop breaking in Hydra viewports that do not support it

Rendering directly to a resource using OpenGL interop and Hgi
doesn't work in Houdini, since it never uses the resulting resource
(it does not call `HdRenderBuffer::GetResource`). But since doing
that simultaneously disables mapping (`HdRenderBuffer::Map` is
not implemented then), nothing was displayed. To fix this, keep
track of whether a Hydra viewport does support displaying a Hgi
resource directly, by checking whether
`HdRenderBuffer::GetResource` is ever called and only enable use
of OpenGL interop if that is the case.

Differential Revision: https://developer.blender.org/D15090

===

M   intern/cycles/hydra/display_driver.cpp
M   intern/cycles/hydra/display_driver.h
M   intern/cycles/hydra/output_driver.cpp
M   intern/cycles/hydra/render_buffer.cpp
M   intern/cycles/hydra/render_buffer.h

===

diff --git a/intern/cycles/hydra/display_driver.cpp 
b/intern/cycles/hydra/display_driver.cpp
index a809ace63e2..0c0b577c358 100644
--- a/intern/cycles/hydra/display_driver.cpp
+++ b/intern/cycles/hydra/display_driver.cpp
@@ -23,10 +23,18 @@ 
HdCyclesDisplayDriver::HdCyclesDisplayDriver(HdCyclesSession *renderParam, Hgi *
 
 HdCyclesDisplayDriver::~HdCyclesDisplayDriver()
 {
-  deinit();
+  if (texture_) {
+_hgi->DestroyTexture(_);
+  }
+
+  if (gl_pbo_id_) {
+glDeleteBuffers(1, _pbo_id_);
+  }
+
+  gl_context_dispose();
 }
 
-void HdCyclesDisplayDriver::init()
+void HdCyclesDisplayDriver::gl_context_create()
 {
 #ifdef _WIN32
   if (!gl_context_) {
@@ -64,16 +72,42 @@ void HdCyclesDisplayDriver::init()
   }
 }
 
-void HdCyclesDisplayDriver::deinit()
+bool HdCyclesDisplayDriver::gl_context_enable()
 {
-  if (texture_) {
-_hgi->DestroyTexture(_);
+#ifdef _WIN32
+  if (!hdc_ || !gl_context_) {
+return false;
   }
 
-  if (gl_pbo_id_) {
-glDeleteBuffers(1, _pbo_id_);
+  mutex_.lock();
+
+  // Do not change context if this is called in the main thread
+  if (wglGetCurrentContext() == nullptr) {
+if (!TF_VERIFY(wglMakeCurrent((HDC)hdc_, (HGLRC)gl_context_))) {
+  mutex_.unlock();
+  return false;
+}
+  }
+
+  return true;
+#else
+  return false;
+#endif
+}
+
+void HdCyclesDisplayDriver::gl_context_disable()
+{
+#ifdef _WIN32
+  if (wglGetCurrentContext() == gl_context_) {
+TF_VERIFY(wglMakeCurrent(nullptr, nullptr));
   }
 
+  mutex_.unlock();
+#endif
+}
+
+void HdCyclesDisplayDriver::gl_context_dispose()
+{
 #ifdef _WIN32
   if (gl_context_) {
 TF_VERIFY(wglDeleteContext((HGLRC)gl_context_));
@@ -90,13 +124,9 @@ bool HdCyclesDisplayDriver::update_begin(const Params 
,
  int texture_width,
  int texture_height)
 {
-#ifdef _WIN32
-  if (!hdc_ || !gl_context_) {
+  if (!gl_context_enable()) {
 return false;
   }
-#endif
-
-  graphics_interop_activate();
 
   if (gl_render_sync_) {
 glWaitSync((GLsync)gl_render_sync_, 0, GL_TIMEOUT_IGNORED);
@@ -121,15 +151,14 @@ bool HdCyclesDisplayDriver::update_begin(const Params 
,
 void HdCyclesDisplayDriver::update_end()
 {
   gl_upload_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
-
   glFlush();
 
-  graphics_interop_deactivate();
+  gl_context_disable();
 }
 
 void HdCyclesDisplayDriver::flush()
 {
-  graphics_interop_activate();
+  gl_context_enable();
 
   if (gl_upload_sync_) {
 glWaitSync((GLsync)gl_upload_sync_, 0, GL_TIMEOUT_IGNORED);
@@ -139,7 +168,7 @@ void HdCyclesDisplayDriver::flush()
 glWaitSync((GLsync)gl_render_sync_, 0, GL_TIMEOUT_IGNORED);
   }
 
-  graphics_interop_deactivate();
+  gl_context_disable();
 }
 
 half4 *HdCyclesDisplayDriver::map_texture_buffer()
@@ -179,25 +208,12 @@ DisplayDriver::GraphicsInterop 
HdCyclesDisplayDriver::graphics_interop_get()
 
 void HdCyclesDisplayDriver::graphics_interop_activate()
 {
-  mutex_.lock();
-
-#ifdef _WIN32
-  // Do not change context if this is called in the main thread
-  if (wglGetCurrentContext() == nullptr) {
-TF_VERIFY(wglMakeCurrent((HDC)hdc_, (HGLRC)gl_context_));
-  }
-#endif
+  gl_context_enable();
 }
 
 void HdCyclesDisplayDriver::graphics_interop_deactivate()
 {
-#ifdef _WIN32
-  if (wglGetCurrentContext() == gl_context_) {
-TF_VERIFY(wglMakeCurrent(nullptr, nullptr));
-  }
-#endif
-
-  mutex_.unlock();
+  gl_context_disable();
 }
 
 void HdCyclesDisplayDriver::clear()
@@ -214,7 +230,11 @@ void HdCyclesDisplayDriver::draw(const Params )
 return;
   }
 
-  init();
+  if (!renderBuffer->IsResourceUsed()) {
+return;
+  }
+
+  gl_context_create();
 
   // Cycles 'DisplayDriver' only supports 'half

[Bf-blender-cvs] [5c6053ccb1c] master: Fix misaligned address error when rendering 3D curves in the viewport with Cycles and OptiX 7.4

2022-06-03 Thread Patrick Mours

Commit: 5c6053ccb1cbbe57d5a9d0aa33eadc6cb3e9dc9a
Author: Patrick Mours
Date:   Fri Jun 3 12:24:13 2022 +0200
Branches: master
https://developer.blender.org/rB5c6053ccb1cbbe57d5a9d0aa33eadc6cb3e9dc9a

Fix misaligned address error when rendering 3D curves in the viewport with 
Cycles and OptiX 7.4

Acceleration structures in the viewport default to building with the fast
build flag, but the intersection program used for curves was queried with
the fast trace flag. The resulting mismatch caused an exception in the
intersection kernel. Since it's difficult to predict whether dynamic or static
acceleration structures are going to be built at the time of kernel loading,
this fixes the mismatch by always using the fast trace flag for curves.

===

M   intern/cycles/device/optix/device_impl.cpp

===

diff --git a/intern/cycles/device/optix/device_impl.cpp 
b/intern/cycles/device/optix/device_impl.cpp
index 35717c49d1a..9ab9bbb59c5 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -553,7 +553,8 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
   OptixBuiltinISOptions builtin_options = {};
 #  if OPTIX_ABI_VERSION >= 55
   builtin_options.builtinISModuleType = 
OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
-  builtin_options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE;
+  builtin_options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE |
+   OPTIX_BUILD_FLAG_ALLOW_COMPACTION;
   builtin_options.curveEndcapFlags = OPTIX_CURVE_ENDCAP_DEFAULT; /* 
Disable end-caps. */
 #  else
   builtin_options.builtinISModuleType = 
OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
@@ -1387,7 +1388,10 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
   OptixAccelBufferSizes sizes = {};
   OptixAccelBuildOptions options = {};
   options.operation = operation;
-  if (use_fast_trace_bvh) {
+  if (use_fast_trace_bvh ||
+  /* The build flags have to match the ones used to query the built-in 
curve intersection
+ program (see optixBuiltinISModuleGet above) */
+  build_input.type == OPTIX_BUILD_INPUT_TYPE_CURVES) {
 VLOG(2) << "Using fast to trace OptiX BVH";
 options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE | 
OPTIX_BUILD_FLAG_ALLOW_COMPACTION;
   }

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [a8c81ffa831] master: Cycles: Add half precision float support for volumes with NanoVDB

2022-05-23 Thread Patrick Mours

Commit: a8c81ffa83122661b80e842ddd641e366b3d5c04
Author: Patrick Mours
Date:   Fri May 20 18:01:26 2022 +0200
Branches: master
https://developer.blender.org/rBa8c81ffa83122661b80e842ddd641e366b3d5c04

Cycles: Add half precision float support for volumes with NanoVDB

This patch makes it possible to change the precision with which to
store volume data in the NanoVDB data structure (as float, half, or
using variable bit quantization) via the previously unused precision
field in the volume data block.
It makes it possible to further reduce memory usage during
rendering, at a slight cost to the visual detail of a volume.

Differential Revision: https://developer.blender.org/D10023

===

M   intern/cycles/blender/volume.cpp
M   intern/cycles/device/cuda/device_impl.cpp
M   intern/cycles/device/hip/device_impl.cpp
M   intern/cycles/device/memory.cpp
M   intern/cycles/kernel/device/cpu/image.h
M   intern/cycles/kernel/device/gpu/image.h
M   intern/cycles/scene/image.cpp
M   intern/cycles/scene/image_oiio.cpp
M   intern/cycles/scene/image_vdb.cpp
M   intern/cycles/scene/image_vdb.h
M   intern/cycles/scene/object.cpp
M   intern/cycles/util/texture.h
M   release/scripts/startup/bl_ui/properties_data_volume.py
M   source/blender/makesdna/DNA_volume_defaults.h
M   source/blender/makesdna/DNA_volume_types.h
M   source/blender/makesrna/intern/rna_volume.c

===

diff --git a/intern/cycles/blender/volume.cpp b/intern/cycles/blender/volume.cpp
index 8dd2d45c0b6..a9a2c474f40 100644
--- a/intern/cycles/blender/volume.cpp
+++ b/intern/cycles/blender/volume.cpp
@@ -219,7 +219,10 @@ static void sync_smoke_volume(
 
 class BlenderVolumeLoader : public VDBImageLoader {
  public:
-  BlenderVolumeLoader(BL::BlendData _data, BL::Volume _volume, const 
string _name)
+  BlenderVolumeLoader(BL::BlendData _data,
+  BL::Volume _volume,
+  const string _name,
+  BL::VolumeRender::precision_enum precision_)
   : VDBImageLoader(grid_name), b_volume(b_volume)
   {
 b_volume.grids.load(b_data.ptr.data);
@@ -240,6 +243,20 @@ class BlenderVolumeLoader : public VDBImageLoader {
 break;
   }
 }
+#endif
+#ifdef WITH_NANOVDB
+switch (precision_) {
+  case BL::VolumeRender::precision_FULL:
+precision = 32;
+break;
+  case BL::VolumeRender::precision_HALF:
+precision = 16;
+break;
+  default:
+  case BL::VolumeRender::precision_VARIABLE:
+precision = 0;
+break;
+}
 #endif
   }
 
@@ -318,7 +335,8 @@ static void sync_volume_object(BL::BlendData _data,
 volume->attributes.add(std) :
 volume->attributes.add(name, TypeDesc::TypeFloat, 
ATTR_ELEMENT_VOXEL);
 
-  ImageLoader *loader = new BlenderVolumeLoader(b_data, b_volume, 
name.string());
+  ImageLoader *loader = new BlenderVolumeLoader(
+  b_data, b_volume, name.string(), b_render.precision());
   ImageParams params;
   params.frame = b_volume.grids.frame();
 
diff --git a/intern/cycles/device/cuda/device_impl.cpp 
b/intern/cycles/device/cuda/device_impl.cpp
index 6908ae5ead3..75177566901 100644
--- a/intern/cycles/device/cuda/device_impl.cpp
+++ b/intern/cycles/device/cuda/device_impl.cpp
@@ -1084,7 +1084,9 @@ void CUDADevice::tex_alloc(device_texture )
   need_texture_info = true;
 
   if (mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT &&
-  mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
+  mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3 &&
+  mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FPN &&
+  mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FP16) {
 CUDA_RESOURCE_DESC resDesc;
 memset(, 0, sizeof(resDesc));
 
diff --git a/intern/cycles/device/hip/device_impl.cpp 
b/intern/cycles/device/hip/device_impl.cpp
index 7159277b325..f8fdb86ca29 100644
--- a/intern/cycles/device/hip/device_impl.cpp
+++ b/intern/cycles/device/hip/device_impl.cpp
@@ -1042,7 +1042,9 @@ void HIPDevice::tex_alloc(device_texture )
   need_texture_info = true;
 
   if (mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT &&
-  mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
+  mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3 &&
+  mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FPN &&
+  mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FP16) {
 /* Bindless textures. */
 hipResourceDesc resDesc;
 memset(, 0, sizeof(resDesc));
diff --git a/intern/cycles/device/memory.cpp b/intern/cycles/device/memory.cpp
index 4c068dbdd3e..40cf2573cfb 100644
--- a/intern/cycles/device/memory.cpp
+++ b/intern/cycles/device/memory.cpp
@@ -165,6 +165,8 @@ device_texture::device_texture(Dev

[Bf-blender-cvs] [6fa5d520b86] master: Cycles: Add support for parallel compilation of OptiX module

2022-05-05 Thread Patrick Mours

Commit: 6fa5d520b861e8d5b96967452c50b459c52e8024
Author: Patrick Mours
Date:   Thu May 5 11:08:44 2022 +0200
Branches: master
https://developer.blender.org/rB6fa5d520b861e8d5b96967452c50b459c52e8024

Cycles: Add support for parallel compilation of OptiX module

OptiX 7.4 adds support for splitting the costly creation of an OptiX
module into smaller tasks that can be executed in parallel on a
thread pool.
This is only really relevant for the "shader_raytrace" kernel variant
as the main one is small and compiles fast either way. It sheds of
a few seconds there (total gain is not massive currently, since it is
difficult for the compiler to split up the huge shading entry point
that is the primary one taking up time, but it is still measurable).

Differential Revision: https://developer.blender.org/D14845

===

M   intern/cycles/device/optix/device_impl.cpp

===

diff --git a/intern/cycles/device/optix/device_impl.cpp 
b/intern/cycles/device/optix/device_impl.cpp
index 8830d8c44ac..6329144131e 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -23,6 +23,7 @@
 #  include "util/md5.h"
 #  include "util/path.h"
 #  include "util/progress.h"
+#  include "util/task.h"
 #  include "util/time.h"
 
 #  undef __KERNEL_CPU__
@@ -216,6 +217,24 @@ static OptixResult 
optixUtilDenoiserInvokeTiled(OptixDenoiser denoiser,
   return OPTIX_SUCCESS;
 }
 
+#  if OPTIX_ABI_VERSION >= 55
+static void execute_optix_task(TaskPool , OptixTask task, OptixResult 
_reason)
+{
+  OptixTask additional_tasks[16];
+  unsigned int num_additional_tasks = 0;
+
+  const OptixResult result = optixTaskExecute(task, additional_tasks, 16, 
_additional_tasks);
+  if (result == OPTIX_SUCCESS) {
+for (unsigned int i = 0; i < num_additional_tasks; ++i) {
+  pool.push(function_bind(_optix_task, std::ref(pool), 
additional_tasks[i], std::ref(failure_reason)));
+}
+  }
+  else {
+failure_reason = result;
+  }
+}
+#  endif
+
 }  // namespace
 
 OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
@@ -453,6 +472,23 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
   return false;
 }
 
+#  if OPTIX_ABI_VERSION >= 55
+OptixTask task = nullptr;
+OptixResult result = optixModuleCreateFromPTXWithTasks(context,
+   _options,
+   _options,
+   ptx_data.data(),
+   ptx_data.size(),
+   nullptr,
+   nullptr,
+   _module,
+   );
+if (result == OPTIX_SUCCESS) {
+  TaskPool pool;
+  execute_optix_task(pool, task, result);
+  pool.wait_work();
+}
+#  else
 const OptixResult result = optixModuleCreateFromPTX(context,
 _options,
 _options,
@@ -461,6 +497,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
 nullptr,
 0,
 _module);
+#  endif
 if (result != OPTIX_SUCCESS) {
   set_error(string_printf("Failed to load OptiX kernel from '%s' (%s)",
   ptx_filename.c_str(),

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [fc2c22e90c2] master: Cycles: Hydra fixes for stageMetersPerUnit and OpenGL context on Windows

2022-04-29 Thread Patrick Mours

Commit: fc2c22e90c252f683a42574d4382f7e3c23940e1
Author: Patrick Mours
Date:   Wed Apr 20 13:40:48 2022 +0200
Branches: master
https://developer.blender.org/rBfc2c22e90c252f683a42574d4382f7e3c23940e1

Cycles: Hydra fixes for stageMetersPerUnit and OpenGL context on Windows

Add "stageMetersPerUnit" render setting for USD files that have that set to
something other than the default (e.g. exported by Blender).

And fix a crash when an application creates a Hydra render pass on a thread
that does not have an OpenGL context current.

===

M   intern/cycles/hydra/display_driver.cpp
M   intern/cycles/hydra/display_driver.h
M   intern/cycles/hydra/file_reader.cpp
M   intern/cycles/hydra/render_delegate.cpp
M   intern/cycles/hydra/render_delegate.h

===

diff --git a/intern/cycles/hydra/display_driver.cpp 
b/intern/cycles/hydra/display_driver.cpp
index 6f6ca35cd31..a809ace63e2 100644
--- a/intern/cycles/hydra/display_driver.cpp
+++ b/intern/cycles/hydra/display_driver.cpp
@@ -19,44 +19,66 @@ HDCYCLES_NAMESPACE_OPEN_SCOPE
 HdCyclesDisplayDriver::HdCyclesDisplayDriver(HdCyclesSession *renderParam, Hgi 
*hgi)
 : _renderParam(renderParam), _hgi(hgi)
 {
+}
+
+HdCyclesDisplayDriver::~HdCyclesDisplayDriver()
+{
+  deinit();
+}
+
+void HdCyclesDisplayDriver::init()
+{
 #ifdef _WIN32
-  hdc_ = GetDC(CreateWindowA("STATIC",
- "HdCycles",
- WS_OVERLAPPEDWINDOW | WS_CLIPSIBLINGS | 
WS_CLIPCHILDREN,
- 0,
- 0,
- 64,
- 64,
- NULL,
- NULL,
- GetModuleHandle(NULL),
- NULL));
-
-  int pixelFormat = GetPixelFormat(wglGetCurrentDC());
-  PIXELFORMATDESCRIPTOR pfd = {sizeof(pfd)};
-  DescribePixelFormat((HDC)hdc_, pixelFormat, sizeof(pfd), );
-  SetPixelFormat((HDC)hdc_, pixelFormat, );
-
-  TF_VERIFY(gl_context_ = wglCreateContext((HDC)hdc_));
-  TF_VERIFY(wglShareLists(wglGetCurrentContext(), (HGLRC)gl_context_));
+  if (!gl_context_) {
+hdc_ = GetDC(CreateWindowA("STATIC",
+   "HdCycles",
+   WS_OVERLAPPEDWINDOW | WS_CLIPSIBLINGS | 
WS_CLIPCHILDREN,
+   0,
+   0,
+   64,
+   64,
+   NULL,
+   NULL,
+   GetModuleHandle(NULL),
+   NULL));
+
+int pixelFormat = GetPixelFormat(wglGetCurrentDC());
+PIXELFORMATDESCRIPTOR pfd = {sizeof(pfd)};
+DescribePixelFormat((HDC)hdc_, pixelFormat, sizeof(pfd), );
+SetPixelFormat((HDC)hdc_, pixelFormat, );
+
+TF_VERIFY(gl_context_ = wglCreateContext((HDC)hdc_));
+TF_VERIFY(wglShareLists(wglGetCurrentContext(), (HGLRC)gl_context_));
+  }
+  if (!gl_context_) {
+return;
+  }
 #endif
 
-  glewInit();
+  if (!gl_pbo_id_) {
+if (glewInit() != GLEW_OK) {
+  return;
+}
 
-  glGenBuffers(1, _pbo_id_);
+glGenBuffers(1, _pbo_id_);
+  }
 }
 
-HdCyclesDisplayDriver::~HdCyclesDisplayDriver()
+void HdCyclesDisplayDriver::deinit()
 {
   if (texture_) {
 _hgi->DestroyTexture(_);
   }
 
-  glDeleteBuffers(1, _pbo_id_);
+  if (gl_pbo_id_) {
+glDeleteBuffers(1, _pbo_id_);
+  }
 
 #ifdef _WIN32
-  TF_VERIFY(wglDeleteContext((HGLRC)gl_context_));
-  DestroyWindow(WindowFromDC((HDC)hdc_));
+  if (gl_context_) {
+TF_VERIFY(wglDeleteContext((HGLRC)gl_context_));
+DestroyWindow(WindowFromDC((HDC)hdc_));
+  }
 #endif
 }
 
@@ -192,6 +214,8 @@ void HdCyclesDisplayDriver::draw(const Params )
 return;
   }
 
+  init();
+
   // Cycles 'DisplayDriver' only supports 'half4' format
   TF_VERIFY(renderBuffer->GetFormat() == HdFormatFloat16Vec4);
 
diff --git a/intern/cycles/hydra/display_driver.h 
b/intern/cycles/hydra/display_driver.h
index 668f7d76eed..20086830e6a 100644
--- a/intern/cycles/hydra/display_driver.h
+++ b/intern/cycles/hydra/display_driver.h
@@ -19,6 +19,9 @@ class HdCyclesDisplayDriver final : public 
CCL_NS::DisplayDriver {
   ~HdCyclesDisplayDriver();
 
  private:
+  void init();
+  void deinit();
+
   void next_tile_begin() override;
 
   bool update_begin(const Params , int texture_width, int 
texture_height) override;
diff --git a/intern/cycles/hydra/file_reader.cpp 
b/intern/cycles/hydra/file_reader.cpp
index 329cc959ac3..8925626d8c3 100644
--- a/intern/cycles/hydra/file_reader.cpp
+++ b/intern/cycles/hydra/file_reader.cpp
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 HDCYCLES_NAMESPACE_OPEN_SCOPE
@@ -69,6 +70,9 @@ void HdCyclesFileReader::read(Session *session,

[Bf-blender-cvs] [c31b89e76e0] master: Cycles: Add support for "stageMetersPerUnit" Hydra render setting

2022-04-13 Thread Patrick Mours

Commit: c31b89e76e0d216fc7b8807aa8ccd815b30ed93d
Author: Patrick Mours
Date:   Tue Apr 12 18:08:25 2022 +0200
Branches: master
https://developer.blender.org/rBc31b89e76e0d216fc7b8807aa8ccd815b30ed93d

Cycles: Add support for "stageMetersPerUnit" Hydra render setting

This can be useful to match transforms to what native Cycles
would see in Blender, as USD typically uses centimeters, but
Blender uses meters. This patch also fixes the hardcoded focal
length multiplicator, which is now using the same units as
everything else. Default of "stageMetersPerUnit" is 0.01 to match
the USD default of centimeters.

Differential Revision: https://developer.blender.org/D14630

===

M   intern/cycles/hydra/camera.cpp
M   intern/cycles/hydra/camera.h
M   intern/cycles/hydra/geometry.inl
M   intern/cycles/hydra/light.cpp
M   intern/cycles/hydra/render_delegate.cpp
M   intern/cycles/hydra/render_pass.cpp
M   intern/cycles/hydra/session.h

===

diff --git a/intern/cycles/hydra/camera.cpp b/intern/cycles/hydra/camera.cpp
index c746a107899..62042cbbcd2 100644
--- a/intern/cycles/hydra/camera.cpp
+++ b/intern/cycles/hydra/camera.cpp
@@ -3,6 +3,7 @@
  * Copyright 2022 Blender Foundation */
 
 #include "hydra/camera.h"
+#include "hydra/session.h"
 #include "scene/camera.h"
 
 #include 
@@ -12,6 +13,19 @@
 HDCYCLES_NAMESPACE_OPEN_SCOPE
 
 extern Transform convert_transform(const GfMatrix4d );
+Transform convert_camera_transform(const GfMatrix4d , float 
metersPerUnit)
+{
+  Transform t = convert_transform(matrix);
+  // Flip Z axis
+  t.x.z *= -1.0f;
+  t.y.z *= -1.0f;
+  t.z.z *= -1.0f;
+  // Scale translation
+  t.x.w *= metersPerUnit;
+  t.y.w *= metersPerUnit;
+  t.z.w *= metersPerUnit;
+  return t;
+}
 
 #if PXR_VERSION < 2102
 // clang-format off
@@ -61,13 +75,20 @@ void HdCyclesCamera::Sync(HdSceneDelegate *sceneDelegate,
   if (*dirtyBits & DirtyBits::DirtyTransform) {
 sceneDelegate->SampleTransform(id, &_transformSamples);
 
+bool transform_found = false;
 for (size_t i = 0; i < _transformSamples.count; ++i) {
   if (_transformSamples.times[i] == 0.0f) {
 _transform = _transformSamples.values[i];
 _data.SetTransform(_transform);
+transform_found = true;
 break;
   }
 }
+
+if (!transform_found && _transformSamples.count) {
+  _transform = _transformSamples.values[0];
+  _data.SetTransform(_transform);
+}
   }
 #else
   if (*dirtyBits & DirtyBits::DirtyViewMatrix) {
@@ -236,18 +257,21 @@ void HdCyclesCamera::Finalize(HdRenderParam *renderParam)
   HdCamera::Finalize(renderParam);
 }
 
-void HdCyclesCamera::ApplyCameraSettings(Camera *cam) const
+void HdCyclesCamera::ApplyCameraSettings(HdRenderParam *renderParam, Camera 
*cam) const
 {
-  ApplyCameraSettings(_data, _windowPolicy, cam);
+  ApplyCameraSettings(renderParam, _data, _windowPolicy, cam);
+
+  const float metersPerUnit = static_cast(renderParam)->GetStageMetersPerUnit();
 
   array motion(_transformSamples.count);
-  for (size_t i = 0; i < _transformSamples.count; ++i)
-motion[i] = convert_transform(_transformSamples.values[i]) *
-transform_scale(1.0f, 1.0f, -1.0f);
+  for (size_t i = 0; i < _transformSamples.count; ++i) {
+motion[i] = convert_camera_transform(_transformSamples.values[i], 
metersPerUnit);
+  }
   cam->set_motion(motion);
 }
 
-void HdCyclesCamera::ApplyCameraSettings(const GfCamera ,
+void HdCyclesCamera::ApplyCameraSettings(HdRenderParam *renderParam,
+ const GfCamera ,
  CameraUtilConformWindowPolicy 
windowPolicy,
  Camera *cam)
 {
@@ -261,20 +285,22 @@ void HdCyclesCamera::ApplyCameraSettings(const GfCamera 
,
 GfCamera::Orthographic == CAMERA_ORTHOGRAPHIC);
   cam->set_camera_type(static_cast(data.GetProjection()));
 
+  const float metersPerUnit = static_cast(renderParam)->GetStageMetersPerUnit();
+
   auto viewplane = data.GetFrustum().GetWindow();
   auto focalLength = 1.0f;
   if (data.GetProjection() == GfCamera::Perspective) {
 viewplane *= 2.0 / viewplane.GetSize()[1];  // Normalize viewplane
-focalLength = data.GetFocalLength() * 1e-3f;
+focalLength = data.GetFocalLength() * GfCamera::FOCAL_LENGTH_UNIT * 
metersPerUnit;
 
 
cam->set_fov(GfDegreesToRadians(data.GetFieldOfView(GfCamera::FOVVertical)));
   }
 
-  cam->set_sensorwidth(data.GetHorizontalAperture() * GfCamera::APERTURE_UNIT);
-  cam->set_sensorheight(data.GetVerticalAperture() * GfCamera::APERTURE_UNIT);
+  cam->set_sensorwidth(data.GetHorizontalAperture() * GfCamera::APERTURE_UNIT 
* metersPerUnit);
+  cam->set_sensorheight(data.GetVerticalAperture() * GfCamera

[Bf-blender-cvs] [e5136872881] master: Cycles: Fix a few type casting warnings

2022-04-05 Thread Patrick Mours

Commit: e51368728815e3700414a77bf91668425a9965ec
Author: Patrick Mours
Date:   Tue Apr 5 17:30:01 2022 +0200
Branches: master
https://developer.blender.org/rBe51368728815e3700414a77bf91668425a9965ec

Cycles: Fix a few type casting warnings

Stumbled over the `integrate_surface_volume_only_bounce` kernel
function not returning the right type. The others too showed up as
warnings when building Cycles as a standalone which didn't have
those warnings disabled.

Differential Revision: https://developer.blender.org/D14558

===

M   intern/cycles/kernel/integrator/shade_surface.h
M   intern/cycles/scene/geometry.cpp
M   intern/cycles/scene/geometry.h
M   intern/cycles/session/merge.cpp

===

diff --git a/intern/cycles/kernel/integrator/shade_surface.h 
b/intern/cycles/kernel/integrator/shade_surface.h
index a9bf3b5b432..55bb08044ae 100644
--- a/intern/cycles/kernel/integrator/shade_surface.h
+++ b/intern/cycles/kernel/integrator/shade_surface.h
@@ -346,8 +346,8 @@ ccl_device_forceinline int 
integrate_surface_bsdf_bssrdf_bounce(
 }
 
 #ifdef __VOLUME__
-ccl_device_forceinline bool 
integrate_surface_volume_only_bounce(IntegratorState state,
- ccl_private 
ShaderData *sd)
+ccl_device_forceinline int 
integrate_surface_volume_only_bounce(IntegratorState state,
+ccl_private 
ShaderData *sd)
 {
   if (!path_state_volume_next(state)) {
 return LABEL_NONE;
diff --git a/intern/cycles/scene/geometry.cpp b/intern/cycles/scene/geometry.cpp
index a2a15416ae6..8152a27046f 100644
--- a/intern/cycles/scene/geometry.cpp
+++ b/intern/cycles/scene/geometry.cpp
@@ -180,7 +180,7 @@ bool Geometry::has_true_displacement() const
 }
 
 void Geometry::compute_bvh(
-Device *device, DeviceScene *dscene, SceneParams *params, Progress 
*progress, int n, int total)
+Device *device, DeviceScene *dscene, SceneParams *params, Progress 
*progress, size_t n, size_t total)
 {
   if (progress->get_cancel())
 return;
diff --git a/intern/cycles/scene/geometry.h b/intern/cycles/scene/geometry.h
index bbb50d5cbfe..0c2e70d483d 100644
--- a/intern/cycles/scene/geometry.h
+++ b/intern/cycles/scene/geometry.h
@@ -110,8 +110,8 @@ class Geometry : public Node {
DeviceScene *dscene,
SceneParams *params,
Progress *progress,
-   int n,
-   int total);
+   size_t n,
+   size_t total);
 
   virtual PrimitiveType primitive_type() const = 0;
 
diff --git a/intern/cycles/session/merge.cpp b/intern/cycles/session/merge.cpp
index a88ffee6409..316f56630d6 100644
--- a/intern/cycles/session/merge.cpp
+++ b/intern/cycles/session/merge.cpp
@@ -531,7 +531,7 @@ static void read_layer_samples(vector ,
 current_layer_samples.total = 0;
 current_layer_samples.per_pixel.resize(in_spec.width * in_spec.height);
 std::fill(
-current_layer_samples.per_pixel.begin(), 
current_layer_samples.per_pixel.end(), 0);
+current_layer_samples.per_pixel.begin(), 
current_layer_samples.per_pixel.end(), 0.0f);
   }
 
   if (layer.has_sample_pass) {

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [f60cffad38d] master: Cycles: Use USD dependencies when building Hydra render delegate

2022-04-05 Thread Patrick Mours

Commit: f60cffad38d12bdfefe503924e93c33a7c89f671
Author: Patrick Mours
Date:   Thu Mar 31 19:27:32 2022 +0200
Branches: master
https://developer.blender.org/rBf60cffad38d12bdfefe503924e93c33a7c89f671

Cycles: Use USD dependencies when building Hydra render delegate

Adds support for linking with some of the dependencies of a USD
build instead of the precompiled libraries from Blender, specifically
OpenSubdiv, OpenVDB and TBB. Other dependencies keep using the
precompiled libraries from Blender, since they are linked statically
anyway so it does't matter as much. Plus they have interdependencies
that are difficult to resolve when only using selected libraries from
the USD build and can't simply assume that USD was built with all
of them.

This patch also makes building the Hydra render delegate via the
standalone repository work and fixes various small issues I ran into
in general on Windows (e.g. the use of both fixed paths and
`find_package` did not seem to work correctly). Building both the
standalone Cycles application and the Hydra render delegate at the
same time is supported now as well (the paths in the USD plugin JSON
file are updated accordingly).

All that needs to be done now to build is to specify a `PXR_ROOT`
or `USD_ROOT` CMake variable pointing to the USD installation,
everything else is taken care of automatically (CMake targets are
loaded from the `pxrTargets.cmake` of USD and linked into the
render delegate and OpenSubdiv, OpenVDB and TBB are replaced
with those from USD when they exist).

Differential Revision: https://developer.blender.org/D14523

===

M   build_files/cmake/platform/platform_win32.cmake
M   intern/cycles/CMakeLists.txt
M   intern/cycles/cmake/external_libs.cmake
M   intern/cycles/cmake/macros.cmake
M   intern/cycles/hydra/CMakeLists.txt
M   intern/cycles/hydra/camera.cpp
M   intern/cycles/hydra/config.h
M   intern/cycles/hydra/field.cpp
M   intern/cycles/hydra/material.cpp
M   intern/cycles/hydra/material.h
M   intern/cycles/hydra/node_util.cpp

===

diff --git a/build_files/cmake/platform/platform_win32.cmake 
b/build_files/cmake/platform/platform_win32.cmake
index 8ae38e03fb1..b0dbc0d3264 100644
--- a/build_files/cmake/platform/platform_win32.cmake
+++ b/build_files/cmake/platform/platform_win32.cmake
@@ -401,7 +401,7 @@ if(WITH_CODEC_FFMPEG)
 ${LIBDIR}/ffmpeg/include/msvc
   )
   windows_find_package(FFmpeg)
-  if(NOT FFMPEG_FOUND)
+  if(NOT FFmpeg_FOUND)
 warn_hardcoded_paths(FFmpeg)
 set(FFMPEG_LIBRARIES
   ${LIBDIR}/ffmpeg/lib/avcodec.lib
@@ -415,7 +415,7 @@ endif()
 
 if(WITH_IMAGE_OPENEXR)
   windows_find_package(OpenEXR REQUIRED)
-  if(NOT OPENEXR_FOUND)
+  if(NOT OpenEXR_FOUND)
 set(OPENEXR_ROOT_DIR ${LIBDIR}/openexr)
 set(OPENEXR_VERSION "2.1")
 warn_hardcoded_paths(OpenEXR)
@@ -531,17 +531,20 @@ if(WITH_BOOST)
 set(BOOST_LIBRARIES ${Boost_LIBRARIES})
 set(BOOST_LIBPATH ${Boost_LIBRARY_DIRS})
   endif()
+
   set(BOOST_DEFINITIONS "-DBOOST_ALL_NO_LIB")
 endif()
 
 if(WITH_OPENIMAGEIO)
   windows_find_package(OpenImageIO)
-  set(OPENIMAGEIO ${LIBDIR}/OpenImageIO)
-  set(OPENIMAGEIO_LIBPATH ${OPENIMAGEIO}/lib)
-  set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO}/include)
-  set(OIIO_OPTIMIZED optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO.lib 
optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util.lib)
-  set(OIIO_DEBUG debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_d.lib debug 
${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util_d.lib)
-  set(OPENIMAGEIO_LIBRARIES ${OIIO_OPTIMIZED} ${OIIO_DEBUG})
+  if(NOT OpenImageIO_FOUND)
+set(OPENIMAGEIO ${LIBDIR}/OpenImageIO)
+set(OPENIMAGEIO_LIBPATH ${OPENIMAGEIO}/lib)
+set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO}/include)
+set(OIIO_OPTIMIZED optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO.lib 
optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util.lib)
+set(OIIO_DEBUG debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_d.lib debug 
${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util_d.lib)
+set(OPENIMAGEIO_LIBRARIES ${OIIO_OPTIMIZED} ${OIIO_DEBUG})
+  endif()
 
   set(OPENIMAGEIO_DEFINITIONS "-DUSE_TBB=0")
   set(OPENIMAGEIO_IDIFF "${OPENIMAGEIO}/bin/idiff.exe")
@@ -572,31 +575,38 @@ if(WITH_LLVM)
 message(WARNING "LLVM debug libs not present on this system. Using release 
libs for debug builds.")
 set(LLVM_LIBRARY ${LLVM_LIBRARY_OPTIMIZED})
   endif()
-
 endif()
 
 if(WITH_OPENCOLORIO)
-  set(OPENCOLORIO ${LIBDIR}/OpenColorIO)
-  set(OPENCOLORIO_INCLUDE_DIRS ${OPENCOLORIO}/include)
-  set(OPENCOLORIO_LIBPATH ${OPENCOLORIO}/lib)
-  set(OPENCOLORIO_LIBRARIES
-optimized ${OPENCOLORIO_LIBPATH}/OpenColorIO.lib
-optimized ${OPENCOLORIO_LIBPATH}/libyaml-cpp.lib
-optimized ${OPENCOLORIO_LIBPATH}/libexpatMD.lib
-optimized ${OPENCOLORIO_LIBPATH}/pystring.lib
-debug $

[Bf-blender-cvs] [5d38b13e61f] master: CMake: Rename "USD_LIBRARY_PREFIX" to "PXR_LIB_PREFIX" for consistency

2022-03-23 Thread Patrick Mours

Commit: 5d38b13e61ff04df6b8b4e52541910167225a18e
Author: Patrick Mours
Date:   Wed Mar 23 16:52:02 2022 +0100
Branches: master
https://developer.blender.org/rB5d38b13e61ff04df6b8b4e52541910167225a18e

CMake: Rename "USD_LIBRARY_PREFIX" to "PXR_LIB_PREFIX" for consistency

rBc1909770e7f192574ea62449dd14b4254637e604 introduced "PXR_LIB_PREFIX" for 
building the
dependencies, so only makes sense to use the same name in the Hydra render 
delegate CMake too

===

M   build_files/cmake/Modules/FindUSD.cmake
M   intern/cycles/hydra/CMakeLists.txt

===

diff --git a/build_files/cmake/Modules/FindUSD.cmake 
b/build_files/cmake/Modules/FindUSD.cmake
index 75b5df9e196..d8f2ee22e6e 100644
--- a/build_files/cmake/Modules/FindUSD.cmake
+++ b/build_files/cmake/Modules/FindUSD.cmake
@@ -36,7 +36,8 @@ FIND_PATH(USD_INCLUDE_DIR
 # See 
https://github.com/PixarAnimationStudios/USD/blob/release/CHANGELOG.md#2111---2021-11-01
 FIND_LIBRARY(USD_LIBRARY
   NAMES
-usd_usd_m usd_usd_ms usd_m usd_ms ${USD_LIBRARY_PREFIX}usd
+usd_usd_m usd_usd_ms usd_m usd_ms
+${PXR_LIB_PREFIX}usd
   NAMES_PER_DIR
   HINTS
 ${_usd_SEARCH_DIRS}
diff --git a/intern/cycles/hydra/CMakeLists.txt 
b/intern/cycles/hydra/CMakeLists.txt
index 4ada4250780..703bd955135 100644
--- a/intern/cycles/hydra/CMakeLists.txt
+++ b/intern/cycles/hydra/CMakeLists.txt
@@ -92,22 +92,22 @@ target_compile_definitions(hdCyclesStatic
 
 target_link_libraries(hdCyclesStatic
   PRIVATE
-  ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}hd${CMAKE_LINK_LIBRARY_SUFFIX}
-  ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}plug${CMAKE_LINK_LIBRARY_SUFFIX}
-  ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}tf${CMAKE_LINK_LIBRARY_SUFFIX}
-  ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}trace${CMAKE_LINK_LIBRARY_SUFFIX}
-  ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}vt${CMAKE_LINK_LIBRARY_SUFFIX}
-  ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}work${CMAKE_LINK_LIBRARY_SUFFIX}
-  ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}sdf${CMAKE_LINK_LIBRARY_SUFFIX}
-  
${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}cameraUtil${CMAKE_LINK_LIBRARY_SUFFIX}
-  ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}hf${CMAKE_LINK_LIBRARY_SUFFIX}
-  ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}pxOsd${CMAKE_LINK_LIBRARY_SUFFIX}
-  ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}gf${CMAKE_LINK_LIBRARY_SUFFIX}
-  ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}arch${CMAKE_LINK_LIBRARY_SUFFIX}
-  ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}hgi${CMAKE_LINK_LIBRARY_SUFFIX}
-  ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}glf${CMAKE_LINK_LIBRARY_SUFFIX}
-  ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}hdx${CMAKE_LINK_LIBRARY_SUFFIX}
-  ${USD_LIBRARY_DIR}/${USD_LIBRARY_PREFIX}usdGeom${CMAKE_LINK_LIBRARY_SUFFIX}
+  ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}hd${CMAKE_LINK_LIBRARY_SUFFIX}
+  ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}plug${CMAKE_LINK_LIBRARY_SUFFIX}
+  ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}tf${CMAKE_LINK_LIBRARY_SUFFIX}
+  ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}trace${CMAKE_LINK_LIBRARY_SUFFIX}
+  ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}vt${CMAKE_LINK_LIBRARY_SUFFIX}
+  ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}work${CMAKE_LINK_LIBRARY_SUFFIX}
+  ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}sdf${CMAKE_LINK_LIBRARY_SUFFIX}
+  ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}cameraUtil${CMAKE_LINK_LIBRARY_SUFFIX}
+  ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}hf${CMAKE_LINK_LIBRARY_SUFFIX}
+  ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}pxOsd${CMAKE_LINK_LIBRARY_SUFFIX}
+  ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}gf${CMAKE_LINK_LIBRARY_SUFFIX}
+  ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}arch${CMAKE_LINK_LIBRARY_SUFFIX}
+  ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}hgi${CMAKE_LINK_LIBRARY_SUFFIX}
+  ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}glf${CMAKE_LINK_LIBRARY_SUFFIX}
+  ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}hdx${CMAKE_LINK_LIBRARY_SUFFIX}
+  ${USD_LIBRARY_DIR}/${PXR_LIB_PREFIX}usdGeom${CMAKE_LINK_LIBRARY_SUFFIX}
   cycles_scene
   cycles_session
   cycles_graph

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [d350976ba06] master: Cycles: Add Hydra render delegate

2022-03-23 Thread Patrick Mours

Commit: d350976ba06d4ef93aa53fc4cd9da57be46ae924
Author: Patrick Mours
Date:   Wed Mar 23 16:07:43 2022 +0100
Branches: master
https://developer.blender.org/rBd350976ba06d4ef93aa53fc4cd9da57be46ae924

Cycles: Add Hydra render delegate

This patch adds a Hydra render delegate to Cycles, allowing Cycles to be used 
for rendering
in applications that provide a Hydra viewport. The implementation was written 
from scratch
against Cycles X, for integration into the Blender repository to make it 
possible to continue
developing it in step with the rest of Cycles. For this purpose it follows the 
style of the rest of
the Cycles code and can be built with a CMake option
(`WITH_CYCLES_HYDRA_RENDER_DELEGATE=1`) similar to the existing standalone 
version
of Cycles.

Since Hydra render delegates need to be built against the exact USD version and 
other
dependencies as the target application is using, this is intended to be built 
separate from
Blender (`WITH_BLENDER=0` CMake option) and with support for library versions 
different
from what Blender is using. As such the CMake build scripts for Windows had to 
be modified
slightly, so that the Cycles Hydra render delegate can e.g. be built with MSVC 
2017 again
even though Blender requires MSVC 2019 now, and it's possible to specify custom 
paths to
the USD SDK etc. The codebase supports building against the latest USD release 
22.03 and all
the way back to USD 20.08 (with some limitations).

Reviewed By: brecht, LazyDodo

Differential Revision: https://developer.blender.org/D14398

===

M   CMakeLists.txt
M   build_files/cmake/Modules/FindUSD.cmake
M   build_files/cmake/macros.cmake
M   build_files/cmake/platform/platform_win32.cmake
M   intern/cycles/CMakeLists.txt
M   intern/cycles/device/CMakeLists.txt
A   intern/cycles/hydra/CMakeLists.txt
A   intern/cycles/hydra/attribute.cpp
A   intern/cycles/hydra/attribute.h
A   intern/cycles/hydra/camera.cpp
A   intern/cycles/hydra/camera.h
A   intern/cycles/hydra/config.h
A   intern/cycles/hydra/curves.cpp
A   intern/cycles/hydra/curves.h
A   intern/cycles/hydra/display_driver.cpp
A   intern/cycles/hydra/display_driver.h
A   intern/cycles/hydra/field.cpp
A   intern/cycles/hydra/field.h
A   intern/cycles/hydra/geometry.h
A   intern/cycles/hydra/geometry.inl
A   intern/cycles/hydra/instancer.cpp
A   intern/cycles/hydra/instancer.h
A   intern/cycles/hydra/light.cpp
A   intern/cycles/hydra/light.h
A   intern/cycles/hydra/material.cpp
A   intern/cycles/hydra/material.h
A   intern/cycles/hydra/mesh.cpp
A   intern/cycles/hydra/mesh.h
A   intern/cycles/hydra/node_util.cpp
A   intern/cycles/hydra/node_util.h
A   intern/cycles/hydra/output_driver.cpp
A   intern/cycles/hydra/output_driver.h
A   intern/cycles/hydra/plugInfo.json
A   intern/cycles/hydra/plugin.cpp
A   intern/cycles/hydra/plugin.h
A   intern/cycles/hydra/pointcloud.cpp
A   intern/cycles/hydra/pointcloud.h
A   intern/cycles/hydra/render_buffer.cpp
A   intern/cycles/hydra/render_buffer.h
A   intern/cycles/hydra/render_delegate.cpp
A   intern/cycles/hydra/render_delegate.h
A   intern/cycles/hydra/render_pass.cpp
A   intern/cycles/hydra/render_pass.h
A   intern/cycles/hydra/resources/plugInfo.json
A   intern/cycles/hydra/session.cpp
A   intern/cycles/hydra/session.h
A   intern/cycles/hydra/volume.cpp
A   intern/cycles/hydra/volume.h
M   intern/cycles/integrator/render_scheduler.cpp
M   intern/cycles/kernel/svm/vertex_color.h
M   intern/cycles/scene/integrator.cpp
M   intern/cycles/scene/mesh.cpp
M   intern/cycles/scene/mesh.h
M   intern/cycles/session/session.h
M   intern/cycles/util/tbb.h

===

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d31a0c4a63d..bf40347e2ef 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -408,6 +408,8 @@ option(WITH_CYCLES_DEBUG "Build Cycles with 
options useful for debug
 option(WITH_CYCLES_STANDALONE"Build Cycles standalone application" OFF)
 option(WITH_CYCLES_STANDALONE_GUI"Build Cycles standalone with GUI" OFF)
 
+option(WITH_CYCLES_HYDRA_RENDER_DELEGATE "Build Cycles Hydra render delegate" 
OFF)
+
 option(WITH_CYCLES_DEBUG_NAN "Build Cycles with additional asserts for 
detecting NaNs and invalid values" OFF)
 option(WITH_CYCLES_NATIVE_ONLY   "Build Cycles with native kernel only 
(which fits current CPU, use for development only)" OFF)
 option(WITH_CYCLES_KERNEL_ASAN   "Build Cycles kernels with address 
sanitizer when WITH_COMPILER_ASAN is on, even if it's very slow" OFF)
@@ -742,9 +744,10 @@ endif()
 #-
 # Check

[Bf-blender-cvs] [08e719910bf] cycles_hydra: Cycles: Add Hydra render delegate

2022-03-22 Thread Patrick Mours

Commit: 08e719910bf2065ef0603cba8cc43ea236b2d090
Author: Patrick Mours
Date:   Mon Mar 21 10:58:51 2022 +0100
Branches: cycles_hydra
https://developer.blender.org/rB08e719910bf2065ef0603cba8cc43ea236b2d090

Cycles: Add Hydra render delegate

This patch adds a 
[Hydra](https://graphics.pixar.com/usd/release/glossary.html#usdglossary-hydra) 
render delegate to Cycles, allowing Cycles to be used for rendering in 
applications that provide a Hydra viewport (e.g. USDView or NVIDIA Omniverse 
Kit). The implementation was written from scratch against Cycles X, for 
integration into the Blender repository to make it possible to continue 
developing it in step with the rest of Cycles. For this purpose it follows the 
style of the rest of the [...]

Supported features:
- CPU/CUDA/OptiX/HIP/Metal support
- Camera Settings
- Render Settings (automatically queried from Cycles via node type system)
- Basic AOVs (color, depth, normal, primId, instanceId)
- Lights (Disk, Distant, Dome, Rect, Sphere)
- Meshes
- Geom Subsets
- Subdivision Surfaces (using native Cycles support)
- Custom Primvars (converted to Cycles attributes)
- Cycles Materials (can be exported to USD using the 
[universal-scene-description branch of 
Blender](https://developer.blender.org/diffusion/B/history/universal-scene-description/))
- USD Preview Surface Materials
- Curves
- Point Clouds
- OpenVDB Volumes

Still missing features:
- Motion Blur
- Custom AOVs
- ...

Since Hydra render delegates need to be built against the exact USD version and 
other dependencies as the target application is using, this is intended to be 
built separate from Blender (`WITH_BLENDER=0` CMake option) and with support 
for library versions different from what Blender is using. As such the CMake 
build scripts for Windows had to be modified slightly, so that the Cycles Hydra 
render delegate can e.g. be built with MSVC 2017 again even though Blender 
requires MSVC 2019 now, an [...]

This also includes an optimization for Hydra viewports that display the result 
using OpenGL, in which case the texture can be kept entirely on the GPU (see 
display_driver.cpp). Unfortunately this is a bit difficult since Hydra doesn't 
give any control over the OpenGL context created by an application, so the only 
way to make it available to Cycles (which is rendering on a separate thread) 
without disturbing the target application is to create a second OpenGL context 
that is sharing resour [...]

---

**To build:**

1. [Set up a Blender build 
environment](https://wiki.blender.org/wiki/Building_Blender) as usual but 
download and apply this patch to the Git repository (Download Raw Diff on the 
right via `Save Link As` and then run `git apply patch.diff` with the 
downloaded file in your local repository after syncing to latest master branch).
2. Set these CMake variables:
```
WITH_BLENDER=0
WITH_CYCLES_HYDRA_RENDER_DELEGATE=1
USD_INCLUDE_DIRS=/include
USD_LIBRARY_DIR=/lib
USD_LIBRARY_PREFIX=
```
3. Continue following the usual Blender build instructions. After building the 
INSTALL target, the output directory contains the `hdCycles` shared library and 
associated resource files which can be loaded as a USD plugin.

**To execute:**

4. Copy `hdCycles.dll`/`hdCycles.a` and the `hdCycles` directory from the 
output directory to the USD plugin directory of the target application, or 
point a `PXR_PLUGINPATH_NAME` environment variable to the output directory.
5. Launch the target application, it should now automatically detect the Cycles 
Hydra render delegate.

Differential Revision: https://developer.blender.org/D14398

===

M   CMakeLists.txt
M   build_files/cmake/Modules/FindUSD.cmake
M   build_files/cmake/macros.cmake
M   build_files/cmake/platform/platform_win32.cmake
M   intern/cycles/CMakeLists.txt
M   intern/cycles/device/CMakeLists.txt
A   intern/cycles/hydra/CMakeLists.txt
A   intern/cycles/hydra/attribute.cpp
A   intern/cycles/hydra/attribute.h
A   intern/cycles/hydra/camera.cpp
A   intern/cycles/hydra/camera.h
A   intern/cycles/hydra/config.h
A   intern/cycles/hydra/curves.cpp
A   intern/cycles/hydra/curves.h
A   intern/cycles/hydra/display_driver.cpp
A   intern/cycles/hydra/display_driver.h
A   intern/cycles/hydra/field.cpp
A   intern/cycles/hydra/field.h
A   intern/cycles/hydra/geometry.h
A   intern/cycles/hydra/geometry.inl
A   intern/cycles/hydra/instancer.cpp
A   intern/cycles/hydra/instancer.h
A   intern/cycles/hydra/light.cpp
A   intern/cycles/hydra/light.h
A   intern/cycles/hydra/material.cpp
A   intern/cycles/hydra/material.h
A   intern/cycles/hydra/mesh.cpp
A   intern/cycles/hydra/mesh.h
A   intern/cycles/hydra/node_util.cpp
A   intern/cycles/hydra/node_util.h
A   intern/cycles/hydra/output_driver.cpp
A   intern/cycles/hydra/output_driver.h
A   intern/cycles/hydra

[Bf-blender-cvs] [c8b946bc97f] cycles_hydra: Fix Windows build to use existing "WITH_WINDOWS_FIND_MODULES" CMake option

2022-03-22 Thread Patrick Mours

Commit: c8b946bc97f77892226e499821784dea1f7198d0
Author: Patrick Mours
Date:   Mon Mar 21 16:47:20 2022 +0100
Branches: cycles_hydra
https://developer.blender.org/rBc8b946bc97f77892226e499821784dea1f7198d0

Fix Windows build to use existing "WITH_WINDOWS_FIND_MODULES" CMake option

===

M   build_files/cmake/Modules/FindUSD.cmake
M   build_files/cmake/platform/platform_win32.cmake
M   intern/cycles/CMakeLists.txt

===

diff --git a/build_files/cmake/Modules/FindUSD.cmake 
b/build_files/cmake/Modules/FindUSD.cmake
index c8c1f043b63..3d31228bf02 100644
--- a/build_files/cmake/Modules/FindUSD.cmake
+++ b/build_files/cmake/Modules/FindUSD.cmake
@@ -17,60 +17,51 @@ IF(NOT USD_ROOT_DIR AND NOT $ENV{USD_ROOT_DIR} STREQUAL "")
   SET(USD_ROOT_DIR $ENV{USD_ROOT_DIR})
 ENDIF()
 
-find_package(pxr REQUIRED OFF)
-
-if (NOT pxr_FOUND)
-
-  SET(_usd_SEARCH_DIRS
-${USD_ROOT_DIR}
-/opt/lib/usd
-  )
+SET(_usd_SEARCH_DIRS
+  ${USD_ROOT_DIR}
+  /opt/lib/usd
+)
 
-  FIND_PATH(USD_INCLUDE_DIR
-NAMES
-  pxr/usd/usd/api.h
-HINTS
-  ${_usd_SEARCH_DIRS}
-PATH_SUFFIXES
-  include
-DOC "Universal Scene Description (USD) header files"
-  )
+FIND_PATH(USD_INCLUDE_DIR
+  NAMES
+pxr/usd/usd/api.h
+  HINTS
+${_usd_SEARCH_DIRS}
+  PATH_SUFFIXES
+include
+  DOC "Universal Scene Description (USD) header files"
+  NO_CMAKE_PATH
+)
 
-  FIND_LIBRARY(USD_LIBRARY
-NAMES
-  usd_m usd_ms
-NAMES_PER_DIR
-HINTS
-  ${_usd_SEARCH_DIRS}
-PATH_SUFFIXES
-  lib64 lib lib/static
-DOC "Universal Scene Description (USD) monolithic library"
-  )
+FIND_LIBRARY(USD_LIBRARY
+  NAMES
+usd_m usd_ms ${USD_LIBRARY_PREFIX}usd
+  NAMES_PER_DIR
+  HINTS
+${_usd_SEARCH_DIRS}
+  PATH_SUFFIXES
+lib64 lib lib/static
+  DOC "Universal Scene Description (USD) library"
+)
 
-  IF(${USD_LIBRARY_NOTFOUND})
-set(USD_FOUND FALSE)
-  ELSE()
-# handle the QUIETLY and REQUIRED arguments and set USD_FOUND to TRUE if
-# all listed variables are TRUE
-INCLUDE(FindPackageHandleStandardArgs)
-FIND_PACKAGE_HANDLE_STANDARD_ARGS(USD DEFAULT_MSG USD_LIBRARY 
USD_INCLUDE_DIR)
+IF(${USD_LIBRARY_NOTFOUND})
+  set(USD_FOUND FALSE)
+ELSE()
+  # handle the QUIETLY and REQUIRED arguments and set USD_FOUND to TRUE if
+  # all listed variables are TRUE
+  INCLUDE(FindPackageHandleStandardArgs)
+  FIND_PACKAGE_HANDLE_STANDARD_ARGS(USD DEFAULT_MSG USD_LIBRARY 
USD_INCLUDE_DIR)
 
-IF(USD_FOUND)
-  get_filename_component(USD_LIBRARY_DIR ${USD_LIBRARY} DIRECTORY)
-  SET(USD_INCLUDE_DIRS ${USD_INCLUDE_DIR})
-  set(USD_LIBRARIES ${USD_LIBRARY})
-ENDIF()
+  IF(USD_FOUND)
+get_filename_component(USD_LIBRARY_DIR ${USD_LIBRARY} DIRECTORY)
+SET(USD_INCLUDE_DIRS ${USD_INCLUDE_DIR})
+set(USD_LIBRARIES ${USD_LIBRARY})
   ENDIF()
-
-  UNSET(_usd_SEARCH_DIRS)
-
-ELSE()
-SET(USD_FOUND ON)
-SET(USD_INCLUDE_DIR ${PXR_INCLUDE_DIRS})
-SET(USD_LIBRARIES ${PXR_LIBRARIES})
 ENDIF()
 
 MARK_AS_ADVANCED(
   USD_INCLUDE_DIR
   USD_LIBRARY_DIR
 )
+
+UNSET(_usd_SEARCH_DIRS)
diff --git a/build_files/cmake/platform/platform_win32.cmake 
b/build_files/cmake/platform/platform_win32.cmake
index ec0c83195e9..edbccee6152 100644
--- a/build_files/cmake/platform/platform_win32.cmake
+++ b/build_files/cmake/platform/platform_win32.cmake
@@ -255,9 +255,6 @@ if(NOT DEFINED LIBDIR)
   elseif(MSVC_VERSION GREATER 1909)
 message(STATUS "Visual Studio 2017 detected.")
 set(LIBDIR ${CMAKE_SOURCE_DIR}/../lib/${LIBDIR_BASE}_vc15)
-  elseif(MSVC_VERSION EQUAL 1900)
-message(STATUS "Visual Studio 2015 detected.")
-set(LIBDIR ${CMAKE_SOURCE_DIR}/../lib/${LIBDIR_BASE}_vc15)
   endif()
 else()
   message(STATUS "Using pre-compiled LIBDIR: ${LIBDIR}")
@@ -306,8 +303,8 @@ set(ZLIB_INCLUDE_DIR ${LIBDIR}/zlib/include)
 set(ZLIB_LIBRARY ${LIBDIR}/zlib/lib/libz_st.lib)
 set(ZLIB_DIR ${LIBDIR}/zlib)
 
-windows_find_package(zlib) # we want to find before finding things that depend 
on it like png
-windows_find_package(png)
+windows_find_package(ZLIB) # we want to find before finding things that depend 
on it like png
+windows_find_package(PNG)
 
 if(NOT PNG_FOUND)
   warn_hardcoded_paths(libpng)
@@ -319,9 +316,9 @@ if(NOT PNG_FOUND)
 endif()
 
 set(JPEG_NAMES ${JPEG_NAMES} libjpeg)
-windows_find_package(jpeg REQUIRED)
+windows_find_package(JPEG REQUIRED)
 if(NOT JPEG_FOUND)
-  warn_hardcoded_paths(jpeg)
+  warn_hardcoded_paths(JPEG)
   set(JPEG_INCLUDE_DIR ${LIBDIR}/jpeg/include)
   set(JPEG_LIBRARIES ${LIBDIR}/jpeg/lib/libjpeg.lib)
 endif()
@@ -339,7 +336,7 @@ set(FREETYPE_LIBRARIES
   ${LIBDIR}/brotli/lib/brotlidec-static.lib
   ${LIBDIR}/brotli/lib/brotlicommon-static.lib
 )
-windows_find_package(freetype REQUIRED)
+windows_find_package(Freetype REQUIRED

[Bf-blender-cvs] [3d5dbc1c449] blender-v3.0-release: Cycles: Reintroduce device-only memory handling that got lost in Cycles X merge

2022-01-11 Thread Patrick Mours

Commit: 3d5dbc1c44907c73d2e6e57a146cbadaea9623bd
Author: Patrick Mours
Date:   Mon Dec 6 14:58:35 2021 +0100
Branches: blender-v3.0-release
https://developer.blender.org/rB3d5dbc1c44907c73d2e6e57a146cbadaea9623bd

Cycles: Reintroduce device-only memory handling that got lost in Cycles X merge

Somehow only a part of rBf4f8b6dde32b0438e0b97a6d8ebeb89802987127 ended up in
Cycles X, causing the issue that commit fixed, "OPTIX_ERROR_INVALID_VALUE" when 
the
system is out of memory, to show up again.
This adds the missing changes to fix that problem.

Maniphest Tasks: T93620

Differential Revision: https://developer.blender.org/D13488

===

M   intern/cycles/device/cpu/device_impl.cpp
M   intern/cycles/device/cuda/device_impl.cpp
M   intern/cycles/device/optix/device_impl.cpp

===

diff --git a/intern/cycles/device/cpu/device_impl.cpp 
b/intern/cycles/device/cpu/device_impl.cpp
index 68dec7f0af2..5db89d1e4fb 100644
--- a/intern/cycles/device/cpu/device_impl.cpp
+++ b/intern/cycles/device/cpu/device_impl.cpp
@@ -134,8 +134,7 @@ void CPUDevice::mem_alloc(device_memory )
   << string_human_readable_size(mem.memory_size()) << ")";
 }
 
-if (mem.type == MEM_DEVICE_ONLY) {
-  assert(!mem.host_pointer);
+if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) {
   size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES;
   void *data = util_aligned_malloc(mem.memory_size(), alignment);
   mem.device_pointer = (device_ptr)data;
@@ -194,7 +193,7 @@ void CPUDevice::mem_free(device_memory )
 tex_free((device_texture &)mem);
   }
   else if (mem.device_pointer) {
-if (mem.type == MEM_DEVICE_ONLY) {
+if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) {
   util_aligned_free((void *)mem.device_pointer);
 }
 mem.device_pointer = 0;
diff --git a/intern/cycles/device/cuda/device_impl.cpp 
b/intern/cycles/device/cuda/device_impl.cpp
index 20945796a2d..8c5779f4a72 100644
--- a/intern/cycles/device/cuda/device_impl.cpp
+++ b/intern/cycles/device/cuda/device_impl.cpp
@@ -680,7 +680,7 @@ CUDADevice::CUDAMem 
*CUDADevice::generic_alloc(device_memory , size_t pitch_
 
   void *shared_pointer = 0;
 
-  if (mem_alloc_result != CUDA_SUCCESS && can_map_host) {
+  if (mem_alloc_result != CUDA_SUCCESS && can_map_host && mem.type != 
MEM_DEVICE_ONLY) {
 if (mem.shared_pointer) {
   /* Another device already allocated host memory. */
   mem_alloc_result = CUDA_SUCCESS;
@@ -703,8 +703,14 @@ CUDADevice::CUDAMem 
*CUDADevice::generic_alloc(device_memory , size_t pitch_
   }
 
   if (mem_alloc_result != CUDA_SUCCESS) {
-status = " failed, out of device and host memory";
-set_error("System is out of GPU and shared host memory");
+if (mem.type == MEM_DEVICE_ONLY) {
+  status = " failed, out of device memory";
+  set_error("System is out of GPU memory");
+}
+else {
+  status = " failed, out of device and host memory";
+  set_error("System is out of GPU and shared host memory");
+}
   }
 
   if (mem.name) {
diff --git a/intern/cycles/device/optix/device_impl.cpp 
b/intern/cycles/device/optix/device_impl.cpp
index f230f865f60..b33b5e21eee 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -44,14 +44,14 @@
 CCL_NAMESPACE_BEGIN
 
 OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
-: device(device), queue(device), state(device, "__denoiser_state")
+: device(device), queue(device), state(device, "__denoiser_state", true)
 {
 }
 
 OptiXDevice::OptiXDevice(const DeviceInfo , Stats , Profiler 
)
 : CUDADevice(info, stats, profiler),
   sbt_data(this, "__sbt", MEM_READ_ONLY),
-  launch_params(this, "__params"),
+  launch_params(this, "__params", false),
   denoiser_(this)
 {
   /* Make the CUDA context current. */
@@ -507,7 +507,7 @@ class OptiXDevice::DenoiseContext {
   : denoise_params(task.params),
 render_buffers(task.render_buffers),
 buffer_params(task.buffer_params),
-guiding_buffer(device, "denoiser guiding passes buffer"),
+guiding_buffer(device, "denoiser guiding passes buffer", true),
 num_samples(task.num_samples)
   {
 num_input_passes = 1;
@@ -1001,6 +1001,13 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
   const OptixBuildInput _input,
   uint16_t num_motion_steps)
 {
+  /* Allocate and build acceleration structures only one at a time, to prevent 
parallel builds
+   * from running out of memory (since both original and compacted 
acceleration structure memory
+   * may be allocated at the same ti

[Bf-blender-cvs] [8393ccd0763] master: Cycles: Add OptiX temporal denoising support

2022-01-05 Thread Patrick Mours

Commit: 8393ccd07634b3152b18d4d527b1460dab9dbe06
Author: Patrick Mours
Date:   Tue Jan 4 21:39:54 2022 +0100
Branches: master
https://developer.blender.org/rB8393ccd07634b3152b18d4d527b1460dab9dbe06

Cycles: Add OptiX temporal denoising support

Enables the `bpy.ops.cycles.denoise_animation()` operator again and modifies it 
to support
temporal denoising with OptiX. This requires renders that were done with both 
the "Vector"
and "Denoising Data" passes.

Differential Revision: https://developer.blender.org/D11442

===

M   intern/cycles/blender/python.cpp
M   intern/cycles/blender/sync.h
M   intern/cycles/device/denoise.cpp
M   intern/cycles/device/denoise.h
M   intern/cycles/device/optix/device_impl.cpp
M   intern/cycles/device/optix/device_impl.h
M   intern/cycles/device/queue.h
M   intern/cycles/kernel/device/gpu/kernel.h
M   intern/cycles/kernel/types.h
M   intern/cycles/scene/pass.cpp
M   intern/cycles/session/denoising.cpp
M   intern/cycles/session/denoising.h

===

diff --git a/intern/cycles/blender/python.cpp b/intern/cycles/blender/python.cpp
index 024dae306b0..f509d5c2eeb 100644
--- a/intern/cycles/blender/python.cpp
+++ b/intern/cycles/blender/python.cpp
@@ -735,27 +735,20 @@ static bool image_parse_filepaths(PyObject *pyfilepaths, 
vector 
 
 static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject 
*keywords)
 {
-#if 1
-  (void)args;
-  (void)keywords;
-#else
   static const char *keyword_list[] = {
-  "preferences", "scene", "view_layer", "input", "output", "tile_size", 
"samples", NULL};
+  "preferences", "scene", "view_layer", "input", "output", NULL};
   PyObject *pypreferences, *pyscene, *pyviewlayer;
   PyObject *pyinput, *pyoutput = NULL;
-  int tile_size = 0, samples = 0;
 
   if (!PyArg_ParseTupleAndKeywords(args,
keywords,
-   "|Oii",
+   "|O",
(char **)keyword_list,
,
,
,
,
-   ,
-   _size,
-   )) {
+   )) {
 return NULL;
   }
 
@@ -777,14 +770,10 @@ static PyObject *denoise_func(PyObject * /*self*/, 
PyObject *args, PyObject *key
  _ViewLayer,
  PyLong_AsVoidPtr(pyviewlayer),
  );
-  PointerRNA cviewlayer = RNA_pointer_get(, "cycles");
+  BL::ViewLayer b_view_layer(viewlayerptr);
 
-  DenoiseParams params;
-  params.radius = get_int(cviewlayer, "denoising_radius");
-  params.strength = get_float(cviewlayer, "denoising_strength");
-  params.feature_strength = get_float(cviewlayer, 
"denoising_feature_strength");
-  params.relative_pca = get_boolean(cviewlayer, "denoising_relative_pca");
-  params.neighbor_frames = get_int(cviewlayer, "denoising_neighbor_frames");
+  DenoiseParams params = BlenderSync::get_denoise_params(b_scene, 
b_view_layer, true);
+  params.use = true;
 
   /* Parse file paths list. */
   vector input, output;
@@ -812,24 +801,15 @@ static PyObject *denoise_func(PyObject * /*self*/, 
PyObject *args, PyObject *key
   }
 
   /* Create denoiser. */
-  DenoiserPipeline denoiser(device);
-  denoiser.params = params;
+  DenoiserPipeline denoiser(device, params);
   denoiser.input = input;
   denoiser.output = output;
 
-  if (tile_size > 0) {
-denoiser.tile_size = make_int2(tile_size, tile_size);
-  }
-  if (samples > 0) {
-denoiser.samples_override = samples;
-  }
-
   /* Run denoiser. */
   if (!denoiser.run()) {
 PyErr_SetString(PyExc_ValueError, denoiser.error.c_str());
 return NULL;
   }
-#endif
 
   Py_RETURN_NONE;
 }
diff --git a/intern/cycles/blender/sync.h b/intern/cycles/blender/sync.h
index d074f90bb1b..3722b938863 100644
--- a/intern/cycles/blender/sync.h
+++ b/intern/cycles/blender/sync.h
@@ -105,11 +105,11 @@ class BlenderSync {
   static BufferParams get_buffer_params(
   BL::SpaceView3D _v3d, BL::RegionView3D _rv3d, Camera *cam, int 
width, int height);
 
- private:
   static DenoiseParams get_denoise_params(BL::Scene _scene,
   BL::ViewLayer _view_layer,
   bool background);
 
+ private:
   /* sync */
   void sync_lights(BL::Depsgraph _depsgraph, bool update_all);
   void sync_materials(BL::Depsgraph _depsgraph, bool update_all);
diff --git a/i

[Bf-blender-cvs] [ca143fafa67] master: Cleanup: Silence "integer conversion resulted in a change of sign" warning in Cycles kernel code

2022-01-03 Thread Patrick Mours

Commit: ca143fafa674f5dbec39ded3ecbba4b0abfe93db
Author: Patrick Mours
Date:   Mon Jan 3 16:26:15 2022 +0100
Branches: master
https://developer.blender.org/rBca143fafa674f5dbec39ded3ecbba4b0abfe93db

Cleanup: Silence "integer conversion resulted in a change of sign" warning in 
Cycles kernel code

Occured because "PATH_RAY_SHADOW_CATCHER_BACKGROUND" is expressed as an unsigned
integer, because too large for a signed integer, but the "PathRayFlag" enum 
type defaulted to a
signed integer still.

===

M   intern/cycles/kernel/types.h

===

diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h
index 20abea37649..1d0537f9547 100644
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@@ -202,7 +202,7 @@ enum SamplingPattern {
 
 /* These flags values correspond to `raytypes` in `osl.cpp`, so keep them in 
sync! */
 
-enum PathRayFlag {
+enum PathRayFlag : uint32_t {
   /* 
* Ray visibility.
*
@@ -1559,7 +1559,7 @@ enum {
 
 /* Kernel Features */
 
-enum KernelFeatureFlag : unsigned int {
+enum KernelFeatureFlag : uint32_t {
   /* Shader nodes. */
   KERNEL_FEATURE_NODE_BSDF = (1U << 0U),
   KERNEL_FEATURE_NODE_EMISSION = (1U << 1U),

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [e14f8c2dd76] master: Cycles: Reintroduce device-only memory handling that got lost in Cycles X merge

2021-12-07 Thread Patrick Mours

Commit: e14f8c2dd765a5f20d652899434174daa039804b
Author: Patrick Mours
Date:   Mon Dec 6 14:58:35 2021 +0100
Branches: master
https://developer.blender.org/rBe14f8c2dd765a5f20d652899434174daa039804b

Cycles: Reintroduce device-only memory handling that got lost in Cycles X merge

Somehow only a part of rBf4f8b6dde32b0438e0b97a6d8ebeb89802987127 ended up in
Cycles X, causing the issue that commit fixed, "OPTIX_ERROR_INVALID_VALUE" when 
the
system is out of memory, to show up again.
This adds the missing changes to fix that problem.

Maniphest Tasks: T93620

Differential Revision: https://developer.blender.org/D13488

===

M   intern/cycles/device/cpu/device_impl.cpp
M   intern/cycles/device/cuda/device_impl.cpp
M   intern/cycles/device/optix/device_impl.cpp

===

diff --git a/intern/cycles/device/cpu/device_impl.cpp 
b/intern/cycles/device/cpu/device_impl.cpp
index 62b9cc93dae..6f3c8b42124 100644
--- a/intern/cycles/device/cpu/device_impl.cpp
+++ b/intern/cycles/device/cpu/device_impl.cpp
@@ -129,8 +129,7 @@ void CPUDevice::mem_alloc(device_memory )
   << string_human_readable_size(mem.memory_size()) << ")";
 }
 
-if (mem.type == MEM_DEVICE_ONLY) {
-  assert(!mem.host_pointer);
+if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) {
   size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES;
   void *data = util_aligned_malloc(mem.memory_size(), alignment);
   mem.device_pointer = (device_ptr)data;
@@ -189,7 +188,7 @@ void CPUDevice::mem_free(device_memory )
 tex_free((device_texture &)mem);
   }
   else if (mem.device_pointer) {
-if (mem.type == MEM_DEVICE_ONLY) {
+if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) {
   util_aligned_free((void *)mem.device_pointer);
 }
 mem.device_pointer = 0;
diff --git a/intern/cycles/device/cuda/device_impl.cpp 
b/intern/cycles/device/cuda/device_impl.cpp
index ee55e6dc632..8d022040414 100644
--- a/intern/cycles/device/cuda/device_impl.cpp
+++ b/intern/cycles/device/cuda/device_impl.cpp
@@ -678,7 +678,7 @@ CUDADevice::CUDAMem 
*CUDADevice::generic_alloc(device_memory , size_t pitch_
 
   void *shared_pointer = 0;
 
-  if (mem_alloc_result != CUDA_SUCCESS && can_map_host) {
+  if (mem_alloc_result != CUDA_SUCCESS && can_map_host && mem.type != 
MEM_DEVICE_ONLY) {
 if (mem.shared_pointer) {
   /* Another device already allocated host memory. */
   mem_alloc_result = CUDA_SUCCESS;
@@ -701,8 +701,14 @@ CUDADevice::CUDAMem 
*CUDADevice::generic_alloc(device_memory , size_t pitch_
   }
 
   if (mem_alloc_result != CUDA_SUCCESS) {
-status = " failed, out of device and host memory";
-set_error("System is out of GPU and shared host memory");
+if (mem.type == MEM_DEVICE_ONLY) {
+  status = " failed, out of device memory";
+  set_error("System is out of GPU memory");
+}
+else {
+  status = " failed, out of device and host memory";
+  set_error("System is out of GPU and shared host memory");
+}
   }
 
   if (mem.name) {
diff --git a/intern/cycles/device/optix/device_impl.cpp 
b/intern/cycles/device/optix/device_impl.cpp
index a0c748fb6cd..da3c1ac57d1 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -46,14 +46,14 @@
 CCL_NAMESPACE_BEGIN
 
 OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
-: device(device), queue(device), state(device, "__denoiser_state")
+: device(device), queue(device), state(device, "__denoiser_state", true)
 {
 }
 
 OptiXDevice::OptiXDevice(const DeviceInfo , Stats , Profiler 
)
 : CUDADevice(info, stats, profiler),
   sbt_data(this, "__sbt", MEM_READ_ONLY),
-  launch_params(this, "__params"),
+  launch_params(this, "__params", false),
   denoiser_(this)
 {
   /* Make the CUDA context current. */
@@ -523,7 +523,7 @@ class OptiXDevice::DenoiseContext {
   : denoise_params(task.params),
 render_buffers(task.render_buffers),
 buffer_params(task.buffer_params),
-guiding_buffer(device, "denoiser guiding passes buffer"),
+guiding_buffer(device, "denoiser guiding passes buffer", true),
 num_samples(task.num_samples)
   {
 num_input_passes = 1;
@@ -1015,6 +1015,13 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
   const OptixBuildInput _input,
   uint16_t num_motion_steps)
 {
+  /* Allocate and build acceleration structures only one at a time, to prevent 
parallel builds
+   * from running out of memory (since both original and compacted 
acceleration structure memory
+   * may be allocated at the same time for the duration of

[Bf-blender-cvs] [17665494186] master: Fix T92308: OptiX denoising fails with high resolutions

2021-12-02 Thread Patrick Mours

Commit: 17665494186816cebb9e8304199e40f9ee033990
Author: Patrick Mours
Date:   Wed Dec 1 11:54:42 2021 +0100
Branches: master
https://developer.blender.org/rB17665494186816cebb9e8304199e40f9ee033990

Fix T92308: OptiX denoising fails with high resolutions

The OptiX denoiser does have an upper limit as to how many pixels it can 
denoise at once, so
this changes the OptiX denoising process to use tiles for high resolution 
images.
The OptiX SDK does have an utility function for this purpose, so changes are 
minor, adjusting
the configured tile size and including enough overlap.

Maniphest Tasks: T92308

Differential Revision: https://developer.blender.org/D13436

===

M   intern/cycles/device/optix/device_impl.cpp
M   intern/cycles/device/optix/device_impl.h

===

diff --git a/intern/cycles/device/optix/device_impl.cpp 
b/intern/cycles/device/optix/device_impl.cpp
index 7a78504f458..a0c748fb6cd 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -41,6 +41,8 @@
 #  define __KERNEL_OPTIX__
 #  include "kernel/device/optix/globals.h"
 
+#  include 
+
 CCL_NAMESPACE_BEGIN
 
 OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
@@ -884,35 +886,33 @@ bool OptiXDevice::denoise_create_if_needed(DenoiseContext 
)
 
 bool OptiXDevice::denoise_configure_if_needed(DenoiseContext )
 {
-  if (denoiser_.is_configured && (denoiser_.configured_size.x == 
context.buffer_params.width &&
-  denoiser_.configured_size.y == 
context.buffer_params.height)) {
+  /* Limit maximum tile size denoiser can be invoked with. */
+  const int2 tile_size = make_int2(min(context.buffer_params.width, 4096),
+   min(context.buffer_params.height, 4096));
+
+  if (denoiser_.is_configured &&
+  (denoiser_.configured_size.x == tile_size.x && 
denoiser_.configured_size.y == tile_size.y)) {
 return true;
   }
 
-  const BufferParams _params = context.buffer_params;
-
-  OptixDenoiserSizes sizes = {};
   optix_assert(optixDenoiserComputeMemoryResources(
-  denoiser_.optix_denoiser, buffer_params.width, buffer_params.height, 
));
-
-  /* Denoiser is invoked on whole images only, so no overlap needed (would be 
used for tiling). */
-  denoiser_.scratch_size = sizes.withoutOverlapScratchSizeInBytes;
-  denoiser_.scratch_offset = sizes.stateSizeInBytes;
+  denoiser_.optix_denoiser, tile_size.x, tile_size.y, _.sizes));
 
   /* Allocate denoiser state if tile size has changed since last setup. */
-  denoiser_.state.alloc_to_device(denoiser_.scratch_offset + 
denoiser_.scratch_size);
+  denoiser_.state.alloc_to_device(denoiser_.sizes.stateSizeInBytes +
+  
denoiser_.sizes.withOverlapScratchSizeInBytes);
 
   /* Initialize denoiser state for the current tile size. */
   const OptixResult result = optixDenoiserSetup(
   denoiser_.optix_denoiser,
   0, /* Work around bug in r495 drivers that causes artifacts when 
denoiser setup is called
 on a stream that is not the default stream */
-  buffer_params.width,
-  buffer_params.height,
+  tile_size.x + denoiser_.sizes.overlapWindowSizeInPixels * 2,
+  tile_size.y + denoiser_.sizes.overlapWindowSizeInPixels * 2,
   denoiser_.state.device_pointer,
-  denoiser_.scratch_offset,
-  denoiser_.state.device_pointer + denoiser_.scratch_offset,
-  denoiser_.scratch_size);
+  denoiser_.sizes.stateSizeInBytes,
+  denoiser_.state.device_pointer + denoiser_.sizes.stateSizeInBytes,
+  denoiser_.sizes.withOverlapScratchSizeInBytes);
   if (result != OPTIX_SUCCESS) {
 set_error("Failed to set up OptiX denoiser");
 return false;
@@ -921,8 +921,7 @@ bool 
OptiXDevice::denoise_configure_if_needed(DenoiseContext )
   cuda_assert(cuCtxSynchronize());
 
   denoiser_.is_configured = true;
-  denoiser_.configured_size.x = buffer_params.width;
-  denoiser_.configured_size.y = buffer_params.height;
+  denoiser_.configured_size = tile_size;
 
   return true;
 }
@@ -993,18 +992,20 @@ bool OptiXDevice::denoise_run(DenoiseContext , 
const DenoisePass )
   guide_layers.albedo = albedo_layer;
   guide_layers.normal = normal_layer;
 
-  optix_assert(optixDenoiserInvoke(denoiser_.optix_denoiser,
-   denoiser_.queue.stream(),
-   ,
-   denoiser_.state.device_pointer,
-   denoiser_.scratch_offset,
-   _layers,
-   _layers,
-   1,
-   0,
-   0,
-   denoiser_.state.device_pointer + 
denoiser_.scratch_offset,
-

[Bf-blender-cvs] [7a97e925fde] master: Cycles: Add support for building with OptiX 7.4 SDK and use built-in catmull-rom curve type

2021-11-24 Thread Patrick Mours

Commit: 7a97e925fde585ffafd7bdfe310d161cb6d51bc1
Author: Patrick Mours
Date:   Wed Nov 24 15:19:02 2021 +0100
Branches: master
https://developer.blender.org/rB7a97e925fde585ffafd7bdfe310d161cb6d51bc1

Cycles: Add support for building with OptiX 7.4 SDK and use built-in 
catmull-rom curve type

Some enum names were changed/removed in OptiX 7.4, so some changes are 
necessary to
make things compile still.
In addition, OptiX 7.4 also adds built-in support for catmull-rom curves, so it 
is no longer
necessary to convert the catmull-rom data to cubic bsplines first, and has 
endcaps disabled
by default now, so can remove the special handling via any-hit programs that 
filtered them
out before.

Differential Revision: https://developer.blender.org/D13351

===

M   intern/cycles/device/optix/device_impl.cpp
M   intern/cycles/kernel/device/optix/kernel.cu

===

diff --git a/intern/cycles/device/optix/device_impl.cpp 
b/intern/cycles/device/optix/device_impl.cpp
index 6e897e3831f..b82b1281eb8 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -208,11 +208,15 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
   }
   else {
 module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3;
-module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
+module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
   }
 
   module_options.boundValues = nullptr;
   module_options.numBoundValues = 0;
+#  if OPTIX_ABI_VERSION >= 55
+  module_options.payloadTypes = nullptr;
+  module_options.numPayloadTypes = 0;
+#  endif
 
   OptixPipelineCompileOptions pipeline_options = {};
   /* Default to no motion blur and two-level graph, since it is the fastest 
option. */
@@ -227,7 +231,11 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
   pipeline_options.usesPrimitiveTypeFlags = 
OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE;
   if (kernel_features & KERNEL_FEATURE_HAIR) {
 if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
+#  if OPTIX_ABI_VERSION >= 55
+  pipeline_options.usesPrimitiveTypeFlags |= 
OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM;
+#  else
   pipeline_options.usesPrimitiveTypeFlags |= 
OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE;
+#  endif
 }
 else
   pipeline_options.usesPrimitiveTypeFlags |= 
OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
@@ -324,7 +332,13 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
 if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
   /* Built-in thick curve intersection. */
   OptixBuiltinISOptions builtin_options = {};
+#  if OPTIX_ABI_VERSION >= 55
+  builtin_options.builtinISModuleType = 
OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
+  builtin_options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE;
+  builtin_options.curveEndcapFlags = OPTIX_CURVE_ENDCAP_DEFAULT; /* 
Disable endcaps. */
+#  else
   builtin_options.builtinISModuleType = 
OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
+#  endif
   builtin_options.usesMotionBlur = false;
 
   optix_assert(optixBuiltinISModuleGet(
@@ -411,7 +425,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
 link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
   }
   else {
-link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
+link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
   }
 
   if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
@@ -1178,6 +1192,15 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress 
, bool refit)
   int ka = max(k0 - 1, curve.first_key);
   int kb = min(k1 + 1, curve.first_key + curve.num_keys - 1);
 
+  index_data[i] = i * 4;
+  float4 *const v = vertex_data.data() + step * num_vertices + 
index_data[i];
+
+#  if OPTIX_ABI_VERSION >= 55
+  v[0] = make_float4(keys[ka].x, keys[ka].y, keys[ka].z, 
curve_radius[ka]);
+  v[1] = make_float4(keys[k0].x, keys[k0].y, keys[k0].z, 
curve_radius[k0]);
+  v[2] = make_float4(keys[k1].x, keys[k1].y, keys[k1].z, 
curve_radius[k1]);
+  v[3] = make_float4(keys[kb].x, keys[kb].y, keys[kb].z, 
curve_radius[kb]);
+#  else
   const float4 px = make_float4(keys[ka].x, keys[k0].x, 
keys[k1].x, keys[kb].x);
   const float4 py = make_float4(keys[ka].y, keys[k0].y, 
keys[k1].y, keys[kb].y);
   const float4 pz = make_float4(keys[ka].z, keys[k0].z, 
keys[k1].z, keys[kb].z);
@@ -1190,8 +1213,6 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress , 
bool refit)
   static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f;
   static const float4 cr2bsp3 = make_float4(-2, +5, -4, +7) / 6.f;
 
-  index_data[i] = i * 4;
-  float4 *const v = vertex_data.data() + step *

[Bf-blender-cvs] [809ae823b7c] master: Merge branch 'blender-v3.0-release'

2021-11-12 Thread Patrick Mours

Commit: 809ae823b7cb612fda219c0e277425bba175090f
Author: Patrick Mours
Date:   Fri Nov 12 19:00:23 2021 +0100
Branches: master
https://developer.blender.org/rB809ae823b7cb612fda219c0e277425bba175090f

Merge branch 'blender-v3.0-release'

===



===



___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [9d0d4b8601d] blender-v3.0-release: Fix T93029: OptiX denoising artifacts at high sample counts in specific scenes

2021-11-12 Thread Patrick Mours

Commit: 9d0d4b8601dfb9de335dd7af32562cbfb94238a6
Author: Patrick Mours
Date:   Fri Nov 12 18:59:50 2021 +0100
Branches: blender-v3.0-release
https://developer.blender.org/rB9d0d4b8601dfb9de335dd7af32562cbfb94238a6

Fix T93029: OptiX denoising artifacts at high sample counts in specific scenes

Partially reverts commit rB440a3475b8f5410e5c41bfbed5ce82771b41356f because
"optixDenoiserComputeIntensity" does not currently support input images that 
are not packed (the
"pixelStrideInBytes" field is not zero). As a result the intensity calculation 
would take into account
data from other passes in the image, some of which was scaled by the number of 
samples still and
therefore produce widely incorrect results that then caused artifacts in the 
denoised image.

Maniphest Tasks: T93029

===

M   intern/cycles/device/optix/device_impl.cpp

===

diff --git a/intern/cycles/device/optix/device_impl.cpp 
b/intern/cycles/device/optix/device_impl.cpp
index 8e554d0ba2f..bb690551c04 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -886,8 +886,7 @@ bool 
OptiXDevice::denoise_configure_if_needed(DenoiseContext )
   denoiser_.scratch_offset = sizes.stateSizeInBytes;
 
   /* Allocate denoiser state if tile size has changed since last setup. */
-  denoiser_.state.alloc_to_device(denoiser_.scratch_offset + 
denoiser_.scratch_size +
-  sizeof(float));
+  denoiser_.state.alloc_to_device(denoiser_.scratch_offset + 
denoiser_.scratch_size);
 
   /* Initialize denoiser state for the current tile size. */
   const OptixResult result = optixDenoiserSetup(
@@ -971,16 +970,6 @@ bool OptiXDevice::denoise_run(DenoiseContext , 
const DenoisePass )
 
   /* Finally run denoising. */
   OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
-  params.hdrIntensity = denoiser_.state.device_pointer + 
denoiser_.scratch_offset +
-denoiser_.scratch_size;
-
-  optix_assert(
-  optixDenoiserComputeIntensity(denoiser_.optix_denoiser,
-denoiser_.queue.stream(),
-_layer,
-params.hdrIntensity,
-denoiser_.state.device_pointer + 
denoiser_.scratch_offset,
-denoiser_.scratch_size));
 
   OptixDenoiserLayer image_layers = {};
   image_layers.input = color_layer;

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [f5656204352] master: Fix T92985: CUDA errors with Cycles film convert kernels

2021-11-10 Thread Patrick Mours

Commit: f56562043521a5c160585aea3f28167b4d3bc77d
Author: Patrick Mours
Date:   Wed Nov 10 14:37:15 2021 +0100
Branches: master
https://developer.blender.org/rBf56562043521a5c160585aea3f28167b4d3bc77d

Fix T92985: CUDA errors with Cycles film convert kernels

rB3a4c8f406a3a3bf0627477c6183a594fa707a6e2 changed the macros that create the 
film
convert kernel entry points, but in the process accidentally changed the 
parameter definition
to one of those (which caused CUDA launch and misaligned address errors) and 
changed the
implementation as well. This restores the correct implementation from before.

In addition, the `ccl_gpu_kernel_threads` macro did not work as intended and 
caused the
generated launch bounds to end up with an incorrect input for the second 
parameter (it was
set to "thread_num_registers", rather than the result of the block number 
calculation). I'm
not entirely sure why, as the macro definition looked sound to me. Decided to 
simply go with
two separate macros instead, to simplify and solve this.

Also changed how state is captured with the `ccl_gpu_kernel_lambda` macro 
slightly, to avoid
a compiler warning (expression has no effect) that otherwise occurred.

Maniphest Tasks: T92985

Differential Revision: https://developer.blender.org/D13175

===

M   intern/cycles/kernel/CMakeLists.txt
M   intern/cycles/kernel/device/cuda/config.h
M   intern/cycles/kernel/device/gpu/kernel.h
M   intern/cycles/kernel/device/hip/config.h
M   intern/cycles/kernel/device/metal/compat.h

===

diff --git a/intern/cycles/kernel/CMakeLists.txt 
b/intern/cycles/kernel/CMakeLists.txt
index f311b0e74bb..39cb886b16e 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -379,7 +379,6 @@ if(WITH_CYCLES_CUDA_BINARIES)
 ${SRC_KERNEL_HEADERS}
 ${SRC_KERNEL_DEVICE_GPU_HEADERS}
 ${SRC_KERNEL_DEVICE_CUDA_HEADERS}
-${SRC_KERNEL_DEVICE_METAL_HEADERS}
 ${SRC_UTIL_HEADERS}
   )
   set(cuda_cubins)
diff --git a/intern/cycles/kernel/device/cuda/config.h 
b/intern/cycles/kernel/device/cuda/config.h
index e333fe90332..003881d7912 100644
--- a/intern/cycles/kernel/device/cuda/config.h
+++ b/intern/cycles/kernel/device/cuda/config.h
@@ -92,25 +92,19 @@
 
 /* Compute number of threads per block and minimum blocks per multiprocessor
  * given the maximum number of registers per thread. */
-
-#define ccl_gpu_kernel_threads(block_num_threads) \
-  extern "C" __global__ void __launch_bounds__(block_num_threads)
-
-#define ccl_gpu_kernel_threads_registers(block_num_threads, 
thread_num_registers) \
+#define ccl_gpu_kernel(block_num_threads, thread_num_registers) \
   extern "C" __global__ void __launch_bounds__(block_num_threads, \
GPU_MULTIPRESSOR_MAX_REGISTERS 
/ \
(block_num_threads * 
thread_num_registers))
 
-/* allow ccl_gpu_kernel to accept 1 or 2 parameters */
-#define SELECT_MACRO(_1, _2, NAME, ...) NAME
-#define ccl_gpu_kernel(...) \
-  SELECT_MACRO(__VA_ARGS__, ccl_gpu_kernel_threads_registers, 
ccl_gpu_kernel_threads)(__VA_ARGS__)
+#define ccl_gpu_kernel_threads(block_num_threads) \
+  extern "C" __global__ void __launch_bounds__(block_num_threads)
 
 #define ccl_gpu_kernel_signature(name, ...) kernel_gpu_##name(__VA_ARGS__)
 
 #define ccl_gpu_kernel_call(x) x
 
-/* define a function object where "func" is the lambda body, and additional 
parameters are used to
+/* Define a function object where "func" is the lambda body, and additional 
parameters are used to
  * specify captured state  */
 #define ccl_gpu_kernel_lambda(func, ...) \
   struct KernelLambda { \
@@ -119,8 +113,7 @@
 { \
   return (func); \
 } \
-  } ccl_gpu_kernel_lambda_pass; \
-  ccl_gpu_kernel_lambda_pass
+  } ccl_gpu_kernel_lambda_pass
 
 /* sanity checks */
 
diff --git a/intern/cycles/kernel/device/gpu/kernel.h 
b/intern/cycles/kernel/device/gpu/kernel.h
index 2ec6a49ec7b..e954178ec63 100644
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -56,8 +56,7 @@
  */
 
 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
-ccl_gpu_kernel_signature(integrator_reset,
-  int num_states)
+ccl_gpu_kernel_signature(integrator_reset, int num_states)
 {
   const int state = ccl_gpu_global_id_x();
 
@@ -265,7 +264,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, 
GPU_KERNEL_MAX_REGISTERS)
   }
 }
 
-ccl_gpu_kernel(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
+ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
 ccl_gpu_kernel_signature(integrator_queued_paths_array,
  int num_states,
  cc

[Bf-blender-cvs] [faeb2cc9005] master: Merge branch 'blender-v3.0-release'

2021-11-09 Thread Patrick Mours

Commit: faeb2cc9005739efd6d58a7ab1e9170bf064b656
Author: Patrick Mours
Date:   Tue Nov 9 14:49:36 2021 +0100
Branches: master
https://developer.blender.org/rBfaeb2cc9005739efd6d58a7ab1e9170bf064b656

Merge branch 'blender-v3.0-release'

===



===



___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [440a3475b8f] blender-v3.0-release: Cycles: Improve OptiX denoising with dark images and fix crash when denoiser is destroyed

2021-11-09 Thread Patrick Mours

Commit: 440a3475b8f5410e5c41bfbed5ce82771b41356f
Author: Patrick Mours
Date:   Tue Nov 9 12:17:09 2021 +0100
Branches: blender-v3.0-release
https://developer.blender.org/rB440a3475b8f5410e5c41bfbed5ce82771b41356f

Cycles: Improve OptiX denoising with dark images and fix crash when denoiser is 
destroyed

Adds a pass before denoising that calculates the intensity of the image, which 
can be
passed into the OptiX denoiser for more optimal results for very dark or very 
bright images.

In addition this also fixes a crash that sometimes occurred on exit. The OptiX 
denoiser object
has to be destroyed before the OptiX device context object (since it references 
that). But in
C++ the destructor function of a class is called before its fields are 
destructed, so
"~OptiXDevice" was always called before "OptiXDevice::~Denoiser" and therefore
"optixDeviceContextDestroy" was called before "optixDenoiserDestroy", hence the 
crash.

Differential Revision: https://developer.blender.org/D13160

===

M   intern/cycles/device/optix/device_impl.cpp
M   intern/cycles/device/optix/device_impl.h
M   intern/cycles/kernel/device/gpu/kernel.h

===

diff --git a/intern/cycles/device/optix/device_impl.cpp 
b/intern/cycles/device/optix/device_impl.cpp
index 7f94212f383..8e554d0ba2f 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -48,14 +48,6 @@ OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
 {
 }
 
-OptiXDevice::Denoiser::~Denoiser()
-{
-  const CUDAContextScope scope(device);
-  if (optix_denoiser != nullptr) {
-optixDenoiserDestroy(optix_denoiser);
-  }
-}
-
 OptiXDevice::OptiXDevice(const DeviceInfo , Stats , Profiler 
)
 : CUDADevice(info, stats, profiler),
   sbt_data(this, "__sbt", MEM_READ_ONLY),
@@ -133,6 +125,11 @@ OptiXDevice::~OptiXDevice()
 }
   }
 
+  /* Make sure denoiser is destroyed before device context! */
+  if (denoiser_.optix_denoiser != nullptr) {
+optixDenoiserDestroy(denoiser_.optix_denoiser);
+  }
+
   optixDeviceContextDestroy(context);
 }
 
@@ -884,11 +881,13 @@ bool 
OptiXDevice::denoise_configure_if_needed(DenoiseContext )
   optix_assert(optixDenoiserComputeMemoryResources(
   denoiser_.optix_denoiser, buffer_params.width, buffer_params.height, 
));
 
-  denoiser_.scratch_size = sizes.withOverlapScratchSizeInBytes;
+  /* Denoiser is invoked on whole images only, so no overlap needed (would be 
used for tiling). */
+  denoiser_.scratch_size = sizes.withoutOverlapScratchSizeInBytes;
   denoiser_.scratch_offset = sizes.stateSizeInBytes;
 
   /* Allocate denoiser state if tile size has changed since last setup. */
-  denoiser_.state.alloc_to_device(denoiser_.scratch_offset + 
denoiser_.scratch_size);
+  denoiser_.state.alloc_to_device(denoiser_.scratch_offset + 
denoiser_.scratch_size +
+  sizeof(float));
 
   /* Initialize denoiser state for the current tile size. */
   const OptixResult result = optixDenoiserSetup(
@@ -942,8 +941,6 @@ bool OptiXDevice::denoise_run(DenoiseContext , 
const DenoisePass )
 color_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
   }
 
-  device_vector fake_albedo(this, "fake_albedo", MEM_READ_WRITE);
-
   /* Optional albedo and color passes. */
   if (context.num_input_passes > 1) {
 const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
@@ -974,6 +971,17 @@ bool OptiXDevice::denoise_run(DenoiseContext , 
const DenoisePass )
 
   /* Finally run denoising. */
   OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
+  params.hdrIntensity = denoiser_.state.device_pointer + 
denoiser_.scratch_offset +
+denoiser_.scratch_size;
+
+  optix_assert(
+  optixDenoiserComputeIntensity(denoiser_.optix_denoiser,
+denoiser_.queue.stream(),
+_layer,
+params.hdrIntensity,
+denoiser_.state.device_pointer + 
denoiser_.scratch_offset,
+denoiser_.scratch_size));
+
   OptixDenoiserLayer image_layers = {};
   image_layers.input = color_layer;
   image_layers.output = output_layer;
diff --git a/intern/cycles/device/optix/device_impl.h 
b/intern/cycles/device/optix/device_impl.h
index 3ec98098eb7..5cfc249b430 100644
--- a/intern/cycles/device/optix/device_impl.h
+++ b/intern/cycles/device/optix/device_impl.h
@@ -82,7 +82,6 @@ class OptiXDevice : public CUDADevice {
   class Denoiser {
public:
 explicit Denoiser(OptiXDevice *device);
-~Denoiser();
 
 OptiXDevice *device;
 OptiXDeviceQueue queue;
diff --git a/intern/cycles/kernel/device/gpu/kernel.h 
b/intern/cycles/kernel/device/gpu/kernel.h
index f86a8c692aa..5848ba5d

[Bf-blender-cvs] [9daf6a69a6a] blender-v3.0-release: Fix T92472: OptiX denoising artifacts with recent GPU driver 495.29.05 or newer on Linux

2021-11-09 Thread Patrick Mours

Commit: 9daf6a69a6acd95f0b46bc45e5f3ae27d0904764
Author: Patrick Mours
Date:   Tue Nov 9 12:24:54 2021 +0100
Branches: blender-v3.0-release
https://developer.blender.org/rB9daf6a69a6acd95f0b46bc45e5f3ae27d0904764

Fix T92472: OptiX denoising artifacts with recent GPU driver 495.29.05 or newer 
on Linux

Adds a workaround for a driver bug in r495 that causes artifacts with OptiX 
denoising.
`optixDenoiserSetup` is not working properly there when called with a stream 
other than the
default stream, so use the default stream for now and force synchronization 
across the entire
context afterwards to ensure the other stream Cycles uses to enqueue the actual 
denoising
command cannot execute before the denoising setup has finished.

Maniphest Tasks: T92472

Differential Revision: https://developer.blender.org/D13158

===

M   intern/cycles/device/optix/device_impl.cpp

===

diff --git a/intern/cycles/device/optix/device_impl.cpp 
b/intern/cycles/device/optix/device_impl.cpp
index 9b9a5ac0de7..7f94212f383 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -891,20 +891,23 @@ bool 
OptiXDevice::denoise_configure_if_needed(DenoiseContext )
   denoiser_.state.alloc_to_device(denoiser_.scratch_offset + 
denoiser_.scratch_size);
 
   /* Initialize denoiser state for the current tile size. */
-  const OptixResult result = optixDenoiserSetup(denoiser_.optix_denoiser,
-denoiser_.queue.stream(),
-buffer_params.width,
-buffer_params.height,
-denoiser_.state.device_pointer,
-denoiser_.scratch_offset,
-denoiser_.state.device_pointer 
+
-denoiser_.scratch_offset,
-denoiser_.scratch_size);
+  const OptixResult result = optixDenoiserSetup(
+  denoiser_.optix_denoiser,
+  0, /* Work around bug in r495 drivers that causes artifacts when 
denoiser setup is called
+on a stream that is not the default stream */
+  buffer_params.width,
+  buffer_params.height,
+  denoiser_.state.device_pointer,
+  denoiser_.scratch_offset,
+  denoiser_.state.device_pointer + denoiser_.scratch_offset,
+  denoiser_.scratch_size);
   if (result != OPTIX_SUCCESS) {
 set_error("Failed to set up OptiX denoiser");
 return false;
   }
 
+  cuda_assert(cuCtxSynchronize());
+
   denoiser_.is_configured = true;
   denoiser_.configured_size.x = buffer_params.width;
   denoiser_.configured_size.y = buffer_params.height;

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [31dfdb6379c] blender-v2.93-release: Add missing "CUDA_ERROR_UNSUPPORTED_PTX_VERSION" to CUEW

2021-11-02 Thread Patrick Mours

Commit: 31dfdb6379cd42b919ba529eb9cfd3f29eb8de64
Author: Patrick Mours
Date:   Tue Nov 2 12:30:28 2021 +0100
Branches: blender-v2.93-release
https://developer.blender.org/rB31dfdb6379cd42b919ba529eb9cfd3f29eb8de64

Add missing "CUDA_ERROR_UNSUPPORTED_PTX_VERSION" to CUEW

This is required for Cycles to report a meaningful error message when it fails 
to load a PTX module
created with a newer CUDA toolkit version than the driver supports.

Fix crash when kernel loading failed (T91879)

Ref T91879

===

M   extern/cuew/include/cuew.h
M   extern/cuew/src/cuew.c
M   intern/cycles/device/cuda/device_cuda_impl.cpp

===

diff --git a/extern/cuew/include/cuew.h b/extern/cuew/include/cuew.h
index 0fa0f1291fa..85522744ad1 100644
--- a/extern/cuew/include/cuew.h
+++ b/extern/cuew/include/cuew.h
@@ -609,6 +609,7 @@ typedef enum cudaError_enum {
   CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219,
   CUDA_ERROR_NVLINK_UNCORRECTABLE = 220,
   CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221,
+  CUDA_ERROR_UNSUPPORTED_PTX_VERSION = 222,
   CUDA_ERROR_INVALID_SOURCE = 300,
   CUDA_ERROR_FILE_NOT_FOUND = 301,
   CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
diff --git a/extern/cuew/src/cuew.c b/extern/cuew/src/cuew.c
index 7a1b0018a24..9eba9306323 100644
--- a/extern/cuew/src/cuew.c
+++ b/extern/cuew/src/cuew.c
@@ -736,6 +736,7 @@ const char *cuewErrorString(CUresult result) {
 case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: return "Invalid graphics 
context";
 case CUDA_ERROR_NVLINK_UNCORRECTABLE: return "Nvlink uncorrectable";
 case CUDA_ERROR_JIT_COMPILER_NOT_FOUND: return "Jit compiler not found";
+case CUDA_ERROR_UNSUPPORTED_PTX_VERSION: return "Unsupported PTX version";
 case CUDA_ERROR_INVALID_SOURCE: return "Invalid source";
 case CUDA_ERROR_FILE_NOT_FOUND: return "File not found";
 case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared 
object failed to resolve";
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp 
b/intern/cycles/device/cuda/device_cuda_impl.cpp
index cebe8ce631e..e9d8dc5a7de 100644
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -583,9 +583,9 @@ bool CUDADevice::load_kernels(const DeviceRequestedFeatures 
_features)
 
   if (result == CUDA_SUCCESS) {
 reserve_local_memory(requested_features);
-  }
 
-  load_functions();
+load_functions();
+  }
 
   return (result == CUDA_SUCCESS);
 }

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [b382632665b] blender-v2.83-release: Add missing "CUDA_ERROR_UNSUPPORTED_PTX_VERSION" to CUEW

2021-10-29 Thread Patrick Mours

Commit: b382632665b3552d580a3c65e94dd36857d5fb68
Author: Patrick Mours
Date:   Fri Oct 29 10:29:25 2021 +0200
Branches: blender-v2.83-release
https://developer.blender.org/rBb382632665b3552d580a3c65e94dd36857d5fb68

Add missing "CUDA_ERROR_UNSUPPORTED_PTX_VERSION" to CUEW

This is required for Cycles to report a meaningful error message when it fails 
to load a PTX module
created with a newer CUDA toolkit version than the driver supports.

Ref T91879

===

M   extern/cuew/include/cuew.h
M   extern/cuew/src/cuew.c
M   intern/cycles/device/cuda/device_cuda_impl.cpp

===

diff --git a/extern/cuew/include/cuew.h b/extern/cuew/include/cuew.h
index 0fa0f1291fa..85522744ad1 100644
--- a/extern/cuew/include/cuew.h
+++ b/extern/cuew/include/cuew.h
@@ -609,6 +609,7 @@ typedef enum cudaError_enum {
   CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219,
   CUDA_ERROR_NVLINK_UNCORRECTABLE = 220,
   CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221,
+  CUDA_ERROR_UNSUPPORTED_PTX_VERSION = 222,
   CUDA_ERROR_INVALID_SOURCE = 300,
   CUDA_ERROR_FILE_NOT_FOUND = 301,
   CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
diff --git a/extern/cuew/src/cuew.c b/extern/cuew/src/cuew.c
index f477ec48a18..e5349763197 100644
--- a/extern/cuew/src/cuew.c
+++ b/extern/cuew/src/cuew.c
@@ -736,6 +736,7 @@ const char *cuewErrorString(CUresult result) {
 case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: return "Invalid graphics 
context";
 case CUDA_ERROR_NVLINK_UNCORRECTABLE: return "Nvlink uncorrectable";
 case CUDA_ERROR_JIT_COMPILER_NOT_FOUND: return "Jit compiler not found";
+case CUDA_ERROR_UNSUPPORTED_PTX_VERSION: return "Unsupported PTX version";
 case CUDA_ERROR_INVALID_SOURCE: return "Invalid source";
 case CUDA_ERROR_FILE_NOT_FOUND: return "File not found";
 case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared 
object failed to resolve";
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp 
b/intern/cycles/device/cuda/device_cuda_impl.cpp
index 6196f642f8e..22a136e5ab9 100644
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -545,9 +545,9 @@ bool CUDADevice::load_kernels(const DeviceRequestedFeatures 
_features)
 
   if (result == CUDA_SUCCESS) {
 reserve_local_memory(requested_features);
-  }
 
-  load_functions();
+load_functions();
+  }
 
   return (result == CUDA_SUCCESS);
 }

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [3a655711955] master: Fix T90666: Toggling motion blur while persistent data is enabled results in artifacts

2021-10-08 Thread Patrick Mours

Commit: 3a65571195524ea50682611306ab4d103807112a
Author: Patrick Mours
Date:   Fri Oct 8 13:45:34 2021 +0200
Branches: master
https://developer.blender.org/rB3a65571195524ea50682611306ab4d103807112a

Fix T90666: Toggling motion blur while persistent data is enabled results in 
artifacts

Enabling or disabling motion blur requires rebuilding the BVH of affected 
geometry and
uploading modified vertices to the device (since without motion blur the 
transform is
applied to the vertex positions, whereas with motion blur this is done during 
traversal).
Previously neither was happening when persistent data was enabled, since the 
relevant
node sockets were not tagged as modified after toggling motion blur.

The change to blender_object.cpp makes it so `geom->set_use_motion_blur()` is 
always
called (regardless of motion blur being toggled on or off), which will tag the 
geometry
as modified if that value changed and ensures the BVH is updated.
The change to hair.cpp/mesh.cpp was necessary since after motion blur is 
disabled,
the transform is applied to the vertex positions of a mesh, but those changes 
were not
uploaded to the device. This is fixed now that they are tagged as modified.

Maniphest Tasks: T90666

Differential Revision: https://developer.blender.org/D12781

===

M   intern/cycles/blender/blender_object.cpp
M   intern/cycles/render/hair.cpp
M   intern/cycles/render/mesh.cpp

===

diff --git a/intern/cycles/blender/blender_object.cpp 
b/intern/cycles/blender/blender_object.cpp
index 95da4a2df84..4b1c4edef7e 100644
--- a/intern/cycles/blender/blender_object.cpp
+++ b/intern/cycles/blender/blender_object.cpp
@@ -104,23 +104,22 @@ void BlenderSync::sync_object_motion_init(BL::Object 
_parent, BL::Object _ob
   array motion;
   object->set_motion(motion);
 
-  Scene::MotionType need_motion = scene->need_motion();
-  if (need_motion == Scene::MOTION_NONE || !object->get_geometry()) {
+  Geometry *geom = object->get_geometry();
+  if (!geom) {
 return;
   }
 
-  Geometry *geom = object->get_geometry();
-
   int motion_steps = 0;
   bool use_motion_blur = false;
 
+  Scene::MotionType need_motion = scene->need_motion();
   if (need_motion == Scene::MOTION_BLUR) {
 motion_steps = object_motion_steps(b_parent, b_ob, 
Object::MAX_MOTION_STEPS);
 if (motion_steps && object_use_deform_motion(b_parent, b_ob)) {
   use_motion_blur = true;
 }
   }
-  else {
+  else if (need_motion != Scene::MOTION_NONE) {
 motion_steps = 3;
   }
 
diff --git a/intern/cycles/render/hair.cpp b/intern/cycles/render/hair.cpp
index e104455f7dd..e757e3fd3e0 100644
--- a/intern/cycles/render/hair.cpp
+++ b/intern/cycles/render/hair.cpp
@@ -441,6 +441,9 @@ void Hair::apply_transform(const Transform , const bool 
apply_to_motion)
 curve_radius[i] = radius;
   }
 
+  tag_curve_keys_modified();
+  tag_curve_radius_modified();
+
   if (apply_to_motion) {
 Attribute *curve_attr = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
 
diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp
index 2ecea3101db..9c93f6f881c 100644
--- a/intern/cycles/render/mesh.cpp
+++ b/intern/cycles/render/mesh.cpp
@@ -508,6 +508,8 @@ void Mesh::apply_transform(const Transform , const bool 
apply_to_motion)
   for (size_t i = 0; i < verts.size(); i++)
 verts[i] = transform_point(, verts[i]);
 
+  tag_verts_modified();
+
   if (apply_to_motion) {
 Attribute *attr = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [c11585a82f9] master: Add missing "CUDA_ERROR_UNSUPPORTED_PTX_VERSION" to CUEW

2021-10-05 Thread Patrick Mours

Commit: c11585a82f97e51c01c4f4f309b85bdf7602ca08
Author: Patrick Mours
Date:   Tue Oct 5 16:36:33 2021 +0200
Branches: master
https://developer.blender.org/rBc11585a82f97e51c01c4f4f309b85bdf7602ca08

Add missing "CUDA_ERROR_UNSUPPORTED_PTX_VERSION" to CUEW

This is required for Cycles to report a meaningful error message when it fails 
to load a PTX module
created with a newer CUDA toolkit version than the driver supports.

Ref T91879

===

M   extern/cuew/include/cuew.h
M   extern/cuew/src/cuew.c

===

diff --git a/extern/cuew/include/cuew.h b/extern/cuew/include/cuew.h
index a2142b8f2ba..5979f48e43d 100644
--- a/extern/cuew/include/cuew.h
+++ b/extern/cuew/include/cuew.h
@@ -609,6 +609,7 @@ typedef enum cudaError_enum {
   CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219,
   CUDA_ERROR_NVLINK_UNCORRECTABLE = 220,
   CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221,
+  CUDA_ERROR_UNSUPPORTED_PTX_VERSION = 222,
   CUDA_ERROR_INVALID_SOURCE = 300,
   CUDA_ERROR_FILE_NOT_FOUND = 301,
   CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
diff --git a/extern/cuew/src/cuew.c b/extern/cuew/src/cuew.c
index 7a1b0018a24..9eba9306323 100644
--- a/extern/cuew/src/cuew.c
+++ b/extern/cuew/src/cuew.c
@@ -736,6 +736,7 @@ const char *cuewErrorString(CUresult result) {
 case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: return "Invalid graphics 
context";
 case CUDA_ERROR_NVLINK_UNCORRECTABLE: return "Nvlink uncorrectable";
 case CUDA_ERROR_JIT_COMPILER_NOT_FOUND: return "Jit compiler not found";
+case CUDA_ERROR_UNSUPPORTED_PTX_VERSION: return "Unsupported PTX version";
 case CUDA_ERROR_INVALID_SOURCE: return "Invalid source";
 case CUDA_ERROR_FILE_NOT_FOUND: return "File not found";
 case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared 
object failed to resolve";

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [2189dfd6e25] master: Cycles: Rework OptiX visibility flags handling

2021-09-27 Thread Patrick Mours

Commit: 2189dfd6e25a7bb6b734116619d87bc2d2a535ff
Author: Patrick Mours
Date:   Wed Sep 22 16:23:08 2021 +0200
Branches: master
https://developer.blender.org/rB2189dfd6e25a7bb6b734116619d87bc2d2a535ff

Cycles: Rework OptiX visibility flags handling

Before the visibility test against the visibility flags was performed in an 
any-hit program in OptiX
(called `__anyhit__kernel_optix_visibility_test`), which was using the 
`__prim_visibility` array.
This is not entirely correct however, since `__prim_visibility` is filled with 
the merged visibility
flags of all objects that reference that primitive, so if one object uses 
different visibility flags
than another object, but they both are instances of the same geometry, they 
would appear the same
way. The reason that the any-hit program was used rather than the OptiX 
instance visibility mask is
that the latter is currently limited to 8 bits only, which is not sufficient to 
contain all Cycles
visibility flags (12 bits).

To mostly fix the problem with multiple instances and different visibility 
flags, I changed things to
use the OptiX instance visibility mask for a subset of the Cycles visibility 
flags (`PATH_RAY_CAMERA`
to `PATH_RAY_VOLUME_SCATTER`, which fit into 8 bits) and only fall back to the 
visibility test any-hit
program if that isn't enough (e.g. the ray visibility mask exceeds 8 bits or 
when using the built-in
curves from OptiX, since the any-hit program is then also used to skip the 
curve endcaps).

This may also improve performance in some cases, since by default OptiX can now 
perform the normal
scene intersection trace calls entirely on RT cores without having to jump back 
to the SM on every
hit to execute the any-hit program.

Fixes T89801

Differential Revision: https://developer.blender.org/D12604

===

M   intern/cycles/device/optix/device_impl.cpp
M   intern/cycles/device/optix/device_impl.h
M   intern/cycles/kernel/bvh/bvh.h
M   intern/cycles/kernel/device/optix/kernel.cu

===

diff --git a/intern/cycles/device/optix/device_impl.cpp 
b/intern/cycles/device/optix/device_impl.cpp
index b54d423a183..5f5eff53063 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -315,6 +315,11 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
   group_descs[PG_HITS].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
   group_descs[PG_HITS].hitgroup.moduleAH = optix_module;
   group_descs[PG_HITS].hitgroup.entryFunctionNameAH = 
"__anyhit__kernel_optix_shadow_all_hit";
+  group_descs[PG_HITV].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
+  group_descs[PG_HITV].hitgroup.moduleCH = optix_module;
+  group_descs[PG_HITV].hitgroup.entryFunctionNameCH = 
"__closesthit__kernel_optix_hit";
+  group_descs[PG_HITV].hitgroup.moduleAH = optix_module;
+  group_descs[PG_HITV].hitgroup.entryFunctionNameAH = 
"__anyhit__kernel_optix_volume_test";
 
   if (kernel_features & KERNEL_FEATURE_HAIR) {
 if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
@@ -397,6 +402,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
   trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + 
stack_size[PG_HITD].cssAH);
   trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + 
stack_size[PG_HITS].cssAH);
   trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + 
stack_size[PG_HITL].cssAH);
+  trace_css = std::max(trace_css, stack_size[PG_HITV].cssIS + 
stack_size[PG_HITV].cssAH);
   trace_css = std::max(trace_css,
stack_size[PG_HITD_MOTION].cssIS + 
stack_size[PG_HITD_MOTION].cssAH);
   trace_css = std::max(trace_css,
@@ -421,6 +427,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
 pipeline_groups.push_back(groups[PG_HITD]);
 pipeline_groups.push_back(groups[PG_HITS]);
 pipeline_groups.push_back(groups[PG_HITL]);
+pipeline_groups.push_back(groups[PG_HITV]);
 if (motion_blur) {
   pipeline_groups.push_back(groups[PG_HITD_MOTION]);
   pipeline_groups.push_back(groups[PG_HITS_MOTION]);
@@ -459,6 +466,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
 pipeline_groups.push_back(groups[PG_HITD]);
 pipeline_groups.push_back(groups[PG_HITS]);
 pipeline_groups.push_back(groups[PG_HITL]);
+pipeline_groups.push_back(groups[PG_HITV]);
 if (motion_blur) {
   pipeline_groups.push_back(groups[PG_HITD_MOTION]);
   pipeline_groups.push_back(groups[PG_HITS_MOTION]);
@@ -1390,25 +1398,33 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress 
, bool refit)
   /* Set user instance ID to object index (but leave low bit blank). */
   instance.instanceId = ob->get_device_index() << 1;
 
-  /* Have to have at least one bit in the mask, or else instance would 
always be culled. */
-  instance.visibilityMask = 1;
+

[Bf-blender-cvs] [cad00ba01b4] cycles-x: Cycles X: Improve performance of transparent shadows with OptiX

2021-09-17 Thread Patrick Mours

Commit: cad00ba01b4ac7412c51c6d0143e402dc56967d7
Author: Patrick Mours
Date:   Thu Sep 16 18:16:38 2021 +0200
Branches: cycles-x
https://developer.blender.org/rBcad00ba01b4ac7412c51c6d0143e402dc56967d7

Cycles X: Improve performance of transparent shadows with OptiX

This changes the shadow record-all any-hit program to accept all
hits (return without calling `optixIgnoreIntersection`) beyond
the furthest distance recorded after the maximum number of hits
that can be recorded was reached.
OptiX will not call the any-hit program anymore for hits beyond
the distance of the accepted hits and also reduces the current
ray length behind the scenes. As a result performance improves
drastically in scenes where shadow rays can hit a lot of
transparent objects, like the "koro" benchmark scene.

With this applied I now get similar performance with both CUDA
and OptiX in "koro". Not quite perfect yet, but much better than
before.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D12524

===

M   intern/cycles/kernel/device/optix/kernel.cu
M   intern/cycles/kernel/integrator/integrator_shade_shadow.h

===

diff --git a/intern/cycles/kernel/device/optix/kernel.cu 
b/intern/cycles/kernel/device/optix/kernel.cu
index 8c68522289f..a4603b53150 100644
--- a/intern/cycles/kernel/device/optix/kernel.cu
+++ b/intern/cycles/kernel/device/optix/kernel.cu
@@ -169,11 +169,13 @@ extern "C" __global__ void 
__anyhit__kernel_optix_local_hit()
 extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
 {
 #ifdef __SHADOW_RECORD_ALL__
+  bool ignore_intersection = false;
+
   const uint prim = optixGetPrimitiveIndex();
 #  ifdef __VISIBILITY_FLAG__
   const uint visibility = optixGetPayload_4();
   if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) {
-return optixIgnoreIntersection();
+ignore_intersection = true;
   }
 #  endif
 
@@ -190,7 +192,7 @@ extern "C" __global__ void 
__anyhit__kernel_optix_shadow_all_hit()
 
 // Filter out curve endcaps
 if (u == 0.0f || u == 1.0f) {
-  return optixIgnoreIntersection();
+  ignore_intersection = true;
 }
   }
 #  endif
@@ -199,7 +201,9 @@ extern "C" __global__ void 
__anyhit__kernel_optix_shadow_all_hit()
   int record_index = num_hits;
   const int max_hits = optixGetPayload_3();
 
-  optixSetPayload_2(num_hits + 1);
+  if (!ignore_intersection) {
+optixSetPayload_2(num_hits + 1);
+  }
 
   Intersection *const isect_array = get_payload_ptr_0();
 
@@ -218,37 +222,37 @@ extern "C" __global__ void 
__anyhit__kernel_optix_shadow_all_hit()
 }
 
 if (optixGetRayTmax() >= max_recorded_t) {
-  return optixIgnoreIntersection();
+  /* Accept hit, so that OptiX won't consider any more hits beyond it 
anymore. */
+  return;
 }
 
 record_index = max_recorded_hit;
   }
-
-  /* TODO: is there a way to shorten the ray length when max_hits is reached, 
so Optix
-   * can discard triangles beyond it? */
 #  endif
 
-  Intersection *const isect = isect_array + record_index;
-  isect->u = u;
-  isect->v = v;
-  isect->t = optixGetRayTmax();
-  isect->prim = prim;
-  isect->object = get_object_id();
-  isect->type = kernel_tex_fetch(__prim_type, prim);
+  if (!ignore_intersection) {
+Intersection *const isect = isect_array + record_index;
+isect->u = u;
+isect->v = v;
+isect->t = optixGetRayTmax();
+isect->prim = prim;
+isect->object = get_object_id();
+isect->type = kernel_tex_fetch(__prim_type, prim);
 
 #  ifdef __TRANSPARENT_SHADOWS__
-  // Detect if this surface has a shader with transparent shadows
-  if (!shader_transparent_shadow(NULL, isect) || max_hits == 0) {
+// Detect if this surface has a shader with transparent shadows
+if (!shader_transparent_shadow(NULL, isect) || max_hits == 0) {
 #  endif
-// If no transparent shadows, all light is blocked and we can stop 
immediately
-optixSetPayload_5(true);
-return optixTerminateRay();
+  // If no transparent shadows, all light is blocked and we can stop 
immediately
+  optixSetPayload_5(true);
+  return optixTerminateRay();
 #  ifdef __TRANSPARENT_SHADOWS__
+}
+#  endif
   }
 
   // Continue tracing
   optixIgnoreIntersection();
-#  endif
 #endif
 }
 
diff --git a/intern/cycles/kernel/integrator/integrator_shade_shadow.h 
b/intern/cycles/kernel/integrator/integrator_shade_shadow.h
index fb836191c94..fd3c3ae1653 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_shadow.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_shadow.h
@@ -23,6 +23,11 @@
 
 CCL_NAMESPACE_BEGIN
 
+ccl_device_inline bool shadow_intersections_has_remaining(const int num_hits)
+{
+  return num_hits >= INTEGRATOR_SHADOW_ISECT_SIZE;
+}
+
 #ifdef __TRANSP

[Bf-blender-cvs] [27db38f0a72] cycles-x: Cycles X: Shading performance improvements by changing inlining behavior for SVM

2021-07-06 Thread Patrick Mours

Commit: 27db38f0a729411f13c99a60574d59c70d461be5
Author: Patrick Mours
Date:   Mon Jul 5 12:58:22 2021 +0200
Branches: cycles-x
https://developer.blender.org/rB27db38f0a729411f13c99a60574d59c70d461be5

Cycles X: Shading performance improvements by changing inlining behavior for SVM

The shading kernels (shade_surface, ...) are limited by memory a lot. I found 
several hotspots
where execution was stalled waiting for spills to be loaded back into 
registers. That's
something that can be adjusted by changing the inlining logic:

For example, the compiler did not inline "kernel_write_denoising_features" 
(even though it
was marked __inline__), which caused it to force synchronization before the 
function call.
Forcing it inline avoided that and got rid of that hotspot.

Then there was cubic texture filtering and NanoVDB, which introduced huge code 
chunks
into each texture sampling evaluation (increasing register and instruction 
cache pressure),
even though they are rarely actually used. Making them __noinline__ outsources 
that
overhead to only occur when actually used.

Another case is the SVM. The compiler currently converts the node type switch 
statement
into a binary searched branch sequence. This means depending on the SVM node 
hit, the
GPU has to branch over large portions of code, which increases instruction 
cache pressure
immensely (GPU is fetching lots of code even for stuff it immediately jumps 
away from
again, while jumping through the binary searched branches). This can be reduced 
somewhat
by making all the node functions __noinline__, so that the GPU only has to 
branch over a
bunch of call instructions, rather than all the inlined code.
The SVM "offset" value is passed by value into the node functions now and 
returned through
function return value, to make the compiler keep it in a register. Otherwise 
when passed as
a pointer, in OptiX the compiler was forced to move it into local memory (since 
functions
are compiled separately there, so the compiler is unaware of how that pointer 
is used).

Differential Revision: https://developer.blender.org/D11816

===

M   intern/cycles/kernel/device/cuda/image.h
M   intern/cycles/kernel/kernel_passes.h
M   intern/cycles/kernel/svm/svm.h
M   intern/cycles/kernel/svm/svm_ao.h
M   intern/cycles/kernel/svm/svm_attribute.h
M   intern/cycles/kernel/svm/svm_bevel.h
M   intern/cycles/kernel/svm/svm_blackbody.h
M   intern/cycles/kernel/svm/svm_brick.h
M   intern/cycles/kernel/svm/svm_brightness.h
M   intern/cycles/kernel/svm/svm_bump.h
M   intern/cycles/kernel/svm/svm_camera.h
M   intern/cycles/kernel/svm/svm_checker.h
M   intern/cycles/kernel/svm/svm_clamp.h
M   intern/cycles/kernel/svm/svm_closure.h
M   intern/cycles/kernel/svm/svm_convert.h
M   intern/cycles/kernel/svm/svm_displace.h
M   intern/cycles/kernel/svm/svm_fresnel.h
M   intern/cycles/kernel/svm/svm_gamma.h
M   intern/cycles/kernel/svm/svm_geometry.h
M   intern/cycles/kernel/svm/svm_gradient.h
M   intern/cycles/kernel/svm/svm_hsv.h
M   intern/cycles/kernel/svm/svm_ies.h
M   intern/cycles/kernel/svm/svm_image.h
M   intern/cycles/kernel/svm/svm_invert.h
M   intern/cycles/kernel/svm/svm_light_path.h
M   intern/cycles/kernel/svm/svm_magic.h
M   intern/cycles/kernel/svm/svm_map_range.h
M   intern/cycles/kernel/svm/svm_mapping.h
M   intern/cycles/kernel/svm/svm_math.h
M   intern/cycles/kernel/svm/svm_mix.h
M   intern/cycles/kernel/svm/svm_musgrave.h
M   intern/cycles/kernel/svm/svm_noisetex.h
M   intern/cycles/kernel/svm/svm_normal.h
M   intern/cycles/kernel/svm/svm_ramp.h
M   intern/cycles/kernel/svm/svm_sepcomb_hsv.h
M   intern/cycles/kernel/svm/svm_sky.h
M   intern/cycles/kernel/svm/svm_tex_coord.h
M   intern/cycles/kernel/svm/svm_types.h
M   intern/cycles/kernel/svm/svm_value.h
M   intern/cycles/kernel/svm/svm_vector_rotate.h
M   intern/cycles/kernel/svm/svm_vector_transform.h
M   intern/cycles/kernel/svm/svm_vertex_color.h
M   intern/cycles/kernel/svm/svm_voronoi.h
M   intern/cycles/kernel/svm/svm_voxel.h
M   intern/cycles/kernel/svm/svm_wave.h
M   intern/cycles/kernel/svm/svm_wavelength.h
M   intern/cycles/kernel/svm/svm_white_noise.h
M   intern/cycles/kernel/svm/svm_wireframe.h

===

diff --git a/intern/cycles/kernel/device/cuda/image.h 
b/intern/cycles/kernel/device/cuda/image.h
index 92a66ecf9a0..e127fe88df3 100644
--- a/intern/cycles/kernel/device/cuda/image.h
+++ b/intern/cycles/kernel/device/cuda/image.h
@@ -65,7 +65,7 @@ ccl_device float cubic_h1(float a)
 
 /* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA 
samples. */
 template
-ccl_device T kernel_tex_image_interp_bicubic(const TextureInfo , float x, 
float y)
+c

[Bf-blender-cvs] [45cfa58ee82] cycles-x: Enable built-in OptiX curves by default

2021-06-24 Thread Patrick Mours

Commit: 45cfa58ee82a3ccdeaa5d07cb69e0f672b356e08
Author: Patrick Mours
Date:   Thu Jun 24 12:53:43 2021 +0200
Branches: cycles-x
https://developer.blender.org/rB45cfa58ee82a3ccdeaa5d07cb69e0f672b356e08

Enable built-in OptiX curves by default

Starting with OptiX 7.3 curves now behave as expected with Cycles and render 
the same as the
custom intersection implementation, so enable by default.

===

M   intern/cycles/blender/addon/properties.py
M   intern/cycles/blender/addon/ui.py
M   intern/cycles/blender/blender_python.cpp
M   intern/cycles/device/optix/device_impl.cpp
M   intern/cycles/util/util_debug.cpp
M   intern/cycles/util/util_debug.h

===

diff --git a/intern/cycles/blender/addon/properties.py 
b/intern/cycles/blender/addon/properties.py
index 6afce0829f8..4997e9e4381 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -708,11 +708,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
 
 debug_use_cuda_adaptive_compile: BoolProperty(name="Adaptive Compile", 
default=False)
 
-debug_use_optix_curves_api: BoolProperty(
-name="Native OptiX Curve Primitive",
-description="Use OptiX curves API for hair instead of custom 
implementation",
-default=False
-)
 debug_use_optix_debug: BoolProperty(
 name="OptiX Module Debug",
 description="Load OptiX module in debug mode: lower logging verbosity 
level, enable validations, and lower optimization level",
diff --git a/intern/cycles/blender/addon/ui.py 
b/intern/cycles/blender/addon/ui.py
index 4d6418ed84a..8ab8e051ec0 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -1779,7 +1779,6 @@ class CYCLES_RENDER_PT_debug(CyclesDebugButtonsPanel, 
Panel):
 
 col = layout.column()
 col.label(text="OptiX Flags:")
-col.prop(cscene, "debug_use_optix_curves_api")
 col.prop(cscene, "debug_use_optix_debug")
 
 col.separator()
diff --git a/intern/cycles/blender/blender_python.cpp 
b/intern/cycles/blender/blender_python.cpp
index fbb6c07bfb6..59826fa3e85 100644
--- a/intern/cycles/blender/blender_python.cpp
+++ b/intern/cycles/blender/blender_python.cpp
@@ -90,7 +90,6 @@ bool debug_flags_sync_from_scene(BL::Scene b_scene)
   /* Synchronize CUDA flags. */
   flags.cuda.adaptive_compile = get_boolean(cscene, 
"debug_use_cuda_adaptive_compile");
   /* Synchronize OptiX flags. */
-  flags.optix.use_curves_api = get_boolean(cscene, 
"debug_use_optix_curves_api");
   flags.optix.use_debug = get_boolean(cscene, "debug_use_optix_debug");
   /* Synchronize OpenCL device type. */
   switch (get_enum(cscene, "debug_opencl_device_type")) {
diff --git a/intern/cycles/device/optix/device_impl.cpp 
b/intern/cycles/device/optix/device_impl.cpp
index 2d2b596b95b..44f7964304d 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -232,12 +232,13 @@ bool OptiXDevice::load_kernels(const 
DeviceRequestedFeatures _features
 #  if OPTIX_ABI_VERSION >= 36
   pipeline_options.usesPrimitiveTypeFlags = 
OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE;
   if (requested_features.use_hair) {
-if (DebugFlags().optix.use_curves_api && 
requested_features.use_hair_thick) {
+#if OPTIX_ABI_VERSION >= 47
+if (requested_features.use_hair_thick) {
   pipeline_options.usesPrimitiveTypeFlags |= 
OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE;
 }
-else {
+else
+#endif
   pipeline_options.usesPrimitiveTypeFlags |= 
OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
-}
   }
 #  endif
 
@@ -337,8 +338,8 @@ bool OptiXDevice::load_kernels(const 
DeviceRequestedFeatures _features
   group_descs[PG_HITS].hitgroup.entryFunctionNameIS = 
"__intersection__curve_ribbon";
 }
 
-#  if OPTIX_ABI_VERSION >= 36
-if (DebugFlags().optix.use_curves_api && 
requested_features.use_hair_thick) {
+#  if OPTIX_ABI_VERSION >= 47
+if (requested_features.use_hair_thick) {
   OptixBuiltinISOptions builtin_options = {};
   builtin_options.builtinISModuleType = 
OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
   builtin_options.usesMotionBlur = false;
@@ -409,7 +410,7 @@ bool OptiXDevice::load_kernels(const 
DeviceRequestedFeatures _features
   trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + 
stack_size[PG_HITD].cssAH);
   trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + 
stack_size[PG_HITS].cssAH);
   trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + 
stack_size[PG_HITL].cssAH);
-#  if OPTIX_ABI_VERSION >= 36
+#  if OPTIX_ABI_VERSION >= 47
   trace_css = std::max(trace_css,
stack_size[PG_HITD_MOTI

[Bf-blender-cvs] [b046bc536be] master: Fix T88096: Baking with OptiX and displacement fails

2021-05-25 Thread Patrick Mours

Commit: b046bc536bec914013c678b552ce6cef7dd308e6
Author: Patrick Mours
Date:   Tue May 25 16:56:16 2021 +0200
Branches: master
https://developer.blender.org/rBb046bc536bec914013c678b552ce6cef7dd308e6

Fix T88096: Baking with OptiX and displacement fails

Using displacement runs the shader eval kernel, but since OptiX modules are not 
loaded when
baking is active, those were not available and therefore failed to launch. This 
fixes that by falling
back to the CUDA kernels.

===

M   intern/cycles/device/device_optix.cpp

===

diff --git a/intern/cycles/device/device_optix.cpp 
b/intern/cycles/device/device_optix.cpp
index 01de0724cb2..b008dfa376f 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -726,7 +726,11 @@ class OptiXDevice : public CUDADevice {
   }
 }
 else if (task.type == DeviceTask::SHADER) {
-  launch_shader_eval(task, thread_index);
+  // CUDA kernels are used when doing baking
+  if (optix_module == NULL)
+CUDADevice::shader(task);
+  else
+launch_shader_eval(task, thread_index);
 }
 else if (task.type == DeviceTask::DENOISE_BUFFER) {
   // Set up a single tile that covers the whole task and denoise it

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [ffa70e76901] blender-v2.93-release: Fix missing Cycles CPU name for Arm processors

2021-04-29 Thread Patrick Mours

Commit: ffa70e769010a3b7e6b80be6f80b21dfb8713f13
Author: Patrick Mours
Date:   Thu Apr 29 15:51:29 2021 +0200
Branches: blender-v2.93-release
https://developer.blender.org/rBffa70e769010a3b7e6b80be6f80b21dfb8713f13

Fix missing Cycles CPU name for Arm processors

===

M   intern/cycles/util/util_system.cpp

===

diff --git a/intern/cycles/util/util_system.cpp 
b/intern/cycles/util/util_system.cpp
index 2c1716ce515..6500a59e42c 100644
--- a/intern/cycles/util/util_system.cpp
+++ b/intern/cycles/util/util_system.cpp
@@ -166,12 +166,33 @@ static void __cpuid(int data[4], int selector)
 
 string system_cpu_brand_string()
 {
+#if !defined(WIN32) && !defined(__x86_64__) && !defined(__i386__)
+  FILE *cpuinfo = fopen("/proc/cpuinfo", "r");
+  if (cpuinfo != nullptr) {
+char cpuinfo_buf[513] = "";
+fread(cpuinfo_buf, sizeof(cpuinfo_buf) - 1, 1, cpuinfo);
+fclose(cpuinfo);
+
+char *modelname = strstr(cpuinfo_buf, "model name");
+if (modelname != nullptr) {
+  modelname = strchr(modelname, ':');
+  if (modelname != nullptr) {
+modelname += 2;
+char *modelname_end = strchr(modelname, '\n');
+if (modelname_end != nullptr) {
+  *modelname_end = '\0';
+  return modelname;
+}
+  }
+}
+  }
+#else
   char buf[49] = {0};
   int result[4] = {0};
 
   __cpuid(result, 0x8000);
 
-  if (result[0] >= (int)0x8004) {
+  if (result[0] != 0 && result[0] >= (int)0x8004) {
 __cpuid((int *)(buf + 0), 0x8002);
 __cpuid((int *)(buf + 16), 0x8003);
 __cpuid((int *)(buf + 32), 0x8004);
@@ -183,7 +204,7 @@ string system_cpu_brand_string()
 
 return brand;
   }
-
+#endif
   return "Unknown CPU";
 }

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [94960250b53] master: Cycles: Fix build with OptiX 7.3 SDK

2021-04-26 Thread Patrick Mours

Commit: 94960250b539c46315808fcb9bdb9d64c0f71eea
Author: Patrick Mours
Date:   Mon Apr 26 14:55:39 2021 +0200
Branches: master
https://developer.blender.org/rB94960250b539c46315808fcb9bdb9d64c0f71eea

Cycles: Fix build with OptiX 7.3 SDK

===

M   intern/cycles/device/device_optix.cpp

===

diff --git a/intern/cycles/device/device_optix.cpp 
b/intern/cycles/device/device_optix.cpp
index fcf8fab9cc4..cce11507fa1 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -955,14 +955,21 @@ class OptiXDevice : public CUDADevice {
 // Create OptiX denoiser handle on demand when it is first used
 OptixDenoiserOptions denoiser_options = {};
 assert(task.denoising.input_passes >= 1 && task.denoising.input_passes 
<= 3);
+#  if OPTIX_ABI_VERSION >= 47
+denoiser_options.guideAlbedo = task.denoising.input_passes >= 2;
+denoiser_options.guideNormal = task.denoising.input_passes >= 3;
+check_result_optix_ret(optixDenoiserCreate(
+context, OPTIX_DENOISER_MODEL_KIND_HDR, _options, 
));
+#  else
 denoiser_options.inputKind = static_cast(
 OPTIX_DENOISER_INPUT_RGB + (task.denoising.input_passes - 1));
-#  if OPTIX_ABI_VERSION < 28
+#if OPTIX_ABI_VERSION < 28
 denoiser_options.pixelFormat = OPTIX_PIXEL_FORMAT_FLOAT3;
-#  endif
+#endif
 check_result_optix_ret(optixDenoiserCreate(context, _options, 
));
 check_result_optix_ret(
 optixDenoiserSetModel(denoiser, OPTIX_DENOISER_MODEL_KIND_HDR, 
NULL, 0));
+#  endif
 
 // OptiX denoiser handle was created with the requested number of 
input passes
 denoiser_input_passes = task.denoising.input_passes;
@@ -1032,10 +1039,34 @@ class OptiXDevice : public CUDADevice {
 #  endif
   output_layers[0].format = OPTIX_PIXEL_FORMAT_FLOAT3;
 
+#  if OPTIX_ABI_VERSION >= 47
+  OptixDenoiserLayer image_layers = {};
+  image_layers.input = input_layers[0];
+  image_layers.output = output_layers[0];
+
+  OptixDenoiserGuideLayer guide_layers = {};
+  guide_layers.albedo = input_layers[1];
+  guide_layers.normal = input_layers[2];
+#  endif
+
   // Finally run denonising
   OptixDenoiserParams params = {};  // All parameters are disabled/zero
+#  if OPTIX_ABI_VERSION >= 47
   check_result_optix_ret(optixDenoiserInvoke(denoiser,
- 0,
+ NULL,
+ ,
+ denoiser_state.device_pointer,
+ scratch_offset,
+ _layers,
+ _layers,
+ 1,
+ overlap_offset.x,
+ overlap_offset.y,
+ denoiser_state.device_pointer 
+ scratch_offset,
+ scratch_size));
+#  else
+  check_result_optix_ret(optixDenoiserInvoke(denoiser,
+ NULL,
  ,
  denoiser_state.device_pointer,
  scratch_offset,
@@ -1046,6 +1077,7 @@ class OptiXDevice : public CUDADevice {
  output_layers,
  denoiser_state.device_pointer 
+ scratch_offset,
  scratch_size));
+#  endif
 
 #  if OPTIX_DENOISER_NO_PIXEL_STRIDE
   void *output_args[] = {_ptr,

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [847579b4225] master: Add support for building on Linux aarch64

2021-04-20 Thread Patrick Mours

Commit: 847579b422507917c4252ecc5c777bf5e0fc6f09
Author: Patrick Mours
Date:   Tue Apr 20 14:00:05 2021 +0200
Branches: master
https://developer.blender.org/rB847579b422507917c4252ecc5c777bf5e0fc6f09

Add support for building on Linux aarch64

Differential Revision: https://developer.blender.org/D10958

===

M   build_files/build_environment/CMakeLists.txt
M   build_files/build_environment/cmake/boost.cmake
M   build_files/build_environment/cmake/embree.cmake
M   build_files/build_environment/cmake/gmp.cmake
M   build_files/build_environment/cmake/harvest.cmake
M   build_files/build_environment/cmake/llvm.cmake
M   build_files/build_environment/cmake/opencolorio.cmake
M   build_files/build_environment/cmake/options.cmake
M   build_files/build_environment/cmake/png.cmake
M   build_files/build_environment/cmake/sse2neon.cmake
M   build_files/build_environment/cmake/ssl.cmake
M   build_files/build_environment/cmake/ssl.conf
M   build_files/build_environment/cmake/tbb.cmake
M   build_files/build_environment/cmake/versions.cmake
M   build_files/build_environment/cmake/x264.cmake
M   build_files/build_environment/install_deps.sh
M   build_files/build_environment/patches/cmakelists_tbb.txt
M   build_files/build_environment/patches/tbb.diff
M   build_files/build_environment/patches/theora.diff
M   build_files/build_environment/patches/usd.diff
M   build_files/cmake/Modules/FindEmbree.cmake
M   intern/cycles/util/util_simd.h
M   intern/cycles/util/util_sseb.h
M   intern/cycles/util/util_ssef.h
M   intern/cycles/util/util_ssei.h
M   intern/cycles/util/util_system.cpp

===

diff --git a/build_files/build_environment/CMakeLists.txt 
b/build_files/build_environment/CMakeLists.txt
index a3d694b4bc3..fb79eee62be 100644
--- a/build_files/build_environment/CMakeLists.txt
+++ b/build_files/build_environment/CMakeLists.txt
@@ -113,7 +113,7 @@ include(cmake/expat.cmake)
 include(cmake/yamlcpp.cmake)
 include(cmake/opencolorio.cmake)
 
-if(APPLE AND ("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64"))
+if(BLENDER_PLATFORM_ARM)
   include(cmake/sse2neon.cmake)
 endif()
 
diff --git a/build_files/build_environment/cmake/boost.cmake 
b/build_files/build_environment/cmake/boost.cmake
index 8b36af7dc41..5170a3a123e 100644
--- a/build_files/build_environment/cmake/boost.cmake
+++ b/build_files/build_environment/cmake/boost.cmake
@@ -18,6 +18,12 @@
 
 set(BOOST_ADDRESS_MODEL 64)
 
+if(BLENDER_PLATFORM_ARM)
+  set(BOOST_ARCHITECTURE arm)
+else()
+  set(BOOST_ARCHITECTURE x86)
+endif()
+
 if(WIN32)
   set(BOOST_TOOLSET toolset=msvc-14.1)
   set(BOOST_COMPILER_STRING -vc141)
@@ -29,7 +35,6 @@ if(WIN32)
   if(BUILD_MODE STREQUAL Release)
 set(BOOST_HARVEST_CMD ${BOOST_HARVEST_CMD} && ${CMAKE_COMMAND} -E 
copy_directory ${LIBDIR}/boost/include/boost-${BOOST_VERSION_NODOTS_SHORT}/ 
${HARVEST_TARGET}/boost/include/)
   endif()
-
 elseif(APPLE)
   set(BOOST_CONFIGURE_COMMAND ./bootstrap.sh)
   set(BOOST_BUILD_COMMAND ./b2)
@@ -93,7 +98,7 @@ ExternalProject_Add(external_boost
   UPDATE_COMMAND  ""
   PATCH_COMMAND ${BOOST_PATCH_COMMAND}
   CONFIGURE_COMMAND ${BOOST_CONFIGURE_COMMAND}
-  BUILD_COMMAND ${BOOST_BUILD_COMMAND} ${BOOST_BUILD_OPTIONS} 
-j${MAKE_THREADS} architecture=x86 address-model=${BOOST_ADDRESS_MODEL} 
link=static threading=multi ${BOOST_OPTIONS}--prefix=${LIBDIR}/boost install
+  BUILD_COMMAND ${BOOST_BUILD_COMMAND} ${BOOST_BUILD_OPTIONS} 
-j${MAKE_THREADS} architecture=${BOOST_ARCHITECTURE} 
address-model=${BOOST_ADDRESS_MODEL} link=static threading=multi 
${BOOST_OPTIONS}--prefix=${LIBDIR}/boost install
   BUILD_IN_SOURCE 1
   INSTALL_COMMAND "${BOOST_HARVEST_CMD}"
 )
diff --git a/build_files/build_environment/cmake/embree.cmake 
b/build_files/build_environment/cmake/embree.cmake
index 4830630def0..cd693d766dc 100644
--- a/build_files/build_environment/cmake/embree.cmake
+++ b/build_files/build_environment/cmake/embree.cmake
@@ -47,7 +47,7 @@ else()
   set(EMBREE_BUILD_DIR)
 endif()
 
-if(APPLE AND ("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64"))
+if(BLENDER_PLATFORM_ARM)
   ExternalProject_Add(external_embree
 GIT_REPOSITORY ${EMBREE_ARM_GIT}
 GIT_TAG "blender-arm"
diff --git a/build_files/build_environment/cmake/gmp.cmake 
b/build_files/build_environment/cmake/gmp.cmake
index 323630a63aa..6ca81678a32 100644
--- a/build_files/build_environment/cmake/gmp.cmake
+++ b/build_files/build_environment/cmake/gmp.cmake
@@ -25,19 +25,12 @@ else()
   set(GMP_OPTIONS --enable-static --disable-shared )
 endif()
 
-if(APPLE)
-  if("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64")
-set(GMP_OPTIONS
-  ${GMP_OPTIONS}
-  --disable-assembly
-)
-  else()
-set(GMP_OPTIONS
-  ${GMP

[Bf-blender-cvs] [7cbd66d42fb] master: Cycles: Initialize all OptiX structs to zero before use

2021-04-13 Thread Patrick Mours

Commit: 7cbd66d42fb3f43b26f7dbea61f182f00987eafb
Author: Patrick Mours
Date:   Tue Apr 13 13:43:34 2021 +0200
Branches: master
https://developer.blender.org/rB7cbd66d42fb3f43b26f7dbea61f182f00987eafb

Cycles: Initialize all OptiX structs to zero before use

This is done to ensure building with newer OptiX SDK releases that add new 
struct fields gives
deterministic results (no uninitialized fields and therefore random data is 
passed to OptiX).

===

M   intern/cycles/device/device_optix.cpp

===

diff --git a/intern/cycles/device/device_optix.cpp 
b/intern/cycles/device/device_optix.cpp
index 51e1a0033ba..fcf8fab9cc4 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -362,7 +362,7 @@ class OptiXDevice : public CUDADevice {
   }
 }
 
-OptixModuleCompileOptions module_options;
+OptixModuleCompileOptions module_options = {};
 module_options.maxRegisterCount = 0;  // Do not set an explicit register 
limit
 #  ifdef WITH_CYCLES_DEBUG
 module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0;
@@ -377,7 +377,7 @@ class OptiXDevice : public CUDADevice {
 module_options.numBoundValues = 0;
 #  endif
 
-OptixPipelineCompileOptions pipeline_options;
+OptixPipelineCompileOptions pipeline_options = {};
 // Default to no motion blur and two-level graph, since it is the fastest 
option
 pipeline_options.usesMotionBlur = false;
 pipeline_options.traversableGraphFlags =
@@ -477,7 +477,7 @@ class OptiXDevice : public CUDADevice {
 
 #  if OPTIX_ABI_VERSION >= 36
   if (DebugFlags().optix.curves_api && requested_features.use_hair_thick) {
-OptixBuiltinISOptions builtin_options;
+OptixBuiltinISOptions builtin_options = {};
 builtin_options.builtinISModuleType = 
OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
 builtin_options.usesMotionBlur = false;
 
@@ -571,7 +571,7 @@ class OptiXDevice : public CUDADevice {
  stack_size[PG_HITS_MOTION].cssIS + 
stack_size[PG_HITS_MOTION].cssAH);
 #  endif
 
-OptixPipelineLinkOptions link_options;
+OptixPipelineLinkOptions link_options = {};
 link_options.maxTraceDepth = 1;
 #  ifdef WITH_CYCLES_DEBUG
 link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
@@ -953,7 +953,7 @@ class OptiXDevice : public CUDADevice {
 }
 
 // Create OptiX denoiser handle on demand when it is first used
-OptixDenoiserOptions denoiser_options;
+OptixDenoiserOptions denoiser_options = {};
 assert(task.denoising.input_passes >= 1 && task.denoising.input_passes 
<= 3);
 denoiser_options.inputKind = static_cast(
 OPTIX_DENOISER_INPUT_RGB + (task.denoising.input_passes - 1));
@@ -1157,7 +1157,7 @@ class OptiXDevice : public CUDADevice {
 
 // Compute memory usage
 OptixAccelBufferSizes sizes = {};
-OptixAccelBuildOptions options;
+OptixAccelBuildOptions options = {};
 options.operation = operation;
 if (background) {
   // Prefer best performance and lowest memory consumption in background
@@ -1195,7 +1195,7 @@ class OptiXDevice : public CUDADevice {
 }
 
 // Finally build the acceleration structure
-OptixAccelEmitDesc compacted_size_prop;
+OptixAccelEmitDesc compacted_size_prop = {};
 compacted_size_prop.type = OPTIX_PROPERTY_TYPE_COMPACTED_SIZE;
 // A tiny space was allocated for this property at the end of the 
temporary buffer above
 // Make sure this pointer is 8-byte aligned

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [f1fe42d912f] master: Cycles: Do not allocate tile buffers on all devices when peer memory is active and denoising is not

2021-03-30 Thread Patrick Mours

Commit: f1fe42d912f088259bbc82d597121978204e991d
Author: Patrick Mours
Date:   Tue Mar 30 12:59:03 2021 +0200
Branches: master
https://developer.blender.org/rBf1fe42d912f088259bbc82d597121978204e991d

Cycles: Do not allocate tile buffers on all devices when peer memory is active 
and denoising is not

Separate tile buffers on all devices only need to exist when denoising is 
active (so any overlap
being rendered simultaneously does not write to the same memory region).
When denoising is not active they can be distributed like all other memory when 
peer
memory support is available.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D10858

===

M   intern/cycles/device/device_multi.cpp

===

diff --git a/intern/cycles/device/device_multi.cpp 
b/intern/cycles/device/device_multi.cpp
index b272e59f99d..35faadcbec5 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -46,10 +46,13 @@ class MultiDevice : public Device {
   list devices, denoising_devices;
   device_ptr unique_key;
   vector> peer_islands;
+  bool use_denoising;
   bool matching_rendering_and_denoising_devices;
 
   MultiDevice(DeviceInfo , Stats , Profiler , bool 
background_)
-  : Device(info, stats, profiler, background_), unique_key(1)
+  : Device(info, stats, profiler, background_),
+unique_key(1),
+use_denoising(!info.denoising_devices.empty())
   {
 foreach (DeviceInfo , info.multi_devices) {
   /* Always add CPU devices at the back since GPU devices can change
@@ -194,6 +197,7 @@ class MultiDevice : public Device {
   if (!sub.device->load_kernels(requested_features))
 return false;
 
+use_denoising = requested_features.use_denoising;
 if (requested_features.use_denoising) {
   /* Only need denoising feature, everything else is unused. */
   DeviceRequestedFeatures denoising_features;
@@ -400,7 +404,7 @@ class MultiDevice : public Device {
 size_t existing_size = mem.device_size;
 
 /* The tile buffers are allocated on each device (see below), so copy to 
all of them */
-if (strcmp(mem.name, "RenderBuffers") == 0) {
+if (strcmp(mem.name, "RenderBuffers") == 0 && use_denoising) {
   foreach (SubDevice , devices) {
 mem.device = sub.device;
 mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
@@ -466,7 +470,7 @@ class MultiDevice : public Device {
 /* This is a hack to only allocate the tile buffers on denoising devices
  * Similarly the tile buffers also need to be allocated separately on all 
devices so any
  * overlap rendered for denoising does not interfere with each other */
-if (strcmp(mem.name, "RenderBuffers") == 0) {
+if (strcmp(mem.name, "RenderBuffers") == 0 && use_denoising) {
   vector device_pointers;
   device_pointers.reserve(devices.size());
 
@@ -518,7 +522,7 @@ class MultiDevice : public Device {
 size_t existing_size = mem.device_size;
 
 /* Free memory that was allocated for all devices (see above) on each 
device */
-if (strcmp(mem.name, "RenderBuffers") == 0 || mem.type == MEM_PIXELS) {
+if (mem.type == MEM_PIXELS || (strcmp(mem.name, "RenderBuffers") == 0 && 
use_denoising)) {
   foreach (SubDevice , devices) {
 mem.device = sub.device;
 mem.device_pointer = sub.ptr_map[key];

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [f4f8b6dde32] master: Cycles: Change device-only memory to actually only allocate on the device

2021-03-11 Thread Patrick Mours

Commit: f4f8b6dde32b0438e0b97a6d8ebeb89802987127
Author: Patrick Mours
Date:   Wed Mar 3 14:35:50 2021 +0100
Branches: master
https://developer.blender.org/rBf4f8b6dde32b0438e0b97a6d8ebeb89802987127

Cycles: Change device-only memory to actually only allocate on the device

This patch changes the `MEM_DEVICE_ONLY` type to only allocate on the device 
and fail if
that is not possible anymore because out-of-memory (since OptiX acceleration 
structures may
not be allocated in host memory). It also fixes high peak memory usage during 
OptiX
acceleration structure building.

Reviewed By: brecht

Maniphest Tasks: T85985

Differential Revision: https://developer.blender.org/D10535

===

M   intern/cycles/bvh/bvh_optix.cpp
M   intern/cycles/device/cuda/device_cuda_impl.cpp
M   intern/cycles/device/device_cpu.cpp
M   intern/cycles/device/device_denoising.h
M   intern/cycles/device/device_memory.h
M   intern/cycles/device/device_optix.cpp

===

diff --git a/intern/cycles/bvh/bvh_optix.cpp b/intern/cycles/bvh/bvh_optix.cpp
index e094f339ede..d630e8965dc 100644
--- a/intern/cycles/bvh/bvh_optix.cpp
+++ b/intern/cycles/bvh/bvh_optix.cpp
@@ -27,8 +27,8 @@ BVHOptiX::BVHOptiX(const BVHParams _,
Device *device)
 : BVH(params_, geometry_, objects_),
   traversable_handle(0),
-  as_data(device, params_.top_level ? "optix tlas" : "optix blas"),
-  motion_transform_data(device, "optix motion transform")
+  as_data(device, params_.top_level ? "optix tlas" : "optix blas", false),
+  motion_transform_data(device, "optix motion transform", false)
 {
 }
 
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp 
b/intern/cycles/device/cuda/device_cuda_impl.cpp
index 44a51835f4c..5b62292ca55 100644
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -854,7 +854,7 @@ CUDADevice::CUDAMem 
*CUDADevice::generic_alloc(device_memory , size_t pitch_
 
   void *shared_pointer = 0;
 
-  if (mem_alloc_result != CUDA_SUCCESS && can_map_host) {
+  if (mem_alloc_result != CUDA_SUCCESS && can_map_host && mem.type != 
MEM_DEVICE_ONLY) {
 if (mem.shared_pointer) {
   /* Another device already allocated host memory. */
   mem_alloc_result = CUDA_SUCCESS;
@@ -877,8 +877,14 @@ CUDADevice::CUDAMem 
*CUDADevice::generic_alloc(device_memory , size_t pitch_
   }
 
   if (mem_alloc_result != CUDA_SUCCESS) {
-status = " failed, out of device and host memory";
-set_error("System is out of GPU and shared host memory");
+if (mem.type == MEM_DEVICE_ONLY) {
+  status = " failed, out of device memory";
+  set_error("System is out of GPU memory");
+}
+else {
+  status = " failed, out of device and host memory";
+  set_error("System is out of GPU and shared host memory");
+}
   }
 
   if (mem.name) {
diff --git a/intern/cycles/device/device_cpu.cpp 
b/intern/cycles/device/device_cpu.cpp
index fdfd3f83be6..e2f9c7391da 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -396,8 +396,7 @@ class CPUDevice : public Device {
 << string_human_readable_size(mem.memory_size()) << ")";
   }
 
-  if (mem.type == MEM_DEVICE_ONLY) {
-assert(!mem.host_pointer);
+  if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) {
 size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES;
 void *data = util_aligned_malloc(mem.memory_size(), alignment);
 mem.device_pointer = (device_ptr)data;
@@ -459,7 +458,7 @@ class CPUDevice : public Device {
   tex_free((device_texture &)mem);
 }
 else if (mem.device_pointer) {
-  if (mem.type == MEM_DEVICE_ONLY) {
+  if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) {
 util_aligned_free((void *)mem.device_pointer);
   }
   mem.device_pointer = 0;
diff --git a/intern/cycles/device/device_denoising.h 
b/intern/cycles/device/device_denoising.h
index 2c0dc23b44a..bb8bdfdd225 100644
--- a/intern/cycles/device/device_denoising.h
+++ b/intern/cycles/device/device_denoising.h
@@ -171,7 +171,8 @@ class DenoisingTask {
 bool gpu_temporary_mem;
 
 DenoiseBuffers(Device *device)
-: mem(device, "denoising pixel buffer"), temporary_mem(device, 
"denoising temporary mem")
+: mem(device, "denoising pixel buffer"),
+  temporary_mem(device, "denoising temporary mem", true)
 {
 }
   } buffer;
diff --git a/intern/cycles/device/device_memory.h 
b/intern/cycles/device/device_memory.h
index 1f63a152458..97459b9ae6a 100644
--- a/intern/cycles/device/device_memory.h
+++ b/intern/cycles/device/devic

[Bf-blender-cvs] [b2e1b13abde] master: Cycles: Add option to change input passes for viewport denoising

2021-02-23 Thread Patrick Mours

Commit: b2e1b13abde787c2aad97d5c317357cf84360bdb
Author: Patrick Mours
Date:   Mon Feb 22 18:09:48 2021 +0100
Branches: master
https://developer.blender.org/rBb2e1b13abde787c2aad97d5c317357cf84360bdb

Cycles: Add option to change input passes for viewport denoising

There are cases where the default input passes of color+albedo do not yield 
useful results
and while this was possible to change that for final frame rendering (in the 
layer settings),
viewport denoising always used a fixed color+albedo. This adds an option to 
change the
input passes for viewport denoising too, so that one can use it in scenes that 
otherwise
wouldn't work well with it.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D10404

===

M   intern/cycles/blender/addon/properties.py
M   intern/cycles/blender/addon/ui.py
M   intern/cycles/blender/blender_sync.cpp

===

diff --git a/intern/cycles/blender/addon/properties.py 
b/intern/cycles/blender/addon/properties.py
index 0708c371a0e..dc4437bdc52 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -646,6 +646,12 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
 min=0, max=(1 << 24),
 default=1,
 )
+preview_denoising_input_passes: EnumProperty(
+name="Viewport Input Passes",
+description="Passes used by the denoiser to distinguish noise from 
shader and geometry detail",
+items=enum_denoising_input_passes,
+default='RGB_ALBEDO',
+)
 
 debug_reset_timeout: FloatProperty(
 name="Reset timeout",
@@ -1434,7 +1440,6 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
 items=enum_denoising_input_passes,
 default='RGB_ALBEDO',
 )
-
 denoising_openimagedenoise_input_passes: EnumProperty(
 name="Input Passes",
 description="Passes used by the denoiser to distinguish noise from 
shader and geometry detail",
diff --git a/intern/cycles/blender/addon/ui.py 
b/intern/cycles/blender/addon/ui.py
index 68f6291b373..c9b4dc25cf2 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -275,6 +275,8 @@ class 
CYCLES_RENDER_PT_sampling_denoising(CyclesButtonsPanel, Panel):
 
 sub.prop(cscene, "denoiser", text="")
 
+layout.separator()
+
 heading = layout.column(align=False, heading="Viewport")
 row = heading.row(align=True)
 row.prop(cscene, "use_preview_denoising", text="")
@@ -285,6 +287,9 @@ class 
CYCLES_RENDER_PT_sampling_denoising(CyclesButtonsPanel, Panel):
 sub = heading.row(align=True)
 sub.active = cscene.use_preview_denoising
 sub.prop(cscene, "preview_denoising_start_sample", text="Start Sample")
+sub = heading.row(align=True)
+sub.active = cscene.use_preview_denoising
+sub.prop(cscene, "preview_denoising_input_passes", text="Input Passes")
 
 
 class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
diff --git a/intern/cycles/blender/blender_sync.cpp 
b/intern/cycles/blender/blender_sync.cpp
index b6a5f67ec2d..0e61f4f2615 100644
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -1005,6 +1005,9 @@ DenoiseParams BlenderSync::get_denoise_params(BL::Scene 
_scene,
 cscene, "preview_denoiser", DENOISER_NUM, DENOISER_NONE);
 denoising.start_sample = get_int(cscene, "preview_denoising_start_sample");
 
+denoising.input_passes = (DenoiserInput)get_enum(
+cscene, "preview_denoising_input_passes", DENOISER_INPUT_NUM, 
(int)denoising.input_passes);
+
 /* Auto select fastest denoiser. */
 if (denoising.type == DENOISER_NONE) {
   if (!Device::available_devices(DEVICE_MASK_OPTIX).empty()) {

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [c661515090b] blender-v2.83-release: Cycles: Add CUDA 11 build support

2021-02-08 Thread Patrick Mours

Commit: c661515090b189061c4388a335573ac3d5745925
Author: Patrick Mours
Date:   Tue Oct 13 12:33:34 2020 +0200
Branches: blender-v2.83-release
https://developer.blender.org/rBc661515090b189061c4388a335573ac3d5745925

Cycles: Add CUDA 11 build support

With this patch the build system checks whether the "CUDA10_NVCC_EXECUTABLE" 
CMake
variable is set and if so will use that to build sm_30 kernels. Similarily for 
sm_8x kernels it
checks "CUDA11_NVCC_EXECUTABLE". All other kernels are built using the default 
CUDA
toolkit. This makes it possible to use either the CUDA 10 or CUDA 11 toolkit by 
default and
only selectively use the other for the kernels where its a hard requirement.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D9179

===

M   CMakeLists.txt
M   build_files/buildbot/worker_compile.py
M   build_files/cmake/config/blender_release.cmake
M   intern/cycles/CMakeLists.txt
M   intern/cycles/kernel/CMakeLists.txt

===

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6f705ffbe44..b15bbb7486b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -369,7 +369,7 @@ option(WITH_CYCLES_CUDA_BINARIES"Build Cycles CUDA 
binaries" OFF)
 option(WITH_CYCLES_CUBIN_COMPILER   "Build cubins with nvrtc based compiler 
instead of nvcc" OFF)
 option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful 
on machines with limited RAM)" OFF)
 mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
-set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 
sm_75 compute_75 CACHE STRING "CUDA architectures to build binaries for")
+set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 
sm_75 sm_86 compute_75 CACHE STRING "CUDA architectures to build binaries for")
 mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
 unset(PLATFORM_DEFAULT)
 option(WITH_CYCLES_LOGGING  "Build Cycles with logging support" ON)
diff --git a/build_files/buildbot/worker_compile.py 
b/build_files/buildbot/worker_compile.py
index 340f507df4c..d5482571c11 100644
--- a/build_files/buildbot/worker_compile.py
+++ b/build_files/buildbot/worker_compile.py
@@ -44,13 +44,17 @@ def get_cmake_options(builder):
 optix_sdk_dir = os.path.join(builder.blender_dir, '..', '..', 
'NVIDIA-Optix-SDK')
 options.append('-DOPTIX_ROOT_DIR:PATH=' + optix_sdk_dir)
 
-# Workers have multiple CUDA versions installed. Select 10.1 for Blender 
2.83 releases.
+# Workaround to build sm_30 kernels with CUDA 10, since CUDA 11 no longer 
supports that architecture
 if builder.platform == 'win':
-options.append('-DCUDA_TOOLKIT_ROOT_DIR:PATH=C:/Program Files/NVIDIA 
GPU Computing Toolkit/CUDA/v10.1')
-options.append('-DCUDA_NVCC_EXECUTABLE:FILEPATH=C:/Program 
Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/nvcc.exe')
+options.append('-DCUDA10_TOOLKIT_ROOT_DIR:PATH=C:/Program Files/NVIDIA 
GPU Computing Toolkit/CUDA/v10.1')
+options.append('-DCUDA10_NVCC_EXECUTABLE:FILEPATH=C:/Program 
Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/nvcc.exe')
+options.append('-DCUDA11_TOOLKIT_ROOT_DIR:PATH=C:/Program Files/NVIDIA 
GPU Computing Toolkit/CUDA/v11.1')
+options.append('-DCUDA11_NVCC_EXECUTABLE:FILEPATH=C:/Program 
Files/NVIDIA GPU Computing Toolkit/CUDA/v11.1/bin/nvcc.exe')
 elif builder.platform == 'linux':
-options.append('-DCUDA_TOOLKIT_ROOT_DIR:PATH=/usr/local/cuda-10.1')
-
options.append('-DCUDA_NVCC_EXECUTABLE:FILEPATH=/usr/local/cuda-10.1/bin/nvcc')
+options.append('-DCUDA10_TOOLKIT_ROOT_DIR:PATH=/usr/local/cuda-10.1')
+
options.append('-DCUDA10_NVCC_EXECUTABLE:FILEPATH=/usr/local/cuda-10.1/bin/nvcc')
+options.append('-DCUDA11_TOOLKIT_ROOT_DIR:PATH=/usr/local/cuda-11.1')
+
options.append('-DCUDA11_NVCC_EXECUTABLE:FILEPATH=/usr/local/cuda-11.1/bin/nvcc')
 
 options.append("-C" + os.path.join(builder.blender_dir, config_file))
 options.append("-DCMAKE_INSTALL_PREFIX=%s" % (builder.install_dir))
diff --git a/build_files/cmake/config/blender_release.cmake 
b/build_files/cmake/config/blender_release.cmake
index 2d52fb22c86..e1b7560e4fc 100644
--- a/build_files/cmake/config/blender_release.cmake
+++ b/build_files/cmake/config/blender_release.cmake
@@ -52,7 +52,7 @@ set(WITH_USD ON  CACHE BOOL "" FORCE)
 set(WITH_MEM_JEMALLOC  ON  CACHE BOOL "" FORCE)
 set(WITH_CYCLES_CUDA_BINARIES  ON  CACHE BOOL "" FORCE)
 set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
-set(CYCLES_CUDA_BINARIES_ARCH 
sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;sm_70;sm_75;compute_75 CACHE STRING 
"" FORCE)
+set(CYCLES_CUDA_BINARIES_ARCH 
sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;s

[Bf-blender-cvs] [f00ba344115] blender-v2.83-release: Cycles: Enable OptiX on first generation Maxwell GPUs again

2021-02-08 Thread Patrick Mours

Commit: f00ba344115ca07f255fdd6088956e4d035714a0
Author: Patrick Mours
Date:   Mon Jul 27 16:11:00 2020 +0200
Branches: blender-v2.83-release
https://developer.blender.org/rBf00ba344115ca07f255fdd6088956e4d035714a0

Cycles: Enable OptiX on first generation Maxwell GPUs again

===

M   intern/cycles/device/device_optix.cpp
M   intern/cycles/kernel/CMakeLists.txt

===

diff --git a/intern/cycles/device/device_optix.cpp 
b/intern/cycles/device/device_optix.cpp
index 2b28d1e1dbb..db04c13d083 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -1543,11 +1543,10 @@ void device_optix_info(const vector 
_devices, vector"
--target 52
+-target 50
 -ptx
 -i ${CMAKE_CURRENT_SOURCE_DIR}/${input}
 ${cuda_flags}
@@ -573,7 +573,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
 COMMAND
   ${CUDA_NVCC_EXECUTABLE}
   --ptx
-  -arch=sm_52
+  -arch=sm_50
   ${cuda_flags}
   ${input}
 WORKING_DIRECTORY

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [b4bddf2e3bd] blender-v2.83-release: Fix OptiX being shown as available on first generation Maxwell GPUs

2021-02-08 Thread Patrick Mours

Commit: b4bddf2e3bd3e33a9ca7488fc78112f8a5fa0544
Author: Patrick Mours
Date:   Fri Jul 24 15:36:09 2020 +0200
Branches: blender-v2.83-release
https://developer.blender.org/rBb4bddf2e3bd3e33a9ca7488fc78112f8a5fa0544

Fix OptiX being shown as available on first generation Maxwell GPUs

The OptiX kernels are compiled for target "compute_sm_52", which is only 
available on second
generation Maxwell GPUs, so disable support for older ones.

===

M   intern/cycles/device/device_optix.cpp

===

diff --git a/intern/cycles/device/device_optix.cpp 
b/intern/cycles/device/device_optix.cpp
index db04c13d083..2b28d1e1dbb 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -1543,10 +1543,11 @@ void device_optix_info(const vector 
_devices, vectorhttps://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [08aaa07adbd] blender-v2.83-release: Cycles: Use pre-compiled PTX kernel for older generation when no matching one is found

2021-02-08 Thread Patrick Mours

Commit: 08aaa07adbd46e27f4226f29559be156f14a524b
Author: Patrick Mours
Date:   Fri Jul 17 15:06:55 2020 +0200
Branches: blender-v2.83-release
https://developer.blender.org/rB08aaa07adbd46e27f4226f29559be156f14a524b

Cycles: Use pre-compiled PTX kernel for older generation when no matching one 
is found

This patch changes the discovery of pre-compiled kernels, to look for any PTX, 
even if
it does not match the current architecture version exactly. It works because 
the driver can
JIT-compile PTX generated for architectures less than or equal to the current 
one.
This e.g. makes it possible to render on a new GPU architecture even if no 
pre-compiled
binary kernel was distributed for it as part of the Blender installation.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D8332

===

M   CMakeLists.txt
M   build_files/cmake/config/blender_release.cmake
M   intern/cycles/device/cuda/device_cuda_impl.cpp
M   intern/cycles/kernel/CMakeLists.txt

===

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 83f547eb593..6f705ffbe44 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -369,7 +369,7 @@ option(WITH_CYCLES_CUDA_BINARIES"Build Cycles CUDA 
binaries" OFF)
 option(WITH_CYCLES_CUBIN_COMPILER   "Build cubins with nvrtc based compiler 
instead of nvcc" OFF)
 option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful 
on machines with limited RAM)" OFF)
 mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
-set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 
sm_75 CACHE STRING "CUDA architectures to build binaries for")
+set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 
sm_75 compute_75 CACHE STRING "CUDA architectures to build binaries for")
 mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
 unset(PLATFORM_DEFAULT)
 option(WITH_CYCLES_LOGGING  "Build Cycles with logging support" ON)
diff --git a/build_files/cmake/config/blender_release.cmake 
b/build_files/cmake/config/blender_release.cmake
index 01a59e451aa..2d52fb22c86 100644
--- a/build_files/cmake/config/blender_release.cmake
+++ b/build_files/cmake/config/blender_release.cmake
@@ -52,7 +52,7 @@ set(WITH_USD ON  CACHE BOOL "" FORCE)
 set(WITH_MEM_JEMALLOC  ON  CACHE BOOL "" FORCE)
 set(WITH_CYCLES_CUDA_BINARIES  ON  CACHE BOOL "" FORCE)
 set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
-set(CYCLES_CUDA_BINARIES_ARCH 
sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;sm_70;sm_75 CACHE STRING "" FORCE)
+set(CYCLES_CUDA_BINARIES_ARCH 
sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;sm_70;sm_75;compute_75 CACHE STRING 
"" FORCE)
 set(WITH_CYCLES_DEVICE_OPTIX   ON CACHE BOOL "" FORCE)
 
 # platform dependent options
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp 
b/intern/cycles/device/cuda/device_cuda_impl.cpp
index ba5d479e0e7..870f9f9ecf9 100644
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -352,11 +352,24 @@ string CUDADevice::compile_kernel(const 
DeviceRequestedFeatures _featu
   }
 }
 
-const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, 
major, minor));
-VLOG(1) << "Testing for pre-compiled kernel " << ptx << ".";
-if (path_exists(ptx)) {
-  VLOG(1) << "Using precompiled kernel.";
-  return ptx;
+/* The driver can JIT-compile PTX generated for older generations, so find 
the closest one. */
+int ptx_major = major, ptx_minor = minor;
+while (ptx_major >= 3) {
+  const string ptx = path_get(
+  string_printf("lib/%s_compute_%d%d.ptx", name, ptx_major, 
ptx_minor));
+  VLOG(1) << "Testing for pre-compiled kernel " << ptx << ".";
+  if (path_exists(ptx)) {
+VLOG(1) << "Using precompiled kernel.";
+return ptx;
+  }
+
+  if (ptx_minor > 0) {
+ptx_minor--;
+  }
+  else {
+ptx_major--;
+ptx_minor = 9;
+  }
 }
   }
 
diff --git a/intern/cycles/kernel/CMakeLists.txt 
b/intern/cycles/kernel/CMakeLists.txt
index 2e839a616e9..6ab0b9d39d2 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -549,7 +549,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
   ${SRC_UTIL_HEADERS}
 COMMAND ${CUBIN_CC_ENV}
 "$"
--target 30
+-target 52
 -ptx
 -i ${CMAKE_CURRENT_SOURCE_DIR}/${input}
 ${cuda_flags}
@@ -573,7 +573,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
 COMMAND

[Bf-blender-cvs] [510541563ef] blender-v2.83-release: Cycles: Enable OptiX on all Maxwell+ GPUs

2021-02-08 Thread Patrick Mours

Commit: 510541563efa8f34e3ed6632e53aef31c3665a2f
Author: Patrick Mours
Date:   Fri Jun 5 12:33:00 2020 +0200
Branches: blender-v2.83-release
https://developer.blender.org/rB510541563efa8f34e3ed6632e53aef31c3665a2f

Cycles: Enable OptiX on all Maxwell+ GPUs

===

M   intern/cycles/device/device_optix.cpp

===

diff --git a/intern/cycles/device/device_optix.cpp 
b/intern/cycles/device/device_optix.cpp
index 441fa35f8af..db04c13d083 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -1537,34 +1537,22 @@ bool device_optix_init()
 
 void device_optix_info(const vector _devices, 
vector )
 {
+  devices.reserve(cuda_devices.size());
+
   // Simply add all supported CUDA devices as OptiX devices again
-  for (const DeviceInfo _info : cuda_devices) {
-DeviceInfo info = cuda_info;
+  for (DeviceInfo info : cuda_devices) {
 assert(info.type == DEVICE_CUDA);
-info.type = DEVICE_OPTIX;
-info.id += "_OptiX";
 
-// Figure out RTX support
-CUdevice cuda_device = 0;
-CUcontext cuda_context = NULL;
-unsigned int rtcore_version = 0;
-if (cuDeviceGet(_device, info.num) == CUDA_SUCCESS &&
-cuDevicePrimaryCtxRetain(_context, cuda_device) == CUDA_SUCCESS) {
-  OptixDeviceContext optix_context = NULL;
-  if (optixDeviceContextCreate(cuda_context, nullptr, _context) == 
OPTIX_SUCCESS) {
-optixDeviceContextGetProperty(optix_context,
-  OPTIX_DEVICE_PROPERTY_RTCORE_VERSION,
-  _version,
-  sizeof(rtcore_version));
-optixDeviceContextDestroy(optix_context);
-  }
-  cuDevicePrimaryCtxRelease(cuda_device);
+int major;
+cuDeviceGetAttribute(, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, 
info.num);
+if (major < 5) {
+  continue;  // Only Maxwell and up are supported by OptiX
 }
 
-// Only add devices with RTX support
-if (rtcore_version != 0 || getenv("CYCLES_OPTIX_TEST")) {
-  devices.push_back(info);
-}
+info.type = DEVICE_OPTIX;
+info.id += "_OptiX";
+
+devices.push_back(info);
   }
 }

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [510541563ef] tmp-2.83-cycles-rtx3-kernels: Cycles: Enable OptiX on all Maxwell+ GPUs

2021-02-08 Thread Patrick Mours

Commit: 510541563efa8f34e3ed6632e53aef31c3665a2f
Author: Patrick Mours
Date:   Fri Jun 5 12:33:00 2020 +0200
Branches: tmp-2.83-cycles-rtx3-kernels
https://developer.blender.org/rB510541563efa8f34e3ed6632e53aef31c3665a2f

Cycles: Enable OptiX on all Maxwell+ GPUs

===

M   intern/cycles/device/device_optix.cpp

===

diff --git a/intern/cycles/device/device_optix.cpp 
b/intern/cycles/device/device_optix.cpp
index 441fa35f8af..db04c13d083 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -1537,34 +1537,22 @@ bool device_optix_init()
 
 void device_optix_info(const vector _devices, 
vector )
 {
+  devices.reserve(cuda_devices.size());
+
   // Simply add all supported CUDA devices as OptiX devices again
-  for (const DeviceInfo _info : cuda_devices) {
-DeviceInfo info = cuda_info;
+  for (DeviceInfo info : cuda_devices) {
 assert(info.type == DEVICE_CUDA);
-info.type = DEVICE_OPTIX;
-info.id += "_OptiX";
 
-// Figure out RTX support
-CUdevice cuda_device = 0;
-CUcontext cuda_context = NULL;
-unsigned int rtcore_version = 0;
-if (cuDeviceGet(_device, info.num) == CUDA_SUCCESS &&
-cuDevicePrimaryCtxRetain(_context, cuda_device) == CUDA_SUCCESS) {
-  OptixDeviceContext optix_context = NULL;
-  if (optixDeviceContextCreate(cuda_context, nullptr, _context) == 
OPTIX_SUCCESS) {
-optixDeviceContextGetProperty(optix_context,
-  OPTIX_DEVICE_PROPERTY_RTCORE_VERSION,
-  _version,
-  sizeof(rtcore_version));
-optixDeviceContextDestroy(optix_context);
-  }
-  cuDevicePrimaryCtxRelease(cuda_device);
+int major;
+cuDeviceGetAttribute(, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, 
info.num);
+if (major < 5) {
+  continue;  // Only Maxwell and up are supported by OptiX
 }
 
-// Only add devices with RTX support
-if (rtcore_version != 0 || getenv("CYCLES_OPTIX_TEST")) {
-  devices.push_back(info);
-}
+info.type = DEVICE_OPTIX;
+info.id += "_OptiX";
+
+devices.push_back(info);
   }
 }

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [92f79432047] tmp-2.83-cycles-rtx3-kernels: Cycles: Add CUDA 11 build support

2021-02-08 Thread Patrick Mours

Commit: 92f794320477240d1fd84edc8cb7089f4a10fae7
Author: Patrick Mours
Date:   Tue Oct 13 12:33:34 2020 +0200
Branches: tmp-2.83-cycles-rtx3-kernels
https://developer.blender.org/rB92f794320477240d1fd84edc8cb7089f4a10fae7

Cycles: Add CUDA 11 build support

With this patch the build system checks whether the "CUDA10_NVCC_EXECUTABLE" 
CMake
variable is set and if so will use that to build sm_30 kernels. Similarily for 
sm_8x kernels it
checks "CUDA11_NVCC_EXECUTABLE". All other kernels are built using the default 
CUDA
toolkit. This makes it possible to use either the CUDA 10 or CUDA 11 toolkit by 
default and
only selectively use the other for the kernels where its a hard requirement.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D9179

===

M   CMakeLists.txt
M   build_files/buildbot/worker_compile.py
M   build_files/cmake/config/blender_release.cmake
M   intern/cycles/CMakeLists.txt
M   intern/cycles/kernel/CMakeLists.txt

===

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6f705ffbe44..b15bbb7486b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -369,7 +369,7 @@ option(WITH_CYCLES_CUDA_BINARIES"Build Cycles CUDA 
binaries" OFF)
 option(WITH_CYCLES_CUBIN_COMPILER   "Build cubins with nvrtc based compiler 
instead of nvcc" OFF)
 option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful 
on machines with limited RAM)" OFF)
 mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
-set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 
sm_75 compute_75 CACHE STRING "CUDA architectures to build binaries for")
+set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 
sm_75 sm_86 compute_75 CACHE STRING "CUDA architectures to build binaries for")
 mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
 unset(PLATFORM_DEFAULT)
 option(WITH_CYCLES_LOGGING  "Build Cycles with logging support" ON)
diff --git a/build_files/buildbot/worker_compile.py 
b/build_files/buildbot/worker_compile.py
index 340f507df4c..d5482571c11 100644
--- a/build_files/buildbot/worker_compile.py
+++ b/build_files/buildbot/worker_compile.py
@@ -44,13 +44,17 @@ def get_cmake_options(builder):
 optix_sdk_dir = os.path.join(builder.blender_dir, '..', '..', 
'NVIDIA-Optix-SDK')
 options.append('-DOPTIX_ROOT_DIR:PATH=' + optix_sdk_dir)
 
-# Workers have multiple CUDA versions installed. Select 10.1 for Blender 
2.83 releases.
+# Workaround to build sm_30 kernels with CUDA 10, since CUDA 11 no longer 
supports that architecture
 if builder.platform == 'win':
-options.append('-DCUDA_TOOLKIT_ROOT_DIR:PATH=C:/Program Files/NVIDIA 
GPU Computing Toolkit/CUDA/v10.1')
-options.append('-DCUDA_NVCC_EXECUTABLE:FILEPATH=C:/Program 
Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/nvcc.exe')
+options.append('-DCUDA10_TOOLKIT_ROOT_DIR:PATH=C:/Program Files/NVIDIA 
GPU Computing Toolkit/CUDA/v10.1')
+options.append('-DCUDA10_NVCC_EXECUTABLE:FILEPATH=C:/Program 
Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/nvcc.exe')
+options.append('-DCUDA11_TOOLKIT_ROOT_DIR:PATH=C:/Program Files/NVIDIA 
GPU Computing Toolkit/CUDA/v11.1')
+options.append('-DCUDA11_NVCC_EXECUTABLE:FILEPATH=C:/Program 
Files/NVIDIA GPU Computing Toolkit/CUDA/v11.1/bin/nvcc.exe')
 elif builder.platform == 'linux':
-options.append('-DCUDA_TOOLKIT_ROOT_DIR:PATH=/usr/local/cuda-10.1')
-
options.append('-DCUDA_NVCC_EXECUTABLE:FILEPATH=/usr/local/cuda-10.1/bin/nvcc')
+options.append('-DCUDA10_TOOLKIT_ROOT_DIR:PATH=/usr/local/cuda-10.1')
+
options.append('-DCUDA10_NVCC_EXECUTABLE:FILEPATH=/usr/local/cuda-10.1/bin/nvcc')
+options.append('-DCUDA11_TOOLKIT_ROOT_DIR:PATH=/usr/local/cuda-11.1')
+
options.append('-DCUDA11_NVCC_EXECUTABLE:FILEPATH=/usr/local/cuda-11.1/bin/nvcc')
 
 options.append("-C" + os.path.join(builder.blender_dir, config_file))
 options.append("-DCMAKE_INSTALL_PREFIX=%s" % (builder.install_dir))
diff --git a/build_files/cmake/config/blender_release.cmake 
b/build_files/cmake/config/blender_release.cmake
index 2d52fb22c86..e1b7560e4fc 100644
--- a/build_files/cmake/config/blender_release.cmake
+++ b/build_files/cmake/config/blender_release.cmake
@@ -52,7 +52,7 @@ set(WITH_USD ON  CACHE BOOL "" FORCE)
 set(WITH_MEM_JEMALLOC  ON  CACHE BOOL "" FORCE)
 set(WITH_CYCLES_CUDA_BINARIES  ON  CACHE BOOL "" FORCE)
 set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
-set(CYCLES_CUDA_BINARIES_ARCH 
sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;sm_70;sm_75;compute_75 CACHE STRING 
"" FORCE)
+set(CYCLES_CUDA_BINARIES_ARCH 
sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;s

[Bf-blender-cvs] [b4bddf2e3bd] tmp-2.83-cycles-rtx3-kernels: Fix OptiX being shown as available on first generation Maxwell GPUs

2021-02-08 Thread Patrick Mours

Commit: b4bddf2e3bd3e33a9ca7488fc78112f8a5fa0544
Author: Patrick Mours
Date:   Fri Jul 24 15:36:09 2020 +0200
Branches: tmp-2.83-cycles-rtx3-kernels
https://developer.blender.org/rBb4bddf2e3bd3e33a9ca7488fc78112f8a5fa0544

Fix OptiX being shown as available on first generation Maxwell GPUs

The OptiX kernels are compiled for target "compute_sm_52", which is only 
available on second
generation Maxwell GPUs, so disable support for older ones.

===

M   intern/cycles/device/device_optix.cpp

===

diff --git a/intern/cycles/device/device_optix.cpp 
b/intern/cycles/device/device_optix.cpp
index db04c13d083..2b28d1e1dbb 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -1543,10 +1543,11 @@ void device_optix_info(const vector 
_devices, vectorhttps://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [f00ba344115] tmp-2.83-cycles-rtx3-kernels: Cycles: Enable OptiX on first generation Maxwell GPUs again

2021-02-08 Thread Patrick Mours

Commit: f00ba344115ca07f255fdd6088956e4d035714a0
Author: Patrick Mours
Date:   Mon Jul 27 16:11:00 2020 +0200
Branches: tmp-2.83-cycles-rtx3-kernels
https://developer.blender.org/rBf00ba344115ca07f255fdd6088956e4d035714a0

Cycles: Enable OptiX on first generation Maxwell GPUs again

===

M   intern/cycles/device/device_optix.cpp
M   intern/cycles/kernel/CMakeLists.txt

===

diff --git a/intern/cycles/device/device_optix.cpp 
b/intern/cycles/device/device_optix.cpp
index 2b28d1e1dbb..db04c13d083 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -1543,11 +1543,10 @@ void device_optix_info(const vector 
_devices, vector"
--target 52
+-target 50
 -ptx
 -i ${CMAKE_CURRENT_SOURCE_DIR}/${input}
 ${cuda_flags}
@@ -573,7 +573,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
 COMMAND
   ${CUDA_NVCC_EXECUTABLE}
   --ptx
-  -arch=sm_52
+  -arch=sm_50
   ${cuda_flags}
   ${input}
 WORKING_DIRECTORY

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [08aaa07adbd] tmp-2.83-cycles-rtx3-kernels: Cycles: Use pre-compiled PTX kernel for older generation when no matching one is found

2021-02-08 Thread Patrick Mours

Commit: 08aaa07adbd46e27f4226f29559be156f14a524b
Author: Patrick Mours
Date:   Fri Jul 17 15:06:55 2020 +0200
Branches: tmp-2.83-cycles-rtx3-kernels
https://developer.blender.org/rB08aaa07adbd46e27f4226f29559be156f14a524b

Cycles: Use pre-compiled PTX kernel for older generation when no matching one 
is found

This patch changes the discovery of pre-compiled kernels, to look for any PTX, 
even if
it does not match the current architecture version exactly. It works because 
the driver can
JIT-compile PTX generated for architectures less than or equal to the current 
one.
This e.g. makes it possible to render on a new GPU architecture even if no 
pre-compiled
binary kernel was distributed for it as part of the Blender installation.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D8332

===

M   CMakeLists.txt
M   build_files/cmake/config/blender_release.cmake
M   intern/cycles/device/cuda/device_cuda_impl.cpp
M   intern/cycles/kernel/CMakeLists.txt

===

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 83f547eb593..6f705ffbe44 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -369,7 +369,7 @@ option(WITH_CYCLES_CUDA_BINARIES"Build Cycles CUDA 
binaries" OFF)
 option(WITH_CYCLES_CUBIN_COMPILER   "Build cubins with nvrtc based compiler 
instead of nvcc" OFF)
 option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful 
on machines with limited RAM)" OFF)
 mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
-set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 
sm_75 CACHE STRING "CUDA architectures to build binaries for")
+set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 
sm_75 compute_75 CACHE STRING "CUDA architectures to build binaries for")
 mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
 unset(PLATFORM_DEFAULT)
 option(WITH_CYCLES_LOGGING  "Build Cycles with logging support" ON)
diff --git a/build_files/cmake/config/blender_release.cmake 
b/build_files/cmake/config/blender_release.cmake
index 01a59e451aa..2d52fb22c86 100644
--- a/build_files/cmake/config/blender_release.cmake
+++ b/build_files/cmake/config/blender_release.cmake
@@ -52,7 +52,7 @@ set(WITH_USD ON  CACHE BOOL "" FORCE)
 set(WITH_MEM_JEMALLOC  ON  CACHE BOOL "" FORCE)
 set(WITH_CYCLES_CUDA_BINARIES  ON  CACHE BOOL "" FORCE)
 set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
-set(CYCLES_CUDA_BINARIES_ARCH 
sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;sm_70;sm_75 CACHE STRING "" FORCE)
+set(CYCLES_CUDA_BINARIES_ARCH 
sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;sm_70;sm_75;compute_75 CACHE STRING 
"" FORCE)
 set(WITH_CYCLES_DEVICE_OPTIX   ON CACHE BOOL "" FORCE)
 
 # platform dependent options
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp 
b/intern/cycles/device/cuda/device_cuda_impl.cpp
index ba5d479e0e7..870f9f9ecf9 100644
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -352,11 +352,24 @@ string CUDADevice::compile_kernel(const 
DeviceRequestedFeatures _featu
   }
 }
 
-const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, 
major, minor));
-VLOG(1) << "Testing for pre-compiled kernel " << ptx << ".";
-if (path_exists(ptx)) {
-  VLOG(1) << "Using precompiled kernel.";
-  return ptx;
+/* The driver can JIT-compile PTX generated for older generations, so find 
the closest one. */
+int ptx_major = major, ptx_minor = minor;
+while (ptx_major >= 3) {
+  const string ptx = path_get(
+  string_printf("lib/%s_compute_%d%d.ptx", name, ptx_major, 
ptx_minor));
+  VLOG(1) << "Testing for pre-compiled kernel " << ptx << ".";
+  if (path_exists(ptx)) {
+VLOG(1) << "Using precompiled kernel.";
+return ptx;
+  }
+
+  if (ptx_minor > 0) {
+ptx_minor--;
+  }
+  else {
+ptx_major--;
+ptx_minor = 9;
+  }
 }
   }
 
diff --git a/intern/cycles/kernel/CMakeLists.txt 
b/intern/cycles/kernel/CMakeLists.txt
index 2e839a616e9..6ab0b9d39d2 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -549,7 +549,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
   ${SRC_UTIL_HEADERS}
 COMMAND ${CUBIN_CC_ENV}
 "$"
--target 30
+-target 52
 -ptx
 -i ${CMAKE_CURRENT_SOURCE_DIR}/${input}
 ${cuda_flags}
@@ -573,7 +573,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
 COMMAND

[Bf-blender-cvs] [9f89166b52b] blender-v2.92-release master: Fix T85148: OptiX viewport denoising regression

2021-01-29 Thread Patrick Mours

Commit: 9f89166b52b1de880c14847a1d0cd830d7c83f5b
Author: Patrick Mours
Date:   Fri Jan 29 13:35:00 2021 +0100
Branches: blender-v2.92-release master
https://developer.blender.org/rB9f89166b52b1de880c14847a1d0cd830d7c83f5b

Fix T85148: OptiX viewport denoising regression

Commit 6e74a8b69f215e63e136cb4c497e738371ac798f changed the denoiser input 
passes default to
include the normal pass. This does not always produce optimal images though, 
hence why the
default was previously set to only include the color and albedo passes. This 
restores that behavior, so
that viewport denoising with OptiX produces the same results as before.

===

M   intern/cycles/device/device_task.h

===

diff --git a/intern/cycles/device/device_task.h 
b/intern/cycles/device/device_task.h
index f9b47c59e95..a9298a9126c 100644
--- a/intern/cycles/device/device_task.h
+++ b/intern/cycles/device/device_task.h
@@ -100,7 +100,9 @@ class DenoiseParams {
 neighbor_frames = 2;
 clamp_input = true;
 
-input_passes = DENOISER_INPUT_RGB_ALBEDO_NORMAL;
+/* Default to color + albedo only, since normal input does not always have 
the desired effect
+ * when denoising with OptiX. */
+input_passes = DENOISER_INPUT_RGB_ALBEDO;
 
 start_sample = 0;
   }

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [b2e00e8f8e0] master: Merge branch 'blender-v2.92-release'

2021-01-29 Thread Patrick Mours

Commit: b2e00e8f8e03d3ae4ca3ea9f66f90ee0a2d008f9
Author: Patrick Mours
Date:   Fri Jan 29 13:35:21 2021 +0100
Branches: master
https://developer.blender.org/rBb2e00e8f8e03d3ae4ca3ea9f66f90ee0a2d008f9

Merge branch 'blender-v2.92-release'

===



===



___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [9b80291412f] master: Merge branch 'blender-v2.92-release'

2021-01-27 Thread Patrick Mours

Commit: 9b80291412feed2a9942eb41d0bd9390035a702c
Author: Patrick Mours
Date:   Wed Jan 27 15:29:39 2021 +0100
Branches: master
https://developer.blender.org/rB9b80291412feed2a9942eb41d0bd9390035a702c

Merge branch 'blender-v2.92-release'

===



===



___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [a92ebab5da3] master: Windows: Add "--debug-cycles" argument to "blender_debug_gpu.bat" batch file

2021-01-21 Thread Patrick Mours

Commit: a92ebab5da3bcbe3ee1b87348e51f6bcb347b881
Author: Patrick Mours
Date:   Thu Jan 21 16:28:02 2021 +0100
Branches: master
https://developer.blender.org/rBa92ebab5da3bcbe3ee1b87348e51f6bcb347b881

Windows: Add "--debug-cycles" argument to "blender_debug_gpu.bat" batch file

This extends the "blender_debug_gpu.bat" batch file to also be useful for 
triaging Cycles
problems. OptiX initialization errors or problem while iterating CUDA devices 
are only
logged when the `--debug-cycles` flag is specified, so adding that here.

Reviewed By: brecht, LazyDodo

Differential Revision: https://developer.blender.org/D10167

===

M   release/windows/batch/blender_debug_gpu.cmd

===

diff --git a/release/windows/batch/blender_debug_gpu.cmd 
b/release/windows/batch/blender_debug_gpu.cmd
index 46d126ab621..53d7863ec70 100644
--- a/release/windows/batch/blender_debug_gpu.cmd
+++ b/release/windows/batch/blender_debug_gpu.cmd
@@ -12,5 +12,5 @@ mkdir "%temp%\blender\debug_logs" > NUL 2>&1
 echo.
 echo Starting blender and waiting for it to exit
 set PYTHONPATH=
-"%~dp0\blender" --debug --debug-gpu --python-expr "import bpy; 
bpy.ops.wm.sysinfo(filepath=r'%temp%\blender\debug_logs\blender_system_info.txt')"
 > "%temp%\blender\debug_logs\blender_debug_output.txt" 2>&1 < %0
+"%~dp0\blender" --debug --debug-gpu --debug-cycles --python-expr "import bpy; 
bpy.ops.wm.sysinfo(filepath=r'%temp%\blender\debug_logs\blender_system_info.txt')"
 > "%temp%\blender\debug_logs\blender_debug_output.txt" 2>&1 < %0
 explorer "%temp%\blender\debug_logs"
\ No newline at end of file

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [fc5f9a8ac90] master: Merge branch 'blender-v2.92-release'

2021-01-20 Thread Patrick Mours

Commit: fc5f9a8ac900bcc0c712f89d4efca3583b0906ab
Author: Patrick Mours
Date:   Wed Jan 20 14:40:46 2021 +0100
Branches: master
https://developer.blender.org/rBfc5f9a8ac900bcc0c712f89d4efca3583b0906ab

Merge branch 'blender-v2.92-release'

===



===



___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [4a09907eab2] blender-v2.92-release master: Fix T84049: Crash when using Cycles Progressive Refine with OptiX+CPU

2021-01-20 Thread Patrick Mours

Commit: 4a09907eab2a3b6da53b1942aebefdcf58bbd604
Author: Patrick Mours
Date:   Wed Jan 20 14:12:43 2021 +0100
Branches: blender-v2.92-release master
https://developer.blender.org/rB4a09907eab2a3b6da53b1942aebefdcf58bbd604

Fix T84049: Crash when using Cycles Progressive Refine with OptiX+CPU

Tile stealing may steal a CPU tile buffer and move it to the GPU, but next time 
around that
tile may be re-used on the CPU again (in progressive refinement mode). The 
buffer would
still be on the GPU then though, so is inaccessible to the CPU. As a result 
Blender crashed
when the CPU tried to write results to that tile buffer.
This fixes that by ensuring a stolen tile buffer is moved back to the device it 
is used on before
rendering.

===

M   intern/cycles/render/session.cpp

===

diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index 0debc08d911..f3cdae77d47 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -540,6 +540,10 @@ bool Session::acquire_tile(RenderTile , Device 
*tile_device, uint tile_typ
 tile->buffers = new RenderBuffers(tile_device);
 tile->buffers->reset(buffer_params);
   }
+  else if (tile->buffers->buffer.device != tile_device) {
+/* Move buffer to current tile device again in case it was stolen before. 
*/
+tile->buffers->buffer.move_device(tile_device);
+  }
 
   tile->buffers->map_neighbor_copied = false;

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [4fde594fda1] blender-v2.92-release master: Fix T84650: CPU render + OptiX denoiser leaves empty tiles unprocessed

2021-01-18 Thread Patrick Mours

Commit: 4fde594fda13abf98437bc5d0012decc2bd3d5f6
Author: Patrick Mours
Date:   Mon Jan 18 15:30:25 2021 +0100
Branches: blender-v2.92-release master
https://developer.blender.org/rB4fde594fda13abf98437bc5d0012decc2bd3d5f6

Fix T84650: CPU render + OptiX denoiser leaves empty tiles unprocessed

The OptiX denoiser is part of the OptiX device, so to the tile manager looks 
like a GPU device. As a
result the tile stealing implementation erroneously stole CPU tiles and moved 
them to that OptiX
device, even though in this configuration the OptiX device was only set up for 
denoising and not
rendering. Launching the render kernel therefore caused a crash because of a 
missing AS etc.
This fixes that by ensuring tiles can only be stolen by devices that support 
render tiles.

===

M   intern/cycles/render/session.cpp

===

diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index a00f8154148..0debc08d911 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -459,7 +459,11 @@ bool Session::acquire_tile(RenderTile , Device 
*tile_device, uint tile_typ
   int device_num = device->device_number(tile_device);
 
   while (!tile_manager.next_tile(tile, device_num, tile_types)) {
-if (steal_tile(rtile, tile_device, tile_lock)) {
+/* Can only steal tiles on devices that support rendering
+ * This is because denoising tiles cannot be stolen (see below)
+ */
+if ((tile_types & (RenderTile::PATH_TRACE | RenderTile::BAKE)) &&
+steal_tile(rtile, tile_device, tile_lock)) {
   return true;
 }

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [ce0f212498c] master: Merge branch 'blender-v2.92-release'

2021-01-18 Thread Patrick Mours

Commit: ce0f212498c3347f512966bf85d08feefe1d032e
Author: Patrick Mours
Date:   Mon Jan 18 15:30:48 2021 +0100
Branches: master
https://developer.blender.org/rBce0f212498c3347f512966bf85d08feefe1d032e

Merge branch 'blender-v2.92-release'

===



===



___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [c66f00dc26b] master: Fix Cycles rendering with OptiX after instance limit increase when building with old SDK

2021-01-08 Thread Patrick Mours

Commit: c66f00dc26b08d5f7be6aef080c1a0ec2de19cd7
Author: Patrick Mours
Date:   Fri Jan 8 13:38:26 2021 +0100
Branches: master
https://developer.blender.org/rBc66f00dc26b08d5f7be6aef080c1a0ec2de19cd7

Fix Cycles rendering with OptiX after instance limit increase when building 
with old SDK

Commit d259e7dcfbbd37cec5a45fdfb554f24de10d0268 increased the instance limit, 
but only provided
a fall back for the host code for older OptiX SDKs, not for kernel code. This 
caused a mismatch when
an old SDK was used (as is currently the case on buildbot) and subsequent 
rendering artifacts. This
fixes that by moving the bit that is checked to a common location that works 
with both old an new
SDK versions.

===

M   intern/cycles/device/device_optix.cpp
M   intern/cycles/kernel/kernels/optix/kernel_optix.cu

===

diff --git a/intern/cycles/device/device_optix.cpp 
b/intern/cycles/device/device_optix.cpp
index de98e3f3594..f19289f966e 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -1514,16 +1514,19 @@ class OptiXDevice : public CUDADevice {
 }
 else {
   unsigned int num_instances = 0;
+  unsigned int max_num_instances = 0x;
 
   bvh_optix->as_data.free();
   bvh_optix->traversable_handle = 0;
   bvh_optix->motion_transform_data.free();
 
-#  if OPTIX_ABI_VERSION < 23
-  if (bvh->objects.size() > 0x7F) {
-#  else
-  if (bvh->objects.size() > 0x7FF) {
-#  endif
+  optixDeviceContextGetProperty(context,
+
OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID,
+_num_instances,
+sizeof(max_num_instances));
+  // Do not count first bit, which is used to distinguish instanced and 
non-instanced objects
+  max_num_instances >>= 1;
+  if (bvh->objects.size() > max_num_instances) {
 progress.set_error(
 "Failed to build OptiX acceleration structure because there are 
too many instances");
 return;
@@ -1582,8 +1585,8 @@ class OptiXDevice : public CUDADevice {
 instance.transform[5] = 1.0f;
 instance.transform[10] = 1.0f;
 
-// Set user instance ID to object index
-instance.instanceId = ob->get_device_index();
+// Set user instance ID to object index (but leave low bit blank)
+instance.instanceId = ob->get_device_index() << 1;
 
 // Have to have at least one bit in the mask, or else instance would 
always be culled
 instance.visibilityMask = 1;
@@ -1689,13 +1692,9 @@ class OptiXDevice : public CUDADevice {
   else {
 // Disable instance transform if geometry already has it applied 
to vertex data
 instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
-// Non-instanced objects read ID from prim_object, so
-// distinguish them from instanced objects with high bit set
-#  if OPTIX_ABI_VERSION < 23
-instance.instanceId |= 0x80;
-#  else
-instance.instanceId |= 0x800;
-#  endif
+// Non-instanced objects read ID from 'prim_object', so distinguish
+// them from instanced objects with the low bit set
+instance.instanceId |= 1;
   }
 }
   }
diff --git a/intern/cycles/kernel/kernels/optix/kernel_optix.cu 
b/intern/cycles/kernel/kernels/optix/kernel_optix.cu
index 0c2c84fdbdf..7f609eab474 100644
--- a/intern/cycles/kernel/kernels/optix/kernel_optix.cu
+++ b/intern/cycles/kernel/kernels/optix/kernel_optix.cu
@@ -45,13 +45,12 @@ template ccl_device_forceinline uint 
get_object_id()
   uint object = optixGetInstanceId();
 #endif
   // Choose between always returning object ID or only for instances
-  if (always)
-// Can just remove the high bit since instance always contains object ID
-return object & 0x7FF;  // OPTIX_ABI_VERSION >= 23 ? 0x7FF : 
0x7F
-  // Set to OBJECT_NONE if this is not an instanced object
-  else if (object & 0x800)  // OPTIX_ABI_VERSION >= 23 ? 0x800 : 
0x80
-object = OBJECT_NONE;
-  return object;
+  if (always || (object & 1) == 0)
+// Can just remove the low bit since instance always contains object ID
+return object >> 1;
+  else
+// Set to OBJECT_NONE if this is not an instanced object
+return OBJECT_NONE;
 }
 
 extern "C" __global__ void __raygen__kernel_optix_path_trace()

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [d259e7dcfbb] master: Cycles: Increase instance limit for OptiX acceleration structure building

2021-01-07 Thread Patrick Mours

Commit: d259e7dcfbbd37cec5a45fdfb554f24de10d0268
Author: Patrick Mours
Date:   Thu Jan 7 18:54:29 2021 +0100
Branches: master
https://developer.blender.org/rBd259e7dcfbbd37cec5a45fdfb554f24de10d0268

Cycles: Increase instance limit for OptiX acceleration structure building

For a while now OptiX had support for 28-bits of instance IDs, instead of the 
initial 24-bits (see also
value reported by OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID). This change 
makes use of
that and also adds an error reported when the number of instances an OptiX 
acceleration structure is
created with goes beyond the limit, to make this clear instead of just 
rendering an image with artifacts.

Manifest Tasks: T81431

===

M   intern/cycles/device/device_optix.cpp
M   intern/cycles/kernel/kernels/optix/kernel_optix.cu

===

diff --git a/intern/cycles/device/device_optix.cpp 
b/intern/cycles/device/device_optix.cpp
index 673fc1752bb..de98e3f3594 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -1519,6 +1519,16 @@ class OptiXDevice : public CUDADevice {
   bvh_optix->traversable_handle = 0;
   bvh_optix->motion_transform_data.free();
 
+#  if OPTIX_ABI_VERSION < 23
+  if (bvh->objects.size() > 0x7F) {
+#  else
+  if (bvh->objects.size() > 0x7FF) {
+#  endif
+progress.set_error(
+"Failed to build OptiX acceleration structure because there are 
too many instances");
+return;
+  }
+
   // Fill instance descriptions
 #  if OPTIX_ABI_VERSION < 41
   device_vector aabbs(this, "optix tlas aabbs", MEM_READ_ONLY);
@@ -1681,7 +1691,11 @@ class OptiXDevice : public CUDADevice {
 instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
 // Non-instanced objects read ID from prim_object, so
 // distinguish them from instanced objects with high bit set
+#  if OPTIX_ABI_VERSION < 23
 instance.instanceId |= 0x80;
+#  else
+instance.instanceId |= 0x800;
+#  endif
   }
 }
   }
diff --git a/intern/cycles/kernel/kernels/optix/kernel_optix.cu 
b/intern/cycles/kernel/kernels/optix/kernel_optix.cu
index 8ccd2555091..0c2c84fdbdf 100644
--- a/intern/cycles/kernel/kernels/optix/kernel_optix.cu
+++ b/intern/cycles/kernel/kernels/optix/kernel_optix.cu
@@ -47,9 +47,9 @@ template ccl_device_forceinline uint 
get_object_id()
   // Choose between always returning object ID or only for instances
   if (always)
 // Can just remove the high bit since instance always contains object ID
-return object & 0x7F;
+return object & 0x7FF;  // OPTIX_ABI_VERSION >= 23 ? 0x7FF : 
0x7F
   // Set to OBJECT_NONE if this is not an instanced object
-  else if (object & 0x80)
+  else if (object & 0x800)  // OPTIX_ABI_VERSION >= 23 ? 0x800 : 
0x80
 object = OBJECT_NONE;
   return object;
 }

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [3373d14b1b0] master: Fix T83925: Crash when rendering on the CPU with OptiX denoiser enabled

2021-01-05 Thread Patrick Mours

Commit: 3373d14b1b05b2ee5dc88efff5dc8b1a5fe20f36
Author: Patrick Mours
Date:   Tue Jan 5 18:37:31 2021 +0100
Branches: master
https://developer.blender.org/rB3373d14b1b05b2ee5dc88efff5dc8b1a5fe20f36

Fix T83925: Crash when rendering on the CPU with OptiX denoiser enabled

Rendering on the CPU uses the Embree BVH layout, whether the OptiX denoiser is 
enabled or not.
This means the "build_bvh" function gets a "BVHEmbree" object to fill and not a 
"BVHMulti" as it
was assuming before, which caused crashes due to memory geting overwritten 
incorrectly. This
fixes that by redirecting Embree BVH builds to the Embree device.

Manifest Tasks: T83925

===

M   intern/cycles/device/device_multi.cpp

===

diff --git a/intern/cycles/device/device_multi.cpp 
b/intern/cycles/device/device_multi.cpp
index e5b138917ff..44959577fb5 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -248,11 +248,14 @@ class MultiDevice : public Device {
   void build_bvh(BVH *bvh, Progress , bool refit) override
   {
 /* Try to build and share a single acceleration structure, if possible */
-if (bvh->params.bvh_layout == BVH_LAYOUT_BVH2) {
+if (bvh->params.bvh_layout == BVH_LAYOUT_BVH2 || bvh->params.bvh_layout == 
BVH_LAYOUT_EMBREE) {
   devices.back().device->build_bvh(bvh, progress, refit);
   return;
 }
 
+assert(bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX ||
+   bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE);
+
 BVHMulti *const bvh_multi = static_cast(bvh);
 bvh_multi->sub_bvhs.resize(devices.size());

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [166c0db3f94] master: Fix T83915: Subdivision Surface modifier causes visual artifacts in Cycles rendered viewport - CPU and OptiX

2021-01-05 Thread Patrick Mours

Commit: 166c0db3f9412925b501b7172875cb8ee2eb6958
Author: Patrick Mours
Date:   Tue Jan 5 14:39:29 2021 +0100
Branches: master
https://developer.blender.org/rB166c0db3f9412925b501b7172875cb8ee2eb6958

Fix T83915: Subdivision Surface modifier causes visual artifacts in Cycles 
rendered viewport - CPU and OptiX

Changing the geometry in the current scene caused the primitive offsets for all 
geometry to
change, but the values would not be updated in all bottom-level BVH structures. 
Rendering
artifacts and crashes where the result. This fixes that by ensuring all BVH 
structures are
updated when the primitive offsets change.

===

M   intern/cycles/bvh/bvh_embree.cpp
M   intern/cycles/render/geometry.cpp
M   intern/cycles/render/geometry.h

===

diff --git a/intern/cycles/bvh/bvh_embree.cpp b/intern/cycles/bvh/bvh_embree.cpp
index b874bda7186..c082478e5b1 100644
--- a/intern/cycles/bvh/bvh_embree.cpp
+++ b/intern/cycles/bvh/bvh_embree.cpp
@@ -682,6 +682,7 @@ void BVHEmbree::refit(Progress )
 if (mesh->num_triangles() > 0) {
   RTCGeometry geom = rtcGetGeometry(scene, geom_id);
   set_tri_vertex_buffer(geom, mesh, true);
+  rtcSetGeometryUserData(geom, (void *)mesh->optix_prim_offset);
   rtcCommitGeometry(geom);
 }
   }
@@ -690,6 +691,7 @@ void BVHEmbree::refit(Progress )
 if (hair->num_curves() > 0) {
   RTCGeometry geom = rtcGetGeometry(scene, geom_id + 1);
   set_curve_vertex_buffer(geom, hair, true);
+  rtcSetGeometryUserData(geom, (void *)hair->optix_prim_offset);
   rtcCommitGeometry(geom);
 }
   }
diff --git a/intern/cycles/render/geometry.cpp 
b/intern/cycles/render/geometry.cpp
index 64b98a91853..6fc217f2d76 100644
--- a/intern/cycles/render/geometry.cpp
+++ b/intern/cycles/render/geometry.cpp
@@ -280,6 +280,15 @@ void Geometry::tag_update(Scene *scene, bool rebuild)
   scene->object_manager->need_update = true;
 }
 
+void Geometry::tag_bvh_update(bool rebuild)
+{
+  tag_modified();
+
+  if (rebuild) {
+need_update_rebuild = true;
+  }
+}
+
 /* Geometry Manager */
 
 GeometryManager::GeometryManager()
@@ -915,7 +924,7 @@ void GeometryManager::device_update_attributes(Device 
*device,
   scene->object_manager->device_update_mesh_offsets(device, dscene, scene);
 }
 
-void GeometryManager::mesh_calc_offset(Scene *scene)
+void GeometryManager::mesh_calc_offset(Scene *scene, BVHLayout bvh_layout)
 {
   size_t vert_size = 0;
   size_t tri_size = 0;
@@ -930,6 +939,14 @@ void GeometryManager::mesh_calc_offset(Scene *scene)
   size_t optix_prim_size = 0;
 
   foreach (Geometry *geom, scene->geometry) {
+if (geom->optix_prim_offset != optix_prim_size) {
+  /* Need to rebuild BVH in OptiX, since refit only allows modified mesh 
data there */
+  const bool has_optix_bvh = bvh_layout == BVH_LAYOUT_OPTIX ||
+ bvh_layout == BVH_LAYOUT_MULTI_OPTIX ||
+ bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE;
+  geom->tag_bvh_update(has_optix_bvh);
+}
+
 if (geom->geometry_type == Geometry::MESH || geom->geometry_type == 
Geometry::VOLUME) {
   Mesh *mesh = static_cast(geom);
 
@@ -1526,7 +1543,9 @@ void GeometryManager::device_update(Device *device,
   /* Device update. */
   device_free(device, dscene);
 
-  mesh_calc_offset(scene);
+  const BVHLayout bvh_layout = 
BVHParams::best_bvh_layout(scene->params.bvh_layout,
+  
device->get_bvh_layout_mask());
+  mesh_calc_offset(scene, bvh_layout);
   if (true_displacement_used) {
 scoped_callback_timer timer([scene](double time) {
   if (scene->update_stats) {
@@ -1553,8 +1572,6 @@ void GeometryManager::device_update(Device *device,
   }
 
   /* Update displacement. */
-  BVHLayout bvh_layout = BVHParams::best_bvh_layout(scene->params.bvh_layout,
-
device->get_bvh_layout_mask());
   bool displacement_done = false;
   size_t num_bvh = 0;
 
diff --git a/intern/cycles/render/geometry.h b/intern/cycles/render/geometry.h
index d3daf0cc809..b124e950ad2 100644
--- a/intern/cycles/render/geometry.h
+++ b/intern/cycles/render/geometry.h
@@ -157,6 +157,8 @@ class Geometry : public Node {
 
   /* Updates */
   void tag_update(Scene *scene, bool rebuild);
+
+  void tag_bvh_update(bool rebuild);
 };
 
 /* Geometry Manager */
@@ -198,7 +200,7 @@ class GeometryManager {
  vector _attributes);
 
   /* Compute verts/triangles/curves offsets in global arrays. */
-  void mesh_calc_offset(Scene *scene);
+  void mesh_calc_offset(Scene *scene, BVHLayout bvh_layout);
 
   void device_update_object(Device *device, DeviceScene *dscene, Scene *scene, 
Progress );

__

[Bf-blender-cvs] [bfb6fce6594] master: Cycles: Add CPU+GPU rendering support with OptiX

2020-12-11 Thread Patrick Mours

Commit: bfb6fce6594e9cf133bd18aee311c1e5e32dc799
Author: Patrick Mours
Date:   Thu Dec 10 14:18:25 2020 +0100
Branches: master
https://developer.blender.org/rBbfb6fce6594e9cf133bd18aee311c1e5e32dc799

Cycles: Add CPU+GPU rendering support with OptiX

Adds support for building multiple BVH types in order to support using both CPU 
and OptiX
devices for rendering simultaneously. Primitive packing for Embree and OptiX is 
now
standalone, so it only needs to be run once and can be shared between the two. 
Additionally,
BVH building was made a device call, so that each device backend can decide how 
to
perform the building. The multi-device for instance creates a special multi-BVH 
that holds
references to several sub-BVHs, one for each sub-device.

Reviewed By: brecht, kevindietrich

Differential Revision: https://developer.blender.org/D9718

===

M   intern/cycles/blender/addon/properties.py
M   intern/cycles/blender/blender_device.cpp
M   intern/cycles/bvh/CMakeLists.txt
M   intern/cycles/bvh/bvh.cpp
M   intern/cycles/bvh/bvh.h
M   intern/cycles/bvh/bvh2.cpp
M   intern/cycles/bvh/bvh2.h
M   intern/cycles/bvh/bvh_embree.cpp
M   intern/cycles/bvh/bvh_embree.h
A   intern/cycles/bvh/bvh_multi.cpp
A   intern/cycles/bvh/bvh_multi.h
M   intern/cycles/bvh/bvh_optix.cpp
M   intern/cycles/bvh/bvh_optix.h
M   intern/cycles/device/cuda/device_cuda.h
M   intern/cycles/device/cuda/device_cuda_impl.cpp
M   intern/cycles/device/device.cpp
M   intern/cycles/device/device.h
M   intern/cycles/device/device_cpu.cpp
M   intern/cycles/device/device_multi.cpp
M   intern/cycles/device/device_optix.cpp
M   intern/cycles/kernel/bvh/bvh_embree.h
M   intern/cycles/kernel/kernel_types.h
M   intern/cycles/render/geometry.cpp
M   intern/cycles/render/geometry.h
M   intern/cycles/render/hair.cpp
M   intern/cycles/render/hair.h
M   intern/cycles/render/mesh.cpp
M   intern/cycles/render/mesh.h
M   intern/cycles/render/scene.cpp
M   intern/cycles/render/scene.h

===

diff --git a/intern/cycles/blender/addon/properties.py 
b/intern/cycles/blender/addon/properties.py
index 1cb29fc6cb0..2f204b2c658 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -1570,7 +1570,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
 elif entry.type == 'CPU':
 cpu_devices.append(entry)
 # Extend all GPU devices with CPU.
-if compute_device_type in {'CUDA', 'OPENCL'}:
+if compute_device_type in {'CUDA', 'OPTIX', 'OPENCL'}:
 devices.extend(cpu_devices)
 return devices
 
diff --git a/intern/cycles/blender/blender_device.cpp 
b/intern/cycles/blender/blender_device.cpp
index ffcaef0b2a9..977f8297de1 100644
--- a/intern/cycles/blender/blender_device.cpp
+++ b/intern/cycles/blender/blender_device.cpp
@@ -90,8 +90,7 @@ DeviceInfo blender_device_info(BL::Preferences 
_preferences, BL::Scene _scen
 mask |= DEVICE_MASK_CUDA;
   }
   else if (compute_device == COMPUTE_DEVICE_OPTIX) {
-/* Cannot use CPU and OptiX device at the same time right now, so 
replace mask. */
-mask = DEVICE_MASK_OPTIX;
+mask |= DEVICE_MASK_OPTIX;
   }
   else if (compute_device == COMPUTE_DEVICE_OPENCL) {
 mask |= DEVICE_MASK_OPENCL;
diff --git a/intern/cycles/bvh/CMakeLists.txt b/intern/cycles/bvh/CMakeLists.txt
index 703c69b1797..8cc72359757 100644
--- a/intern/cycles/bvh/CMakeLists.txt
+++ b/intern/cycles/bvh/CMakeLists.txt
@@ -25,6 +25,7 @@ set(SRC
   bvh_binning.cpp
   bvh_build.cpp
   bvh_embree.cpp
+  bvh_multi.cpp
   bvh_node.cpp
   bvh_optix.cpp
   bvh_sort.cpp
@@ -38,6 +39,7 @@ set(SRC_HEADERS
   bvh_binning.h
   bvh_build.h
   bvh_embree.h
+  bvh_multi.h
   bvh_node.h
   bvh_optix.h
   bvh_params.h
diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp
index a51ac4cf4a9..256382e63ba 100644
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -17,17 +17,11 @@
 
 #include "bvh/bvh.h"
 
-#include "render/hair.h"
-#include "render/mesh.h"
-#include "render/object.h"
-
 #include "bvh/bvh2.h"
-#include "bvh/bvh_build.h"
 #include "bvh/bvh_embree.h"
-#include "bvh/bvh_node.h"
+#include "bvh/bvh_multi.h"
 #include "bvh/bvh_optix.h"
 
-#include "util/util_foreach.h"
 #include "util/util_logging.h"
 #include "util/util_progress.h"
 
@@ -38,14 +32,17 @@ CCL_NAMESPACE_BEGIN
 const char *bvh_layout_name(BVHLayout layout)
 {
   switch (layout) {
-case BVH_LAYOUT_BVH2:
-  return "BVH2";
 case BVH_LAYOUT_NONE:
   return "NONE";
+case BVH_LAYOUT_BVH2:
+  return "

[Bf-blender-cvs] [41bca5a3eed] master: Fix T83581: "Only local" ambient occlusion option causes error on OptiX 2.92

2020-12-09 Thread Patrick Mours

Commit: 41bca5a3eed81d79a62899fcb04fa76674f09c88
Author: Patrick Mours
Date:   Wed Dec 9 17:06:28 2020 +0100
Branches: master
https://developer.blender.org/rB41bca5a3eed81d79a62899fcb04fa76674f09c88

Fix T83581: "Only local" ambient occlusion option causes error on OptiX 2.92

The SVM AO node calls "scene_intersect_local" with a NULL pointer for the 
intersection
information, which caused a crash with OptiX since it was not checking for this 
case and
always dereferencing this pointer. This fixes that by checking whether any hit 
information
was requested first (like is done in the BVH2 intersection routines).

===

M   intern/cycles/kernel/kernels/optix/kernel_optix.cu

===

diff --git a/intern/cycles/kernel/kernels/optix/kernel_optix.cu 
b/intern/cycles/kernel/kernels/optix/kernel_optix.cu
index fd9065098dd..8ccd2555091 100644
--- a/intern/cycles/kernel/kernels/optix/kernel_optix.cu
+++ b/intern/cycles/kernel/kernels/optix/kernel_optix.cu
@@ -118,12 +118,18 @@ extern "C" __global__ void 
__anyhit__kernel_optix_local_hit()
 return optixIgnoreIntersection();
   }
 
+  const uint max_hits = optixGetPayload_5();
+  if (max_hits == 0) {
+// Special case for when no hit information is requested, just report that 
something was hit
+optixSetPayload_5(true);
+return optixTerminateRay();
+  }
+
   int hit = 0;
   uint *const lcg_state = get_payload_ptr_0();
   LocalIntersection *const local_isect = 
get_payload_ptr_2();
 
   if (lcg_state) {
-const uint max_hits = optixGetPayload_5();
 for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
   if (optixGetRayTmax() == local_isect->hits[i].t) {
 return optixIgnoreIntersection();

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [d7cf464b493] master: Cycles: Remove "OptiX support is experimental" notice

2020-12-08 Thread Patrick Mours

Commit: d7cf464b493581a381540673aa7ed9e4ff47b425
Author: Patrick Mours
Date:   Tue Dec 8 16:13:04 2020 +0100
Branches: master
https://developer.blender.org/rBd7cf464b493581a381540673aa7ed9e4ff47b425

Cycles: Remove "OptiX support is experimental" notice

OptiX support is not in fact experimental anymore, so it is time for that 
notice to go.
All Cycles features that are currently supported on the GPU do work now when 
OptiX is selected.

===

M   intern/cycles/blender/addon/properties.py

===

diff --git a/intern/cycles/blender/addon/properties.py 
b/intern/cycles/blender/addon/properties.py
index 0d861fde6fc..1cb29fc6cb0 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -1620,11 +1620,6 @@ class CyclesPreferences(bpy.types.AddonPreferences):
 for device in devices:
 box.prop(device, "use", text=device.name)
 
-if device_type == 'OPTIX':
-col = box.column(align=True)
-col.label(text="OptiX support is experimental", icon='INFO')
-col.label(text="Not all Cycles features are supported yet", 
icon='BLANK1')
-
 def draw_impl(self, layout, context):
 row = layout.row()
 row.prop(self, "compute_device_type", expand=True)

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [612b83bbd18] master: Cycles: Enable baking panel in OptiX and redirect those requests to CUDA for now

2020-12-08 Thread Patrick Mours

Commit: 612b83bbd183c214b2d252cf19cdf581f3d9cede
Author: Patrick Mours
Date:   Tue Dec 8 15:42:00 2020 +0100
Branches: master
https://developer.blender.org/rB612b83bbd183c214b2d252cf19cdf581f3d9cede

Cycles: Enable baking panel in OptiX and redirect those requests to CUDA for now

This enables support for baking when OptiX is active, but uses CUDA for that 
behind the scenes, since
the way baking is currently implemented does not work well with OptiX.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D9784

===

M   intern/cycles/blender/addon/ui.py
M   intern/cycles/device/device_optix.cpp

===

diff --git a/intern/cycles/blender/addon/ui.py 
b/intern/cycles/blender/addon/ui.py
index 623e5cf9e37..f24265d256a 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -1822,10 +1822,6 @@ class CYCLES_RENDER_PT_bake(CyclesButtonsPanel, Panel):
 bl_options = {'DEFAULT_CLOSED'}
 COMPAT_ENGINES = {'CYCLES'}
 
-@classmethod
-def poll(cls, context):
-return CyclesButtonsPanel.poll(context) and not use_optix(context)
-
 def draw(self, context):
 layout = self.layout
 layout.use_property_split = True
@@ -1836,6 +1832,9 @@ class CYCLES_RENDER_PT_bake(CyclesButtonsPanel, Panel):
 cbk = scene.render.bake
 rd = scene.render
 
+if use_optix(context):
+layout.label(text="Baking is performed using CUDA instead of 
OptiX", icon='INFO')
+
 if rd.use_bake_multires:
 layout.operator("object.bake_image", icon='RENDER_STILL')
 layout.prop(rd, "use_bake_multires")
diff --git a/intern/cycles/device/device_optix.cpp 
b/intern/cycles/device/device_optix.cpp
index 682540a51fd..c6276c1e955 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -297,6 +297,10 @@ class OptiXDevice : public CUDADevice {
 
   BVHLayoutMask get_bvh_layout_mask() const override
   {
+// CUDA kernels are used when doing baking, so need to build a BVH those 
can understand too!
+if (optix_module == NULL)
+  return CUDADevice::get_bvh_layout_mask();
+
 // OptiX has its own internal acceleration structure format
 return BVH_LAYOUT_OPTIX;
   }
@@ -330,10 +334,9 @@ class OptiXDevice : public CUDADevice {
   return false;
 }
 
-// Disable baking for now, since its kernel is not well-suited for 
inlining and is very slow
+// Baking is currently performed using CUDA, so no need to load OptiX 
kernels
 if (requested_features.use_baking) {
-  set_error("OptiX backend does not support baking yet");
-  return false;
+  return true;
 }
 
 const CUDAContextScope scope(cuContext);
@@ -700,6 +703,11 @@ class OptiXDevice : public CUDADevice {
   while (task.acquire_tile(this, tile, task.tile_types)) {
 if (tile.task == RenderTile::PATH_TRACE)
   launch_render(task, tile, thread_index);
+else if (tile.task == RenderTile::BAKE) {
+  // Perform baking using CUDA, since it is not currently implemented 
in OptiX
+  device_vector work_tiles(this, "work_tiles", 
MEM_READ_ONLY);
+  CUDADevice::render(task, tile, work_tiles);
+}
 else if (tile.task == RenderTile::DENOISE)
   launch_denoise(task, tile);
 task.release_tile(tile);

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [c10546f5e9f] master: Cycles: Add support for shader raytracing in OptiX

2020-12-04 Thread Patrick Mours

Commit: c10546f5e9fe2a300b6a21e1e16b22c93060d0e9
Author: Patrick Mours
Date:   Thu Dec 3 12:19:36 2020 +0100
Branches: master
https://developer.blender.org/rBc10546f5e9fe2a300b6a21e1e16b22c93060d0e9

Cycles: Add support for shader raytracing in OptiX

Support for the AO and bevel shader nodes requires calling "optixTrace" from 
within the shading
VM, which is only allowed from inlined functions to the raygen program or 
callables. This patch
therefore converts the shading VM to use direct callables to make it work. To 
prevent performance
regressions a separate kernel module is compiled and used for this purpose.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D9733

===

M   intern/cycles/device/device_optix.cpp
M   intern/cycles/kernel/CMakeLists.txt
M   intern/cycles/kernel/kernel_subsurface.h
M   intern/cycles/kernel/kernel_types.h
M   intern/cycles/kernel/kernel_volume.h
M   intern/cycles/kernel/svm/svm.h

===

diff --git a/intern/cycles/device/device_optix.cpp 
b/intern/cycles/device/device_optix.cpp
index 95234845f98..682540a51fd 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -141,7 +141,8 @@ class OptiXDevice : public CUDADevice {
 PG_BAKE,  // kernel_bake_evaluate
 PG_DISP,  // kernel_displace_evaluate
 PG_BACK,  // kernel_background_evaluate
-NUM_PROGRAM_GROUPS
+PG_CALL,
+NUM_PROGRAM_GROUPS = PG_CALL + 3
   };
 
   // List of OptiX pipelines
@@ -334,11 +335,6 @@ class OptiXDevice : public CUDADevice {
   set_error("OptiX backend does not support baking yet");
   return false;
 }
-// Disable shader raytracing support for now, since continuation callables 
are slow
-if (requested_features.use_shader_raytrace) {
-  set_error("OptiX backend does not support 'Ambient Occlusion' and 
'Bevel' shader nodes yet");
-  return false;
-}
 
 const CUDAContextScope scope(cuContext);
 
@@ -410,7 +406,9 @@ class OptiXDevice : public CUDADevice {
 }
 
 {  // Load and compile PTX module with OptiX kernels
-  string ptx_data, ptx_filename = path_get("lib/kernel_optix.ptx");
+  string ptx_data, ptx_filename = 
path_get(requested_features.use_shader_raytrace ?
+   
"lib/kernel_optix_shader_raytrace.ptx" :
+   "lib/kernel_optix.ptx");
   if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) {
 if (!getenv("OPTIX_ROOT_DIR")) {
   set_error(
@@ -525,6 +523,21 @@ class OptiXDevice : public CUDADevice {
   group_descs[PG_BACK].raygen.entryFunctionName = 
"__raygen__kernel_optix_background";
 }
 
+// Shader raytracing replaces some functions with direct callables
+if (requested_features.use_shader_raytrace) {
+  group_descs[PG_CALL + 0].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
+  group_descs[PG_CALL + 0].callables.moduleDC = optix_module;
+  group_descs[PG_CALL + 0].callables.entryFunctionNameDC = 
"__direct_callable__svm_eval_nodes";
+  group_descs[PG_CALL + 1].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
+  group_descs[PG_CALL + 1].callables.moduleDC = optix_module;
+  group_descs[PG_CALL + 1].callables.entryFunctionNameDC =
+  "__direct_callable__kernel_volume_shadow";
+  group_descs[PG_CALL + 2].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
+  group_descs[PG_CALL + 2].callables.moduleDC = optix_module;
+  group_descs[PG_CALL + 2].callables.entryFunctionNameDC =
+  "__direct_callable__subsurface_scatter_multi_setup";
+}
+
 check_result_optix_ret(optixProgramGroupCreate(
 context, group_descs, NUM_PROGRAM_GROUPS, _options, nullptr, 0, 
groups));
 
@@ -564,33 +577,51 @@ class OptiXDevice : public CUDADevice {
 #  endif
 
 {  // Create path tracing pipeline
-  OptixProgramGroup pipeline_groups[] = {
-groups[PG_RGEN],
-groups[PG_MISS],
-groups[PG_HITD],
-groups[PG_HITS],
-groups[PG_HITL],
+  vector pipeline_groups;
+  pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
+  pipeline_groups.push_back(groups[PG_RGEN]);
+  pipeline_groups.push_back(groups[PG_MISS]);
+  pipeline_groups.push_back(groups[PG_HITD]);
+  pipeline_groups.push_back(groups[PG_HITS]);
+  pipeline_groups.push_back(groups[PG_HITL]);
 #  if OPTIX_ABI_VERSION >= 36
-groups[PG_HITD_MOTION],
-groups[PG_HITS_MOTION],
+  if (motion_blur) {
+pipeline_groups.push_back(groups[PG_HITD_MOTION]);
+pipeline_groups.push_back(groups[PG_HITS_MOTION]);
+  }
 #  endif
-  };
-  check_result_optix_ret(
-

[Bf-blender-cvs] [a3c40912153] master: Fix Cycles device kernels containing debug assertation code

2020-12-03 Thread Patrick Mours

Commit: a3c40912153235508aaccbd310f247073029becb
Author: Patrick Mours
Date:   Thu Dec 3 15:20:50 2020 +0100
Branches: master
https://developer.blender.org/rBa3c40912153235508aaccbd310f247073029becb

Fix Cycles device kernels containing debug assertation code

NanoVDB includes "assert.h" and makes use of "assert" in several places and 
since the compile
pipeline for CUDA/OptiX kernels does not define "NDEBUG" for release builds, 
those debug
checks were always added. This is not intended, so this patch disables "assert" 
for CUDA/OptiX
by defining "NDEBUG" before including NanoVDB headers.
This also fixes a warning about unknown pragmas in NanoVDB thrown by the CUDA 
compiler.

===

M   intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
M   intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h

===

diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h 
b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
index 44c658d4cab..59b96c86c50 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
@@ -18,6 +18,7 @@
 #define __KERNEL_CPU_IMAGE_H__
 
 #ifdef WITH_NANOVDB
+#  define NANOVDB_USE_INTRINSICS
 #  include 
 #  include 
 #endif
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h 
b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
index 001bc652810..82ad9225fc3 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
+++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
@@ -15,6 +15,8 @@
  */
 
 #ifdef WITH_NANOVDB
+#  define NDEBUG /* Disable "assert" in device code */
+#  define NANOVDB_USE_INTRINSICS
 #  include "nanovdb/NanoVDB.h"
 #  include "nanovdb/util/SampleFromVoxels.h"
 #endif

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [a8f1bea5901] master: Fix NanoVDB not being enabled/disabled correctly in CMake profiles

2020-11-12 Thread Patrick Mours

Commit: a8f1bea5901c2cccf9b1408090f85ee072589cce
Author: Patrick Mours
Date:   Thu Nov 12 12:49:12 2020 +0100
Branches: master
https://developer.blender.org/rBa8f1bea5901c2cccf9b1408090f85ee072589cce

Fix NanoVDB not being enabled/disabled correctly in CMake profiles

This caused warnings when e.g. building the lite profile because NanoVDB was 
not disabled, but
OpenVDB was. This Fixes this by setting the "WITH_NANOVDB" flag too.

===

M   build_files/cmake/config/blender_full.cmake
M   build_files/cmake/config/blender_lite.cmake
M   build_files/cmake/config/blender_release.cmake
M   build_files/cmake/config/bpy_module.cmake

===

diff --git a/build_files/cmake/config/blender_full.cmake 
b/build_files/cmake/config/blender_full.cmake
index c5ed59dfaa5..08065ec0276 100644
--- a/build_files/cmake/config/blender_full.cmake
+++ b/build_files/cmake/config/blender_full.cmake
@@ -44,6 +44,7 @@ set(WITH_OPENMP  ON  CACHE BOOL "" FORCE)
 set(WITH_OPENSUBDIV  ON  CACHE BOOL "" FORCE)
 set(WITH_OPENVDB ON  CACHE BOOL "" FORCE)
 set(WITH_OPENVDB_BLOSC   ON  CACHE BOOL "" FORCE)
+set(WITH_NANOVDB ON  CACHE BOOL "" FORCE)
 set(WITH_POTRACE ON  CACHE BOOL "" FORCE)
 set(WITH_PYTHON_INSTALL  ON  CACHE BOOL "" FORCE)
 set(WITH_QUADRIFLOW  ON  CACHE BOOL "" FORCE)
diff --git a/build_files/cmake/config/blender_lite.cmake 
b/build_files/cmake/config/blender_lite.cmake
index f53bdaac41e..4150094e9f5 100644
--- a/build_files/cmake/config/blender_lite.cmake
+++ b/build_files/cmake/config/blender_lite.cmake
@@ -51,6 +51,7 @@ set(WITH_OPENIMAGEIO OFF CACHE BOOL "" FORCE)
 set(WITH_OPENMP  OFF CACHE BOOL "" FORCE)
 set(WITH_OPENSUBDIV  OFF CACHE BOOL "" FORCE)
 set(WITH_OPENVDB OFF CACHE BOOL "" FORCE)
+set(WITH_NANOVDB OFF CACHE BOOL "" FORCE)
 set(WITH_QUADRIFLOW  OFF CACHE BOOL "" FORCE)
 set(WITH_SDL OFF CACHE BOOL "" FORCE)
 set(WITH_TBB OFF CACHE BOOL "" FORCE)
diff --git a/build_files/cmake/config/blender_release.cmake 
b/build_files/cmake/config/blender_release.cmake
index f8f7b730efe..fd3225b0287 100644
--- a/build_files/cmake/config/blender_release.cmake
+++ b/build_files/cmake/config/blender_release.cmake
@@ -45,6 +45,7 @@ set(WITH_OPENMP  ON  CACHE BOOL "" FORCE)
 set(WITH_OPENSUBDIV  ON  CACHE BOOL "" FORCE)
 set(WITH_OPENVDB ON  CACHE BOOL "" FORCE)
 set(WITH_OPENVDB_BLOSC   ON  CACHE BOOL "" FORCE)
+set(WITH_NANOVDB ON  CACHE BOOL "" FORCE)
 set(WITH_POTRACE ON  CACHE BOOL "" FORCE)
 set(WITH_PYTHON_INSTALL  ON  CACHE BOOL "" FORCE)
 set(WITH_QUADRIFLOW  ON  CACHE BOOL "" FORCE)
diff --git a/build_files/cmake/config/bpy_module.cmake 
b/build_files/cmake/config/bpy_module.cmake
index 2c0da81a1ea..7fc68f97f29 100644
--- a/build_files/cmake/config/bpy_module.cmake
+++ b/build_files/cmake/config/bpy_module.cmake
@@ -28,6 +28,7 @@ set(WITH_OPENCOLLADA OFF CACHE BOOL "" FORCE)
 set(WITH_INTERNATIONAL   OFF CACHE BOOL "" FORCE)
 set(WITH_BULLET  OFF CACHE BOOL "" FORCE)
 set(WITH_OPENVDB OFF CACHE BOOL "" FORCE)
+set(WITH_NANOVDB OFF CACHE BOOL "" FORCE)
 set(WITH_ALEMBIC OFF CACHE BOOL "" FORCE)
 
 # Depends on Python install, do this to quiet warning.

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [a63208823c8] master: Fix NanoVDB compile errors with recent NanoVDB versions

2020-11-10 Thread Patrick Mours

Commit: a63208823c8426b76270393f9217d3cf3ef66d0b
Author: Patrick Mours
Date:   Tue Nov 10 18:28:14 2020 +0100
Branches: master
https://developer.blender.org/rBa63208823c8426b76270393f9217d3cf3ef66d0b

Fix NanoVDB compile errors with recent NanoVDB versions

There were some changes to the NanoVDB API that broke the way Cycles was 
previously using it.
With these changes it compiles successfully again and also still compiles with 
the NanoVDB revision
that is currently part of the Blender dependencies. Ref T81454.

===

M   intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
M   intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h

===

diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h 
b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
index aaf58cbd0ab..44c658d4cab 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
@@ -478,7 +478,7 @@ template struct TextureInterpolator {
 #ifdef WITH_NANOVDB
 template struct NanoVDBInterpolator {
 
-  typedef nanovdb::ReadAccessor> ReadAccessorT;
+  typedef typename nanovdb::NanoGrid::AccessorType AccessorType;
 
   static ccl_always_inline float4 read(float r)
   {
@@ -490,16 +490,22 @@ template struct NanoVDBInterpolator {
 return make_float4(r[0], r[1], r[2], 1.0f);
   }
 
-  static ccl_always_inline float4 interp_3d_closest(ReadAccessorT acc, float 
x, float y, float z)
+  static ccl_always_inline float4 interp_3d_closest(const AccessorType ,
+float x,
+float y,
+float z)
   {
 const nanovdb::Vec3f xyz(x, y, z);
-return read(nanovdb::NearestNeighborSampler(acc)(xyz));
+return read(nanovdb::SampleFromVoxels(acc)(xyz));
   }
 
-  static ccl_always_inline float4 interp_3d_linear(ReadAccessorT acc, float x, 
float y, float z)
+  static ccl_always_inline float4 interp_3d_linear(const AccessorType ,
+   float x,
+   float y,
+   float z)
   {
 const nanovdb::Vec3f xyz(x - 0.5f, y - 0.5f, z - 0.5f);
-return read(nanovdb::TrilinearSampler(acc)(xyz));
+return read(nanovdb::SampleFromVoxels(acc)(xyz));
   }
 
 #  if defined(__GNUC__) || defined(__clang__)
@@ -508,7 +514,7 @@ template struct NanoVDBInterpolator {
   static ccl_never_inline
 #  endif
   float4
-  interp_3d_cubic(ReadAccessorT acc, float x, float y, float z)
+  interp_3d_cubic(const AccessorType , float x, float y, float z)
   {
 int ix, iy, iz;
 int nix, niy, niz;
@@ -561,15 +567,15 @@ template struct NanoVDBInterpolator {
 using namespace nanovdb;
 
 NanoGrid *const grid = (NanoGrid *)info.data;
-const NanoRoot  = grid->tree().root();
+AccessorType acc = grid->getAccessor();
 
 switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) {
   case INTERPOLATION_CLOSEST:
-return interp_3d_closest(root, x, y, z);
+return interp_3d_closest(acc, x, y, z);
   case INTERPOLATION_LINEAR:
-return interp_3d_linear(root, x, y, z);
+return interp_3d_linear(acc, x, y, z);
   default:
-return interp_3d_cubic(root, x, y, z);
+return interp_3d_cubic(acc, x, y, z);
 }
   }
 };
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h 
b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
index b8aaacba960..001bc652810 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
+++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
@@ -159,18 +159,18 @@ ccl_device_inline T kernel_tex_image_interp_nanovdb(
 const TextureInfo , float x, float y, float z, uint interpolation)
 {
   using namespace nanovdb;
-  typedef ReadAccessor> ReadAccessorT;
 
   NanoGrid *const grid = (NanoGrid *)info.data;
-  const NanoRoot  = grid->tree().root();
+  typedef typename nanovdb::NanoGrid::AccessorType AccessorType;
+  AccessorType acc = grid->getAccessor();
 
   switch (interpolation) {
 case INTERPOLATION_CLOSEST:
-  return NearestNeighborSampler(root)(Vec3f(x, y, 
z));
+  return SampleFromVoxels(acc)(Vec3f(x, y, z));
 case INTERPOLATION_LINEAR:
-  return TrilinearSampler(root)(Vec3f(x - 0.5f, y - 
0.5f, z - 0.5f));
+  return SampleFromVoxels(acc)(Vec3f(x - 0.5f, y - 
0.5f, z - 0.5f));
 default:
-  TrilinearSampler s(root);
+  SampleFromVoxels s(acc);
   return kernel_tex_image_interp_tricubic_nanovdb(s, x - 0.5f, y - 
0.5f, z - 0.5f);
   }
 }

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [bd6bfba64da] master: Cycles: Enable NanoVDB usage by default

2020-11-10 Thread Patrick Mours

Commit: bd6bfba64dad2e14cab2c8372ba0f3ad39b93cdc
Author: Patrick Mours
Date:   Tue Nov 10 16:19:47 2020 +0100
Branches: master
https://developer.blender.org/rBbd6bfba64dad2e14cab2c8372ba0f3ad39b93cdc

Cycles: Enable NanoVDB usage by default

As discussed during the Rendering Metting. Ref T81454.

===

M   CMakeLists.txt

===

diff --git a/CMakeLists.txt b/CMakeLists.txt
index aa791a53f81..67b57dc2fc4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -203,7 +203,7 @@ option(WITH_OPENVDB   "Enable features relying on 
OpenVDB" ON)
 option(WITH_OPENVDB_BLOSC "Enable blosc compression for OpenVDB, only enable 
if OpenVDB was built with blosc support" ON)
 option(WITH_OPENVDB_3_ABI_COMPATIBLE "Assume OpenVDB library has been compiled 
with version 3 ABI compatibility" OFF)
 mark_as_advanced(WITH_OPENVDB_3_ABI_COMPATIBLE)
-option(WITH_NANOVDB   "Enable usage of NanoVDB data structure for 
accelerated rendering on the GPU" OFF)
+option(WITH_NANOVDB   "Enable usage of NanoVDB data structure for 
rendering on the GPU" ON)
 
 # GHOST Windowing Library Options
 option(WITH_GHOST_DEBUG   "Enable debugging output for the GHOST library" OFF)

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [118e31a0a99] master: Cycles: Fix tricubic sampling with NanoVDB

2020-11-09 Thread Patrick Mours

Commit: 118e31a0a995ae4e8845376215d9c35017a8f781
Author: Patrick Mours
Date:   Fri Nov 6 15:19:58 2020 +0100
Branches: master
https://developer.blender.org/rB118e31a0a995ae4e8845376215d9c35017a8f781

Cycles: Fix tricubic sampling with NanoVDB

Volumes using tricubic sampling were producing different results with NanoVDB 
compared
to dense textures. This fixes that by using the same tricubic sampling 
algorithm in both
cases. It also fixes some remaining offset issues and some minor things that 
broke OpenCL
kernel compilation on NVIDIA.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D9491

===

M   intern/cycles/kernel/kernel_compat_opencl.h
M   intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
M   intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
M   intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
M   intern/cycles/render/image_vdb.cpp
M   intern/cycles/util/util_types.h

===

diff --git a/intern/cycles/kernel/kernel_compat_opencl.h 
b/intern/cycles/kernel/kernel_compat_opencl.h
index ba7ab43a47a..1848f6059b6 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -48,7 +48,7 @@
 #define ccl_align(n) __attribute__((aligned(n)))
 #define ccl_optional_struct_init
 
-#if __OPENCL_VERSION__ >= 200
+#if __OPENCL_VERSION__ >= 200 && !defined(__NV_CL_C_VERSION)
 #  define ccl_loop_no_unroll __attribute__((opencl_unroll_hint(1)))
 #else
 #  define ccl_loop_no_unroll
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h 
b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
index b466b41f456..b97400a443a 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
@@ -28,7 +28,6 @@ CCL_NAMESPACE_BEGIN
  * instruction sets. */
 namespace {
 
-template struct TextureInterpolator {
 #define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
   { \
 u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \
@@ -38,6 +37,15 @@ template struct TextureInterpolator {
   } \
   (void)0
 
+ccl_always_inline float frac(float x, int *ix)
+{
+  int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0);
+  *ix = i;
+  return x - (float)i;
+}
+
+template struct TextureInterpolator {
+
   static ccl_always_inline float4 read(float4 r)
   {
 return r;
@@ -106,13 +114,6 @@ template struct TextureInterpolator {
 return clamp(x, 0, width - 1);
   }
 
-  static ccl_always_inline float frac(float x, int *ix)
-  {
-int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0);
-*ix = i;
-return x - (float)i;
-  }
-
   /*   2D interpolation  */
 
   static ccl_always_inline float4 interp_closest(const TextureInfo , 
float x, float y)
@@ -370,7 +371,7 @@ template struct TextureInterpolator {
   static ccl_never_inline
 #endif
   float4
-  interp_3d_tricubic(const TextureInfo , float x, float y, float z)
+  interp_3d_cubic(const TextureInfo , float x, float y, float z)
   {
 int width = info.width;
 int height = info.height;
@@ -469,14 +470,16 @@ template struct TextureInterpolator {
   case INTERPOLATION_LINEAR:
 return interp_3d_linear(info, x, y, z);
   default:
-return interp_3d_tricubic(info, x, y, z);
+return interp_3d_cubic(info, x, y, z);
 }
   }
-#undef SET_CUBIC_SPLINE_WEIGHTS
 };
 
 #ifdef WITH_NANOVDB
 template struct NanoVDBInterpolator {
+
+  typedef nanovdb::ReadAccessor> ReadAccessorT;
+
   static ccl_always_inline float4 read(float r)
   {
 return make_float4(r, r, r, 1.0f);
@@ -487,26 +490,93 @@ template struct NanoVDBInterpolator {
 return make_float4(r[0], r[1], r[2], 1.0f);
   }
 
+  static ccl_always_inline float4 interp_3d_closest(ReadAccessorT acc, float 
x, float y, float z)
+  {
+const nanovdb::Vec3f xyz(x, y, z);
+return read(nanovdb::NearestNeighborSampler(acc)(xyz));
+  }
+
+  static ccl_always_inline float4 interp_3d_linear(ReadAccessorT acc, float x, 
float y, float z)
+  {
+const nanovdb::Vec3f xyz(x - 0.5f, y - 0.5f, z - 0.5f);
+return read(nanovdb::TrilinearSampler(acc)(xyz));
+  }
+
+#  if defined(__GNUC__) || defined(__clang__)
+  static ccl_always_inline
+#  else
+  static ccl_never_inline
+#  endif
+  float4
+  interp_3d_cubic(ReadAccessorT acc, float x, float y, float z)
+  {
+int ix, iy, iz;
+int nix, niy, niz;
+int pix, piy, piz;
+int nnix, nniy, nniz;
+/* Tricubic b-spline interpolation. */
+const float tx = frac(x - 0.5f, );
+const float ty = frac(y - 0.5f, );
+const float tz = frac(z - 0.5f, );
+pix = ix - 1;
+piy = iy - 1;
+piz = iz - 1;
+nix = ix + 1;
+niy = iy + 1;
+niz = iz + 1;
+nnix = ix + 2;
+nniy = iy + 2;
+nniz = iz + 2;
+
+const int xc[4] = {pix, ix, nix, nnix};
+const int yc[4

[Bf-blender-cvs] [fd9124ed6b3] master: Fix Cycles volume render differences with NanoVDB when using linear sampling

2020-11-04 Thread Patrick Mours

Commit: fd9124ed6b35fc3701ec3a4a9980c6eda5324fac
Author: Patrick Mours
Date:   Wed Nov 4 15:09:06 2020 +0100
Branches: master
https://developer.blender.org/rBfd9124ed6b35fc3701ec3a4a9980c6eda5324fac

Fix Cycles volume render differences with NanoVDB when using linear sampling

The NanoVDB sampling implementation behaves different from dense texture 
sampling, so this
adds a small offset to the voxel indices to correct for that.
Also removes the need to modify the sampling coordinates by moving all the 
necessary
transformations into the image transform. See also T81454.

===

M   intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
M   intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
M   intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
M   intern/cycles/render/image_vdb.cpp
M   intern/cycles/render/object.cpp

===

diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h 
b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
index 347d0fec7f5..b466b41f456 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
@@ -490,21 +490,17 @@ template struct NanoVDBInterpolator {
   static ccl_always_inline float4
   interp_3d(const TextureInfo , float x, float y, float z, 
InterpolationType interp)
   {
+const nanovdb::Vec3f xyz(x, y, z);
 nanovdb::NanoGrid *const grid = (nanovdb::NanoGrid *)info.data;
 const nanovdb::NanoRoot  = grid->tree().root();
 
-const nanovdb::Coord off(root.bbox().min());
-const nanovdb::Coord dim(root.bbox().dim());
-const nanovdb::Vec3f xyz(off[0] + x * dim[0], off[1] + y * dim[1], off[2] 
+ z * dim[2]);
-
 typedef nanovdb::ReadAccessor> ReadAccessorT;
 switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) {
-  default:
-  case INTERPOLATION_LINEAR:
-return read(nanovdb::SampleFromVoxels(root)(xyz));
   case INTERPOLATION_CLOSEST:
 return read(nanovdb::SampleFromVoxels(root)(xyz));
-  case INTERPOLATION_CUBIC:
+  case INTERPOLATION_LINEAR:
+return read(nanovdb::SampleFromVoxels(root)(xyz));
+  default:
 return read(nanovdb::SampleFromVoxels(root)(xyz));
 }
   }
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h 
b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
index 5a005a3f65b..c2a0ee06dbc 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
+++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
@@ -130,21 +130,17 @@ template
 ccl_device_inline T kernel_tex_image_interp_nanovdb(
 const TextureInfo , float x, float y, float z, uint interpolation)
 {
+  const nanovdb::Vec3f xyz(x, y, z);
   nanovdb::NanoGrid *const grid = (nanovdb::NanoGrid *)info.data;
   const nanovdb::NanoRoot  = grid->tree().root();
 
-  const nanovdb::Coord off(root.bbox().min());
-  const nanovdb::Coord dim(root.bbox().dim());
-  const nanovdb::Vec3f xyz(off[0] + x * dim[0], off[1] + y * dim[1], off[2] + 
z * dim[2]);
-
   typedef nanovdb::ReadAccessor> ReadAccessorT;
   switch (interpolation) {
-default:
-case INTERPOLATION_LINEAR:
-  return nanovdb::SampleFromVoxels(root)(xyz);
 case INTERPOLATION_CLOSEST:
   return nanovdb::SampleFromVoxels(root)(xyz);
-case INTERPOLATION_CUBIC:
+case INTERPOLATION_LINEAR:
+  return nanovdb::SampleFromVoxels(root)(xyz);
+default:
   return nanovdb::SampleFromVoxels(root)(xyz);
   }
 }
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h 
b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
index 2f44f249c5f..cbf9a208112 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
+++ b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
@@ -229,32 +229,29 @@ ccl_device float4 
kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float3 P
   uint interpolation = (interp == INTERPOLATION_NONE) ? info->interpolation : 
interp;
 
 #ifdef WITH_NANOVDB
+  cnanovdb_Vec3F xyz;
+  xyz.mVec[0] = x;
+  xyz.mVec[1] = y;
+  xyz.mVec[2] = z;
+
   if (info->data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT) {
 ccl_global cnanovdb_griddata *grid =
 (ccl_global cnanovdb_griddata *)(kg->buffers[info->cl_buffer] + 
info->data);
 const ccl_global cnanovdb_rootdataF *root = cnanovdb_treedata_rootF(
 cnanovdb_griddata_tree(grid));
 
-cnanovdb_Vec3F xyz;
-xyz.mVec[0] = root->mBBox_min.mVec[0] +
-  x * (root->mBBox_max.mVec[0] - root->mBBox_min.mVec[0]);
-xyz.mVec[1] = root->mBBox_min.mVec[1] +
-  y * (root->mBBox_max.mVec[1] - root->mBBox_min.mVec[1]);
-xyz.mVec[2] = root->mBBox_min.mVec[2] +
-  z * (root->mBBox_max.mVec[2] - root->mBBox_min.mVec[2]);
-
 cnanovdb_readaccessor acc;
 cnanovdb_read

[Bf-blender-cvs] [cf7343a3555] master: Fix Cycles kernel compile error with NanoVDB because of type redefinition

2020-11-02 Thread Patrick Mours

Commit: cf7343a35559c7fec2047c3e5d7ef4dd7c1e64a5
Author: Patrick Mours
Date:   Mon Nov 2 18:00:13 2020 +0100
Branches: master
https://developer.blender.org/rBcf7343a35559c7fec2047c3e5d7ef4dd7c1e64a5

Fix Cycles kernel compile error with NanoVDB because of type redefinition

Cycles defines some basic integer types since it cannot use the standard 
headers when
compiling with NVRTC. NanoVDB however only does this when the "__CUDACC_RTC__" 
define
is set and otherwise includes the standard "stdint.h" header which clashes with 
those typedefs.
So for compatibility do the same thing in the Cycles kernel headers. See also 
T81454.

===

M   intern/cycles/kernel/kernel_compat_cuda.h
M   intern/cycles/kernel/kernel_compat_optix.h

===

diff --git a/intern/cycles/kernel/kernel_compat_cuda.h 
b/intern/cycles/kernel/kernel_compat_cuda.h
index 4094e173da9..ea3b78b7cef 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -32,8 +32,12 @@
 
 /* Manual definitions so we can compile without CUDA toolkit. */
 
+#ifdef __CUDACC_RTC__
 typedef unsigned int uint32_t;
 typedef unsigned long long uint64_t;
+#else
+#  include 
+#endif
 typedef unsigned short half;
 typedef unsigned long long CUtexObject;
 
diff --git a/intern/cycles/kernel/kernel_compat_optix.h 
b/intern/cycles/kernel/kernel_compat_optix.h
index e58d8b2aa63..064c99ca100 100644
--- a/intern/cycles/kernel/kernel_compat_optix.h
+++ b/intern/cycles/kernel/kernel_compat_optix.h
@@ -31,8 +31,12 @@
 #  define ATTR_FALLTHROUGH
 #endif
 
+#ifdef __CUDACC_RTC__
 typedef unsigned int uint32_t;
 typedef unsigned long long uint64_t;
+#else
+#  include 
+#endif
 typedef unsigned short half;
 typedef unsigned long long CUtexObject;

___
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [c26ad42ea43] master: Simplify and enable NanoVDB dependency installation

2020-10-29 Thread Patrick Mours

Commit: c26ad42ea43aa2160a765980087f3dd320db55f0
Author: Patrick Mours
Date:   Thu Oct 29 13:38:16 2020 +0100
Branches: master
https://developer.blender.org/rBc26ad42ea43aa2160a765980087f3dd320db55f0

Simplify and enable NanoVDB dependency installation

Changes NanoVDB to be a standalone dependency that is independent of the 
OpenVDB one.
It works by downloading the "feature/nanovdb" branch of OpenVDB, but using the 
NanoVDB
CMake in the "nanovdb" subdirectory. Since it is header-only, only the install 
target is used.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D9383

===

M   build_files/build_environment/CMakeLists.txt
M   build_files/build_environment/cmake/harvest.cmake
A   build_files/build_environment/cmake/nanovdb.cmake
M   build_files/build_environment/cmake/openvdb.cmake
M   build_files/build_environment/cmake/options.cmake
M   build_files/build_environment/cmake/versions.cmake
D   build_files/build_environment/patches/openvdb_nanovdb.diff

===

diff --git a/build_files/build_environment/CMakeLists.txt 
b/build_files/build_environment/CMakeLists.txt
index 59c15a03119..0bc85f20c16 100644
--- a/build_files/build_environment/CMakeLists.txt
+++ b/build_files/build_environment/CMakeLists.txt
@@ -85,6 +85,7 @@ include(cmake/flexbison.cmake)
 include(cmake/osl.cmake)
 include(cmake/tbb.cmake)
 include(cmake/openvdb.cmake)
+include(cmake/nanovdb.cmake)
 include(cmake/python.cmake)
 include(cmake/python_site_packages.cmake)
 include(cmake/package_python.cmake)
diff --git a/build_files/build_environment/cmake/harvest.cmake 
b/build_files/build_environment/cmake/harvest.cmake
index 1fb56c4d568..1c5354aeb42 100644
--- a/build_files/build_environment/cmake/harvest.cmake
+++ b/build_files/build_environment/cmake/harvest.cmake
@@ -146,10 +146,8 @@ harvest(openjpeg/lib openjpeg/lib "*.a")
 harvest(opensubdiv/include opensubdiv/include "*.h")
 harvest(opensubdiv/lib opensubdiv/lib "*.a")
 harvest(openvdb/include/openvdb openvdb/include/openvdb "*.h")
-if(WITH_NANOVDB)
-  harvest(openvdb/nanovdb nanovdb/include/nanovdb "*.h")
-endif()
 harvest(openvdb/lib openvdb/lib "*.a")
+harvest(nanovdb/nanovdb nanovdb/include/nanovdb "*.h")
 harvest(xr_openxr_sdk/include/openxr xr_openxr_sdk/include/openxr "*.h")
 harvest(xr_openxr_sdk/lib xr_openxr_sdk/lib "*.a")
 harvest(osl/bin osl/bin "oslc")
diff --git a/build_files/build_environment/cmake/nanovdb.cmake 
b/build_files/build_environment/cmake/nanovdb.cmake
new file mode 100644
index 000..89e7c38642d
--- /dev/null
+++ b/build_files/build_environment/cmake/nanovdb.cmake
@@ -0,0 +1,54 @@
+# * BEGIN GPL LICENSE BLOCK *
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# * END GPL LICENSE BLOCK *
+
+set(NANOVDB_EXTRA_ARGS
+   # NanoVDB is header-only, so only need the install target
+  -DNANOVDB_BUILD_UNITTESTS=OFF
+  -DNANOVDB_BUILD_EXAMPLES=OFF
+  -DNANOVDB_BUILD_BENCHMARK=OFF
+  -DNANOVDB_BUILD_DOCS=OFF
+  -DNANOVDB_BUILD_TOOLS=OFF
+  -DNANOVDB_CUDA_KEEP_PTX=OFF
+   # Do not need to include any of the dependencies because of this
+  -DNANOVDB_USE_OPENVDB=OFF
+  -DNANOVDB_USE_OPENGL=OFF
+  -DNANOVDB_USE_OPENCL=OFF
+  -DNANOVDB_USE_CUDA=OFF
+  -DNANOVDB_USE_TBB=OFF
+  -DNANOVDB_USE_BLOSC=OFF
+  -DNANOVDB_USE_ZLIB=OFF
+  -DNANOVDB_USE_OPTIX=OFF
+  -DNANOVDB_ALLOW_FETCHCONTENT=OFF
+)
+
+ExternalProject_Add(nanovdb
+  URL ${NANOVDB_URI}
+  DOWNLOAD_DIR ${DOWNLOAD_DIR}
+  URL_HASH MD5=${NANOVDB_HASH}
+  PREFIX ${BUILD_DIR}/nanovdb
+  SOURCE_SUBDIR nanovdb
+  CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/nanovdb ${DEFAULT_CMAKE_FLAGS} 
${NANOVDB_EXTRA_ARGS}
+  INSTALL_DIR ${LIBDIR}/nanovdb
+)
+
+if(WIN32)
+  ExternalProject_Add_Step(nanovdb after_install
+COMMAND ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/nanovdb/nanovdb 
${HARVEST_TARGET}/nanovdb/include/nanovdb
+DEPENDEES install
+  )
+endif()
diff --git a/build_files/build_environment/cmake/openvdb.cmake 
b/build_files/build_environment/cmake/openvdb.cmake
index 07d0297d5aa..2962f085e1b 100644
--- a/build_files/build_environme

1 2 >

1 - 100 of 178 matches

Mail list logo