Commit: 6532a2e6952641344ac077c3250d9706b5b1c0ae Author: MATILLAT Quentin Date: Fri May 31 15:52:54 2019 +0200 Branches: soc-2019-embree-gpu https://developer.blender.org/rB6532a2e6952641344ac077c3250d9706b5b1c0ae
Initial implementation for Embree GPU =================================================================== M intern/cycles/blender/addon/properties.py M intern/cycles/blender/addon/ui.py M intern/cycles/blender/blender_sync.cpp M intern/cycles/bvh/CMakeLists.txt M intern/cycles/bvh/bvh.cpp A intern/cycles/bvh/bvh_embree_gpu.cpp A intern/cycles/bvh/bvh_embree_gpu.h M intern/cycles/device/device_cuda.cpp M intern/cycles/kernel/kernel_types.h M intern/cycles/render/mesh.cpp =================================================================== diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index d9e145c8b75..ba0339cf3d0 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -527,6 +527,11 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): description="Use Embree as ray accelerator", default=False, ) + use_bvh_embree_gpu: BoolProperty( + name="Use Embree on GPU (experimental)", + description="Use Embree as ray accelerator", + default=False, + ) debug_use_spatial_splits: BoolProperty( name="Use Spatial Splits", description="Use BVH spatial splits: longer builder time, faster render", diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index a49efb3567f..463cf78b6e6 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -650,6 +650,10 @@ class CYCLES_RENDER_PT_performance_acceleration_structure(CyclesButtonsPanel, Pa row = col.row() row.active = use_cpu(context) row.prop(cscene, "use_bvh_embree") + if _cycles.with_embree: + row = col.row() + row.active = use_cuda(context) + row.prop(cscene, "use_bvh_embree_gpu") col.prop(cscene, "debug_use_spatial_splits") sub = col.column() sub.active = not cscene.use_bvh_embree or not _cycles.with_embree diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index 8d93d517d4e..6ab5294427c 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -697,6 +697,8 @@ SceneParams BlenderSync::get_scene_params(BL::Scene &b_scene, bool background) #ifdef WITH_EMBREE params.bvh_layout = RNA_boolean_get(&cscene, "use_bvh_embree") ? BVH_LAYOUT_EMBREE : params.bvh_layout; + params.bvh_layout = RNA_boolean_get(&cscene, "use_bvh_embree_gpu") ? BVH_LAYOUT_EMBREE_GPU : + params.bvh_layout; #endif return params; } diff --git a/intern/cycles/bvh/CMakeLists.txt b/intern/cycles/bvh/CMakeLists.txt index 36bbd937e1a..3952b307096 100644 --- a/intern/cycles/bvh/CMakeLists.txt +++ b/intern/cycles/bvh/CMakeLists.txt @@ -14,6 +14,7 @@ set(SRC bvh_binning.cpp bvh_build.cpp bvh_embree.cpp + bvh_embree_gpu.cpp bvh_node.cpp bvh_sort.cpp bvh_split.cpp @@ -28,6 +29,7 @@ set(SRC_HEADERS bvh_binning.h bvh_build.h bvh_embree.h + bvh_embree_gpu.h bvh_node.h bvh_params.h bvh_sort.h diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp index 53c66777928..b51d6b280d9 100644 --- a/intern/cycles/bvh/bvh.cpp +++ b/intern/cycles/bvh/bvh.cpp @@ -28,6 +28,7 @@ #ifdef WITH_EMBREE # include "bvh/bvh_embree.h" +# include "bvh/bvh_embree_gpu.h" #endif #include "util/util_foreach.h" @@ -107,6 +108,10 @@ BVH *BVH::create(const BVHParams ¶ms, const vector<Object *> &objects) case BVH_LAYOUT_EMBREE: #ifdef WITH_EMBREE return new BVHEmbree(params, objects); +#endif + case BVH_LAYOUT_EMBREE_GPU: +#ifdef WITH_EMBREE + return new BVHEmbreeGPU(params, objects); #endif case BVH_LAYOUT_NONE: case BVH_LAYOUT_ALL: diff --git a/intern/cycles/bvh/bvh_embree_gpu.cpp b/intern/cycles/bvh/bvh_embree_gpu.cpp new file mode 100644 index 00000000000..52367089ce4 --- /dev/null +++ b/intern/cycles/bvh/bvh_embree_gpu.cpp @@ -0,0 +1,458 @@ +/* + * Copyright 2018, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef WITH_EMBREE + +#include "bvh/bvh_embree_gpu.h" + +#include "render/mesh.h" +#include "render/object.h" +#include "util/util_foreach.h" +#include "util/util_logging.h" +#include "util/util_progress.h" + +CCL_NAMESPACE_BEGIN + +typedef struct { + BVHEmbreeGPU *bvhBldr; + Progress *p; +} UserParams; + + +BVHEmbreeGPU::BVHEmbreeGPU(const BVHParams& params_, const vector<Object*>& objects_) + : BVH(params_, objects_), stats(nullptr) +{ + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); + this->rtc_device = rtcNewDevice("verbose=1"); + + rtcSetDeviceErrorFunction(this->rtc_device, [](void*, enum RTCError, const char* str) { + VLOG(1) << str; + }, nullptr); + + pack.root_index = -1; +} + +BVHEmbreeGPU::~BVHEmbreeGPU() +{ + rtcReleaseDevice(this->rtc_device); +} + +ccl::BoundBox RTCBoundBoxToCCL(const RTCBounds *bound) { + return ccl::BoundBox( + make_float3(bound->lower_x, bound->lower_y, bound->lower_z), + make_float3(bound->upper_x, bound->upper_y, bound->upper_z)); + +} +ccl::BoundBox RTCBuildPrimToCCL(const RTCBuildPrimitive &bound) { + return ccl::BoundBox( + make_float3(bound.lower_x, bound.lower_y, bound.lower_z), + make_float3(bound.upper_x, bound.upper_y, bound.upper_z)); + +} + +void CCLBoundBoxToRTC(const ccl::BoundBox &bb, RTCBounds *bound) { + bound->lower_x = bb.min.x; + bound->lower_y = bb.min.y; + bound->lower_z = bb.min.z; + + bound->upper_x = bb.max.x; + bound->upper_y = bb.max.y; + bound->upper_z = bb.max.z; +} + +void BVHEmbreeGPU::build(Progress& progress, Stats *stats_) +{ + this->stats = stats_; + rtcSetDeviceMemoryMonitorFunction(this->rtc_device, [](void* userPtr, const ssize_t bytes, const bool) -> bool { + Stats *stats = static_cast<Stats*>(userPtr); + if(stats == NULL) return true; + + if(bytes > 0) { + stats->mem_alloc(static_cast<size_t>(bytes)); + } + else { + stats->mem_free(static_cast<size_t>(-bytes)); + } + return true; + }, stats); + + progress.set_substatus("Building BVH"); + + + struct RTCBuildArguments args = rtcDefaultBuildArguments(); + args.byteSize = sizeof(args); + + const bool dynamic = params.bvh_type == SceneParams::BVH_DYNAMIC; + + args.buildFlags = (dynamic ? RTC_BUILD_FLAG_DYNAMIC : RTC_BUILD_FLAG_NONE); + args.buildQuality = dynamic ? RTC_BUILD_QUALITY_LOW : + (params.use_spatial_split ? RTC_BUILD_QUALITY_HIGH : RTC_BUILD_QUALITY_MEDIUM); + + /* Count triangles first so we can reserve arrays once. */ + size_t prim_count = 0; + + foreach(Object *ob, objects) { + prim_count += ob->mesh->num_triangles(); + } + + pack.prim_object.reserve(prim_count); + pack.prim_type.reserve(prim_count); + pack.prim_index.reserve(prim_count); + pack.prim_tri_index.reserve(prim_count); + + this->offset.resize(objects.size()); + unsigned int i = 0; + + pack.object_node.clear(); + + vector<RTCBuildPrimitive> prims; + prims.reserve(objects.size() * 3); + foreach(Object *ob, objects) { + add_object(ob, i); + + const float3 *mesh_verts = ob->mesh->verts.data(); + for(size_t tri = 0; tri < ob->mesh->num_triangles(); ++tri) { + BoundBox bb = BoundBox::empty; + ob->mesh->get_triangle(tri).bounds_grow(mesh_verts, bb); + RTCBuildPrimitive prim; + prim.lower_x = bb.min.x; + prim.lower_y = bb.min.y; + prim.lower_z = bb.min.z; + prim.upper_x = bb.max.x; + prim.upper_y = bb.max.y; + prim.upper_z = bb.max.z; + prim.geomID = i; + prim.primID = tri; + + prims.push_back(prim); + } + + ++i; + if(progress.get_cancel()) return; + } + + if(progress.get_cancel()) { + stats = nullptr; + return; + } + + args.bvh = rtcNewBVH(this->rtc_device); + args.maxBranchingFactor = 2; + + args.primitives = prims.data(); + args.primitiveCount = prims.size(); + args.primitiveArrayCapacity = prims.capacity(); + + args.sahBlockSize = 1; + args.maxDepth = BVHParams::MAX_DEPTH; + args.traversalCost = this->params.sah_node_cost; + // 2 is a corrective factor for Embree (may depend on the scene for optimal results) + args.intersectionCost = this->params.sah_primitive_cost * 2; + + args.createNode = [](RTCThreadLocalAllocator alloc, unsigned int numChildren, void*) -> void* { + CHECK_EQ(numChildren, 2) << "Should only have two children"; + void* ptr = rtcThreadLocalAlloc(alloc,sizeof(InnerNode),16); + return new (ptr) InnerNode(BoundBox::empty); + }; + args.setNodeBounds = [](void* nodePtr, const RTCBounds** bounds, unsigned int numChildren, void*) { + InnerNode *node = static_cast<InnerNode*>(nodePtr); + node->num_children_ = static_cast<int>(numChildren); + for (size_t i=0; i < numChildren; i++) { + node->bounds.grow(RTCBoundBoxToCCL(bounds[i])); + } + }; + args.setNodeChildren = [](void* nodePtr, void** childPtr, unsigned int numChildren, void*) { + InnerNode *node = static_cast<InnerNode*>(nodePtr); + node->num_children_ = static_cast<int>(numChildren); + for (size_t i=0; i < numChildren; i++) { + node->children[i] = static_cast<BVHNode*>(childPtr[i]); + } + }; + args.createLeaf = [](RTCThreadLocalAllocator alloc, const RTCBuildPrimitive* prims, size_t numPrims, void *user_ptr) -> void* { + UserParams *userParams = static_cast<UserParams*>(user_ptr); + void* ptr = rtcThreadLocalAlloc(alloc, sizeof(LeafNode), 16); + + int min = 999999, + max = 0; + uint visibility = 0; + BoundBox bounds = BoundBox::empty; + + for(size_t i = 0; i < numPrims; i++) { + const Object *ob = userParams->bvhB @@ Diff output truncated at 10240 characters. @@ _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs