[Bf-blender-cvs] [e0ea53ae77c] soc-2018-cycles-volumes: Updates to volume kernel tiling function.

Geraldine Chua Sun, 10 Jun 2018 08:16:54 -0700

Commit: e0ea53ae77c193ef08bb0b9215c5b3ffd84e7c11
Author: Geraldine Chua
Date:   Sun Jun 10 23:15:29 2018 +0800
Branches: soc-2018-cycles-volumes
https://developer.blender.org/rBe0ea53ae77c193ef08bb0b9215c5b3ffd84e7c11


Updates to volume kernel tiling function.

1. OpenCL and CUDA support (mostly untested).
2. Change name of offsets to grid_info since it needs to keep track of
other info as well.
3. Several speed and memory optimizations.

===================================================================

M       intern/cycles/device/device_cpu.cpp
M       intern/cycles/device/device_cuda.cpp
M       intern/cycles/device/device_memory.h
M       intern/cycles/device/opencl/opencl_base.cpp
M       intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
M       intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
M       intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
M       intern/cycles/render/image.cpp
M       intern/cycles/render/mesh_volume.cpp
M       intern/cycles/util/util_sparse_grid.h
M       intern/cycles/util/util_texture.h

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp 
b/intern/cycles/device/device_cpu.cpp
index 4e1fa05e168..04578eec954 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -377,9 +377,9 @@ public:
        void tex_alloc(device_memory& mem)
        {
                size_t total_memory = mem.memory_size();
-               device_memory *offsets = mem.offsets;
-               if(offsets) {
-                       total_memory += offsets->memory_size();
+               device_memory *grid_info = mem.grid_info;
+               if(grid_info) {
+                       total_memory += grid_info->memory_size();
                }
 
                VLOG(1) << "Texture allocate: " << mem.name << ", "
@@ -418,7 +418,14 @@ public:
                        info.width = mem.real_width;
                        info.height = mem.real_height;
                        info.depth = mem.real_depth;
-                       info.offsets = (uint64_t)(offsets ? 
offsets->host_pointer : 0);
+                       info.grid_info = 0;
+                       if(grid_info) {
+                               info.grid_info = 
(uint64_t)grid_info->host_pointer;
+                               info.tiled_width = get_tile_res(info.width);
+                               info.tiled_height = get_tile_res(info.height);
+                               info.last_tile_width = info.width % TILE_SIZE;
+                               info.last_tile_height = info.height % TILE_SIZE;
+                       }
                        need_texture_info = true;
                }
 
@@ -426,10 +433,10 @@ public:
                mem.device_size = mem.memory_size();
                stats.mem_alloc(mem.device_size);
 
-               if(offsets) {
-                       offsets->device_pointer = 
(device_ptr)offsets->host_pointer;
-                       offsets->device_size = offsets->memory_size();
-                       stats.mem_alloc(offsets->device_size);
+               if(grid_info) {
+                       grid_info->device_pointer = 
(device_ptr)grid_info->host_pointer;
+                       grid_info->device_size = grid_info->memory_size();
+                       stats.mem_alloc(grid_info->device_size);
                }
 
        }
@@ -437,8 +444,8 @@ public:
        void tex_free(device_memory& mem)
        {
                if(mem.device_pointer) {
-                       if(mem.offsets) {
-                               tex_free(*mem.offsets);
+                       if(mem.grid_info) {
+                               tex_free(*mem.grid_info);
                        }
                        mem.device_pointer = 0;
                        stats.mem_free(mem.device_size);
diff --git a/intern/cycles/device/device_cuda.cpp 
b/intern/cycles/device/device_cuda.cpp
index b4529feffa7..d9f146339d2 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1020,6 +1020,9 @@ public:
                string bind_name = mem.name;
                size_t dsize = datatype_size(mem.data_type);
                size_t size = mem.memory_size();
+               if(mem.grid_info) {
+                       size += mem.grid_info->memory_size();
+               }
 
                CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP;
                switch(mem.extension) {
diff --git a/intern/cycles/device/device_memory.h 
b/intern/cycles/device/device_memory.h
index a230d3928ca..0f3843b90e5 100644
--- a/intern/cycles/device/device_memory.h
+++ b/intern/cycles/device/device_memory.h
@@ -203,7 +203,7 @@ public:
        device_ptr device_pointer;
        void *host_pointer;
        void *shared_pointer;
-       device_memory *offsets = NULL;
+       device_memory *grid_info = NULL;
 
        virtual ~device_memory();
 
diff --git a/intern/cycles/device/opencl/opencl_base.cpp 
b/intern/cycles/device/opencl/opencl_base.cpp
index bfa2702ad62..0f4a80025b5 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -526,9 +526,14 @@ void OpenCLDeviceBase::const_copy_to(const char *name, 
void *host, size_t size)
 
 void OpenCLDeviceBase::tex_alloc(device_memory& mem)
 {
+       size_t total_memory = mem.memory_size();
+       if(mem.grid_info) {
+               total_memory += mem.grid_info->memory_size();
+       }
+
        VLOG(1) << "Texture allocate: " << mem.name << ", "
-               << string_human_readable_number(mem.memory_size()) << " bytes. 
("
-               << string_human_readable_size(mem.memory_size()) << ")";
+               << string_human_readable_number(total_memory) << " bytes. ("
+               << string_human_readable_size(total_memory) << ")";
 
        memory_manager.alloc(mem.name, mem);
        /* Set the pointer to non-null to keep code that inspects its value 
from thinking its unallocated. */
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h 
b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
index 7513efc6b15..c43b94db7e0 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
@@ -75,26 +75,33 @@ template<typename T> struct TextureInterpolator  {
                return read(data[y * width + x]);
        }
 
-       static ccl_always_inline float4 read(const T *data, const int *offsets,
+       static ccl_always_inline float4 read(const T *data, const int 
*grid_info,
                                             int x, int y, int z,
-                                            int width, int height, int depth,
-                                            int tiw, int tih, int tid)
+                                            int tiw, int tih, int ltw, int lth)
        {
-               int index = compute_index(offsets, x, y, z,
-                                         width, height, depth, tiw, tih, tid);
-               return index < 0 ? make_float4(0.0f) : read(data[index]);
+               int tix = x / TILE_SIZE, itix = x % TILE_SIZE,
+                   tiy = y / TILE_SIZE, itiy = y % TILE_SIZE,
+                   tiz = z / TILE_SIZE, itiz = z % TILE_SIZE;
+               int dense_index = compute_index_fast(tix, tiy, tiz, tiw, tih) * 
2;
+               int sparse_index = grid_info[dense_index];
+               int dims = grid_info[dense_index + 1];
+               if(sparse_index < 0) {
+                       return make_float4(0.0f);
+               }
+               int itiw = dims & (1 << ST_SHIFT_TRUNCATE_WIDTH) ? ltw : 
TILE_SIZE;
+               int itih = dims & (1 << ST_SHIFT_TRUNCATE_HEIGHT) ? lth : 
TILE_SIZE;
+               int in_tile_index = compute_index_fast(itix, itiy, itiz, itiw, 
itih);
+               return read(data[sparse_index + in_tile_index]);
        }
 
-       static ccl_always_inline float4 read(const T *data, const int *offsets,
-                                            int idx, int width, int height, 
int depth)
+       static ccl_always_inline float4 read(const T *data, const int 
*grid_info,
+                                            int index, int width, int height, 
int /*depth*/,
+                                            int tiw, int tih, int ltw, int lth)
        {
-               int3 c = compute_coordinates(idx, width, height, depth);
-               int index = compute_index(offsets, c.x, c.y, c.z,
-                                         width, height, depth,
-                                         get_tile_res(width),
-                                         get_tile_res(height),
-                                         get_tile_res(depth));
-               return index < 0 ? make_float4(0.0f) : read(data[index]);
+               int x = index % width;
+               int y = (index / width) % height;
+               int z = index / (width * height);
+               return read(data, grid_info, x, y, z, tiw, tih, ltw, lth);
        }
 
        static ccl_always_inline int wrap_periodic(int x, int width)
@@ -304,13 +311,14 @@ template<typename T> struct TextureInterpolator  {
                }
 
                const T *data = (const T*)info.data;
-               const int *ofs = (const int*)info.offsets;
+               const int *grid_info = (const int*)info.grid_info;
 
-               if(ofs) {
-                       return read(data, ofs, ix, iy, iz, width, height, depth,
-                                   get_tile_res(width), get_tile_res(height), 
get_tile_res(depth));
+               if(grid_info) {
+                       return read(data, grid_info, ix, iy, iz,
+                                   info.tiled_width, info.tiled_height,
+                                   info.last_tile_width, 
info.last_tile_height);
                }
-               return read(data[compute_index(ix, iy, iz, width, height, 
depth)]);
+               return read(data[compute_index_fast(ix, iy, iz, width, 
height)]);
        }
 
        static ccl_always_inline float4 interp_3d_linear(const TextureInfo& 
info,
@@ -359,33 +367,31 @@ template<typename T> struct TextureInterpolator  {
 
                float4 r;
                const T *data = (const T*)info.data;
-               const int *ofs = (const int*)info.offsets;
-
-               if(ofs) {
-                       int tiw = get_tile_res(width), tih = 
get_tile_res(height), tid = get_tile_res(depth);
-                       /* Initial check if either voxel is in an active tile. 
*/
-                       if(!tile_is_active(ofs, ix, iy, iz, tiw, tih, tid) &&
-                          !tile_is_active(ofs, nix, niy, niz, tiw, tih, tid)) {
-                               return make_float4(0.0f);
-                       }
-                       r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx) * read(data, 
ofs, ix,  iy,  iz,  width, height, depth, tiw, tih, tid);
-                       r += (1.0f - tz)*(1.0f - ty)*tx                  * 
read(data, ofs, nix, iy,  iz,  width, height, depth, tiw, tih, tid);
-                       r += (1.0f - tz)*ty*(1.0f - tx)                  * 
read(data, ofs, ix,  niy, iz,  width, height, depth, tiw, tih, tid);
-                       r += (1.0f - tz)*ty*tx                                  
 * read(data, ofs, nix, niy, iz,  width, height, depth, tiw, tih, tid);
-                       r += tz*(1.0f - ty)*(1.0f - tx)                  * 
read(data, ofs, ix,  iy,  niz, width, height, depth, tiw, tih, tid);
-                       r += tz*(1.0f - ty)*tx                                  
 * read(data, ofs, nix, iy,  niz, width, height, depth, tiw, tih, tid);
-                       r += tz*ty*(1.0f - tx)                                  
 * read(data, ofs, ix,  niy, niz, width, height, depth, tiw, tih, tid);
-                       r += tz*ty*tx                                           
         * read(data, ofs, nix, niy, niz, width, height, depth, tiw, tih, tid);
+               const int *gi = (const int*)info.grid_info;
+
+               if(gi) {
+                       int tiw = info.tiled_width;
+                       int tih = info.tiled_height;
+                       int ltw = info.last_tile_width;
+                       int lth = info.last_tile_height;
+                       r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx) * read(data, 
gi, ix,  iy,  iz,  tiw, tih, ltw, lth);
+                       r += (1.0f - tz)*(1.0f - ty)*tx          * read(data, 
gi, nix, iy,  iz,  tiw, tih, ltw, lth);
+                       r += (1.0f - tz)*ty*(1.0f - tx)          * read(data, 
gi, ix,  niy, iz,  tiw, tih, ltw, lth);
+                       r += (1.0f - tz)*ty*tx                   * read(data, 
gi, nix, niy, iz,  tiw, tih, ltw, lth);
+                       r += tz*(1.0f - ty)*(1.0f - tx)          * read(data, 
gi, ix,  iy,  niz, tiw, tih, ltw, lth);
+                       r += tz*(1.0f - ty)*tx                   * read(data, 
gi, nix, iy,  niz, tiw, tih, ltw, lth);
+                       r += tz*ty*(1.0f - tx)                   * read(data, 
gi, ix,  niy, niz, tiw, tih, ltw, lth);
+                       r += tz*ty*tx                            * read(data, 
gi, nix, niy, niz, tiw, tih, ltw, lth);
                }
                else {
-                       r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx) * 
read(data[compute_index(ix,  iy,  iz,  width, height, depth)]);
-                       r += (1.0f - tz)*(1.0f - ty)*tx                  * 
read(data[compute_index(nix, iy,  iz,  width, height, depth)]);
-                       r += (1.0f - tz)*ty*(1.0f - tx)                  * 
read(data[compute_index(ix,  niy, iz,  width, height, depth)]);
-                       r += (1.0f - tz)*ty*tx                                  
 * read(data[compute_index(nix, niy, iz,  width, height, depth)]);
-                       r += tz*(1.0f - ty)*(1.0f - tx)                  * 
read(data[compute_index(ix,  iy,  niz, width, height, depth)]);
-                       r += tz*(1.0f - ty)*tx                                  
 * read(data[compute_index(nix, iy,  niz, width, height, depth)]);
-                       r += tz*ty*(1.

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [e0ea53ae77c] soc-2018-cycles-volumes: Updates to volume kernel tiling function.

Reply via email to