Commit: e0ea53ae77c193ef08bb0b9215c5b3ffd84e7c11 Author: Geraldine Chua Date: Sun Jun 10 23:15:29 2018 +0800 Branches: soc-2018-cycles-volumes https://developer.blender.org/rBe0ea53ae77c193ef08bb0b9215c5b3ffd84e7c11
Updates to volume kernel tiling function. 1. OpenCL and CUDA support (mostly untested). 2. Change name of offsets to grid_info since it needs to keep track of other info as well. 3. Several speed and memory optimizations. =================================================================== M intern/cycles/device/device_cpu.cpp M intern/cycles/device/device_cuda.cpp M intern/cycles/device/device_memory.h M intern/cycles/device/opencl/opencl_base.cpp M intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h M intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h M intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h M intern/cycles/render/image.cpp M intern/cycles/render/mesh_volume.cpp M intern/cycles/util/util_sparse_grid.h M intern/cycles/util/util_texture.h =================================================================== diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 4e1fa05e168..04578eec954 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -377,9 +377,9 @@ public: void tex_alloc(device_memory& mem) { size_t total_memory = mem.memory_size(); - device_memory *offsets = mem.offsets; - if(offsets) { - total_memory += offsets->memory_size(); + device_memory *grid_info = mem.grid_info; + if(grid_info) { + total_memory += grid_info->memory_size(); } VLOG(1) << "Texture allocate: " << mem.name << ", " @@ -418,7 +418,14 @@ public: info.width = mem.real_width; info.height = mem.real_height; info.depth = mem.real_depth; - info.offsets = (uint64_t)(offsets ? offsets->host_pointer : 0); + info.grid_info = 0; + if(grid_info) { + info.grid_info = (uint64_t)grid_info->host_pointer; + info.tiled_width = get_tile_res(info.width); + info.tiled_height = get_tile_res(info.height); + info.last_tile_width = info.width % TILE_SIZE; + info.last_tile_height = info.height % TILE_SIZE; + } need_texture_info = true; } @@ -426,10 +433,10 @@ public: mem.device_size = mem.memory_size(); stats.mem_alloc(mem.device_size); - if(offsets) { - offsets->device_pointer = (device_ptr)offsets->host_pointer; - offsets->device_size = offsets->memory_size(); - stats.mem_alloc(offsets->device_size); + if(grid_info) { + grid_info->device_pointer = (device_ptr)grid_info->host_pointer; + grid_info->device_size = grid_info->memory_size(); + stats.mem_alloc(grid_info->device_size); } } @@ -437,8 +444,8 @@ public: void tex_free(device_memory& mem) { if(mem.device_pointer) { - if(mem.offsets) { - tex_free(*mem.offsets); + if(mem.grid_info) { + tex_free(*mem.grid_info); } mem.device_pointer = 0; stats.mem_free(mem.device_size); diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index b4529feffa7..d9f146339d2 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -1020,6 +1020,9 @@ public: string bind_name = mem.name; size_t dsize = datatype_size(mem.data_type); size_t size = mem.memory_size(); + if(mem.grid_info) { + size += mem.grid_info->memory_size(); + } CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP; switch(mem.extension) { diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h index a230d3928ca..0f3843b90e5 100644 --- a/intern/cycles/device/device_memory.h +++ b/intern/cycles/device/device_memory.h @@ -203,7 +203,7 @@ public: device_ptr device_pointer; void *host_pointer; void *shared_pointer; - device_memory *offsets = NULL; + device_memory *grid_info = NULL; virtual ~device_memory(); diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp index bfa2702ad62..0f4a80025b5 100644 --- a/intern/cycles/device/opencl/opencl_base.cpp +++ b/intern/cycles/device/opencl/opencl_base.cpp @@ -526,9 +526,14 @@ void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size) void OpenCLDeviceBase::tex_alloc(device_memory& mem) { + size_t total_memory = mem.memory_size(); + if(mem.grid_info) { + total_memory += mem.grid_info->memory_size(); + } + VLOG(1) << "Texture allocate: " << mem.name << ", " - << string_human_readable_number(mem.memory_size()) << " bytes. (" - << string_human_readable_size(mem.memory_size()) << ")"; + << string_human_readable_number(total_memory) << " bytes. (" + << string_human_readable_size(total_memory) << ")"; memory_manager.alloc(mem.name, mem); /* Set the pointer to non-null to keep code that inspects its value from thinking its unallocated. */ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h index 7513efc6b15..c43b94db7e0 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h @@ -75,26 +75,33 @@ template<typename T> struct TextureInterpolator { return read(data[y * width + x]); } - static ccl_always_inline float4 read(const T *data, const int *offsets, + static ccl_always_inline float4 read(const T *data, const int *grid_info, int x, int y, int z, - int width, int height, int depth, - int tiw, int tih, int tid) + int tiw, int tih, int ltw, int lth) { - int index = compute_index(offsets, x, y, z, - width, height, depth, tiw, tih, tid); - return index < 0 ? make_float4(0.0f) : read(data[index]); + int tix = x / TILE_SIZE, itix = x % TILE_SIZE, + tiy = y / TILE_SIZE, itiy = y % TILE_SIZE, + tiz = z / TILE_SIZE, itiz = z % TILE_SIZE; + int dense_index = compute_index_fast(tix, tiy, tiz, tiw, tih) * 2; + int sparse_index = grid_info[dense_index]; + int dims = grid_info[dense_index + 1]; + if(sparse_index < 0) { + return make_float4(0.0f); + } + int itiw = dims & (1 << ST_SHIFT_TRUNCATE_WIDTH) ? ltw : TILE_SIZE; + int itih = dims & (1 << ST_SHIFT_TRUNCATE_HEIGHT) ? lth : TILE_SIZE; + int in_tile_index = compute_index_fast(itix, itiy, itiz, itiw, itih); + return read(data[sparse_index + in_tile_index]); } - static ccl_always_inline float4 read(const T *data, const int *offsets, - int idx, int width, int height, int depth) + static ccl_always_inline float4 read(const T *data, const int *grid_info, + int index, int width, int height, int /*depth*/, + int tiw, int tih, int ltw, int lth) { - int3 c = compute_coordinates(idx, width, height, depth); - int index = compute_index(offsets, c.x, c.y, c.z, - width, height, depth, - get_tile_res(width), - get_tile_res(height), - get_tile_res(depth)); - return index < 0 ? make_float4(0.0f) : read(data[index]); + int x = index % width; + int y = (index / width) % height; + int z = index / (width * height); + return read(data, grid_info, x, y, z, tiw, tih, ltw, lth); } static ccl_always_inline int wrap_periodic(int x, int width) @@ -304,13 +311,14 @@ template<typename T> struct TextureInterpolator { } const T *data = (const T*)info.data; - const int *ofs = (const int*)info.offsets; + const int *grid_info = (const int*)info.grid_info; - if(ofs) { - return read(data, ofs, ix, iy, iz, width, height, depth, - get_tile_res(width), get_tile_res(height), get_tile_res(depth)); + if(grid_info) { + return read(data, grid_info, ix, iy, iz, + info.tiled_width, info.tiled_height, + info.last_tile_width, info.last_tile_height); } - return read(data[compute_index(ix, iy, iz, width, height, depth)]); + return read(data[compute_index_fast(ix, iy, iz, width, height)]); } static ccl_always_inline float4 interp_3d_linear(const TextureInfo& info, @@ -359,33 +367,31 @@ template<typename T> struct TextureInterpolator { float4 r; const T *data = (const T*)info.data; - const int *ofs = (const int*)info.offsets; - - if(ofs) { - int tiw = get_tile_res(width), tih = get_tile_res(height), tid = get_tile_res(depth); - /* Initial check if either voxel is in an active tile. */ - if(!tile_is_active(ofs, ix, iy, iz, tiw, tih, tid) && - !tile_is_active(ofs, nix, niy, niz, tiw, tih, tid)) { - return make_float4(0.0f); - } - r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx) * read(data, ofs, ix, iy, iz, width, height, depth, tiw, tih, tid); - r += (1.0f - tz)*(1.0f - ty)*tx * read(data, ofs, nix, iy, iz, width, height, depth, tiw, tih, tid); - r += (1.0f - tz)*ty*(1.0f - tx) * read(data, ofs, ix, niy, iz, width, height, depth, tiw, tih, tid); - r += (1.0f - tz)*ty*tx * read(data, ofs, nix, niy, iz, width, height, depth, tiw, tih, tid); - r += tz*(1.0f - ty)*(1.0f - tx) * read(data, ofs, ix, iy, niz, width, height, depth, tiw, tih, tid); - r += tz*(1.0f - ty)*tx * read(data, ofs, nix, iy, niz, width, height, depth, tiw, tih, tid); - r += tz*ty*(1.0f - tx) * read(data, ofs, ix, niy, niz, width, height, depth, tiw, tih, tid); - r += tz*ty*tx * read(data, ofs, nix, niy, niz, width, height, depth, tiw, tih, tid); + const int *gi = (const int*)info.grid_info; + + if(gi) { + int tiw = info.tiled_width; + int tih = info.tiled_height; + int ltw = info.last_tile_width; + int lth = info.last_tile_height; + r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx) * read(data, gi, ix, iy, iz, tiw, tih, ltw, lth); + r += (1.0f - tz)*(1.0f - ty)*tx * read(data, gi, nix, iy, iz, tiw, tih, ltw, lth); + r += (1.0f - tz)*ty*(1.0f - tx) * read(data, gi, ix, niy, iz, tiw, tih, ltw, lth); + r += (1.0f - tz)*ty*tx * read(data, gi, nix, niy, iz, tiw, tih, ltw, lth); + r += tz*(1.0f - ty)*(1.0f - tx) * read(data, gi, ix, iy, niz, tiw, tih, ltw, lth); + r += tz*(1.0f - ty)*tx * read(data, gi, nix, iy, niz, tiw, tih, ltw, lth); + r += tz*ty*(1.0f - tx) * read(data, gi, ix, niy, niz, tiw, tih, ltw, lth); + r += tz*ty*tx * read(data, gi, nix, niy, niz, tiw, tih, ltw, lth); } else { - r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx) * read(data[compute_index(ix, iy, iz, width, height, depth)]); - r += (1.0f - tz)*(1.0f - ty)*tx * read(data[compute_index(nix, iy, iz, width, height, depth)]); - r += (1.0f - tz)*ty*(1.0f - tx) * read(data[compute_index(ix, niy, iz, width, height, depth)]); - r += (1.0f - tz)*ty*tx * read(data[compute_index(nix, niy, iz, width, height, depth)]); - r += tz*(1.0f - ty)*(1.0f - tx) * read(data[compute_index(ix, iy, niz, width, height, depth)]); - r += tz*(1.0f - ty)*tx * read(data[compute_index(nix, iy, niz, width, height, depth)]); - r += tz*ty*(1. @@ Diff output truncated at 10240 characters. @@ _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs