Commit: 2909c0e56fd48df8dea824a652ddfc1e2ccd56af Author: Jeroen Bakker Date: Thu Apr 16 08:43:32 2020 +0200 Branches: modifier-panels-ui https://developer.blender.org/rB2909c0e56fd48df8dea824a652ddfc1e2ccd56af
GPUImmediate: Use 2 Buffers For (Un)Strict We used to have a single buffer that was shared between strict and unstrict draw calls. This leads to many recreation events for the draw buffers. This patch separates the Unstrict draw buffer from the strict draw buffer. This improves performance on Windows Intel 10th gen platform. On a reference platfor before the patch I got 10 FPS, after this patch it became 34fps. Note that the same test normally on a low end GPU can get to 60fps so this does not solve all teh bottlenecks yet. Reviewed By: Clément Foucault Differential Revision: https://developer.blender.org/D7421 =================================================================== M source/blender/gpu/intern/gpu_immediate.c =================================================================== diff --git a/source/blender/gpu/intern/gpu_immediate.c b/source/blender/gpu/intern/gpu_immediate.c index b30fbd66670..72e17dce776 100644 --- a/source/blender/gpu/intern/gpu_immediate.c +++ b/source/blender/gpu/intern/gpu_immediate.c @@ -43,6 +43,14 @@ extern void GPU_matrix_bind(const GPUShaderInterface *); extern bool GPU_matrix_dirty_get(void); +typedef struct ImmediateDrawBuffer { + GLuint vbo_id; + GLubyte *buffer_data; + uint buffer_offset; + uint buffer_size; + uint default_size; +} ImmediateDrawBuffer; + typedef struct { /* TODO: organize this struct by frequency of change (run-time) */ @@ -50,14 +58,14 @@ typedef struct { GPUContext *context; /* current draw call */ - GLubyte *buffer_data; - uint buffer_offset; - uint buffer_bytes_mapped; - uint vertex_len; bool strict_vertex_len; + uint vertex_len; + uint buffer_bytes_mapped; + ImmediateDrawBuffer *active_buffer; GPUPrimType prim_type; - GPUVertFormat vertex_format; + ImmediateDrawBuffer draw_buffer; + ImmediateDrawBuffer draw_buffer_strict; /* current vertex */ uint vertex_idx; @@ -65,7 +73,6 @@ typedef struct { uint16_t unassigned_attr_bits; /* which attributes of current vertex have not been given values? */ - GLuint vbo_id; GLuint vao_id; GLuint bound_program; @@ -76,7 +83,6 @@ typedef struct { /* size of internal buffer */ #define DEFAULT_INTERNAL_BUFFER_SIZE (4 * 1024 * 1024) -static uint imm_buffer_size = DEFAULT_INTERNAL_BUFFER_SIZE; static bool initialized = false; static Immediate imm; @@ -88,9 +94,16 @@ void immInit(void) #endif memset(&imm, 0, sizeof(Immediate)); - imm.vbo_id = GPU_buf_alloc(); - glBindBuffer(GL_ARRAY_BUFFER, imm.vbo_id); - glBufferData(GL_ARRAY_BUFFER, imm_buffer_size, NULL, GL_DYNAMIC_DRAW); + imm.draw_buffer.vbo_id = GPU_buf_alloc(); + imm.draw_buffer.buffer_size = DEFAULT_INTERNAL_BUFFER_SIZE; + imm.draw_buffer.default_size = DEFAULT_INTERNAL_BUFFER_SIZE; + glBindBuffer(GL_ARRAY_BUFFER, imm.draw_buffer.vbo_id); + glBufferData(GL_ARRAY_BUFFER, imm.draw_buffer.buffer_size, NULL, GL_DYNAMIC_DRAW); + imm.draw_buffer_strict.vbo_id = GPU_buf_alloc(); + imm.draw_buffer_strict.buffer_size = 0; + imm.draw_buffer_strict.default_size = 0; + glBindBuffer(GL_ARRAY_BUFFER, imm.draw_buffer_strict.vbo_id); + glBufferData(GL_ARRAY_BUFFER, imm.draw_buffer_strict.buffer_size, NULL, GL_DYNAMIC_DRAW); imm.prim_type = GPU_PRIM_NONE; imm.strict_vertex_len = true; @@ -124,7 +137,8 @@ void immDeactivate(void) void immDestroy(void) { - GPU_buf_free(imm.vbo_id); + GPU_buf_free(imm.draw_buffer.vbo_id); + GPU_buf_free(imm.draw_buffer_strict.vbo_id); initialized = false; } @@ -213,6 +227,7 @@ void immBegin(GPUPrimType prim_type, uint vertex_len) assert(initialized); assert(imm.prim_type == GPU_PRIM_NONE); /* make sure we haven't already begun */ assert(vertex_count_makes_sense_for_primitive(vertex_len, prim_type)); + assert(imm.active_buffer == NULL); #endif imm.prim_type = prim_type; imm.vertex_len = vertex_len; @@ -221,54 +236,58 @@ void immBegin(GPUPrimType prim_type, uint vertex_len) /* how many bytes do we need for this draw call? */ const uint bytes_needed = vertex_buffer_size(&imm.vertex_format, vertex_len); + ImmediateDrawBuffer *active_buffer = imm.strict_vertex_len ? &imm.draw_buffer_strict : + &imm.draw_buffer; + imm.active_buffer = active_buffer; - glBindBuffer(GL_ARRAY_BUFFER, imm.vbo_id); + glBindBuffer(GL_ARRAY_BUFFER, active_buffer->vbo_id); /* does the current buffer have enough room? */ - const uint available_bytes = imm_buffer_size - imm.buffer_offset; + const uint available_bytes = active_buffer->buffer_size - active_buffer->buffer_offset; bool recreate_buffer = false; - if (bytes_needed > imm_buffer_size) { + if (bytes_needed > active_buffer->buffer_size) { /* expand the internal buffer */ - imm_buffer_size = bytes_needed; + active_buffer->buffer_size = bytes_needed; recreate_buffer = true; } - else if (bytes_needed < DEFAULT_INTERNAL_BUFFER_SIZE && - imm_buffer_size > DEFAULT_INTERNAL_BUFFER_SIZE) { + else if (bytes_needed < active_buffer->default_size && + active_buffer->buffer_size > active_buffer->default_size) { /* shrink the internal buffer */ - imm_buffer_size = DEFAULT_INTERNAL_BUFFER_SIZE; + active_buffer->buffer_size = active_buffer->default_size; recreate_buffer = true; } /* ensure vertex data is aligned */ /* Might waste a little space, but it's safe. */ - const uint pre_padding = padding(imm.buffer_offset, imm.vertex_format.stride); + const uint pre_padding = padding(active_buffer->buffer_offset, imm.vertex_format.stride); if (!recreate_buffer && ((bytes_needed + pre_padding) <= available_bytes)) { - imm.buffer_offset += pre_padding; + active_buffer->buffer_offset += pre_padding; } else { /* orphan this buffer & start with a fresh one */ /* this method works on all platforms, old & new */ - glBufferData(GL_ARRAY_BUFFER, imm_buffer_size, NULL, GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, active_buffer->buffer_size, NULL, GL_DYNAMIC_DRAW); - imm.buffer_offset = 0; + active_buffer->buffer_offset = 0; } /* printf("mapping %u to %u\n", imm.buffer_offset, imm.buffer_offset + bytes_needed - 1); */ - imm.buffer_data = glMapBufferRange(GL_ARRAY_BUFFER, - imm.buffer_offset, - bytes_needed, - GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | - (imm.strict_vertex_len ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT)); + active_buffer->buffer_data = glMapBufferRange( + GL_ARRAY_BUFFER, + active_buffer->buffer_offset, + bytes_needed, + GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | + (imm.strict_vertex_len ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT)); #if TRUST_NO_ONE - assert(imm.buffer_data != NULL); + assert(active_buffer->buffer_data != NULL); #endif imm.buffer_bytes_mapped = bytes_needed; - imm.vertex_data = imm.buffer_data; + imm.vertex_data = active_buffer->buffer_data; } void immBeginAtMost(GPUPrimType prim_type, uint vertex_len) @@ -338,7 +357,7 @@ static void immDrawSetup(void) for (uint a_idx = 0; a_idx < imm.vertex_format.attr_len; a_idx++) { const GPUVertAttr *a = &imm.vertex_format.attrs[a_idx]; - const uint offset = imm.buffer_offset + a->offset; + const uint offset = imm.active_buffer->buffer_offset + a->offset; const GLvoid *pointer = (const GLubyte *)0 + offset; const uint loc = read_attr_location(&imm.attr_binding, a_idx); @@ -365,6 +384,7 @@ void immEnd(void) { #if TRUST_NO_ONE assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */ + assert(imm.active_buffer); #endif uint buffer_bytes_used; @@ -421,12 +441,13 @@ void immEnd(void) // glBindBuffer(GL_ARRAY_BUFFER, 0); // glBindVertexArray(0); /* prep for next immBegin */ - imm.buffer_offset += buffer_bytes_used; + imm.active_buffer->buffer_offset += buffer_bytes_used; } /* prep for next immBegin */ imm.prim_type = GPU_PRIM_NONE; imm.strict_vertex_len = true; + imm.active_buffer = NULL; } static void setAttrValueBit(uint attr_id) _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs