Re: [Mesa-dev] [PATCH v3] i965 : optimized bucket index calculation
> On 11/06/2017 08:30 PM, aravindan.muthuku...@intel.com wrote: > > From: Aravindan Muthukumar > > > > Now the complexity has been reduced to O(1) > > > > Algorithm calculates the index using matrix method. > > Matrix arrangement is as below: > > Assuming PAGE_SIZE is 4096. > > > > 1*4096 2*40963*40964*4096 > > 5*4096 6*40967*40968*4096 > > 10*4096 12*4096 14*4096 16*4096 > > 20*4096 24*4096 28*4096 32*4096 > >... ... ... ... > >... ... ... ... > >... ... ... max_cache_size > > > > From this matrix its clearly seen that every row follows the below > > way: > > ... ... ...n > > n+(1/4)n n+(1/2)n n+(3/4)n2n > > > > Row is calculated as log2(size/PAGE_SIZE) Column is calculated as > > converting the difference between the elements to fit into power size > > of two and indexing it. > > > > Final Index is (row*4)+(col-1) > > > > Tested with Intel Mesa CI. > > > > Improves performance of 3DMark on BXT by 0.705966% +/- 0.229767% > > (n=20) > > > > v3: review comments implemented (Ian). > > v2: review comments implemented (Jason). > > > > Signed-off-by: Aravindan Muthukumar > > Signed-off-by: Kedar Karanje > > Reviewed-by: Yogesh Marathe > > --- > > src/mesa/drivers/dri/i965/brw_bufmgr.c | 38 > > +++--- > > 1 file changed, 30 insertions(+), 8 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c > > b/src/mesa/drivers/dri/i965/brw_bufmgr.c > > index 17036b5..9a423da 100644 > > --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c > > +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c > > @@ -86,6 +86,8 @@ > > > > #define memclear(s) memset(&s, 0, sizeof(s)) > > > > +#define PAGE_SIZE 4096 > > + > > #define FILE_DEBUG_FLAG DEBUG_BUFMGR > > > > static inline int > > @@ -180,19 +182,35 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t > pitch, uint32_t tiling) > > return ALIGN(pitch, tile_width); > > } > > > > +/* > > + * This function finds the correct bucket fit for the input size. > > + * The function works with O(1) complexity when the requested size > > + * was queried instead of iterating the size through all the buckets. > > + */ > > static struct bo_cache_bucket * > > bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size) { > > - int i; > > + /* Calculating the pages and rounding up to the page size. */ > > + const unsigned int pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; > > > > - for (i = 0; i < bufmgr->num_buckets; i++) { > > - struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i]; > > - if (bucket->size >= size) { > > - return bucket; > > - } > > - } > > + /* Finding the row number based on the calculated pages. */ > > + const unsigned int rows = 30 - __builtin_clz((pages - 1) | 3); > > > > - return NULL; > > Why did you make random (and incorrect) style changes and delete > (useful) comments from the code I sent? > > > > Thanks Ian. I added comments based on my understanding and I get the > > > point I'll push v4 with your comments. > > + const unsigned int row_max_pages = 4 << rows; > > + const unsigned int prev_row_max_pages = (row_max_pages / 2) & ~2; > > + > > + /* Finding the column number using column interval. */ > > + int col_size_log2 = rows - 1; > > + col_size_log2 += (col_size_log2 < 0); > > + > > + const unsigned int col = ( (pages - prev_row_max_pages + > > +( (1 << col_size_log2) - 1) ) >> > > + col_size_log2 ); > > + > > + /* Calculating the index based on the row and column. */ > > + const unsigned int index = (rows * 4) + (col - 1); > > + > > + return (index < bufmgr->num_buckets) ? > > + &bufmgr->cache_bucket[index] : NULL; > > } > > > > int > > @@ -1254,6 +1272,10 @@ add_bucket(struct brw_bufmgr *bufmgr, int size) > > list_inithead(&bufmgr->cache_bucket[i].head); > > bufmgr->cache_bucket[i].size = size; > > bufmgr->num_buckets++; > > + > > + assert(bucket_for_size(bufmgr, size) == &bufmgr->cache_bucket[i]); > > + assert(bucket_for_size(bufmgr, size - 2048) == > > &bufmgr->cache_bucket[i]); > > + assert(bucket_for_size(bufmgr, size + 1) != > > + &bufmgr->cache_bucket[i]); > > } > > > > static void > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3] i965 : optimized bucket index calculation
On 11/06/2017 08:30 PM, aravindan.muthuku...@intel.com wrote: > From: Aravindan Muthukumar > > Now the complexity has been reduced to O(1) > > Algorithm calculates the index using matrix method. > Matrix arrangement is as below: > Assuming PAGE_SIZE is 4096. > > 1*4096 2*40963*40964*4096 > 5*4096 6*40967*40968*4096 > 10*4096 12*4096 14*4096 16*4096 > 20*4096 24*4096 28*4096 32*4096 >... ... ... ... >... ... ... ... >... ... ... max_cache_size > > From this matrix its clearly seen that every row > follows the below way: > ... ... ...n > n+(1/4)n n+(1/2)n n+(3/4)n2n > > Row is calculated as log2(size/PAGE_SIZE) > Column is calculated as converting the difference > between the elements to fit into power size of two > and indexing it. > > Final Index is (row*4)+(col-1) > > Tested with Intel Mesa CI. > > Improves performance of 3DMark on BXT by 0.705966% +/- 0.229767% (n=20) > > v3: review comments implemented (Ian). > v2: review comments implemented (Jason). > > Signed-off-by: Aravindan Muthukumar > Signed-off-by: Kedar Karanje > Reviewed-by: Yogesh Marathe > --- > src/mesa/drivers/dri/i965/brw_bufmgr.c | 38 > +++--- > 1 file changed, 30 insertions(+), 8 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c > b/src/mesa/drivers/dri/i965/brw_bufmgr.c > index 17036b5..9a423da 100644 > --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c > +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c > @@ -86,6 +86,8 @@ > > #define memclear(s) memset(&s, 0, sizeof(s)) > > +#define PAGE_SIZE 4096 > + > #define FILE_DEBUG_FLAG DEBUG_BUFMGR > > static inline int > @@ -180,19 +182,35 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t > pitch, uint32_t tiling) > return ALIGN(pitch, tile_width); > } > > +/* > + * This function finds the correct bucket fit for the input size. > + * The function works with O(1) complexity when the requested size > + * was queried instead of iterating the size through all the buckets. > + */ > static struct bo_cache_bucket * > bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size) > { > - int i; > + /* Calculating the pages and rounding up to the page size. */ > + const unsigned int pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; > > - for (i = 0; i < bufmgr->num_buckets; i++) { > - struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i]; > - if (bucket->size >= size) { > - return bucket; > - } > - } > + /* Finding the row number based on the calculated pages. */ > + const unsigned int rows = 30 - __builtin_clz((pages - 1) | 3); > > - return NULL; Why did you make random (and incorrect) style changes and delete (useful) comments from the code I sent? > + const unsigned int row_max_pages = 4 << rows; > + const unsigned int prev_row_max_pages = (row_max_pages / 2) & ~2; > + > + /* Finding the column number using column interval. */ > + int col_size_log2 = rows - 1; > + col_size_log2 += (col_size_log2 < 0); > + > + const unsigned int col = ( (pages - prev_row_max_pages + > +( (1 << col_size_log2) - 1) ) >> col_size_log2 ); > + > + /* Calculating the index based on the row and column. */ > + const unsigned int index = (rows * 4) + (col - 1); > + > + return (index < bufmgr->num_buckets) ? > + &bufmgr->cache_bucket[index] : NULL; > } > > int > @@ -1254,6 +1272,10 @@ add_bucket(struct brw_bufmgr *bufmgr, int size) > list_inithead(&bufmgr->cache_bucket[i].head); > bufmgr->cache_bucket[i].size = size; > bufmgr->num_buckets++; > + > + assert(bucket_for_size(bufmgr, size) == &bufmgr->cache_bucket[i]); > + assert(bucket_for_size(bufmgr, size - 2048) == &bufmgr->cache_bucket[i]); > + assert(bucket_for_size(bufmgr, size + 1) != &bufmgr->cache_bucket[i]); > } > > static void > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3] i965 : optimized bucket index calculation
From: Aravindan Muthukumar Now the complexity has been reduced to O(1) Algorithm calculates the index using matrix method. Matrix arrangement is as below: Assuming PAGE_SIZE is 4096. 1*4096 2*40963*40964*4096 5*4096 6*40967*40968*4096 10*4096 12*4096 14*4096 16*4096 20*4096 24*4096 28*4096 32*4096 ... ... ... ... ... ... ... ... ... ... ... max_cache_size From this matrix its clearly seen that every row follows the below way: ... ... ...n n+(1/4)n n+(1/2)n n+(3/4)n2n Row is calculated as log2(size/PAGE_SIZE) Column is calculated as converting the difference between the elements to fit into power size of two and indexing it. Final Index is (row*4)+(col-1) Tested with Intel Mesa CI. Improves performance of 3DMark on BXT by 0.705966% +/- 0.229767% (n=20) v3: review comments implemented (Ian). v2: review comments implemented (Jason). Signed-off-by: Aravindan Muthukumar Signed-off-by: Kedar Karanje Reviewed-by: Yogesh Marathe --- src/mesa/drivers/dri/i965/brw_bufmgr.c | 38 +++--- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c index 17036b5..9a423da 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c @@ -86,6 +86,8 @@ #define memclear(s) memset(&s, 0, sizeof(s)) +#define PAGE_SIZE 4096 + #define FILE_DEBUG_FLAG DEBUG_BUFMGR static inline int @@ -180,19 +182,35 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t pitch, uint32_t tiling) return ALIGN(pitch, tile_width); } +/* + * This function finds the correct bucket fit for the input size. + * The function works with O(1) complexity when the requested size + * was queried instead of iterating the size through all the buckets. + */ static struct bo_cache_bucket * bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size) { - int i; + /* Calculating the pages and rounding up to the page size. */ + const unsigned int pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; - for (i = 0; i < bufmgr->num_buckets; i++) { - struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i]; - if (bucket->size >= size) { - return bucket; - } - } + /* Finding the row number based on the calculated pages. */ + const unsigned int rows = 30 - __builtin_clz((pages - 1) | 3); - return NULL; + const unsigned int row_max_pages = 4 << rows; + const unsigned int prev_row_max_pages = (row_max_pages / 2) & ~2; + + /* Finding the column number using column interval. */ + int col_size_log2 = rows - 1; + col_size_log2 += (col_size_log2 < 0); + + const unsigned int col = ( (pages - prev_row_max_pages + +( (1 << col_size_log2) - 1) ) >> col_size_log2 ); + + /* Calculating the index based on the row and column. */ + const unsigned int index = (rows * 4) + (col - 1); + + return (index < bufmgr->num_buckets) ? + &bufmgr->cache_bucket[index] : NULL; } int @@ -1254,6 +1272,10 @@ add_bucket(struct brw_bufmgr *bufmgr, int size) list_inithead(&bufmgr->cache_bucket[i].head); bufmgr->cache_bucket[i].size = size; bufmgr->num_buckets++; + + assert(bucket_for_size(bufmgr, size) == &bufmgr->cache_bucket[i]); + assert(bucket_for_size(bufmgr, size - 2048) == &bufmgr->cache_bucket[i]); + assert(bucket_for_size(bufmgr, size + 1) != &bufmgr->cache_bucket[i]); } static void -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3] i965 : optimized bucket index calculation.
From: Aravindan Muthukumar Avoiding the loop which was running with O(n) complexity. Now the complexity has been reduced to O(1) Algorithm calculates the index using matrix method. Matrix arrangement is as below: Assuming PAGE_SIZE is 4096. 1*4096 2*40963*40964*4096 5*4096 6*40967*40968*4096 10*4096 12*4096 14*4096 16*4096 20*4096 24*4096 28*4096 32*4096 ... ... ... ... ... ... ... ... ... ... ... max_cache_size From this matrix its clearly seen that every row follows the below way: ... ... ...n n+(1/4)n n+(1/2)n n+(3/4)n2n Row is calulated as log2(size/PAGE_SIZE) Column is calculated as converting the difference between the elements to fit into power size of two and indexing it. Final Index is (row*4)+(col-1) Tested with Intel Mesa CI. Improves performance of 3d Mark on Broxton. Analyzed using Compare Perf Analyser: Average : 201.2 +/- 65.4836 (n=20) Percentage : 0.705966% +/- 0.229767% (n=20) v3: Review comments implemented Signed-off-by: Aravindan Muthukumar Signed-off-by: Kedar Karanje Reviewed-by: Yogesh Marathe --- src/mesa/drivers/dri/i965/brw_bufmgr.c | 42 -- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c index 17036b5..49514a4 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c @@ -86,6 +86,8 @@ #define memclear(s) memset(&s, 0, sizeof(s)) +#define PAGE_SIZE 4096 + #define FILE_DEBUG_FLAG DEBUG_BUFMGR static inline int @@ -180,19 +182,41 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t pitch, uint32_t tiling) return ALIGN(pitch, tile_width); } +static inline int +ilog2_round_up(int value) +{ + assert(value != 0); + return 32 - __builtin_clz(value - 1); +} + +/* + * This function finds the correct bucket fit for the input size. + * The function works with O(1) complexity when the requested size + * was queried instead of iterating the size through all the buckets. + */ static struct bo_cache_bucket * bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size) { - int i; + int index; - for (i = 0; i < bufmgr->num_buckets; i++) { - struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i]; - if (bucket->size >= size) { - return bucket; - } + /* Condition for size less than 4*4096 (16KB) page size. */ + if (size <= 4 * PAGE_SIZE) { + index = DIV_ROUND_UP(size, PAGE_SIZE) - 1; + } else { + /* Number of pages of page size */ + const int pages = DIV_ROUND_UP(size, PAGE_SIZE); + const int pages_log2 = ilog2_round_up(pages) - 1; + + /* Finding the row and column of the matrix */ + const int row = pages_log2 - 1; + const int col = DIV_ROUND_UP((pages - (1 << pages_log2)), + (1 << (pages_log2 - 2))); + /* Using the calculated row and column to index into the matrix */ + index = (row << 2) + (col - 1); } - return NULL; + return (index >= 0 && index < bufmgr->num_buckets) ? + &bufmgr->cache_bucket[index] : NULL; } int @@ -1254,6 +1278,10 @@ add_bucket(struct brw_bufmgr *bufmgr, int size) list_inithead(&bufmgr->cache_bucket[i].head); bufmgr->cache_bucket[i].size = size; bufmgr->num_buckets++; + + assert(bucket_for_size(bufmgr, size) == &bufmgr->cache_bucket[i]); + assert(bucket_for_size(bufmgr, size - 2048) == &bufmgr->cache_bucket[i]); + assert(bucket_for_size(bufmgr, size + 1) != &bufmgr->cache_bucket[i]); } static void -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev