i965: Optimize bucket index calculation
Reducing Bucket index calculation to O(1). This algorithm calculates the index using matrix method. Assuming PAGE_SIZE is 4096, matrix arrangement is as below: 1*4096 2*4096 3*4096 4*4096 5*4096 6*4096 7*4096 8*4096 10*4096 12*4096 14*4096 16*4096 20*4096 24*4096 28*4096 32*4096 ... ... ... ... ... ... ... ... ... ... ... max_cache_size From this matrix its clearly seen that every row follows the below way: ... ... ... n n+(1/4)n n+(1/2)n n+(3/4)n 2n Row is calculated as log2(size/PAGE_SIZE) Column is calculated as converting the difference between the elements to fit into power size of two and indexing it. Final Index is (row*4)+(col-1) Tested with Intel Mesa CI. Improves performance of 3DMark on BXT by 0.705966% +/- 0.229767% (n=20) v4: Review comments on style and code comments implemented (Ian). v3: Review comments implemented (Ian). v2: Review comments implemented (Jason). Signed-off-by: Aravindan Muthukumar <aravindan.muthukumar@intel.com> Signed-off-by: Kedar Karanje <kedar.j.karanje@intel.com> Reviewed-by: Yogesh Marathe <yogesh.marathe@intel.com> Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
This commit is contained in:

committed by
Ian Romanick

parent
c8417c8d25
commit
971b3c019b
@@ -86,6 +86,8 @@
|
||||
|
||||
#define memclear(s) memset(&s, 0, sizeof(s))
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
#define FILE_DEBUG_FLAG DEBUG_BUFMGR
|
||||
|
||||
static inline int
|
||||
@@ -180,19 +182,44 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t pitch, uint32_t tiling)
|
||||
return ALIGN(pitch, tile_width);
|
||||
}
|
||||
|
||||
/**
|
||||
* This function finds the correct bucket fit for the input size.
|
||||
* The function works with O(1) complexity when the requested size
|
||||
* was queried instead of iterating the size through all the buckets.
|
||||
*/
|
||||
static struct bo_cache_bucket *
|
||||
bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
|
||||
{
|
||||
int i;
|
||||
/* Calculating the pages and rounding up to the page size. */
|
||||
const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
|
||||
|
||||
for (i = 0; i < bufmgr->num_buckets; i++) {
|
||||
struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i];
|
||||
if (bucket->size >= size) {
|
||||
return bucket;
|
||||
}
|
||||
}
|
||||
/* Row Bucket sizes clz((x-1) | 3) Row Column
|
||||
* in pages stride size
|
||||
* 0: 1 2 3 4 -> 30 30 30 30 4 1
|
||||
* 1: 5 6 7 8 -> 29 29 29 29 4 1
|
||||
* 2: 10 12 14 16 -> 28 28 28 28 8 2
|
||||
* 3: 20 24 28 32 -> 27 27 27 27 16 4
|
||||
*/
|
||||
const unsigned row = 30 - __builtin_clz((pages - 1) | 3);
|
||||
const unsigned row_max_pages = 4 << row;
|
||||
|
||||
return NULL;
|
||||
/* The '& ~2' is the special case for row 1. In row 1, max pages /
|
||||
* 2 is 2, but the previous row maximum is zero (because there is
|
||||
* no previous row). All row maximum sizes are power of 2, so that
|
||||
* is the only case where that bit will be set.
|
||||
*/
|
||||
const unsigned prev_row_max_pages = (row_max_pages / 2) & ~2;
|
||||
int col_size_log2 = row - 1;
|
||||
col_size_log2 += (col_size_log2 < 0);
|
||||
|
||||
const unsigned col = (pages - prev_row_max_pages +
|
||||
((1 << col_size_log2) - 1)) >> col_size_log2;
|
||||
|
||||
/* Calculating the index based on the row and column. */
|
||||
const unsigned index = (row * 4) + (col - 1);
|
||||
|
||||
return (index < bufmgr->num_buckets) ?
|
||||
&bufmgr->cache_bucket[index] : NULL;
|
||||
}
|
||||
|
||||
int
|
||||
@@ -1267,6 +1294,10 @@ add_bucket(struct brw_bufmgr *bufmgr, int size)
|
||||
list_inithead(&bufmgr->cache_bucket[i].head);
|
||||
bufmgr->cache_bucket[i].size = size;
|
||||
bufmgr->num_buckets++;
|
||||
|
||||
assert(bucket_for_size(bufmgr, size) == &bufmgr->cache_bucket[i]);
|
||||
assert(bucket_for_size(bufmgr, size - 2048) == &bufmgr->cache_bucket[i]);
|
||||
assert(bucket_for_size(bufmgr, size + 1) != &bufmgr->cache_bucket[i]);
|
||||
}
|
||||
|
||||
static void
|
||||
|
Reference in New Issue
Block a user