radeonsi: change max TBO/SSBO sizes again and rework max alloc size

Allow 1/4 of the max heap size, but maximum of 512 MB on 32-bit
architectures.

Reviewed-by: Mihai Preda <mhpreda@gmail.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16901>
This commit is contained in:
Marek Olšák
2022-06-05 17:32:27 -04:00
committed by Marge Bot
parent c1adb33a93
commit aee8ee17a5
6 changed files with 39 additions and 20 deletions

View File

@@ -803,9 +803,9 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
* allocations can fail or cause buffer movement failures in the kernel. * allocations can fail or cause buffer movement failures in the kernel.
*/ */
if (info->has_dedicated_vram) if (info->has_dedicated_vram)
info->max_alloc_size = info->vram_size * 0.8; info->max_heap_size_kb = info->vram_size_kb;
else else
info->max_alloc_size = info->gart_size * 0.7; info->max_heap_size_kb = info->gart_size_kb;
info->vram_type = amdinfo->vram_type; info->vram_type = amdinfo->vram_type;
info->vram_bit_width = amdinfo->vram_bit_width; info->vram_bit_width = amdinfo->vram_bit_width;
@@ -1367,7 +1367,7 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
fprintf(f, " vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_vis_size, 1024 * 1024)); fprintf(f, " vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_vis_size, 1024 * 1024));
fprintf(f, " vram_type = %i\n", info->vram_type); fprintf(f, " vram_type = %i\n", info->vram_type);
fprintf(f, " vram_bit_width = %i\n", info->vram_bit_width); fprintf(f, " vram_bit_width = %i\n", info->vram_bit_width);
fprintf(f, " max_alloc_size = %i MB\n", (int)DIV_ROUND_UP(info->max_alloc_size, 1024 * 1024)); fprintf(f, " max_heap_size_kb = %i MB\n", (int)DIV_ROUND_UP(info->max_heap_size_kb, 1024));
fprintf(f, " min_alloc_size = %u\n", info->min_alloc_size); fprintf(f, " min_alloc_size = %u\n", info->min_alloc_size);
fprintf(f, " address32_hi = 0x%x\n", info->address32_hi); fprintf(f, " address32_hi = 0x%x\n", info->address32_hi);
fprintf(f, " has_dedicated_vram = %u\n", info->has_dedicated_vram); fprintf(f, " has_dedicated_vram = %u\n", info->has_dedicated_vram);

View File

@@ -116,7 +116,7 @@ struct radeon_info {
uint64_t vram_vis_size; uint64_t vram_vis_size;
uint32_t vram_bit_width; uint32_t vram_bit_width;
uint32_t vram_type; uint32_t vram_type;
uint64_t max_alloc_size; uint32_t max_heap_size_kb;
uint32_t min_alloc_size; uint32_t min_alloc_size;
uint32_t address32_hi; uint32_t address32_hi;
bool has_dedicated_vram; bool has_dedicated_vram;

View File

@@ -348,7 +348,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
return 0; return 0;
case PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT: case PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT:
return MIN2(rscreen->b.info.max_alloc_size, INT_MAX); return MIN2(rscreen->b.info.max_heap_size_kb * 1024ull / 4, INT_MAX);
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
return R600_MAP_BUFFER_ALIGNMENT; return R600_MAP_BUFFER_ALIGNMENT;

View File

@@ -991,8 +991,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
* 4 * MAX_MEM_ALLOC_SIZE. * 4 * MAX_MEM_ALLOC_SIZE.
*/ */
*max_global_size = MIN2(4 * max_mem_alloc_size, *max_global_size = MIN2(4 * max_mem_alloc_size,
MAX2(rscreen->info.gart_size, rscreen->info.max_heap_size_kb * 1024ull);
rscreen->info.vram_size));
} }
return sizeof(uint64_t); return sizeof(uint64_t);
@@ -1016,7 +1015,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
if (ret) { if (ret) {
uint64_t *max_mem_alloc_size = ret; uint64_t *max_mem_alloc_size = ret;
*max_mem_alloc_size = rscreen->info.max_alloc_size; *max_mem_alloc_size = (rscreen->info.max_heap_size_kb / 4) * 1024ull;
} }
return sizeof(uint64_t); return sizeof(uint64_t);
@@ -1287,8 +1286,8 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024)); printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024));
printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024)); printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024));
printf("vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_vis_size, 1024*1024)); printf("vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_vis_size, 1024*1024));
printf("max_alloc_size = %i MB\n", printf("max_heap_size = %i MB\n",
(int)DIV_ROUND_UP(rscreen->info.max_alloc_size, 1024*1024)); (int)DIV_ROUND_UP(rscreen->info.max_heap_size_kb, 1024));
printf("min_alloc_size = %u\n", rscreen->info.min_alloc_size); printf("min_alloc_size = %u\n", rscreen->info.min_alloc_size);
printf("has_dedicated_vram = %u\n", rscreen->info.has_dedicated_vram); printf("has_dedicated_vram = %u\n", rscreen->info.has_dedicated_vram);
printf("r600_has_virtual_memory = %i\n", rscreen->info.r600_has_virtual_memory); printf("r600_has_virtual_memory = %i\n", rscreen->info.r600_has_virtual_memory);

View File

@@ -234,11 +234,22 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return 4096 * 1024; return 4096 * 1024;
case PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT: case PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT:
case PIPE_CAP_MAX_SHADER_BUFFER_SIZE_UINT: case PIPE_CAP_MAX_SHADER_BUFFER_SIZE_UINT: {
/* Return 1/4th of the heap size as the maximum because the max size is not practically
* allocatable. Also, this can only return UINT32_MAX at most.
*/
unsigned max_size = MIN2((sscreen->info.max_heap_size_kb * 1024ull) / 4, UINT32_MAX);
/* Allow max 512 MB to pass CTS with a 32-bit build. */ /* Allow max 512 MB to pass CTS with a 32-bit build. */
return MIN2(sscreen->info.max_alloc_size, 512 * 1024 * 1024); if (sizeof(void*) == 4)
max_size = MIN2(max_size, 512 * 1024 * 1024);
return max_size;
}
case PIPE_CAP_MAX_TEXTURE_MB: case PIPE_CAP_MAX_TEXTURE_MB:
return sscreen->info.max_alloc_size / (1024 * 1024); /* Allow 1/4th of the heap size. */
return sscreen->info.max_heap_size_kb / 1024 / 4;
case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
@@ -861,7 +872,7 @@ static int si_get_compute_param(struct pipe_screen *screen, enum pipe_shader_ir
* 4 * MAX_MEM_ALLOC_SIZE. * 4 * MAX_MEM_ALLOC_SIZE.
*/ */
*max_global_size = *max_global_size =
MIN2(4 * max_mem_alloc_size, MAX2(sscreen->info.gart_size, sscreen->info.vram_size)); MIN2(4 * max_mem_alloc_size, sscreen->info.max_heap_size_kb * 1024ull);
} }
return sizeof(uint64_t); return sizeof(uint64_t);
@@ -888,7 +899,10 @@ static int si_get_compute_param(struct pipe_screen *screen, enum pipe_shader_ir
if (ret) { if (ret) {
uint64_t *max_mem_alloc_size = ret; uint64_t *max_mem_alloc_size = ret;
*max_mem_alloc_size = sscreen->info.max_alloc_size; /* Return 1/4 of the heap size as the maximum because the max size is not practically
* allocatable.
*/
*max_mem_alloc_size = (sscreen->info.max_heap_size_kb / 4) * 1024ull;
} }
return sizeof(uint64_t); return sizeof(uint64_t);

View File

@@ -375,14 +375,20 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
/* Radeon allocates all buffers contiguously, which makes large allocations /* Radeon allocates all buffers contiguously, which makes large allocations
* unlikely to succeed. */ * unlikely to succeed. */
if (ws->info.has_dedicated_vram) if (ws->info.has_dedicated_vram)
ws->info.max_alloc_size = ws->info.vram_size * 0.7; ws->info.max_heap_size_kb = ws->info.vram_size_kb;
else else
ws->info.max_alloc_size = ws->info.gart_size * 0.7; ws->info.max_heap_size_kb = ws->info.gart_size_kb;
/* Old kernel driver limitation for allocation sizes. We only use this to limit per-buffer
* allocation size.
*/
if (ws->info.drm_minor < 40) if (ws->info.drm_minor < 40)
ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 256*1024*1024); ws->info.max_heap_size_kb = MIN2(ws->info.max_heap_size_kb, 256 * 1024);
/* Both 32-bit and 64-bit address spaces only have 4GB. */
ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 3ull*1024*1024*1024); /* Both 32-bit and 64-bit address spaces only have 4GB.
* This is a limitation of the VM allocator in the winsys.
*/
ws->info.max_heap_size_kb = MIN2(ws->info.max_heap_size_kb, 4 * 1024 * 1024); /* 4 GB */
/* Get max clock frequency info and convert it to MHz */ /* Get max clock frequency info and convert it to MHz */
radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SCLK, NULL, radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SCLK, NULL,