freedreno: Add compute_lb_size device info
This is really a guess except for a6xx and later, however it shouldn't
change behavior from before.
Fixes: 5879eaac18
("ir3: Increase compute const size on a7xx")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34746>
(cherry picked from commit 156ab5839d045ea291a47789014ce61ddbad0804)
This commit is contained in:

committed by
Eric Engestrom

parent
84f1dcdc2a
commit
f6450df88f
@@ -84,7 +84,7 @@
|
||||
"description": "freedreno: Add compute_lb_size device info",
|
||||
"nominated": true,
|
||||
"nomination_type": 2,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": "5879eaac185ed1c167fd01aff9b91c7cbe43ab0a",
|
||||
"notes": null
|
||||
|
@@ -51,6 +51,18 @@ struct fd_dev_info {
|
||||
|
||||
uint32_t max_waves;
|
||||
|
||||
/* Local Memory (i.e. shared memory in GL/Vulkan) and compute shader
|
||||
* const registers, as well as other things not relevant here, share the
|
||||
* same storage space, called the Local Buffer or LB. This is the size of
|
||||
* the part of the LB used for consts and LM. Consts are duplicated
|
||||
* wavesize_granularity times, and the size of duplicated consts + local
|
||||
* memory must not exceed it. If it is left 0, assume that it is
|
||||
* compute constlen + wavesize_granularity * cs_shared_mem_size, which is
|
||||
* enough to hold both the maximum possible compute consts and local
|
||||
* memory at the same time.
|
||||
*/
|
||||
uint32_t compute_lb_size;
|
||||
|
||||
/* number of CCU is always equal to the number of SP */
|
||||
union {
|
||||
uint32_t num_sp_cores;
|
||||
|
@@ -103,7 +103,7 @@ class GPUInfo(Struct):
|
||||
tile_max_w, tile_max_h, num_vsc_pipes,
|
||||
cs_shared_mem_size, num_sp_cores, wave_granularity, fibers_per_sp,
|
||||
highest_bank_bit = 0, ubwc_swizzle = 0x7, macrotile_mode = 0,
|
||||
threadsize_base = 64, max_waves = 16):
|
||||
threadsize_base = 64, max_waves = 16, compute_lb_size = 0):
|
||||
self.chip = chip.value
|
||||
self.gmem_align_w = gmem_align_w
|
||||
self.gmem_align_h = gmem_align_h
|
||||
@@ -139,9 +139,13 @@ class A6xxGPUInfo(GPUInfo):
|
||||
if chip == CHIP.A6XX:
|
||||
tile_max_w = 1024 # max_bitfield_val(5, 0, 5)
|
||||
tile_max_h = max_bitfield_val(14, 8, 4) # 1008
|
||||
compute_lb_size = 0
|
||||
else:
|
||||
tile_max_w = 1728
|
||||
tile_max_h = 1728
|
||||
# on a7xx the compute_lb_size is 40KB for all known parts for now.
|
||||
# We have a parameter for it in case some low-end parts cut it down.
|
||||
compute_lb_size = 40 * 1024
|
||||
|
||||
super().__init__(chip, gmem_align_w = 16, gmem_align_h = 4,
|
||||
tile_align_w = tile_align_w,
|
||||
@@ -157,7 +161,8 @@ class A6xxGPUInfo(GPUInfo):
|
||||
ubwc_swizzle = ubwc_swizzle,
|
||||
macrotile_mode = macrotile_mode,
|
||||
threadsize_base = threadsize_base,
|
||||
max_waves = max_waves)
|
||||
max_waves = max_waves,
|
||||
compute_lb_size = compute_lb_size)
|
||||
|
||||
self.num_ccu = num_ccu
|
||||
|
||||
|
@@ -263,6 +263,14 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
|
||||
compiler->has_early_preamble = false;
|
||||
}
|
||||
|
||||
if (dev_info->compute_lb_size) {
|
||||
compiler->compute_lb_size = dev_info->compute_lb_size;
|
||||
} else {
|
||||
compiler->compute_lb_size =
|
||||
compiler->max_const_compute * 16 /* bytes/vec4 */ *
|
||||
compiler->wave_granularity + compiler->local_mem_size;
|
||||
}
|
||||
|
||||
/* This is just a guess for a4xx. */
|
||||
compiler->pvtmem_per_fiber_align = compiler->gen >= 4 ? 512 : 128;
|
||||
/* TODO: implement private memory on earlier gen's */
|
||||
|
@@ -129,6 +129,9 @@ struct ir3_compiler {
|
||||
/* The maximum number of constants, in vec4's, for compute shaders. */
|
||||
uint16_t max_const_compute;
|
||||
|
||||
/* See freedreno_dev_info::compute_lb_size. */
|
||||
uint32_t compute_lb_size;
|
||||
|
||||
/* Number of instructions that the shader's base address and length
|
||||
* (instrlen divides instruction count by this) must be aligned to.
|
||||
*/
|
||||
|
Reference in New Issue
Block a user