radeonsi/gfx11: fix compute scratch buffer - WAVES is always per SE

Fixes: ba02ed91a6 - ac/gfx11: fix the scratch buffer

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19477>
(cherry picked from commit bdfacd0a24)
This commit is contained in:
Marek Olšák
2022-11-02 14:34:58 -04:00
committed by Dylan Baker
parent 9005854510
commit dde0dab8b5
5 changed files with 7 additions and 7 deletions

View File

@@ -544,7 +544,7 @@
"description": "radeonsi/gfx11: fix compute scratch buffer - WAVES is always per SE",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "ba02ed91a60839f2a6dc6a89fd9de1144b0788aa"
},

View File

@@ -808,7 +808,7 @@ void ac_set_reg_cu_en(void *cs, unsigned reg_offset, uint32_t value, uint32_t cl
}
/* Return the register value and tune bytes_per_wave to increase scratch performance. */
void ac_get_scratch_tmpring_size(const struct radeon_info *info, bool compute,
void ac_get_scratch_tmpring_size(const struct radeon_info *info,
unsigned bytes_per_wave, unsigned *max_seen_bytes_per_wave,
uint32_t *tmpring_size)
{
@@ -841,8 +841,8 @@ void ac_get_scratch_tmpring_size(const struct radeon_info *info, bool compute,
*max_seen_bytes_per_wave = MAX2(*max_seen_bytes_per_wave, bytes_per_wave);
unsigned max_scratch_waves = info->max_scratch_waves;
if (info->gfx_level >= GFX11 && !compute)
max_scratch_waves /= info->num_se; /* WAVES is per SE for SPI_TMPRING_SIZE. */
if (info->gfx_level >= GFX11)
max_scratch_waves /= info->num_se; /* WAVES is per SE */
/* TODO: We could decrease WAVES to make the whole buffer fit into the infinity cache. */
*tmpring_size = S_0286E8_WAVES(max_scratch_waves) |

View File

@@ -135,7 +135,7 @@ void ac_set_reg_cu_en(void *cs, unsigned reg_offset, uint32_t value, uint32_t cl
unsigned value_shift, const struct radeon_info *info,
void set_sh_reg(void*, unsigned, uint32_t));
void ac_get_scratch_tmpring_size(const struct radeon_info *info, bool compute,
void ac_get_scratch_tmpring_size(const struct radeon_info *info,
unsigned bytes_per_wave, unsigned *max_seen_bytes_per_wave,
uint32_t *tmpring_size);

View File

@@ -540,7 +540,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute
}
unsigned tmpring_size;
ac_get_scratch_tmpring_size(&sctx->screen->info, true,
ac_get_scratch_tmpring_size(&sctx->screen->info,
config->scratch_bytes_per_wave,
&sctx->max_seen_compute_scratch_bytes_per_wave, &tmpring_size);

View File

@@ -4038,7 +4038,7 @@ static bool si_update_scratch_relocs(struct si_context *sctx)
bool si_update_spi_tmpring_size(struct si_context *sctx, unsigned bytes)
{
unsigned spi_tmpring_size;
ac_get_scratch_tmpring_size(&sctx->screen->info, false, bytes,
ac_get_scratch_tmpring_size(&sctx->screen->info, bytes,
&sctx->max_seen_scratch_bytes_per_wave, &spi_tmpring_size);
unsigned scratch_needed_size = sctx->max_seen_scratch_bytes_per_wave *