radeonsi/gfx11: fix compute scratch buffer - WAVES is always per SE
Fixes: ba02ed91a6
- ac/gfx11: fix the scratch buffer
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19477>
This commit is contained in:
@@ -916,7 +916,7 @@ void ac_set_reg_cu_en(void *cs, unsigned reg_offset, uint32_t value, uint32_t cl
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Return the register value and tune bytes_per_wave to increase scratch performance. */
|
/* Return the register value and tune bytes_per_wave to increase scratch performance. */
|
||||||
void ac_get_scratch_tmpring_size(const struct radeon_info *info, bool compute,
|
void ac_get_scratch_tmpring_size(const struct radeon_info *info,
|
||||||
unsigned bytes_per_wave, unsigned *max_seen_bytes_per_wave,
|
unsigned bytes_per_wave, unsigned *max_seen_bytes_per_wave,
|
||||||
uint32_t *tmpring_size)
|
uint32_t *tmpring_size)
|
||||||
{
|
{
|
||||||
@@ -949,8 +949,8 @@ void ac_get_scratch_tmpring_size(const struct radeon_info *info, bool compute,
|
|||||||
*max_seen_bytes_per_wave = MAX2(*max_seen_bytes_per_wave, bytes_per_wave);
|
*max_seen_bytes_per_wave = MAX2(*max_seen_bytes_per_wave, bytes_per_wave);
|
||||||
|
|
||||||
unsigned max_scratch_waves = info->max_scratch_waves;
|
unsigned max_scratch_waves = info->max_scratch_waves;
|
||||||
if (info->gfx_level >= GFX11 && !compute)
|
if (info->gfx_level >= GFX11)
|
||||||
max_scratch_waves /= info->num_se; /* WAVES is per SE for SPI_TMPRING_SIZE. */
|
max_scratch_waves /= info->num_se; /* WAVES is per SE */
|
||||||
|
|
||||||
/* TODO: We could decrease WAVES to make the whole buffer fit into the infinity cache. */
|
/* TODO: We could decrease WAVES to make the whole buffer fit into the infinity cache. */
|
||||||
*tmpring_size = S_0286E8_WAVES(max_scratch_waves) |
|
*tmpring_size = S_0286E8_WAVES(max_scratch_waves) |
|
||||||
|
@@ -166,7 +166,7 @@ void ac_set_reg_cu_en(void *cs, unsigned reg_offset, uint32_t value, uint32_t cl
|
|||||||
unsigned value_shift, const struct radeon_info *info,
|
unsigned value_shift, const struct radeon_info *info,
|
||||||
void set_sh_reg(void*, unsigned, uint32_t));
|
void set_sh_reg(void*, unsigned, uint32_t));
|
||||||
|
|
||||||
void ac_get_scratch_tmpring_size(const struct radeon_info *info, bool compute,
|
void ac_get_scratch_tmpring_size(const struct radeon_info *info,
|
||||||
unsigned bytes_per_wave, unsigned *max_seen_bytes_per_wave,
|
unsigned bytes_per_wave, unsigned *max_seen_bytes_per_wave,
|
||||||
uint32_t *tmpring_size);
|
uint32_t *tmpring_size);
|
||||||
|
|
||||||
|
@@ -566,7 +566,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute
|
|||||||
}
|
}
|
||||||
|
|
||||||
unsigned tmpring_size;
|
unsigned tmpring_size;
|
||||||
ac_get_scratch_tmpring_size(&sctx->screen->info, true,
|
ac_get_scratch_tmpring_size(&sctx->screen->info,
|
||||||
config->scratch_bytes_per_wave,
|
config->scratch_bytes_per_wave,
|
||||||
&sctx->max_seen_compute_scratch_bytes_per_wave, &tmpring_size);
|
&sctx->max_seen_compute_scratch_bytes_per_wave, &tmpring_size);
|
||||||
|
|
||||||
|
@@ -4054,7 +4054,7 @@ static bool si_update_scratch_relocs(struct si_context *sctx)
|
|||||||
bool si_update_spi_tmpring_size(struct si_context *sctx, unsigned bytes)
|
bool si_update_spi_tmpring_size(struct si_context *sctx, unsigned bytes)
|
||||||
{
|
{
|
||||||
unsigned spi_tmpring_size;
|
unsigned spi_tmpring_size;
|
||||||
ac_get_scratch_tmpring_size(&sctx->screen->info, false, bytes,
|
ac_get_scratch_tmpring_size(&sctx->screen->info, bytes,
|
||||||
&sctx->max_seen_scratch_bytes_per_wave, &spi_tmpring_size);
|
&sctx->max_seen_scratch_bytes_per_wave, &spi_tmpring_size);
|
||||||
|
|
||||||
unsigned scratch_needed_size = sctx->max_seen_scratch_bytes_per_wave *
|
unsigned scratch_needed_size = sctx->max_seen_scratch_bytes_per_wave *
|
||||||
|
Reference in New Issue
Block a user