radv: Add option to clear LDS at the end of a shader.

Only shaders which explicitly allow shared memory are included for
now. The pass is very late to avoid optimizations removing the stores
and to ensure the clear gets added after MS outputs get loaded from LDS.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26679>
This commit is contained in:
Bas Nieuwenhuizen
2023-11-01 15:34:13 +01:00
committed by Marge Bot
parent da6a5e1f63
commit eaf61adea5
6 changed files with 28 additions and 1 deletions

View File

@@ -129,6 +129,7 @@ static const driOptionDescription radv_dri_options[] = {
DRI_CONF_RADV_DISABLE_SHRINK_IMAGE_STORE(false)
DRI_CONF_RADV_NO_DYNAMIC_BOUNDS(false)
DRI_CONF_RADV_OVERRIDE_UNIFORM_OFFSET_ALIGNMENT(0)
DRI_CONF_RADV_CLEAR_LDS(false)
DRI_CONF_SECTION_END
DRI_CONF_SECTION_DEBUG
@@ -193,6 +194,8 @@ radv_init_dri_options(struct radv_instance *instance)
if (driQueryOptionb(&instance->dri_options, "radv_disable_dcc"))
instance->debug_flags |= RADV_DEBUG_NO_DCC;
instance->clear_lds = driQueryOptionb(&instance->dri_options, "radv_clear_lds");
instance->zero_vram = driQueryOptionb(&instance->dri_options, "radv_zero_vram");
instance->disable_aniso_single_level = driQueryOptionb(&instance->dri_options, "radv_disable_aniso_single_level");

View File

@@ -229,6 +229,7 @@ radv_generate_pipeline_key(const struct radv_device *device, const VkPipelineSha
#define RADV_HASH_SHADER_PS_WAVE32 (1 << 2)
#define RADV_HASH_SHADER_GE_WAVE32 (1 << 3)
#define RADV_HASH_SHADER_LLVM (1 << 4)
#define RADV_HASH_SHADER_CLEAR_LDS (1 << 5)
#define RADV_HASH_SHADER_KEEP_STATISTICS (1 << 8)
#define RADV_HASH_SHADER_USE_NGG_CULLING (1 << 13)
#define RADV_HASH_SHADER_EMULATE_RT (1 << 16)
@@ -267,6 +268,8 @@ radv_get_hash_flags(const struct radv_device *device, bool stats)
hash_flags |= RADV_HASH_SHADER_NO_RT;
if (device->instance->dual_color_blend_by_location)
hash_flags |= RADV_HASH_SHADER_DUAL_BLEND_MRT1;
if (device->instance->clear_lds)
hash_flags |= RADV_HASH_SHADER_CLEAR_LDS;
return hash_flags;
}
@@ -701,6 +704,12 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_key
NIR_PASS_V(stage->nir, ac_nir_lower_ps, &options);
}
if (radv_shader_should_clear_lds(device, stage->nir)) {
const unsigned chunk_size = 16; /* max single store size */
const unsigned shared_size = ALIGN(stage->nir->info.shared_size, chunk_size);
NIR_PASS(_, stage->nir, nir_clear_shared_memory, shared_size, chunk_size);
}
NIR_PASS(_, stage->nir, nir_lower_int64);
NIR_PASS(_, stage->nir, nir_opt_idiv_const, 8);
@@ -782,6 +791,14 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_key
}
}
bool
radv_shader_should_clear_lds(const struct radv_device *device, const nir_shader *shader)
{
return (shader->info.stage == MESA_SHADER_COMPUTE || shader->info.stage == MESA_SHADER_MESH ||
shader->info.stage == MESA_SHADER_TASK) &&
shader->info.shared_size > 0 && device->instance->clear_lds;
}
static uint32_t
radv_get_executable_count(struct radv_pipeline *pipeline)
{

View File

@@ -382,6 +382,7 @@ struct radv_instance {
bool force_rt_wave64;
bool dual_color_blend_by_location;
bool legacy_sparse_binding;
bool clear_lds;
char *app_layer;
uint8_t override_graphics_shader_version;
uint8_t override_compute_shader_version;

View File

@@ -723,6 +723,8 @@ void radv_optimize_nir_algebraic(nir_shader *shader, bool opt_offsets);
void radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_key *pipeline_key,
struct radv_shader_stage *stage);
bool radv_shader_should_clear_lds(const struct radv_device *device, const nir_shader *shader);
nir_shader *radv_parse_rt_stage(struct radv_device *device, const VkPipelineShaderStageCreateInfo *sinfo,
const struct radv_pipeline_key *key,
const struct radv_pipeline_layout *pipeline_layout);

View File

@@ -1179,7 +1179,8 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
(nir->info.stage == MESA_SHADER_MESH && device->physical_device->rad_info.gfx_level < GFX11);
info->cs.uses_local_invocation_idx = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) |
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) |
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS);
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS) |
radv_shader_should_clear_lds(device, nir);
if (nir->info.stage == MESA_SHADER_COMPUTE || nir->info.stage == MESA_SHADER_TASK ||
nir->info.stage == MESA_SHADER_MESH) {

View File

@@ -713,6 +713,9 @@
#define DRI_CONF_RADV_APP_LAYER() DRI_CONF_OPT_S_NODEF(radv_app_layer, "Select an application layer.")
#define DRI_CONF_RADV_CLEAR_LDS(def) \
DRI_CONF_OPT_B(radv_clear_lds, def, "Clear LDS at the end of shaders. Might decrease performance.")
/**
* \brief ANV specific configuration options
*/