radv: Add option to clear LDS at the end of a shader.
Only shaders which explicitly allow shared memory are included for now. The pass is very late to avoid optimizations removing the stores and to ensure the clear gets added after MS outputs get loaded from LDS. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26679>
This commit is contained in:

committed by
Marge Bot

parent
da6a5e1f63
commit
eaf61adea5
@@ -129,6 +129,7 @@ static const driOptionDescription radv_dri_options[] = {
|
|||||||
DRI_CONF_RADV_DISABLE_SHRINK_IMAGE_STORE(false)
|
DRI_CONF_RADV_DISABLE_SHRINK_IMAGE_STORE(false)
|
||||||
DRI_CONF_RADV_NO_DYNAMIC_BOUNDS(false)
|
DRI_CONF_RADV_NO_DYNAMIC_BOUNDS(false)
|
||||||
DRI_CONF_RADV_OVERRIDE_UNIFORM_OFFSET_ALIGNMENT(0)
|
DRI_CONF_RADV_OVERRIDE_UNIFORM_OFFSET_ALIGNMENT(0)
|
||||||
|
DRI_CONF_RADV_CLEAR_LDS(false)
|
||||||
DRI_CONF_SECTION_END
|
DRI_CONF_SECTION_END
|
||||||
|
|
||||||
DRI_CONF_SECTION_DEBUG
|
DRI_CONF_SECTION_DEBUG
|
||||||
@@ -193,6 +194,8 @@ radv_init_dri_options(struct radv_instance *instance)
|
|||||||
if (driQueryOptionb(&instance->dri_options, "radv_disable_dcc"))
|
if (driQueryOptionb(&instance->dri_options, "radv_disable_dcc"))
|
||||||
instance->debug_flags |= RADV_DEBUG_NO_DCC;
|
instance->debug_flags |= RADV_DEBUG_NO_DCC;
|
||||||
|
|
||||||
|
instance->clear_lds = driQueryOptionb(&instance->dri_options, "radv_clear_lds");
|
||||||
|
|
||||||
instance->zero_vram = driQueryOptionb(&instance->dri_options, "radv_zero_vram");
|
instance->zero_vram = driQueryOptionb(&instance->dri_options, "radv_zero_vram");
|
||||||
|
|
||||||
instance->disable_aniso_single_level = driQueryOptionb(&instance->dri_options, "radv_disable_aniso_single_level");
|
instance->disable_aniso_single_level = driQueryOptionb(&instance->dri_options, "radv_disable_aniso_single_level");
|
||||||
|
@@ -229,6 +229,7 @@ radv_generate_pipeline_key(const struct radv_device *device, const VkPipelineSha
|
|||||||
#define RADV_HASH_SHADER_PS_WAVE32 (1 << 2)
|
#define RADV_HASH_SHADER_PS_WAVE32 (1 << 2)
|
||||||
#define RADV_HASH_SHADER_GE_WAVE32 (1 << 3)
|
#define RADV_HASH_SHADER_GE_WAVE32 (1 << 3)
|
||||||
#define RADV_HASH_SHADER_LLVM (1 << 4)
|
#define RADV_HASH_SHADER_LLVM (1 << 4)
|
||||||
|
#define RADV_HASH_SHADER_CLEAR_LDS (1 << 5)
|
||||||
#define RADV_HASH_SHADER_KEEP_STATISTICS (1 << 8)
|
#define RADV_HASH_SHADER_KEEP_STATISTICS (1 << 8)
|
||||||
#define RADV_HASH_SHADER_USE_NGG_CULLING (1 << 13)
|
#define RADV_HASH_SHADER_USE_NGG_CULLING (1 << 13)
|
||||||
#define RADV_HASH_SHADER_EMULATE_RT (1 << 16)
|
#define RADV_HASH_SHADER_EMULATE_RT (1 << 16)
|
||||||
@@ -267,6 +268,8 @@ radv_get_hash_flags(const struct radv_device *device, bool stats)
|
|||||||
hash_flags |= RADV_HASH_SHADER_NO_RT;
|
hash_flags |= RADV_HASH_SHADER_NO_RT;
|
||||||
if (device->instance->dual_color_blend_by_location)
|
if (device->instance->dual_color_blend_by_location)
|
||||||
hash_flags |= RADV_HASH_SHADER_DUAL_BLEND_MRT1;
|
hash_flags |= RADV_HASH_SHADER_DUAL_BLEND_MRT1;
|
||||||
|
if (device->instance->clear_lds)
|
||||||
|
hash_flags |= RADV_HASH_SHADER_CLEAR_LDS;
|
||||||
return hash_flags;
|
return hash_flags;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -701,6 +704,12 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_key
|
|||||||
NIR_PASS_V(stage->nir, ac_nir_lower_ps, &options);
|
NIR_PASS_V(stage->nir, ac_nir_lower_ps, &options);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (radv_shader_should_clear_lds(device, stage->nir)) {
|
||||||
|
const unsigned chunk_size = 16; /* max single store size */
|
||||||
|
const unsigned shared_size = ALIGN(stage->nir->info.shared_size, chunk_size);
|
||||||
|
NIR_PASS(_, stage->nir, nir_clear_shared_memory, shared_size, chunk_size);
|
||||||
|
}
|
||||||
|
|
||||||
NIR_PASS(_, stage->nir, nir_lower_int64);
|
NIR_PASS(_, stage->nir, nir_lower_int64);
|
||||||
|
|
||||||
NIR_PASS(_, stage->nir, nir_opt_idiv_const, 8);
|
NIR_PASS(_, stage->nir, nir_opt_idiv_const, 8);
|
||||||
@@ -782,6 +791,14 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_key
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
radv_shader_should_clear_lds(const struct radv_device *device, const nir_shader *shader)
|
||||||
|
{
|
||||||
|
return (shader->info.stage == MESA_SHADER_COMPUTE || shader->info.stage == MESA_SHADER_MESH ||
|
||||||
|
shader->info.stage == MESA_SHADER_TASK) &&
|
||||||
|
shader->info.shared_size > 0 && device->instance->clear_lds;
|
||||||
|
}
|
||||||
|
|
||||||
static uint32_t
|
static uint32_t
|
||||||
radv_get_executable_count(struct radv_pipeline *pipeline)
|
radv_get_executable_count(struct radv_pipeline *pipeline)
|
||||||
{
|
{
|
||||||
|
@@ -382,6 +382,7 @@ struct radv_instance {
|
|||||||
bool force_rt_wave64;
|
bool force_rt_wave64;
|
||||||
bool dual_color_blend_by_location;
|
bool dual_color_blend_by_location;
|
||||||
bool legacy_sparse_binding;
|
bool legacy_sparse_binding;
|
||||||
|
bool clear_lds;
|
||||||
char *app_layer;
|
char *app_layer;
|
||||||
uint8_t override_graphics_shader_version;
|
uint8_t override_graphics_shader_version;
|
||||||
uint8_t override_compute_shader_version;
|
uint8_t override_compute_shader_version;
|
||||||
|
@@ -723,6 +723,8 @@ void radv_optimize_nir_algebraic(nir_shader *shader, bool opt_offsets);
|
|||||||
void radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_key *pipeline_key,
|
void radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_key *pipeline_key,
|
||||||
struct radv_shader_stage *stage);
|
struct radv_shader_stage *stage);
|
||||||
|
|
||||||
|
bool radv_shader_should_clear_lds(const struct radv_device *device, const nir_shader *shader);
|
||||||
|
|
||||||
nir_shader *radv_parse_rt_stage(struct radv_device *device, const VkPipelineShaderStageCreateInfo *sinfo,
|
nir_shader *radv_parse_rt_stage(struct radv_device *device, const VkPipelineShaderStageCreateInfo *sinfo,
|
||||||
const struct radv_pipeline_key *key,
|
const struct radv_pipeline_key *key,
|
||||||
const struct radv_pipeline_layout *pipeline_layout);
|
const struct radv_pipeline_layout *pipeline_layout);
|
||||||
|
@@ -1179,7 +1179,8 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
|
|||||||
(nir->info.stage == MESA_SHADER_MESH && device->physical_device->rad_info.gfx_level < GFX11);
|
(nir->info.stage == MESA_SHADER_MESH && device->physical_device->rad_info.gfx_level < GFX11);
|
||||||
info->cs.uses_local_invocation_idx = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) |
|
info->cs.uses_local_invocation_idx = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) |
|
||||||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) |
|
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) |
|
||||||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS);
|
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS) |
|
||||||
|
radv_shader_should_clear_lds(device, nir);
|
||||||
|
|
||||||
if (nir->info.stage == MESA_SHADER_COMPUTE || nir->info.stage == MESA_SHADER_TASK ||
|
if (nir->info.stage == MESA_SHADER_COMPUTE || nir->info.stage == MESA_SHADER_TASK ||
|
||||||
nir->info.stage == MESA_SHADER_MESH) {
|
nir->info.stage == MESA_SHADER_MESH) {
|
||||||
|
@@ -713,6 +713,9 @@
|
|||||||
|
|
||||||
#define DRI_CONF_RADV_APP_LAYER() DRI_CONF_OPT_S_NODEF(radv_app_layer, "Select an application layer.")
|
#define DRI_CONF_RADV_APP_LAYER() DRI_CONF_OPT_S_NODEF(radv_app_layer, "Select an application layer.")
|
||||||
|
|
||||||
|
#define DRI_CONF_RADV_CLEAR_LDS(def) \
|
||||||
|
DRI_CONF_OPT_B(radv_clear_lds, def, "Clear LDS at the end of shaders. Might decrease performance.")
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief ANV specific configuration options
|
* \brief ANV specific configuration options
|
||||||
*/
|
*/
|
||||||
|
Reference in New Issue
Block a user