diff --git a/src/amd/vulkan/radv_instance.c b/src/amd/vulkan/radv_instance.c index cf80d2ab9c6..762b4b3383d 100644 --- a/src/amd/vulkan/radv_instance.c +++ b/src/amd/vulkan/radv_instance.c @@ -129,6 +129,7 @@ static const driOptionDescription radv_dri_options[] = { DRI_CONF_RADV_DISABLE_SHRINK_IMAGE_STORE(false) DRI_CONF_RADV_NO_DYNAMIC_BOUNDS(false) DRI_CONF_RADV_OVERRIDE_UNIFORM_OFFSET_ALIGNMENT(0) + DRI_CONF_RADV_CLEAR_LDS(false) DRI_CONF_SECTION_END DRI_CONF_SECTION_DEBUG @@ -193,6 +194,8 @@ radv_init_dri_options(struct radv_instance *instance) if (driQueryOptionb(&instance->dri_options, "radv_disable_dcc")) instance->debug_flags |= RADV_DEBUG_NO_DCC; + instance->clear_lds = driQueryOptionb(&instance->dri_options, "radv_clear_lds"); + instance->zero_vram = driQueryOptionb(&instance->dri_options, "radv_zero_vram"); instance->disable_aniso_single_level = driQueryOptionb(&instance->dri_options, "radv_disable_aniso_single_level"); diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 38048d9cfc8..a807cbdbab6 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -229,6 +229,7 @@ radv_generate_pipeline_key(const struct radv_device *device, const VkPipelineSha #define RADV_HASH_SHADER_PS_WAVE32 (1 << 2) #define RADV_HASH_SHADER_GE_WAVE32 (1 << 3) #define RADV_HASH_SHADER_LLVM (1 << 4) +#define RADV_HASH_SHADER_CLEAR_LDS (1 << 5) #define RADV_HASH_SHADER_KEEP_STATISTICS (1 << 8) #define RADV_HASH_SHADER_USE_NGG_CULLING (1 << 13) #define RADV_HASH_SHADER_EMULATE_RT (1 << 16) @@ -267,6 +268,8 @@ radv_get_hash_flags(const struct radv_device *device, bool stats) hash_flags |= RADV_HASH_SHADER_NO_RT; if (device->instance->dual_color_blend_by_location) hash_flags |= RADV_HASH_SHADER_DUAL_BLEND_MRT1; + if (device->instance->clear_lds) + hash_flags |= RADV_HASH_SHADER_CLEAR_LDS; return hash_flags; } @@ -701,6 +704,12 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_key NIR_PASS_V(stage->nir, ac_nir_lower_ps, &options); } + if (radv_shader_should_clear_lds(device, stage->nir)) { + const unsigned chunk_size = 16; /* max single store size */ + const unsigned shared_size = ALIGN(stage->nir->info.shared_size, chunk_size); + NIR_PASS(_, stage->nir, nir_clear_shared_memory, shared_size, chunk_size); + } + NIR_PASS(_, stage->nir, nir_lower_int64); NIR_PASS(_, stage->nir, nir_opt_idiv_const, 8); @@ -782,6 +791,14 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_key } } +bool +radv_shader_should_clear_lds(const struct radv_device *device, const nir_shader *shader) +{ + return (shader->info.stage == MESA_SHADER_COMPUTE || shader->info.stage == MESA_SHADER_MESH || + shader->info.stage == MESA_SHADER_TASK) && + shader->info.shared_size > 0 && device->instance->clear_lds; +} + static uint32_t radv_get_executable_count(struct radv_pipeline *pipeline) { diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 09e56f05725..3f0da53d273 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -382,6 +382,7 @@ struct radv_instance { bool force_rt_wave64; bool dual_color_blend_by_location; bool legacy_sparse_binding; + bool clear_lds; char *app_layer; uint8_t override_graphics_shader_version; uint8_t override_compute_shader_version; diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 1e57200955b..bab97c1e7a4 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -723,6 +723,8 @@ void radv_optimize_nir_algebraic(nir_shader *shader, bool opt_offsets); void radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_key *pipeline_key, struct radv_shader_stage *stage); +bool radv_shader_should_clear_lds(const struct radv_device *device, const nir_shader *shader); + nir_shader *radv_parse_rt_stage(struct radv_device *device, const VkPipelineShaderStageCreateInfo *sinfo, const struct radv_pipeline_key *key, const struct radv_pipeline_layout *pipeline_layout); diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index 42ea2f2d6ad..9c74669ecc0 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -1179,7 +1179,8 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n (nir->info.stage == MESA_SHADER_MESH && device->physical_device->rad_info.gfx_level < GFX11); info->cs.uses_local_invocation_idx = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) | BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) | - BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS); + BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS) | + radv_shader_should_clear_lds(device, nir); if (nir->info.stage == MESA_SHADER_COMPUTE || nir->info.stage == MESA_SHADER_TASK || nir->info.stage == MESA_SHADER_MESH) { diff --git a/src/util/driconf.h b/src/util/driconf.h index 4c06f9cc5e2..e91bf81a381 100644 --- a/src/util/driconf.h +++ b/src/util/driconf.h @@ -713,6 +713,9 @@ #define DRI_CONF_RADV_APP_LAYER() DRI_CONF_OPT_S_NODEF(radv_app_layer, "Select an application layer.") +#define DRI_CONF_RADV_CLEAR_LDS(def) \ + DRI_CONF_OPT_B(radv_clear_lds, def, "Clear LDS at the end of shaders. Might decrease performance.") + /** * \brief ANV specific configuration options */