From 77e59eefc16055f0b6fff0f1ed6ddd9eafbd6ac1 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 30 Oct 2024 16:27:28 +0100 Subject: [PATCH] radv: add an option to configure the trap handler exceptions This introduces RADV_TRAP_HANDLER_EXCP to configure the various shader exceptions. Signed-off-by: Samuel Pitoiset Part-of: --- docs/envvars.rst | 14 ++++++++++++++ src/amd/vulkan/radv_debug.h | 7 +++++++ src/amd/vulkan/radv_instance.c | 9 +++++++++ src/amd/vulkan/radv_instance.h | 1 + src/amd/vulkan/radv_shader.c | 29 ++++++++++++++++++++++------- 5 files changed, 53 insertions(+), 7 deletions(-) diff --git a/docs/envvars.rst b/docs/envvars.rst index f21e15124e0..d4e6f6b235b 100644 --- a/docs/envvars.rst +++ b/docs/envvars.rst @@ -1492,6 +1492,20 @@ RADV driver environment variables enable/disable the experimental trap handler for debugging GPU hangs on GFX8 (disabled by default) +.. envvar:: RADV_TRAP_HANDLER_EXCP + + a comma-separated list of named flags to configure the trap handler + exceptions, see the list below: + + ``mem_viol`` + enable memory violation exception + ``float_div_by_zero`` + enable floating point division by zero exception + ``float_overflow`` + enable floating point overflow exception + ``float_underflow`` + enable floating point underflow exception + .. envvar:: RADV_RRA_TRACE_VALIDATE enable validation of captured acceleration structures. Can be diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h index e874d2a190b..d92eb1e16ff 100644 --- a/src/amd/vulkan/radv_debug.h +++ b/src/amd/vulkan/radv_debug.h @@ -81,6 +81,13 @@ enum { RADV_PERFTEST_VIDEO_ENCODE = 1u << 16, }; +enum { + RADV_TRAP_EXCP_MEM_VIOL = 1u << 0, + RADV_TRAP_EXCP_FLOAT_DIV_BY_ZERO = 1u << 1, + RADV_TRAP_EXCP_FLOAT_OVERFLOW = 1u << 2, + RADV_TRAP_EXCP_FLOAT_UNDERFLOW = 1u << 3, +}; + bool radv_init_trace(struct radv_device *device); void radv_finish_trace(struct radv_device *device); diff --git a/src/amd/vulkan/radv_instance.c b/src/amd/vulkan/radv_instance.c index ff98e336596..66bae68877a 100644 --- a/src/amd/vulkan/radv_instance.c +++ b/src/amd/vulkan/radv_instance.c @@ -100,6 +100,14 @@ static const struct debug_control radv_perftest_options[] = {{"localbos", RADV_P {"video_encode", RADV_PERFTEST_VIDEO_ENCODE}, {NULL, 0}}; +static const struct debug_control radv_trap_excp_options[] = { + {"mem_viol", RADV_PERFTEST_LOCAL_BOS}, + {"float_div_by_zero", RADV_PERFTEST_DCC_MSAA}, + {"float_overflow", RADV_PERFTEST_BO_LIST}, + {"float_underflow", RADV_PERFTEST_CS_WAVE_32}, + {NULL, 0}, +}; + const char * radv_get_perftest_option_name(int id) { @@ -343,6 +351,7 @@ radv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, const VkAllocationC instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"), radv_debug_options); instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"), radv_perftest_options); + instance->trap_excp_flags = parse_debug_string(getenv("RADV_TRAP_HANDLER_EXCP"), radv_trap_excp_options); instance->profile_pstate = radv_parse_pstate(debug_get_option("RADV_PROFILE_PSTATE", "peak")); /* When RADV_FORCE_FAMILY is set, the driver creates a null diff --git a/src/amd/vulkan/radv_instance.h b/src/amd/vulkan/radv_instance.h index fc103e1a1da..f43b211444d 100644 --- a/src/amd/vulkan/radv_instance.h +++ b/src/amd/vulkan/radv_instance.h @@ -42,6 +42,7 @@ struct radv_instance { uint64_t debug_flags; uint64_t perftest_flags; + uint64_t trap_excp_flags; enum radeon_ctx_pstate profile_pstate; struct { diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 4dd636be1da..1e72ea3ba14 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -1889,6 +1889,7 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi const struct radv_shader_args *args) { const struct radv_physical_device *pdev = radv_device_physical(device); + const struct radv_instance *instance = radv_physical_device_instance(pdev); struct ac_shader_config *config = &binary->config; if (binary->type == RADV_BINARY_TYPE_RTLD) { @@ -1940,6 +1941,7 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi (pdev->info.gfx_level < GFX10 && num_shared_vgprs == 0)); unsigned num_shared_vgpr_blocks = num_shared_vgprs / 8; unsigned excp_en = 0, excp_en_msb = 0; + bool dx10_clamp = pdev->info.gfx_level < GFX12; config->num_vgprs = num_vgprs; config->num_sgprs = num_sgprs; @@ -1949,11 +1951,24 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi S_00B12C_TRAP_PRESENT(trap_enabled); if (trap_enabled) { - /* Configure the shader exceptions like memory violation, etc. - * TODO: Enable (and validate) more exceptions. - */ - excp_en = 1 << 8; /* mem_viol for the graphics stages */ - excp_en_msb = 1 << 1; /* mem_viol for the compute stage */ + /* Configure the shader exceptions like memory violation, etc. */ + if (instance->trap_excp_flags & RADV_TRAP_EXCP_MEM_VIOL) { + excp_en |= 1 << 8; /* for the graphics stages */ + excp_en_msb |= 1 << 1; /* for the compute stage */ + } + + if (instance->trap_excp_flags & RADV_TRAP_EXCP_FLOAT_DIV_BY_ZERO) + excp_en |= 1 << 2; + if (instance->trap_excp_flags & RADV_TRAP_EXCP_FLOAT_OVERFLOW) + excp_en |= 1 << 3; + if (instance->trap_excp_flags & RADV_TRAP_EXCP_FLOAT_UNDERFLOW) + excp_en |= 1 << 4; + + if (instance->trap_excp_flags & + (RADV_TRAP_EXCP_FLOAT_DIV_BY_ZERO | RADV_TRAP_EXCP_FLOAT_OVERFLOW | RADV_TRAP_EXCP_FLOAT_UNDERFLOW)) { + /* It seems needed to disable DX10_CLAMP, otherwise the float exceptions aren't thrown. */ + dx10_clamp = false; + } } if (!pdev->use_ngg_streamout) { @@ -1962,8 +1977,8 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi S_00B12C_SO_EN(!!info->so.num_outputs); } - config->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) / (info->wave_size == 32 ? 8 : 4)) | - S_00B848_DX10_CLAMP(pdev->info.gfx_level < GFX12) | S_00B848_FLOAT_MODE(config->float_mode); + config->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) / (info->wave_size == 32 ? 8 : 4)) | S_00B848_DX10_CLAMP(dx10_clamp) | + S_00B848_FLOAT_MODE(config->float_mode); if (pdev->info.gfx_level >= GFX10) { config->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX10(args->num_user_sgprs >> 5);