radv: add initial trap handler support with RADV_TRAP_HANDLER=1

A trap handler is used to handle shader exceptions like memory
violations, divide by zero etc. The trap handler shader code will
help to identify the faulty shader/instruction and to report
more information for better debugging.

This has only been tested on GFX8, though it should work on GFX6-GFX7.
It seems we need a different implemenation for GFX9+.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6384>
This commit is contained in:
Samuel Pitoiset
2020-08-18 18:51:46 +02:00
committed by Marge Bot
parent 8fd2f5c16d
commit af3230e39e
4 changed files with 126 additions and 0 deletions

View File

@@ -36,6 +36,7 @@
#include "radv_shader.h"
#define TRACE_BO_SIZE 4096
#define TMA_BO_SIZE 4096
#define COLOR_RESET "\033[0m"
#define COLOR_RED "\033[31m"
@@ -678,3 +679,58 @@ fail:
close(fd);
unlink(path);
}
bool
radv_trap_handler_init(struct radv_device *device)
{
struct radeon_winsys *ws = device->ws;
/* Create the trap handler shader and upload it like other shaders. */
device->trap_handler_shader = radv_create_trap_handler_shader(device);
if (!device->trap_handler_shader) {
fprintf(stderr, "radv: failed to create the trap handler shader.\n");
return false;
}
device->tma_bo = ws->buffer_create(ws, TMA_BO_SIZE, 8,
RADEON_DOMAIN_VRAM,
RADEON_FLAG_CPU_ACCESS |
RADEON_FLAG_NO_INTERPROCESS_SHARING |
RADEON_FLAG_ZERO_VRAM,
RADV_BO_PRIORITY_SCRATCH);
if (!device->tma_bo)
return false;
device->tma_ptr = ws->buffer_map(device->tma_bo);
if (!device->tma_ptr)
return false;
/* Upload a buffer descriptor to store various info from the trap. */
uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + 16;
uint32_t desc[4];
desc[0] = tma_va;
desc[1] = S_008F04_BASE_ADDRESS_HI(tma_va >> 32);
desc[2] = TMA_BO_SIZE;
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
memcpy(device->tma_ptr, desc, sizeof(desc));
return true;
}
void
radv_trap_handler_finish(struct radv_device *device)
{
struct radeon_winsys *ws = device->ws;
if (unlikely(device->trap_handler_shader))
radv_shader_variant_destroy(device, device->trap_handler_shader);
if (unlikely(device->tma_bo))
ws->buffer_destroy(device->tma_bo);
}

View File

@@ -82,4 +82,7 @@ radv_print_spirv(const char *data, uint32_t size, FILE *fp);
void
radv_dump_enabled_options(struct radv_device *device, FILE *f);
bool radv_trap_handler_init(struct radv_device *device);
void radv_trap_handler_finish(struct radv_device *device);
#endif

View File

@@ -2817,6 +2817,19 @@ VkResult radv_CreateDevice(
goto fail;
}
if (getenv("RADV_TRAP_HANDLER")) {
/* TODO: Add support for more hardware. */
assert(device->physical_device->rad_info.chip_class == GFX8);
/* To get the disassembly of the faulty shaders, we have to
* keep some shader info around.
*/
keep_shader_info = true;
if (!radv_trap_handler_init(device))
goto fail;
}
device->keep_shader_info = keep_shader_info;
result = radv_device_init_meta(device);
if (result != VK_SUCCESS)
@@ -2893,6 +2906,8 @@ fail:
radv_thread_trace_finish(device);
radv_trap_handler_finish(device);
if (device->trace_bo)
device->ws->buffer_destroy(device->trace_bo);
@@ -2942,6 +2957,8 @@ void radv_DestroyDevice(
VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
radv_trap_handler_finish(device);
radv_destroy_shader_slabs(device);
pthread_cond_destroy(&device->timeline_cond);
@@ -3420,6 +3437,50 @@ radv_emit_global_shader_pointers(struct radv_queue *queue,
}
}
static void
radv_emit_trap_handler(struct radv_queue *queue,
struct radeon_cmdbuf *cs,
struct radeon_winsys_bo *tma_bo)
{
struct radv_device *device = queue->device;
struct radeon_winsys_bo *tba_bo;
uint64_t tba_va, tma_va;
if (!device->trap_handler_shader || !tma_bo)
return;
tba_bo = device->trap_handler_shader->bo;
tba_va = radv_buffer_get_va(tba_bo) + device->trap_handler_shader->bo_offset;
tma_va = radv_buffer_get_va(tma_bo);
radv_cs_add_buffer(queue->device->ws, cs, tba_bo);
radv_cs_add_buffer(queue->device->ws, cs, tma_bo);
if (queue->queue_family_index == RADV_QUEUE_GENERAL) {
uint32_t regs[] = {R_00B000_SPI_SHADER_TBA_LO_PS,
R_00B100_SPI_SHADER_TBA_LO_VS,
R_00B200_SPI_SHADER_TBA_LO_GS,
R_00B300_SPI_SHADER_TBA_LO_ES,
R_00B400_SPI_SHADER_TBA_LO_HS,
R_00B500_SPI_SHADER_TBA_LO_LS};
for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
radeon_set_sh_reg_seq(cs, regs[i], 4);
radeon_emit(cs, tba_va >> 8);
radeon_emit(cs, tba_va >> 40);
radeon_emit(cs, tma_va >> 8);
radeon_emit(cs, tma_va >> 40);
}
} else {
radeon_set_sh_reg_seq(cs, R_00B838_COMPUTE_TBA_LO, 4);
radeon_emit(cs, tba_va >> 8);
radeon_emit(cs, tba_va >> 40);
radeon_emit(cs, tma_va >> 8);
radeon_emit(cs, tma_va >> 40);
}
}
static void
radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
{
@@ -3724,6 +3785,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
compute_scratch_waves, compute_scratch_bo);
radv_emit_graphics_scratch(queue, cs, scratch_size_per_wave,
scratch_waves, scratch_bo);
radv_emit_trap_handler(queue, cs, queue->device->tma_bo);
if (gds_bo)
radv_cs_add_buffer(queue->device->ws, cs, gds_bo);

View File

@@ -847,6 +847,11 @@ struct radv_device {
uint32_t thread_trace_buffer_size;
int thread_trace_start_frame;
/* Trap handler. */
struct radv_shader_variant *trap_handler_shader;
struct radeon_winsys_bo *tma_bo; /* Trap Memory Address */
uint32_t *tma_ptr;
/* Overallocation. */
bool overallocation_disallowed;
uint64_t allocated_memory_size[VK_MAX_MEMORY_HEAPS];