radv: rework the CS regalloc hang workaround

Move it to the pipeline creation to reduce computations in the hot path.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15162>
This commit is contained in:
Samuel Pitoiset
2022-02-25 08:54:27 +01:00
parent d532da6e96
commit 7ad1eb4e8c
3 changed files with 11 additions and 6 deletions

View File

@@ -7378,12 +7378,8 @@ radv_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_inf
{ {
bool has_prefetch = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7; bool has_prefetch = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
bool pipeline_is_dirty = pipeline != cmd_buffer->state.emitted_compute_pipeline; bool pipeline_is_dirty = pipeline != cmd_buffer->state.emitted_compute_pipeline;
struct radv_shader *compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
unsigned *cs_block_size = compute_shader->info.cs.block_size;
bool cs_regalloc_hang = cmd_buffer->device->physical_device->rad_info.has_cs_regalloc_hang_bug &&
cs_block_size[0] * cs_block_size[1] * cs_block_size[2] > 256;
if (cs_regalloc_hang) if (pipeline->compute.cs_regalloc_hang_bug)
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH | cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
RADV_CMD_FLAG_CS_PARTIAL_FLUSH; RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
@@ -7442,7 +7438,7 @@ radv_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_inf
: VK_PIPELINE_BIND_POINT_COMPUTE); : VK_PIPELINE_BIND_POINT_COMPUTE);
} }
if (cs_regalloc_hang) if (pipeline->compute.cs_regalloc_hang_bug)
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH; cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_CS_PARTIAL_FLUSH); radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_CS_PARTIAL_FLUSH);

View File

@@ -6504,6 +6504,14 @@ radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,
pipeline->push_constant_size = pipeline_layout->push_constant_size; pipeline->push_constant_size = pipeline_layout->push_constant_size;
pipeline->dynamic_offset_count = pipeline_layout->dynamic_offset_count; pipeline->dynamic_offset_count = pipeline_layout->dynamic_offset_count;
if (device->physical_device->rad_info.has_cs_regalloc_hang_bug) {
struct radv_shader *compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
unsigned *cs_block_size = compute_shader->info.cs.block_size;
pipeline->compute.cs_regalloc_hang_bug =
cs_block_size[0] * cs_block_size[1] * cs_block_size[2] > 256;
}
radv_compute_generate_pm4(pipeline); radv_compute_generate_pm4(pipeline);
*pPipeline = radv_pipeline_to_handle(pipeline); *pPipeline = radv_pipeline_to_handle(pipeline);

View File

@@ -1909,6 +1909,7 @@ struct radv_pipeline {
struct radv_pipeline_shader_stack_size *rt_stack_sizes; struct radv_pipeline_shader_stack_size *rt_stack_sizes;
bool dynamic_stack_size; bool dynamic_stack_size;
uint32_t group_count; uint32_t group_count;
bool cs_regalloc_hang_bug;
} compute; } compute;
struct { struct {
unsigned stage_count; unsigned stage_count;