radv: add DGC support for mesh shader only
This only implements mesh shaders with DGC because task shaders are really tricky. I will address them later. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25890>
This commit is contained in:

committed by
Marge Bot

parent
eb3e1bdfe6
commit
338319741c
@@ -9032,7 +9032,8 @@ radv_before_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static bool
|
||||
radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, uint32_t drawCount)
|
||||
radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, uint32_t drawCount,
|
||||
bool dgc)
|
||||
{
|
||||
/* For direct draws, this makes sure we don't draw anything.
|
||||
* For indirect draws, this is necessary to prevent a GPU hang (on MEC version < 100).
|
||||
@@ -9090,6 +9091,7 @@ radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_
|
||||
if (pc_stages)
|
||||
radv_flush_constants(cmd_buffer, pc_stages, VK_PIPELINE_BIND_POINT_GRAPHICS);
|
||||
|
||||
if (!dgc)
|
||||
radv_describe_draw(cmd_buffer);
|
||||
if (likely(!info->indirect)) {
|
||||
struct radv_cmd_state *state = &cmd_buffer->state;
|
||||
@@ -9335,7 +9337,7 @@ radv_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y,
|
||||
info.count_buffer = NULL;
|
||||
info.indirect = NULL;
|
||||
|
||||
if (!radv_before_taskmesh_draw(cmd_buffer, &info, 1))
|
||||
if (!radv_before_taskmesh_draw(cmd_buffer, &info, 1, false))
|
||||
return;
|
||||
|
||||
if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) {
|
||||
@@ -9368,7 +9370,7 @@ radv_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer _buffer
|
||||
info.indexed = false;
|
||||
info.instance_count = 0;
|
||||
|
||||
if (!radv_before_taskmesh_draw(cmd_buffer, &info, drawCount))
|
||||
if (!radv_before_taskmesh_draw(cmd_buffer, &info, drawCount, false))
|
||||
return;
|
||||
|
||||
if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) {
|
||||
@@ -9402,7 +9404,7 @@ radv_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer _b
|
||||
info.indexed = false;
|
||||
info.instance_count = 0;
|
||||
|
||||
if (!radv_before_taskmesh_draw(cmd_buffer, &info, maxDrawCount))
|
||||
if (!radv_before_taskmesh_draw(cmd_buffer, &info, maxDrawCount, false))
|
||||
return;
|
||||
|
||||
if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) {
|
||||
@@ -9453,6 +9455,7 @@ radv_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPre
|
||||
if (compute) {
|
||||
radv_dgc_before_dispatch(cmd_buffer);
|
||||
} else {
|
||||
struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
|
||||
struct radv_draw_info info;
|
||||
|
||||
info.count = pGeneratedCommandsInfo->sequencesCount;
|
||||
@@ -9465,9 +9468,14 @@ radv_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPre
|
||||
info.indexed = layout->indexed;
|
||||
info.instance_count = 0;
|
||||
|
||||
if (radv_pipeline_has_stage(graphics_pipeline, MESA_SHADER_MESH)) {
|
||||
if (!radv_before_taskmesh_draw(cmd_buffer, &info, 1, true))
|
||||
return;
|
||||
} else {
|
||||
if (!radv_before_draw(cmd_buffer, &info, 1, true))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t cmdbuf_size = radv_get_indirect_cmdbuf_size(pGeneratedCommandsInfo);
|
||||
struct radeon_winsys_bo *ib_bo = prep_buffer->bo;
|
||||
|
@@ -88,10 +88,15 @@ radv_get_sequence_size_graphics(const struct radv_indirect_command_layout *layou
|
||||
/* PKT3_SET_BASE + PKT3_DRAW_{INDEX}_INDIRECT_MULTI */
|
||||
*cmd_size += (4 + (pipeline->uses_drawid ? 10 : 5)) * 4;
|
||||
}
|
||||
} else {
|
||||
if (layout->draw_mesh_tasks) {
|
||||
/* userdata writes + instance count + non-indexed draw */
|
||||
*cmd_size += (6 + 2 + (device->mesh_fast_launch_2 ? 5 : 3)) * 4;
|
||||
} else {
|
||||
/* userdata writes + instance count + non-indexed draw */
|
||||
*cmd_size += (5 + 2 + 3) * 4;
|
||||
}
|
||||
}
|
||||
|
||||
if (device->sqtt.bo) {
|
||||
/* sqtt markers */
|
||||
@@ -196,6 +201,7 @@ struct radv_dgc_params {
|
||||
uint16_t binds_index_buffer;
|
||||
uint16_t vtx_base_sgpr;
|
||||
uint32_t max_index_count;
|
||||
uint8_t draw_mesh_tasks;
|
||||
|
||||
/* dispatch info */
|
||||
uint32_t dispatch_initiator;
|
||||
@@ -229,6 +235,7 @@ struct radv_dgc_params {
|
||||
enum {
|
||||
DGC_USES_DRAWID = 1u << 14,
|
||||
DGC_USES_BASEINSTANCE = 1u << 15,
|
||||
DGC_USES_GRID_SIZE = DGC_USES_BASEINSTANCE, /* Mesh shader only */
|
||||
};
|
||||
|
||||
enum {
|
||||
@@ -326,6 +333,37 @@ dgc_emit_userdata_vertex(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *vtx_bas
|
||||
dgc_emit(b, cs, nir_vec(b, values, 5));
|
||||
}
|
||||
|
||||
static void
|
||||
dgc_emit_userdata_mesh(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *vtx_base_sgpr, nir_def *x, nir_def *y,
|
||||
nir_def *z, nir_def *drawid, const struct radv_device *device)
|
||||
{
|
||||
vtx_base_sgpr = nir_u2u32(b, vtx_base_sgpr);
|
||||
nir_def *has_grid_size = nir_test_mask(b, vtx_base_sgpr, DGC_USES_GRID_SIZE);
|
||||
nir_def *has_drawid = nir_test_mask(b, vtx_base_sgpr, DGC_USES_DRAWID);
|
||||
|
||||
nir_push_if(b, nir_ior(b, has_grid_size, has_drawid));
|
||||
{
|
||||
nir_def *pkt_cnt = nir_imm_int(b, 0);
|
||||
pkt_cnt = nir_bcsel(b, has_grid_size, nir_iadd_imm(b, pkt_cnt, 3), pkt_cnt);
|
||||
pkt_cnt = nir_bcsel(b, has_drawid, nir_iadd_imm(b, pkt_cnt, 1), pkt_cnt);
|
||||
|
||||
nir_def *values[6] = {
|
||||
nir_pkt3(b, PKT3_SET_SH_REG, pkt_cnt), nir_iand_imm(b, vtx_base_sgpr, 0x3FFF), dgc_get_nop_packet(b, device),
|
||||
dgc_get_nop_packet(b, device), dgc_get_nop_packet(b, device), dgc_get_nop_packet(b, device),
|
||||
};
|
||||
|
||||
/* DrawID needs to be first if no GridSize. */
|
||||
values[2] = nir_bcsel(b, has_grid_size, x, drawid);
|
||||
values[3] = nir_bcsel(b, has_grid_size, y, values[3]);
|
||||
values[4] = nir_bcsel(b, has_grid_size, z, values[4]);
|
||||
values[5] = nir_bcsel(b, has_drawid, drawid, values[5]);
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(values); i++)
|
||||
dgc_emit(b, cs, values[i]);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
dgc_emit_sqtt_userdata(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *data)
|
||||
{
|
||||
@@ -447,6 +485,15 @@ dgc_emit_dispatch_direct(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *wg_x, n
|
||||
dgc_emit(b, cs, nir_vec(b, values, 5));
|
||||
}
|
||||
|
||||
static void
|
||||
dgc_emit_dispatch_mesh_direct(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *x, nir_def *y, nir_def *z)
|
||||
{
|
||||
nir_def *values[5] = {nir_imm_int(b, PKT3(PKT3_DISPATCH_MESH_DIRECT, 3, false)), x, y, z,
|
||||
nir_imm_int(b, S_0287F0_SOURCE_SELECT(V_0287F0_DI_SRC_SEL_AUTO_INDEX))};
|
||||
|
||||
dgc_emit(b, cs, nir_vec(b, values, 5));
|
||||
}
|
||||
|
||||
static void
|
||||
dgc_emit_grid_size_user_sgpr(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *grid_base_sgpr, nir_def *wg_x,
|
||||
nir_def *wg_y, nir_def *wg_z)
|
||||
@@ -1111,6 +1158,42 @@ dgc_emit_dispatch(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, ni
|
||||
nir_pop_if(b, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV.
|
||||
*/
|
||||
static void
|
||||
dgc_emit_draw_mesh_tasks(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
|
||||
nir_def *draw_params_offset, nir_def *sequence_id, const struct radv_device *device)
|
||||
{
|
||||
nir_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr);
|
||||
nir_def *stream_offset = nir_iadd(b, draw_params_offset, stream_base);
|
||||
|
||||
nir_def *draw_data = nir_load_ssbo(b, 4, 32, stream_buf, stream_offset);
|
||||
nir_def *x = nir_channel(b, draw_data, 0);
|
||||
nir_def *y = nir_channel(b, draw_data, 1);
|
||||
nir_def *z = nir_channel(b, draw_data, 2);
|
||||
|
||||
nir_push_if(b, nir_iand(b, nir_ine_imm(b, x, 0), nir_iand(b, nir_ine_imm(b, y, 0), nir_ine_imm(b, z, 0))));
|
||||
{
|
||||
dgc_emit_sqtt_begin_api_marker(b, cs, ApiCmdDrawMeshTasksEXT);
|
||||
dgc_emit_sqtt_marker_event(b, cs, sequence_id, EventCmdDrawMeshTasksEXT);
|
||||
|
||||
dgc_emit_userdata_mesh(b, cs, vtx_base_sgpr, x, y, z, sequence_id, device);
|
||||
dgc_emit_instance_count(b, cs, nir_imm_int(b, 1));
|
||||
|
||||
if (device->mesh_fast_launch_2) {
|
||||
dgc_emit_dispatch_mesh_direct(b, cs, x, y, z);
|
||||
} else {
|
||||
nir_def *vertex_count = nir_imul(b, x, nir_imul(b, y, z));
|
||||
dgc_emit_draw_index_auto(b, cs, vertex_count);
|
||||
}
|
||||
|
||||
dgc_emit_sqtt_thread_trace_marker(b, cs);
|
||||
dgc_emit_sqtt_end_api_marker(b, cs, ApiCmdDrawMeshTasksEXT);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
build_dgc_prepare_shader(struct radv_device *dev)
|
||||
{
|
||||
@@ -1187,11 +1270,21 @@ build_dgc_prepare_shader(struct radv_device *dev)
|
||||
nir_push_if(&b, nir_ieq_imm(&b, load_param8(&b, is_dispatch), 0));
|
||||
{
|
||||
nir_push_if(&b, nir_ieq_imm(&b, load_param16(&b, draw_indexed), 0));
|
||||
{
|
||||
nir_def *draw_mesh_tasks = load_param8(&b, draw_mesh_tasks);
|
||||
nir_push_if(&b, nir_ieq_imm(&b, draw_mesh_tasks, 0));
|
||||
{
|
||||
dgc_emit_draw(&b, &cmd_buf, stream_buf, stream_base, load_param16(&b, draw_params_offset), sequence_id,
|
||||
dev);
|
||||
}
|
||||
nir_push_else(&b, NULL);
|
||||
{
|
||||
dgc_emit_draw_mesh_tasks(&b, &cmd_buf, stream_buf, stream_base, load_param16(&b, draw_params_offset),
|
||||
sequence_id, dev);
|
||||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
}
|
||||
nir_push_else(&b, NULL);
|
||||
{
|
||||
/* Emit direct draws when index buffers are also updated by DGC. Otherwise, emit
|
||||
* indirect draws to remove the dependency on the cmdbuf state in order to enable
|
||||
@@ -1413,6 +1506,10 @@ radv_CreateIndirectCommandsLayoutNV(VkDevice _device, const VkIndirectCommandsLa
|
||||
layout->push_constant_offsets[j] = pCreateInfo->pTokens[i].offset + k * 4;
|
||||
}
|
||||
break;
|
||||
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV:
|
||||
layout->draw_mesh_tasks = true;
|
||||
layout->draw_params_offset = pCreateInfo->pTokens[i].offset;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unhandled token type");
|
||||
}
|
||||
@@ -1576,8 +1673,15 @@ radv_prepare_dgc_graphics(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedC
|
||||
|
||||
if (cmd_buffer->state.graphics_pipeline->uses_drawid)
|
||||
vtx_base_sgpr |= DGC_USES_DRAWID;
|
||||
|
||||
if (layout->draw_mesh_tasks) {
|
||||
struct radv_shader *mesh_shader = radv_get_shader(graphics_pipeline->base.shaders, MESA_SHADER_MESH);
|
||||
if (mesh_shader->info.cs.uses_grid_size)
|
||||
vtx_base_sgpr |= DGC_USES_GRID_SIZE;
|
||||
} else {
|
||||
if (cmd_buffer->state.graphics_pipeline->uses_baseinstance)
|
||||
vtx_base_sgpr |= DGC_USES_BASEINSTANCE;
|
||||
}
|
||||
|
||||
params->draw_indexed = layout->indexed;
|
||||
params->draw_params_offset = layout->draw_params_offset;
|
||||
@@ -1587,6 +1691,7 @@ radv_prepare_dgc_graphics(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedC
|
||||
params->index_buffer_offset = layout->index_buffer_offset;
|
||||
params->ibo_type_32 = layout->ibo_type_32;
|
||||
params->ibo_type_8 = layout->ibo_type_8;
|
||||
params->draw_mesh_tasks = layout->draw_mesh_tasks;
|
||||
|
||||
if (layout->bind_vbo_mask) {
|
||||
uint32_t mask = vs->info.vs.vb_desc_usage_mask;
|
||||
|
@@ -3253,6 +3253,7 @@ struct radv_indirect_command_layout {
|
||||
|
||||
bool indexed;
|
||||
bool binds_index_buffer;
|
||||
bool draw_mesh_tasks;
|
||||
uint16_t draw_params_offset;
|
||||
uint16_t index_buffer_offset;
|
||||
|
||||
|
Reference in New Issue
Block a user