anv: avoid MI commands to copy draw indirect count
We can just make the address of the count available to the generation shader. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Ivan Briano <ivan.briano@intel.com> Tested-by: Felix DeGrood <felix.j.degrood@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25361>
This commit is contained in:

committed by
Marge Bot

parent
1af1085d76
commit
2e0ff4c551
@@ -355,10 +355,10 @@ anv_device_init_internal_kernels(struct anv_device *device)
|
||||
ARRAY_SIZE(gfx11_generated_draws_spv_source) :
|
||||
ARRAY_SIZE(gfx9_generated_draws_spv_source),
|
||||
.send_count = device->info->ver >= 11 ?
|
||||
11 /* 2 * (2 loads + 3 stores) + 1 store */ :
|
||||
17 /* 2 * (2 loads + 6 stores) + 1 store */,
|
||||
12 /* 2 * (2 loads + 3 stores) + 1 load + 1 store */ :
|
||||
18 /* 2 * (2 loads + 6 stores) + 1 load + 1 store */,
|
||||
.bind_map = {
|
||||
.num_bindings = 4,
|
||||
.num_bindings = 5,
|
||||
.bindings = {
|
||||
{
|
||||
.address_offset = offsetof(struct anv_generated_indirect_params,
|
||||
@@ -372,6 +372,10 @@ anv_device_init_internal_kernels(struct anv_device *device)
|
||||
.address_offset = offsetof(struct anv_generated_indirect_params,
|
||||
draw_ids_addr),
|
||||
},
|
||||
{
|
||||
.address_offset = offsetof(struct anv_generated_indirect_params,
|
||||
draw_count_addr),
|
||||
},
|
||||
{
|
||||
.push_constant = true,
|
||||
},
|
||||
|
@@ -33,7 +33,7 @@
|
||||
|
||||
/* This needs to match common_generated_draws.glsl :
|
||||
*
|
||||
* layout(set = 0, binding = 3) uniform block
|
||||
* layout(set = 0, binding = 4) uniform block
|
||||
*/
|
||||
struct anv_generated_indirect_draw_params {
|
||||
/* Draw ID buffer address (only used on Gfx9) */
|
||||
@@ -47,18 +47,12 @@ struct anv_generated_indirect_draw_params {
|
||||
* gl_FragCoord
|
||||
*/
|
||||
uint32_t draw_base;
|
||||
|
||||
/* Number of draws to generate */
|
||||
uint32_t draw_count;
|
||||
|
||||
/* Maximum number of draws (equals to draw_count for indirect draws without
|
||||
* an indirect count)
|
||||
*/
|
||||
uint32_t max_draw_count;
|
||||
|
||||
/* Instance multiplier for multi view */
|
||||
uint32_t instance_multiplier;
|
||||
|
||||
/* Address where to jump at after the generated draw (only used with
|
||||
* indirect draw count variants)
|
||||
*/
|
||||
@@ -68,6 +62,9 @@ struct anv_generated_indirect_draw_params {
|
||||
struct anv_generated_indirect_params {
|
||||
struct anv_generated_indirect_draw_params draw;
|
||||
|
||||
/* Draw count value for non count variants of draw indirect commands */
|
||||
uint32_t draw_count;
|
||||
|
||||
/* Global address of binding 0 */
|
||||
uint64_t indirect_data_addr;
|
||||
|
||||
@@ -77,6 +74,9 @@ struct anv_generated_indirect_params {
|
||||
/* Global address of binding 2 */
|
||||
uint64_t draw_ids_addr;
|
||||
|
||||
/* Global address of binding 3 (points to the draw_count field above) */
|
||||
uint64_t draw_count_addr;
|
||||
|
||||
/* CPU side pointer to the previous item when number of draws has to be
|
||||
* split into smaller chunks, see while loop in
|
||||
* genX(cmd_buffer_emit_indirect_generated_draws)
|
||||
|
@@ -53,7 +53,6 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
|
||||
bool indexed)
|
||||
{
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
struct anv_batch *batch = &cmd_buffer->generation_batch;
|
||||
|
||||
struct anv_state push_data_state =
|
||||
genX(simple_shader_alloc_push)(&cmd_buffer->generation_shader_state,
|
||||
@@ -62,6 +61,16 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
|
||||
|
||||
struct anv_address draw_count_addr;
|
||||
if (anv_address_is_null(count_addr)) {
|
||||
draw_count_addr = anv_address_add(
|
||||
genX(simple_shader_push_state_address)(
|
||||
&cmd_buffer->generation_shader_state, push_data_state),
|
||||
offsetof(struct anv_generated_indirect_params, draw_count));
|
||||
} else {
|
||||
draw_count_addr = count_addr;
|
||||
}
|
||||
|
||||
struct anv_generated_indirect_params *push_data = push_data_state.map;
|
||||
*push_data = (struct anv_generated_indirect_params) {
|
||||
.draw = {
|
||||
@@ -79,39 +88,16 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
|
||||
ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) |
|
||||
((generated_cmd_stride / 4) << 16),
|
||||
.draw_base = item_base,
|
||||
/* If count_addr is not NULL, we'll edit it through a the command
|
||||
* streamer.
|
||||
*/
|
||||
.draw_count = anv_address_is_null(count_addr) ? max_count : 0,
|
||||
.max_draw_count = max_count,
|
||||
.instance_multiplier = pipeline->instance_multiplier,
|
||||
},
|
||||
.draw_count = anv_address_is_null(count_addr) ? max_count : 0,
|
||||
.indirect_data_addr = anv_address_physical(indirect_data_addr),
|
||||
.generated_cmds_addr = anv_address_physical(generated_cmds_addr),
|
||||
.draw_ids_addr = anv_address_physical(draw_id_addr),
|
||||
.draw_count_addr = anv_address_physical(draw_count_addr),
|
||||
};
|
||||
|
||||
if (!anv_address_is_null(count_addr)) {
|
||||
/* Copy the draw count into the push constants so that the generation
|
||||
* gets the value straight away and doesn't even need to access memory.
|
||||
*/
|
||||
struct mi_builder b;
|
||||
mi_builder_init(&b, device->info, batch);
|
||||
mi_memcpy(&b,
|
||||
anv_address_add(
|
||||
genX(simple_shader_push_state_address)(
|
||||
&cmd_buffer->generation_shader_state,
|
||||
push_data_state),
|
||||
offsetof(struct anv_generated_indirect_params, draw.draw_count)),
|
||||
count_addr, 4);
|
||||
|
||||
/* Make sure the memcpy landed for the generating draw call to pick up
|
||||
* the value.
|
||||
*/
|
||||
genx_batch_emit_pipe_control(batch, cmd_buffer->device->info,
|
||||
ANV_PIPE_CS_STALL_BIT);
|
||||
}
|
||||
|
||||
genX(emit_simple_shader_dispatch)(&cmd_buffer->generation_shader_state,
|
||||
item_count, push_data_state);
|
||||
|
||||
|
@@ -41,14 +41,26 @@ layout(set = 0, binding = 2, std430) buffer Storage2 {
|
||||
uint draw_ids[];
|
||||
};
|
||||
|
||||
/* We're not using a uniform block for this because our compiler
|
||||
* infrastructure relies on UBOs to be 32-bytes aligned so that we can push
|
||||
* them into registers. This value can come directly from the indirect buffer
|
||||
* given to indirect draw commands and the requirement there is 4-bytes
|
||||
* alignment.
|
||||
*
|
||||
* Also use a prefix to the variable to remember to make a copy of it, avoid
|
||||
* unnecessary accesses.
|
||||
*/
|
||||
layout(set = 0, binding = 3) buffer Storage3 {
|
||||
uint _draw_count;
|
||||
};
|
||||
|
||||
/* This data will be provided through push constants. */
|
||||
layout(set = 0, binding = 3) uniform block {
|
||||
layout(set = 0, binding = 4) uniform block {
|
||||
uint64_t draw_id_addr;
|
||||
uint64_t indirect_data_addr;
|
||||
uint indirect_data_stride;
|
||||
uint flags;
|
||||
uint draw_base;
|
||||
uint draw_count;
|
||||
uint max_draw_count;
|
||||
uint instance_multiplier;
|
||||
uint64_t end_addr;
|
||||
|
@@ -80,6 +80,7 @@ void main()
|
||||
uint item_idx = uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x);
|
||||
uint cmd_idx = item_idx * _3dprim_dw_size;
|
||||
uint draw_id = draw_base + item_idx;
|
||||
uint draw_count = _draw_count;
|
||||
|
||||
if (draw_id < draw_count)
|
||||
write_draw(item_idx, cmd_idx, draw_id);
|
||||
|
@@ -138,6 +138,7 @@ void main()
|
||||
uint item_idx = uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x);
|
||||
uint cmd_idx = item_idx * _3dprim_dw_size;
|
||||
uint draw_id = draw_base + item_idx;
|
||||
uint draw_count = _draw_count;
|
||||
|
||||
if (draw_id < draw_count)
|
||||
write_draw(item_idx, cmd_idx, draw_id);
|
||||
|
Reference in New Issue
Block a user