freedreno/a5xx: indirect grids
Signed-off-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
@@ -26,6 +26,8 @@
|
|||||||
|
|
||||||
#include "pipe/p_state.h"
|
#include "pipe/p_state.h"
|
||||||
|
|
||||||
|
#include "freedreno_resource.h"
|
||||||
|
|
||||||
#include "fd5_compute.h"
|
#include "fd5_compute.h"
|
||||||
#include "fd5_context.h"
|
#include "fd5_context.h"
|
||||||
#include "fd5_emit.h"
|
#include "fd5_emit.h"
|
||||||
@@ -126,9 +128,6 @@ fd5_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info)
|
|||||||
struct ir3_shader_variant *v;
|
struct ir3_shader_variant *v;
|
||||||
struct fd_ringbuffer *ring = ctx->batch->draw;
|
struct fd_ringbuffer *ring = ctx->batch->draw;
|
||||||
|
|
||||||
if (info->indirect)
|
|
||||||
return; // TODO
|
|
||||||
|
|
||||||
v = ir3_shader_variant(so->shader, key, &ctx->debug);
|
v = ir3_shader_variant(so->shader, key, &ctx->debug);
|
||||||
|
|
||||||
if (ctx->dirty_shader[PIPE_SHADER_COMPUTE] & FD_DIRTY_SHADER_PROG)
|
if (ctx->dirty_shader[PIPE_SHADER_COMPUTE] & FD_DIRTY_SHADER_PROG)
|
||||||
@@ -158,11 +157,29 @@ fd5_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info)
|
|||||||
OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Y */
|
OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Y */
|
||||||
OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Z */
|
OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Z */
|
||||||
|
|
||||||
|
if (info->indirect) {
|
||||||
|
struct fd_resource *rsc = fd_resource(info->indirect);
|
||||||
|
|
||||||
|
OUT_PKT7(ring, CP_EVENT_WRITE, 4);
|
||||||
|
OUT_RING(ring, CACHE_FLUSH_TS);
|
||||||
|
OUT_RELOCW(ring, fd5_context(ctx)->blit_mem, 0, 0, 0); /* ADDR_LO/HI */
|
||||||
|
OUT_RING(ring, 0x00000000);
|
||||||
|
|
||||||
|
OUT_WFI5(ring);
|
||||||
|
|
||||||
|
OUT_PKT7(ring, CP_EXEC_CS_INDIRECT, 4);
|
||||||
|
OUT_RING(ring, 0x00000000);
|
||||||
|
OUT_RELOC(ring, rsc->bo, info->indirect_offset, 0, 0); /* ADDR_LO/HI */
|
||||||
|
OUT_RING(ring, CP_EXEC_CS_INDIRECT_3_LOCALSIZEX(local_size[0] - 1) |
|
||||||
|
CP_EXEC_CS_INDIRECT_3_LOCALSIZEY(local_size[1] - 1) |
|
||||||
|
CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ(local_size[2] - 1));
|
||||||
|
} else {
|
||||||
OUT_PKT7(ring, CP_EXEC_CS, 4);
|
OUT_PKT7(ring, CP_EXEC_CS, 4);
|
||||||
OUT_RING(ring, 0x00000000);
|
OUT_RING(ring, 0x00000000);
|
||||||
OUT_RING(ring, CP_EXEC_CS_1_NGROUPS_X(info->grid[0]));
|
OUT_RING(ring, CP_EXEC_CS_1_NGROUPS_X(info->grid[0]));
|
||||||
OUT_RING(ring, CP_EXEC_CS_2_NGROUPS_Y(info->grid[1]));
|
OUT_RING(ring, CP_EXEC_CS_2_NGROUPS_Y(info->grid[1]));
|
||||||
OUT_RING(ring, CP_EXEC_CS_3_NGROUPS_Z(info->grid[2]));
|
OUT_RING(ring, CP_EXEC_CS_3_NGROUPS_Z(info->grid[2]));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@@ -859,6 +859,50 @@ ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin
|
|||||||
/* emit compute-shader driver-params: */
|
/* emit compute-shader driver-params: */
|
||||||
uint32_t offset = v->constbase.driver_param;
|
uint32_t offset = v->constbase.driver_param;
|
||||||
if (v->constlen > offset) {
|
if (v->constlen > offset) {
|
||||||
|
fd_wfi(ctx->batch, ring);
|
||||||
|
|
||||||
|
if (info->indirect) {
|
||||||
|
struct pipe_resource *indirect = NULL;
|
||||||
|
unsigned indirect_offset;
|
||||||
|
|
||||||
|
/* This is a bit awkward, but CP_LOAD_STATE.EXT_SRC_ADDR needs
|
||||||
|
* to be aligned more strongly than 4 bytes. So in this case
|
||||||
|
* we need a temporary buffer to copy NumWorkGroups.xyz to.
|
||||||
|
*
|
||||||
|
* TODO if previous compute job is writing to info->indirect,
|
||||||
|
* we might need a WFI.. but since we currently flush for each
|
||||||
|
* compute job, we are probably ok for now.
|
||||||
|
*/
|
||||||
|
if (info->indirect_offset & 0xf) {
|
||||||
|
indirect = pipe_buffer_create(&ctx->screen->base,
|
||||||
|
PIPE_BIND_COMMAND_ARGS_BUFFER, PIPE_USAGE_STREAM,
|
||||||
|
0x1000);
|
||||||
|
indirect_offset = 0;
|
||||||
|
|
||||||
|
if (is_a5xx(ctx->screen)) {
|
||||||
|
struct fd_bo *src = fd_resource(info->indirect)->bo;
|
||||||
|
struct fd_bo *dst = fd_resource(indirect)->bo;
|
||||||
|
for (unsigned i = 0; i < 3; i++) {
|
||||||
|
unsigned dst_off = i * 4;
|
||||||
|
unsigned src_off = (i * 4) + info->indirect_offset;
|
||||||
|
OUT_PKT7(ring, CP_MEM_TO_MEM, 5);
|
||||||
|
OUT_RING(ring, 0x00000000);
|
||||||
|
OUT_RELOCW(ring, dst, dst_off, 0, 0);
|
||||||
|
OUT_RELOC (ring, src, src_off, 0, 0);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
pipe_resource_reference(&indirect, info->indirect);
|
||||||
|
indirect_offset = info->indirect_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx->emit_const(ring, SHADER_COMPUTE, offset * 4,
|
||||||
|
indirect_offset, 4, NULL, indirect);
|
||||||
|
|
||||||
|
pipe_resource_reference(&indirect, NULL);
|
||||||
|
} else {
|
||||||
uint32_t compute_params[IR3_DP_CS_COUNT] = {
|
uint32_t compute_params[IR3_DP_CS_COUNT] = {
|
||||||
[IR3_DP_NUM_WORK_GROUPS_X] = info->grid[0],
|
[IR3_DP_NUM_WORK_GROUPS_X] = info->grid[0],
|
||||||
[IR3_DP_NUM_WORK_GROUPS_Y] = info->grid[1],
|
[IR3_DP_NUM_WORK_GROUPS_Y] = info->grid[1],
|
||||||
@@ -866,8 +910,8 @@ ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin
|
|||||||
/* do we need work-group-size? */
|
/* do we need work-group-size? */
|
||||||
};
|
};
|
||||||
|
|
||||||
fd_wfi(ctx->batch, ring);
|
|
||||||
ctx->emit_const(ring, SHADER_COMPUTE, offset * 4, 0,
|
ctx->emit_const(ring, SHADER_COMPUTE, offset * 4, 0,
|
||||||
ARRAY_SIZE(compute_params), compute_params, NULL);
|
ARRAY_SIZE(compute_params), compute_params, NULL);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@@ -44,6 +44,11 @@ enum ir3_driver_param {
|
|||||||
IR3_DP_NUM_WORK_GROUPS_X = 0,
|
IR3_DP_NUM_WORK_GROUPS_X = 0,
|
||||||
IR3_DP_NUM_WORK_GROUPS_Y = 1,
|
IR3_DP_NUM_WORK_GROUPS_Y = 1,
|
||||||
IR3_DP_NUM_WORK_GROUPS_Z = 2,
|
IR3_DP_NUM_WORK_GROUPS_Z = 2,
|
||||||
|
/* NOTE: gl_NumWorkGroups should be vec4 aligned because
|
||||||
|
* glDispatchComputeIndirect() needs to load these from
|
||||||
|
* the info->indirect buffer. Keep that in mind when/if
|
||||||
|
* adding any addition CS driver params.
|
||||||
|
*/
|
||||||
IR3_DP_CS_COUNT = 4, /* must be aligned to vec4 */
|
IR3_DP_CS_COUNT = 4, /* must be aligned to vec4 */
|
||||||
|
|
||||||
/* vertex shader driver params: */
|
/* vertex shader driver params: */
|
||||||
|
Reference in New Issue
Block a user