freedreno/a6xx: Add bindless state

This will be used when we switch over to lowering image/SSBO to
bindless.

Note that it also starts using CP_SET_DRAW_STATE in the compute path.
Subsequent cleanup will switch texture and eventually other state over
as well (which will make more sense when we get more clever than
emitting all state for every compute grid, but for now simplifies
re-using the same code between 3d and compute).

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20687>
This commit is contained in:
Rob Clark
2023-01-01 12:09:06 -08:00
committed by Marge Bot
parent 101700b150
commit e51975142c
8 changed files with 313 additions and 26 deletions

View File

@@ -60,6 +60,10 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_PKT4(ring, REG_A6XX_SP_CS_CONFIG, 2);
OUT_RING(ring, A6XX_SP_CS_CONFIG_ENABLED |
COND(v->bindless_tex, A6XX_SP_CS_CONFIG_BINDLESS_TEX) |
COND(v->bindless_samp, A6XX_SP_CS_CONFIG_BINDLESS_SAMP) |
COND(v->bindless_ibo, A6XX_SP_CS_CONFIG_BINDLESS_IBO) |
COND(v->bindless_ubo, A6XX_SP_CS_CONFIG_BINDLESS_UBO) |
A6XX_SP_CS_CONFIG_NIBO(ir3_shader_nibo(v)) |
A6XX_SP_CS_CONFIG_NTEX(v->num_samp) |
A6XX_SP_CS_CONFIG_NSAMP(v->num_samp)); /* SP_VS_CONFIG */

View File

@@ -51,6 +51,10 @@ fd6_context_destroy(struct pipe_context *pctx) in_dt
{
struct fd6_context *fd6_ctx = fd6_context(fd_context(pctx));
fd6_descriptor_set_invalidate(&fd6_ctx->cs_descriptor_set);
for (unsigned i = 0; i < ARRAY_SIZE(fd6_ctx->descriptor_sets); i++)
fd6_descriptor_set_invalidate(&fd6_ctx->descriptor_sets[i]);
if (fd6_ctx->streamout_disable_stateobj)
fd_ringbuffer_del(fd6_ctx->streamout_disable_stateobj);
@@ -184,6 +188,26 @@ setup_state_map(struct fd_context *ctx)
fd_context_add_shader_map(ctx, PIPE_SHADER_FRAGMENT, FD_DIRTY_SHADER_TEX,
BIT(FD6_GROUP_FS_TEX));
fd_context_add_shader_map(ctx, PIPE_SHADER_VERTEX,
FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE,
BIT(FD6_GROUP_VS_BINDLESS));
fd_context_add_shader_map(ctx, PIPE_SHADER_TESS_CTRL,
FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE,
BIT(FD6_GROUP_HS_BINDLESS));
fd_context_add_shader_map(ctx, PIPE_SHADER_TESS_EVAL,
FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE,
BIT(FD6_GROUP_DS_BINDLESS));
fd_context_add_shader_map(ctx, PIPE_SHADER_GEOMETRY,
FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE,
BIT(FD6_GROUP_GS_BINDLESS));
/* NOTE: FD6_GROUP_FS_BINDLESS has a weak dependency on the program
* state (ie. it needs to be re-generated with fb-read descriptor
* patched in) but this special case is handled in fd6_emit_3d_state()
*/
fd_context_add_shader_map(ctx, PIPE_SHADER_FRAGMENT,
FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE,
BIT(FD6_GROUP_FS_BINDLESS));
/* NOTE: scissor enabled bit is part of rasterizer state, but
* fd_rasterizer_state_bind() will mark scissor dirty if needed:
*/

View File

@@ -62,8 +62,22 @@ struct fd6_descriptor_set {
* resource has been rebound
*/
uint16_t seqno[IR3_BINDLESS_DESC_COUNT];
/**
* Current GPU copy of the desciptor set
*/
struct fd_bo *bo;
};
static void
fd6_descriptor_set_invalidate(struct fd6_descriptor_set *set)
{
if (!set->bo)
return;
fd_bo_del(set->bo);
set->bo = NULL;
}
struct fd6_context {
struct fd_context base;

View File

@@ -793,11 +793,13 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
emit_marker6(ring, 5);
/* NOTE: we track fb_read differently than _BLEND_ENABLED since we
* might decide to do sysmem in some cases when blend is enabled:
/* Special case, we need to re-emit bindless FS state w/ the
* fb-read state appended:
*/
if (fs->fb_read)
if ((emit->dirty_groups & BIT(FD6_GROUP_PROG)) && fs->fb_read) {
ctx->batch->gmem_reason |= FD_GMEM_FB_READ;
emit->dirty_groups |= BIT(FD6_GROUP_FS_BINDLESS);
}
u_foreach_bit (b, emit->dirty_groups) {
enum fd6_state_id group = b;
@@ -862,6 +864,26 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
state = build_ibo(emit);
fd6_state_take_group(&emit->state, state, FD6_GROUP_IBO);
break;
case FD6_GROUP_VS_BINDLESS:
state = fd6_build_bindless_state(ctx, PIPE_SHADER_VERTEX, false);
fd6_state_take_group(&emit->state, state, FD6_GROUP_VS_BINDLESS);
break;
case FD6_GROUP_HS_BINDLESS:
state = fd6_build_bindless_state(ctx, PIPE_SHADER_TESS_CTRL, false);
fd6_state_take_group(&emit->state, state, FD6_GROUP_HS_BINDLESS);
break;
case FD6_GROUP_DS_BINDLESS:
state = fd6_build_bindless_state(ctx, PIPE_SHADER_TESS_EVAL, false);
fd6_state_take_group(&emit->state, state, FD6_GROUP_DS_BINDLESS);
break;
case FD6_GROUP_GS_BINDLESS:
state = fd6_build_bindless_state(ctx, PIPE_SHADER_GEOMETRY, false);
fd6_state_take_group(&emit->state, state, FD6_GROUP_GS_BINDLESS);
break;
case FD6_GROUP_FS_BINDLESS:
state = fd6_build_bindless_state(ctx, PIPE_SHADER_FRAGMENT, fs->fb_read);
fd6_state_take_group(&emit->state, state, FD6_GROUP_FS_BINDLESS);
break;
case FD6_GROUP_CONST:
state = fd6_build_user_consts(emit);
fd6_state_take_group(&emit->state, state, FD6_GROUP_CONST);
@@ -913,6 +935,7 @@ void
fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct ir3_shader_variant *cp)
{
struct fd6_state state = {};
enum fd_dirty_shader_state dirty = ctx->dirty_shader[PIPE_SHADER_COMPUTE];
if (dirty & (FD_DIRTY_SHADER_TEX | FD_DIRTY_SHADER_PROG |
@@ -957,6 +980,24 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
fd_ringbuffer_del(state);
}
u_foreach_bit (b, ctx->gen_dirty) {
enum fd6_state_id group = b;
switch (group) {
case FD6_GROUP_CS_BINDLESS:
fd6_state_take_group(
&state,
fd6_build_bindless_state(ctx, PIPE_SHADER_COMPUTE, false),
FD6_GROUP_CS_BINDLESS);
break;
default:
/* State-group unused for compute shaders */
break;
}
}
fd6_state_emit(&state, ring);
}
/* emit setup at begin of new cmdstream buffer (don't rely on previous

View File

@@ -66,6 +66,11 @@ enum fd6_state_id {
FD6_GROUP_BLEND_COLOR,
FD6_GROUP_SO,
FD6_GROUP_IBO,
FD6_GROUP_VS_BINDLESS,
FD6_GROUP_HS_BINDLESS,
FD6_GROUP_DS_BINDLESS,
FD6_GROUP_GS_BINDLESS,
FD6_GROUP_FS_BINDLESS,
/*
* Virtual state-groups, which don't turn into a CP_SET_DRAW_STATE group
@@ -73,6 +78,12 @@ enum fd6_state_id {
FD6_GROUP_PROG_KEY, /* Set for any state which could change shader key */
FD6_GROUP_NON_GROUP, /* placeholder group for state emit in IB2, keep last */
/*
* Note that since we don't interleave draws and grids in the same batch,
* the compute vs draw state groups can overlap:
*/
FD6_GROUP_CS_BINDLESS = FD6_GROUP_VS_BINDLESS,
};
#define ENABLE_ALL \
@@ -133,6 +144,7 @@ fd6_state_take_group(struct fd6_state *state, struct fd_ringbuffer *stateobj,
[FD6_GROUP_PROG_BINNING] = CP_SET_DRAW_STATE__0_BINNING,
[FD6_GROUP_PROG_INTERP] = ENABLE_DRAW,
[FD6_GROUP_FS_TEX] = ENABLE_DRAW,
[FD6_GROUP_FS_BINDLESS] = ENABLE_DRAW,
};
assert(state->num_groups < ARRAY_SIZE(state->groups));
struct fd6_state_group *g = &state->groups[state->num_groups++];

View File

@@ -158,6 +158,17 @@ descriptor_set(struct fd_context *ctx, enum pipe_shader_type shader)
static void
clear_descriptor(struct fd6_descriptor_set *set, unsigned slot)
{
/* The 2nd dword of the descriptor contains the width and height.
* so a non-zero value means the slot was previously valid and
* must be cleared. We can't leave dangling descriptors as the
* shader could use variable indexing into the set of IBOs to
* get at them. See piglit arb_shader_image_load_store-invalid.
*/
if (!set->descriptor[slot][1])
return;
fd6_descriptor_set_invalidate(set);
memset(set->descriptor[slot], 0, sizeof(set->descriptor[slot]));
}
@@ -170,6 +181,8 @@ validate_image_descriptor(struct fd_context *ctx, struct fd6_descriptor_set *set
if (!rsc || (rsc->seqno == set->seqno[slot]))
return;
fd6_descriptor_set_invalidate(set);
fd6_image_descriptor(ctx, img, set->descriptor[slot]);
set->seqno[slot] = rsc->seqno;
}
@@ -183,6 +196,8 @@ validate_buffer_descriptor(struct fd_context *ctx, struct fd6_descriptor_set *se
if (!rsc || (rsc->seqno == set->seqno[slot]))
return;
fd6_descriptor_set_invalidate(set);
fd6_ssbo_descriptor(ctx, buf, set->descriptor[slot]);
set->seqno[slot] = rsc->seqno;
}
@@ -221,6 +236,182 @@ fd6_build_ibo_state(struct fd_context *ctx, const struct ir3_shader_variant *v,
return state;
}
/* Build bindless descriptor state, returns ownership of state reference */
struct fd_ringbuffer *
fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader,
bool append_fb_read)
{
struct fd_shaderbuf_stateobj *bufso = &ctx->shaderbuf[shader];
struct fd_shaderimg_stateobj *imgso = &ctx->shaderimg[shader];
struct fd6_descriptor_set *set = descriptor_set(ctx, shader);
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
ctx->batch->submit, 16 * 4, FD_RINGBUFFER_STREAMING);
/* Don't re-use a previous descriptor set if appending the
* fb-read descriptor, as that can change across batches.
* The normal descriptor slots are safe to re-use even if
* the state is dirtied due to batch flush, but the fb-read
* slot is not.
*/
if (unlikely(append_fb_read))
fd6_descriptor_set_invalidate(set);
/*
* Re-validate the descriptor slots, ie. in the case that
* the resource gets rebound due to use with non-UBWC
* compatible view format, etc.
*
* While we are at it, attach the BOs to the ring.
*/
u_foreach_bit (b, bufso->enabled_mask) {
struct pipe_shader_buffer *buf = &bufso->sb[b];
unsigned idx = b + IR3_BINDLESS_SSBO_OFFSET;
validate_buffer_descriptor(ctx, set, idx, buf);
if (buf->buffer)
fd_ringbuffer_attach_bo(ring, fd_resource(buf->buffer)->bo);
}
u_foreach_bit (b, imgso->enabled_mask) {
struct pipe_image_view *img = &imgso->si[b];
unsigned idx = b + IR3_BINDLESS_IMAGE_OFFSET;
validate_image_descriptor(ctx, set, idx, img);
if (img->resource)
fd_ringbuffer_attach_bo(ring, fd_resource(img->resource)->bo);
}
if (!set->bo) {
set->bo = fd_bo_new(
ctx->dev, sizeof(set->descriptor),
/* Use same flags as ringbuffer so hits the same heap,
* because those will already have the FD_RELOC_DUMP
* flag set:
*/
FD_BO_GPUREADONLY | FD_BO_CACHED_COHERENT,
"%s bindless", _mesa_shader_stage_to_abbrev(shader));
fd_bo_mark_for_dump(set->bo);
uint32_t *desc_buf = fd_bo_map(set->bo);
memcpy(desc_buf, set->descriptor, sizeof(set->descriptor));
if (unlikely(append_fb_read)) {
/* The last image slot is used for fb-read: */
unsigned idx = IR3_BINDLESS_DESC_COUNT - 1;
/* This is patched with the appropriate descriptor for GMEM or
* sysmem rendering path in fd6_gmem
*/
struct fd_cs_patch patch = {
.cs = &desc_buf[idx * FDL6_TEX_CONST_DWORDS],
};
util_dynarray_append(&ctx->batch->fb_read_patches,
__typeof__(patch), patch);
}
}
/*
* Build stateobj emitting reg writes to configure the descriptor
* set and CP_LOAD_STATE packets to preload the state.
*
* Note that unless the app is using the max # of SSBOs there will
* be a gap between the IBO descriptors used for SSBOs and for images,
* so emit this as two CP_LOAD_STATE packets:
*/
unsigned idx = ir3_shader_descriptor_set(shader);
if (shader == PIPE_SHADER_COMPUTE) {
OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(.cs_bindless = 0x1f));
OUT_REG(ring, A6XX_SP_CS_BINDLESS_BASE_DESCRIPTOR(
idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
));
OUT_REG(ring, A6XX_HLSQ_CS_BINDLESS_BASE_DESCRIPTOR(
idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
));
if (bufso->enabled_mask) {
OUT_PKT(ring, CP_LOAD_STATE6_FRAG,
CP_LOAD_STATE6_0(
.dst_off = IR3_BINDLESS_SSBO_OFFSET,
.state_type = ST6_IBO,
.state_src = SS6_BINDLESS,
.state_block = SB6_CS_SHADER,
.num_unit = util_last_bit(bufso->enabled_mask),
),
CP_LOAD_STATE6_EXT_SRC_ADDR(
/* This isn't actually an address: */
.qword = (idx << 28) |
IR3_BINDLESS_SSBO_OFFSET * FDL6_TEX_CONST_DWORDS,
),
);
}
if (imgso->enabled_mask) {
OUT_PKT(ring, CP_LOAD_STATE6_FRAG,
CP_LOAD_STATE6_0(
.dst_off = IR3_BINDLESS_IMAGE_OFFSET,
.state_type = ST6_IBO,
.state_src = SS6_BINDLESS,
.state_block = SB6_CS_SHADER,
.num_unit = util_last_bit(imgso->enabled_mask),
),
CP_LOAD_STATE6_EXT_SRC_ADDR(
/* This isn't actually an address: */
.qword = (idx << 28) |
IR3_BINDLESS_IMAGE_OFFSET * FDL6_TEX_CONST_DWORDS,
),
);
}
} else {
OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(.gfx_bindless = 0x1f));
OUT_REG(ring, A6XX_SP_BINDLESS_BASE_DESCRIPTOR(
idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
));
OUT_REG(ring, A6XX_HLSQ_BINDLESS_BASE_DESCRIPTOR(
idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
));
if (bufso->enabled_mask) {
OUT_PKT(ring, CP_LOAD_STATE6,
CP_LOAD_STATE6_0(
.dst_off = IR3_BINDLESS_SSBO_OFFSET,
.state_type = ST6_SHADER,
.state_src = SS6_BINDLESS,
.state_block = SB6_IBO,
.num_unit = util_last_bit(bufso->enabled_mask),
),
CP_LOAD_STATE6_EXT_SRC_ADDR(
/* This isn't actually an address: */
.qword = (idx << 28) |
IR3_BINDLESS_SSBO_OFFSET * FDL6_TEX_CONST_DWORDS,
),
);
}
if (imgso->enabled_mask) {
OUT_PKT(ring, CP_LOAD_STATE6,
CP_LOAD_STATE6_0(
.dst_off = IR3_BINDLESS_IMAGE_OFFSET,
.state_type = ST6_SHADER,
.state_src = SS6_BINDLESS,
.state_block = SB6_IBO,
.num_unit = util_last_bit(imgso->enabled_mask),
),
CP_LOAD_STATE6_EXT_SRC_ADDR(
/* This isn't actually an address: */
.qword = (idx << 28) |
IR3_BINDLESS_IMAGE_OFFSET * FDL6_TEX_CONST_DWORDS,
),
);
}
}
return ring;
}
static void
fd6_set_shader_buffers(struct pipe_context *pctx, enum pipe_shader_type shader,
unsigned start, unsigned count,

View File

@@ -39,6 +39,9 @@ struct ir3_shader_variant;
struct fd_ringbuffer *
fd6_build_ibo_state(struct fd_context *ctx, const struct ir3_shader_variant *v,
enum pipe_shader_type shader) assert_dt;
struct fd_ringbuffer *
fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader,
bool append_fb_read) assert_dt;
void fd6_image_init(struct pipe_context *pctx);

View File

@@ -289,6 +289,22 @@ setup_stream_out(struct fd_context *ctx, struct fd6_program_state *state,
state->streamout_stateobj = ring;
}
static uint32_t
sp_xs_config(struct ir3_shader_variant *v)
{
if (!v)
return 0;
return A6XX_SP_VS_CONFIG_ENABLED |
COND(v->bindless_tex, A6XX_SP_VS_CONFIG_BINDLESS_TEX) |
COND(v->bindless_samp, A6XX_SP_VS_CONFIG_BINDLESS_SAMP) |
COND(v->bindless_ibo, A6XX_SP_VS_CONFIG_BINDLESS_IBO) |
COND(v->bindless_ubo, A6XX_SP_VS_CONFIG_BINDLESS_UBO) |
A6XX_SP_VS_CONFIG_NIBO(ir3_shader_nibo(v)) |
A6XX_SP_VS_CONFIG_NTEX(v->num_samp) |
A6XX_SP_VS_CONFIG_NSAMP(v->num_samp);
}
static void
setup_config_stateobj(struct fd_context *ctx, struct fd6_program_state *state)
{
@@ -318,37 +334,19 @@ setup_config_stateobj(struct fd_context *ctx, struct fd6_program_state *state)
A6XX_HLSQ_FS_CNTL_ENABLED);
OUT_PKT4(ring, REG_A6XX_SP_VS_CONFIG, 1);
OUT_RING(ring, COND(state->vs, A6XX_SP_VS_CONFIG_ENABLED) |
A6XX_SP_VS_CONFIG_NIBO(ir3_shader_nibo(state->vs)) |
A6XX_SP_VS_CONFIG_NTEX(state->vs->num_samp) |
A6XX_SP_VS_CONFIG_NSAMP(state->vs->num_samp));
OUT_RING(ring, sp_xs_config(state->vs));
OUT_PKT4(ring, REG_A6XX_SP_HS_CONFIG, 1);
OUT_RING(ring, COND(state->hs,
A6XX_SP_HS_CONFIG_ENABLED |
A6XX_SP_HS_CONFIG_NIBO(ir3_shader_nibo(state->hs)) |
A6XX_SP_HS_CONFIG_NTEX(state->hs->num_samp) |
A6XX_SP_HS_CONFIG_NSAMP(state->hs->num_samp)));
OUT_RING(ring, sp_xs_config(state->hs));
OUT_PKT4(ring, REG_A6XX_SP_DS_CONFIG, 1);
OUT_RING(ring, COND(state->ds,
A6XX_SP_DS_CONFIG_ENABLED |
A6XX_SP_DS_CONFIG_NIBO(ir3_shader_nibo(state->ds)) |
A6XX_SP_DS_CONFIG_NTEX(state->ds->num_samp) |
A6XX_SP_DS_CONFIG_NSAMP(state->ds->num_samp)));
OUT_RING(ring, sp_xs_config(state->ds));
OUT_PKT4(ring, REG_A6XX_SP_GS_CONFIG, 1);
OUT_RING(ring, COND(state->gs,
A6XX_SP_GS_CONFIG_ENABLED |
A6XX_SP_GS_CONFIG_NIBO(ir3_shader_nibo(state->gs)) |
A6XX_SP_GS_CONFIG_NTEX(state->gs->num_samp) |
A6XX_SP_GS_CONFIG_NSAMP(state->gs->num_samp)));
OUT_RING(ring, sp_xs_config(state->gs));
OUT_PKT4(ring, REG_A6XX_SP_FS_CONFIG, 1);
OUT_RING(ring, COND(state->fs, A6XX_SP_FS_CONFIG_ENABLED) |
A6XX_SP_FS_CONFIG_NIBO(ir3_shader_nibo(state->fs)) |
A6XX_SP_FS_CONFIG_NTEX(state->fs->num_samp) |
A6XX_SP_FS_CONFIG_NSAMP(state->fs->num_samp));
OUT_RING(ring, sp_xs_config(state->fs));
OUT_PKT4(ring, REG_A6XX_SP_IBO_COUNT, 1);
OUT_RING(ring, ir3_shader_nibo(state->fs));