freedreno/a6xx: move dynamic program state to streaming stateobj

Move the program state which we can't pre-bake to a streaming state
object, rather than emitting directly in the draw cmdstream.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3435>
This commit is contained in:
Rob Clark
2020-01-16 14:38:41 -08:00
parent d2fd6469c3
commit 6dc9b292d0
4 changed files with 61 additions and 44 deletions

View File

@@ -1014,12 +1014,12 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
fd6_emit_add_group(emit, prog->binning_stateobj,
FD6_GROUP_PROG_BINNING, CP_SET_DRAW_STATE__0_BINNING);
/* emit remaining non-stateobj program state, ie. what depends
* on other emit state, so cannot be pre-baked. This could
* be moved to a separate stateobj which is dynamically
* created.
/* emit remaining streaming program state, ie. what depends on
* other emit state, so cannot be pre-baked.
*/
fd6_program_emit(ring, emit);
struct fd_ringbuffer *streaming = fd6_program_interp_state(emit);
fd6_emit_take_group(emit, streaming, FD6_GROUP_PROG_INTERP, ENABLE_DRAW);
}
if (dirty & FD_DIRTY_RASTERIZER) {

View File

@@ -46,6 +46,7 @@ enum fd6_state_id {
FD6_GROUP_PROG_CONFIG,
FD6_GROUP_PROG,
FD6_GROUP_PROG_BINNING,
FD6_GROUP_PROG_INTERP,
FD6_GROUP_LRZ,
FD6_GROUP_LRZ_BINNING,
FD6_GROUP_VBO,

View File

@@ -790,31 +790,6 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9107, 1);
OUT_RING(ring, 0);
if (!binning_pass) {
/* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
for (j = -1; (j = ir3_next_varying(fs, j)) < (int)fs->inputs_count; ) {
/* NOTE: varyings are packed, so if compmask is 0xb
* then first, third, and fourth component occupy
* three consecutive varying slots:
*/
unsigned compmask = fs->inputs[j].compmask;
uint32_t inloc = fs->inputs[j].inloc;
if (fs->inputs[j].interpolate == INTERP_MODE_FLAT) {
uint32_t loc = inloc;
for (i = 0; i < 4; i++) {
if (compmask & (1 << i)) {
state->vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
loc++;
}
}
}
}
}
if (fs->instrlen)
fd6_emit_shader(ring, fs);
@@ -846,24 +821,62 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
ir3_emit_immediates(screen, fs, ring);
}
/* emits the program state which is not part of the stateobj because of
* dependency on other gl state (rasterflat or sprite-coord-replacement)
static struct fd_ringbuffer *
create_interp_stateobj(struct fd_context *ctx, struct fd6_program_state *state)
{
const struct ir3_shader_variant *fs = state->fs;
struct fd_ringbuffer *ring = fd_ringbuffer_new_object(ctx->pipe, 18 * 4);
uint32_t vinterp[8] = {0};
/* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
for (int j = -1; (j = ir3_next_varying(fs, j)) < (int)fs->inputs_count; ) {
/* NOTE: varyings are packed, so if compmask is 0xb
* then first, third, and fourth component occupy
* three consecutive varying slots:
*/
unsigned compmask = fs->inputs[j].compmask;
uint32_t inloc = fs->inputs[j].inloc;
if (fs->inputs[j].interpolate == INTERP_MODE_FLAT) {
uint32_t loc = inloc;
for (int i = 0; i < 4; i++) {
if (compmask & (1 << i)) {
vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
loc++;
}
}
}
}
OUT_PKT4(ring, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8);
for (int i = 0; i < 8; i++)
OUT_RING(ring, vinterp[i]); /* VPC_VARYING_INTERP[i].MODE */
OUT_PKT4(ring, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
for (int i = 0; i < 8; i++)
OUT_RING(ring, 0x00000000); /* VPC_VARYING_PS_REPL[i] */
return ring;
}
/* build the program streaming state which is not part of the pre-
* baked stateobj because of dependency on other gl state (rasterflat
* or sprite-coord-replacement)
*/
void
fd6_program_emit(struct fd_ringbuffer *ring, struct fd6_emit *emit)
struct fd_ringbuffer *
fd6_program_interp_state(struct fd6_emit *emit)
{
const struct fd6_program_state *state = fd6_emit_get_prog(emit);
if (!unlikely(emit->rasterflat || emit->sprite_coord_enable)) {
/* fastpath: */
OUT_PKT4(ring, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8);
for (int i = 0; i < 8; i++)
OUT_RING(ring, state->vinterp[i]); /* VPC_VARYING_INTERP[i].MODE */
OUT_PKT4(ring, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
for (int i = 0; i < 8; i++)
OUT_RING(ring, 0x00000000); /* VPC_VARYING_PS_REPL[i] */
return fd_ringbuffer_ref(state->interp_stateobj);
} else {
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
emit->ctx->batch->submit, 18 * 4, FD_RINGBUFFER_STREAMING);
/* slow-path: */
struct ir3_shader_variant *fs = state->fs;
uint32_t vinterp[8], vpsrepl[8];
@@ -938,6 +951,8 @@ fd6_program_emit(struct fd_ringbuffer *ring, struct fd6_emit *emit)
OUT_PKT4(ring, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
for (int i = 0; i < 8; i++)
OUT_RING(ring, vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */
return ring;
}
}
@@ -980,6 +995,7 @@ fd6_program_create(void *data, struct ir3_shader_variant *bs,
setup_config_stateobj(state->config_stateobj, state);
setup_stateobj(state->binning_stateobj, ctx->screen, state, key, true);
setup_stateobj(state->stateobj, ctx->screen, state, key, false);
state->interp_stateobj = create_interp_stateobj(ctx, state);
return &state->base;
}
@@ -991,6 +1007,7 @@ fd6_program_destroy(void *data, struct ir3_program_state *state)
fd_ringbuffer_del(so->stateobj);
fd_ringbuffer_del(so->binning_stateobj);
fd_ringbuffer_del(so->config_stateobj);
fd_ringbuffer_del(so->interp_stateobj);
free(so);
}

View File

@@ -52,13 +52,12 @@ struct fd6_program_state {
struct ir3_shader_variant *gs;
struct ir3_shader_variant *fs;
struct fd_ringbuffer *config_stateobj;
struct fd_ringbuffer *interp_stateobj;
struct fd_ringbuffer *binning_stateobj;
struct fd_ringbuffer *stateobj;
/* cached state about current emitted shader program (3d): */
struct fd6_streamout_state tf;
uint32_t vinterp[8];
};
static inline struct fd6_program_state *
@@ -80,7 +79,7 @@ fd6_last_shader(const struct fd6_program_state *state)
void fd6_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so);
void fd6_program_emit(struct fd_ringbuffer *ring, struct fd6_emit *emit);
struct fd_ringbuffer * fd6_program_interp_state(struct fd6_emit *emit);
void fd6_prog_init(struct pipe_context *pctx);