freedreno/a6xx: move dynamic program state to streaming stateobj
Move the program state which we can't pre-bake to a streaming state object, rather than emitting directly in the draw cmdstream. Signed-off-by: Rob Clark <robdclark@chromium.org> Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3435>
This commit is contained in:
@@ -1014,12 +1014,12 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
|
||||
fd6_emit_add_group(emit, prog->binning_stateobj,
|
||||
FD6_GROUP_PROG_BINNING, CP_SET_DRAW_STATE__0_BINNING);
|
||||
|
||||
/* emit remaining non-stateobj program state, ie. what depends
|
||||
* on other emit state, so cannot be pre-baked. This could
|
||||
* be moved to a separate stateobj which is dynamically
|
||||
* created.
|
||||
/* emit remaining streaming program state, ie. what depends on
|
||||
* other emit state, so cannot be pre-baked.
|
||||
*/
|
||||
fd6_program_emit(ring, emit);
|
||||
struct fd_ringbuffer *streaming = fd6_program_interp_state(emit);
|
||||
|
||||
fd6_emit_take_group(emit, streaming, FD6_GROUP_PROG_INTERP, ENABLE_DRAW);
|
||||
}
|
||||
|
||||
if (dirty & FD_DIRTY_RASTERIZER) {
|
||||
|
@@ -46,6 +46,7 @@ enum fd6_state_id {
|
||||
FD6_GROUP_PROG_CONFIG,
|
||||
FD6_GROUP_PROG,
|
||||
FD6_GROUP_PROG_BINNING,
|
||||
FD6_GROUP_PROG_INTERP,
|
||||
FD6_GROUP_LRZ,
|
||||
FD6_GROUP_LRZ_BINNING,
|
||||
FD6_GROUP_VBO,
|
||||
|
@@ -790,31 +790,6 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
|
||||
OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9107, 1);
|
||||
OUT_RING(ring, 0);
|
||||
|
||||
|
||||
if (!binning_pass) {
|
||||
/* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
|
||||
for (j = -1; (j = ir3_next_varying(fs, j)) < (int)fs->inputs_count; ) {
|
||||
/* NOTE: varyings are packed, so if compmask is 0xb
|
||||
* then first, third, and fourth component occupy
|
||||
* three consecutive varying slots:
|
||||
*/
|
||||
unsigned compmask = fs->inputs[j].compmask;
|
||||
|
||||
uint32_t inloc = fs->inputs[j].inloc;
|
||||
|
||||
if (fs->inputs[j].interpolate == INTERP_MODE_FLAT) {
|
||||
uint32_t loc = inloc;
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (compmask & (1 << i)) {
|
||||
state->vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
|
||||
loc++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (fs->instrlen)
|
||||
fd6_emit_shader(ring, fs);
|
||||
|
||||
@@ -846,24 +821,62 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
|
||||
ir3_emit_immediates(screen, fs, ring);
|
||||
}
|
||||
|
||||
/* emits the program state which is not part of the stateobj because of
|
||||
* dependency on other gl state (rasterflat or sprite-coord-replacement)
|
||||
static struct fd_ringbuffer *
|
||||
create_interp_stateobj(struct fd_context *ctx, struct fd6_program_state *state)
|
||||
{
|
||||
const struct ir3_shader_variant *fs = state->fs;
|
||||
struct fd_ringbuffer *ring = fd_ringbuffer_new_object(ctx->pipe, 18 * 4);
|
||||
uint32_t vinterp[8] = {0};
|
||||
|
||||
/* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
|
||||
for (int j = -1; (j = ir3_next_varying(fs, j)) < (int)fs->inputs_count; ) {
|
||||
/* NOTE: varyings are packed, so if compmask is 0xb
|
||||
* then first, third, and fourth component occupy
|
||||
* three consecutive varying slots:
|
||||
*/
|
||||
void
|
||||
fd6_program_emit(struct fd_ringbuffer *ring, struct fd6_emit *emit)
|
||||
unsigned compmask = fs->inputs[j].compmask;
|
||||
|
||||
uint32_t inloc = fs->inputs[j].inloc;
|
||||
|
||||
if (fs->inputs[j].interpolate == INTERP_MODE_FLAT) {
|
||||
uint32_t loc = inloc;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (compmask & (1 << i)) {
|
||||
vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
|
||||
loc++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8);
|
||||
for (int i = 0; i < 8; i++)
|
||||
OUT_RING(ring, vinterp[i]); /* VPC_VARYING_INTERP[i].MODE */
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
|
||||
for (int i = 0; i < 8; i++)
|
||||
OUT_RING(ring, 0x00000000); /* VPC_VARYING_PS_REPL[i] */
|
||||
|
||||
return ring;
|
||||
}
|
||||
|
||||
/* build the program streaming state which is not part of the pre-
|
||||
* baked stateobj because of dependency on other gl state (rasterflat
|
||||
* or sprite-coord-replacement)
|
||||
*/
|
||||
struct fd_ringbuffer *
|
||||
fd6_program_interp_state(struct fd6_emit *emit)
|
||||
{
|
||||
const struct fd6_program_state *state = fd6_emit_get_prog(emit);
|
||||
|
||||
if (!unlikely(emit->rasterflat || emit->sprite_coord_enable)) {
|
||||
/* fastpath: */
|
||||
OUT_PKT4(ring, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8);
|
||||
for (int i = 0; i < 8; i++)
|
||||
OUT_RING(ring, state->vinterp[i]); /* VPC_VARYING_INTERP[i].MODE */
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
|
||||
for (int i = 0; i < 8; i++)
|
||||
OUT_RING(ring, 0x00000000); /* VPC_VARYING_PS_REPL[i] */
|
||||
return fd_ringbuffer_ref(state->interp_stateobj);
|
||||
} else {
|
||||
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
|
||||
emit->ctx->batch->submit, 18 * 4, FD_RINGBUFFER_STREAMING);
|
||||
|
||||
/* slow-path: */
|
||||
struct ir3_shader_variant *fs = state->fs;
|
||||
uint32_t vinterp[8], vpsrepl[8];
|
||||
@@ -938,6 +951,8 @@ fd6_program_emit(struct fd_ringbuffer *ring, struct fd6_emit *emit)
|
||||
OUT_PKT4(ring, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
|
||||
for (int i = 0; i < 8; i++)
|
||||
OUT_RING(ring, vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */
|
||||
|
||||
return ring;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -980,6 +995,7 @@ fd6_program_create(void *data, struct ir3_shader_variant *bs,
|
||||
setup_config_stateobj(state->config_stateobj, state);
|
||||
setup_stateobj(state->binning_stateobj, ctx->screen, state, key, true);
|
||||
setup_stateobj(state->stateobj, ctx->screen, state, key, false);
|
||||
state->interp_stateobj = create_interp_stateobj(ctx, state);
|
||||
|
||||
return &state->base;
|
||||
}
|
||||
@@ -991,6 +1007,7 @@ fd6_program_destroy(void *data, struct ir3_program_state *state)
|
||||
fd_ringbuffer_del(so->stateobj);
|
||||
fd_ringbuffer_del(so->binning_stateobj);
|
||||
fd_ringbuffer_del(so->config_stateobj);
|
||||
fd_ringbuffer_del(so->interp_stateobj);
|
||||
free(so);
|
||||
}
|
||||
|
||||
|
@@ -52,13 +52,12 @@ struct fd6_program_state {
|
||||
struct ir3_shader_variant *gs;
|
||||
struct ir3_shader_variant *fs;
|
||||
struct fd_ringbuffer *config_stateobj;
|
||||
struct fd_ringbuffer *interp_stateobj;
|
||||
struct fd_ringbuffer *binning_stateobj;
|
||||
struct fd_ringbuffer *stateobj;
|
||||
|
||||
/* cached state about current emitted shader program (3d): */
|
||||
struct fd6_streamout_state tf;
|
||||
|
||||
uint32_t vinterp[8];
|
||||
};
|
||||
|
||||
static inline struct fd6_program_state *
|
||||
@@ -80,7 +79,7 @@ fd6_last_shader(const struct fd6_program_state *state)
|
||||
|
||||
void fd6_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so);
|
||||
|
||||
void fd6_program_emit(struct fd_ringbuffer *ring, struct fd6_emit *emit);
|
||||
struct fd_ringbuffer * fd6_program_interp_state(struct fd6_emit *emit);
|
||||
|
||||
void fd6_prog_init(struct pipe_context *pctx);
|
||||
|
||||
|
Reference in New Issue
Block a user