From 14524511857a52b02b636cd9f7b5b9dc55422f14 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 1 Jan 2023 08:15:32 -0800 Subject: [PATCH] freedreno/a6xx: Split out fd6_state Split out the build-up of CP_SET_DRAW_STATE packet, as we are going to want to re-use this for compute state later when we switch to bindless IBO descriptors. While we are at it, drop the enable_mask param, as this is determined solely by the group_id, and it is easier to maintain a table for the handful of exceptions to ENABLE_ALL. The compiler should be able to optimize away the table lookup. Signed-off-by: Rob Clark Part-of: --- src/gallium/drivers/freedreno/a6xx/fd6_emit.c | 113 +++++++----------- src/gallium/drivers/freedreno/a6xx/fd6_emit.h | 81 +++++++++---- 2 files changed, 103 insertions(+), 91 deletions(-) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index f74e5e5a74d..5c6863ee6cf 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -282,18 +282,15 @@ fd6_emit_combined_textures(struct fd6_emit *emit, { struct fd_context *ctx = emit->ctx; - static const struct { - enum fd6_state_id state_id; - unsigned enable_mask; - } s[PIPE_SHADER_TYPES] = { - [PIPE_SHADER_VERTEX] = {FD6_GROUP_VS_TEX, ENABLE_ALL}, - [PIPE_SHADER_TESS_CTRL] = {FD6_GROUP_HS_TEX, ENABLE_ALL}, - [PIPE_SHADER_TESS_EVAL] = {FD6_GROUP_DS_TEX, ENABLE_ALL}, - [PIPE_SHADER_GEOMETRY] = {FD6_GROUP_GS_TEX, ENABLE_ALL}, - [PIPE_SHADER_FRAGMENT] = {FD6_GROUP_FS_TEX, ENABLE_DRAW}, + static const enum fd6_state_id s[PIPE_SHADER_TYPES] = { + [PIPE_SHADER_VERTEX] = FD6_GROUP_VS_TEX, + [PIPE_SHADER_TESS_CTRL] = FD6_GROUP_HS_TEX, + [PIPE_SHADER_TESS_EVAL] = FD6_GROUP_DS_TEX, + [PIPE_SHADER_GEOMETRY] = FD6_GROUP_GS_TEX, + [PIPE_SHADER_FRAGMENT] = FD6_GROUP_FS_TEX, }; - assert(s[type].state_id); + assert((type < ARRAY_SIZE(s)) && s[type]); if (!v->image_mapping.num_tex && !v->fb_read) { /* in the fast-path, when we don't have to mix in any image/SSBO @@ -303,13 +300,11 @@ fd6_emit_combined_textures(struct fd6_emit *emit, * Also, framebuffer-read is a slow-path because an extra * texture needs to be inserted. */ - if ((ctx->dirty_shader[type] & FD_DIRTY_SHADER_TEX) && - ctx->tex[type].num_textures > 0) { + if (ctx->tex[type].num_textures > 0) { struct fd6_texture_state *tex = fd6_texture_state(ctx, type, &ctx->tex[type]); - fd6_emit_add_group(emit, tex->stateobj, s[type].state_id, - s[type].enable_mask); + fd6_state_add_group(&emit->state, tex->stateobj, s[type]); fd6_texture_state_reference(&tex, NULL); } @@ -327,8 +322,7 @@ fd6_emit_combined_textures(struct fd6_emit *emit, fd6_emit_textures(ctx, stateobj, type, tex, v); - fd6_emit_take_group(emit, stateobj, s[type].state_id, - s[type].enable_mask); + fd6_state_take_group(&emit->state, stateobj, s[type]); } } } @@ -705,14 +699,13 @@ fd6_emit_streamout(struct fd_ringbuffer *ring, struct fd6_emit *emit) assert_dt } if (emit->streamout_mask) { - fd6_emit_add_group(emit, prog->streamout_stateobj, FD6_GROUP_SO, - ENABLE_ALL); + fd6_state_add_group(&emit->state, prog->streamout_stateobj, FD6_GROUP_SO); } else if (ctx->last.streamout_mask != 0) { /* If we transition from a draw with streamout to one without, turn * off streamout. */ - fd6_emit_add_group(emit, fd6_context(ctx)->streamout_disable_stateobj, - FD6_GROUP_SO, ENABLE_ALL); + fd6_state_add_group(&emit->state, fd6_context(ctx)->streamout_disable_stateobj, + FD6_GROUP_SO); } /* Make sure that any use of our TFB outputs (indirect draw source or shader @@ -824,131 +817,111 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) u_foreach_bit (b, emit->dirty_groups) { enum fd6_state_id group = b; struct fd_ringbuffer *state = NULL; - uint32_t enable_mask = ENABLE_ALL; switch (group) { case FD6_GROUP_VTXSTATE: state = fd6_vertex_stateobj(ctx->vtx.vtx)->stateobj; - fd_ringbuffer_ref(state); + fd6_state_add_group(&emit->state, state, FD6_GROUP_VTXSTATE); break; case FD6_GROUP_VBO: state = build_vbo_state(emit); + fd6_state_take_group(&emit->state, state, FD6_GROUP_VBO); break; case FD6_GROUP_ZSA: state = fd6_zsa_state( ctx, util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])), fd_depth_clamp_enabled(ctx)); - fd_ringbuffer_ref(state); + fd6_state_add_group(&emit->state, state, FD6_GROUP_ZSA); break; case FD6_GROUP_LRZ: state = build_lrz(emit); - if (!state) - continue; + if (state) + fd6_state_take_group(&emit->state, state, FD6_GROUP_LRZ); break; case FD6_GROUP_SCISSOR: state = build_scissor(emit); + fd6_state_take_group(&emit->state, state, FD6_GROUP_SCISSOR); break; case FD6_GROUP_PROG: - fd6_emit_add_group(emit, prog->config_stateobj, FD6_GROUP_PROG_CONFIG, - ENABLE_ALL); - fd6_emit_add_group(emit, prog->stateobj, FD6_GROUP_PROG, ENABLE_DRAW); - fd6_emit_add_group(emit, prog->binning_stateobj, - FD6_GROUP_PROG_BINNING, - CP_SET_DRAW_STATE__0_BINNING); + fd6_state_add_group(&emit->state, prog->config_stateobj, + FD6_GROUP_PROG_CONFIG); + fd6_state_add_group(&emit->state, prog->stateobj, FD6_GROUP_PROG); + fd6_state_add_group(&emit->state, prog->binning_stateobj, + FD6_GROUP_PROG_BINNING); /* emit remaining streaming program state, ie. what depends on * other emit state, so cannot be pre-baked. */ - fd6_emit_take_group(emit, fd6_program_interp_state(emit), - FD6_GROUP_PROG_INTERP, ENABLE_DRAW); - continue; + fd6_state_take_group(&emit->state, fd6_program_interp_state(emit), + FD6_GROUP_PROG_INTERP); + break; case FD6_GROUP_RASTERIZER: state = fd6_rasterizer_state(ctx, emit->primitive_restart); - fd_ringbuffer_ref(state); + fd6_state_add_group(&emit->state, state, FD6_GROUP_RASTERIZER); break; case FD6_GROUP_PROG_FB_RAST: state = build_prog_fb_rast(emit); + fd6_state_take_group(&emit->state, state, FD6_GROUP_PROG_FB_RAST); break; case FD6_GROUP_BLEND: state = fd6_blend_variant(ctx->blend, pfb->samples, ctx->sample_mask) ->stateobj; - fd_ringbuffer_ref(state); + fd6_state_add_group(&emit->state, state, FD6_GROUP_BLEND); break; case FD6_GROUP_BLEND_COLOR: state = build_blend_color(emit); + fd6_state_take_group(&emit->state, state, FD6_GROUP_BLEND_COLOR); break; case FD6_GROUP_IBO: state = build_ibo(emit); + fd6_state_take_group(&emit->state, state, FD6_GROUP_IBO); break; case FD6_GROUP_CONST: state = fd6_build_user_consts(emit); + fd6_state_take_group(&emit->state, state, FD6_GROUP_CONST); break; case FD6_GROUP_DRIVER_PARAMS: state = fd6_build_driver_params(emit); + fd6_state_take_group(&emit->state, state, FD6_GROUP_DRIVER_PARAMS); break; case FD6_GROUP_PRIMITIVE_PARAMS: state = fd6_build_tess_consts(emit); + fd6_state_take_group(&emit->state, state, FD6_GROUP_PRIMITIVE_PARAMS); break; case FD6_GROUP_VS_TEX: fd6_emit_combined_textures(emit, PIPE_SHADER_VERTEX, vs); - continue; + break; case FD6_GROUP_HS_TEX: if (hs) { fd6_emit_combined_textures(emit, PIPE_SHADER_TESS_CTRL, hs); } - continue; + break; case FD6_GROUP_DS_TEX: if (ds) { fd6_emit_combined_textures(emit, PIPE_SHADER_TESS_EVAL, ds); } - continue; + break; case FD6_GROUP_GS_TEX: if (gs) { fd6_emit_combined_textures(emit, PIPE_SHADER_GEOMETRY, gs); } - continue; + break; case FD6_GROUP_FS_TEX: fd6_emit_combined_textures(emit, PIPE_SHADER_FRAGMENT, fs); - continue; + break; case FD6_GROUP_SO: fd6_emit_streamout(ring, emit); - continue; + break; case FD6_GROUP_NON_GROUP: fd6_emit_non_ring(ring, emit); - continue; + break; default: unreachable("bad state group"); } - - fd6_emit_take_group(emit, state, group, enable_mask); } - if (emit->num_groups > 0) { - OUT_PKT7(ring, CP_SET_DRAW_STATE, 3 * emit->num_groups); - for (unsigned i = 0; i < emit->num_groups; i++) { - struct fd6_state_group *g = &emit->groups[i]; - unsigned n = g->stateobj ? fd_ringbuffer_size(g->stateobj) / 4 : 0; - - assert((g->enable_mask & ~ENABLE_ALL) == 0); - - if (n == 0) { - OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) | - CP_SET_DRAW_STATE__0_DISABLE | g->enable_mask | - CP_SET_DRAW_STATE__0_GROUP_ID(g->group_id)); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - } else { - OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(n) | g->enable_mask | - CP_SET_DRAW_STATE__0_GROUP_ID(g->group_id)); - OUT_RB(ring, g->stateobj); - } - - if (g->stateobj) - fd_ringbuffer_del(g->stateobj); - } - emit->num_groups = 0; - } + fd6_state_emit(&emit->state, ring); } void diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h index efe4c6bf59d..65060ba8898 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h @@ -83,6 +83,65 @@ struct fd6_state_group { uint32_t enable_mask; }; +struct fd6_state { + struct fd6_state_group groups[32]; + unsigned num_groups; +}; + +static inline void +fd6_state_emit(struct fd6_state *state, struct fd_ringbuffer *ring) +{ + if (!state->num_groups) + return; + + OUT_PKT7(ring, CP_SET_DRAW_STATE, 3 * state->num_groups); + for (unsigned i = 0; i < state->num_groups; i++) { + struct fd6_state_group *g = &state->groups[i]; + unsigned n = g->stateobj ? fd_ringbuffer_size(g->stateobj) / 4 : 0; + + assert((g->enable_mask & ~ENABLE_ALL) == 0); + + if (n == 0) { + OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) | + CP_SET_DRAW_STATE__0_DISABLE | g->enable_mask | + CP_SET_DRAW_STATE__0_GROUP_ID(g->group_id)); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + } else { + OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(n) | g->enable_mask | + CP_SET_DRAW_STATE__0_GROUP_ID(g->group_id)); + OUT_RB(ring, g->stateobj); + } + + if (g->stateobj) + fd_ringbuffer_del(g->stateobj); + } +} + +static inline void +fd6_state_take_group(struct fd6_state *state, struct fd_ringbuffer *stateobj, + enum fd6_state_id group_id) +{ + static const unsigned enable_mask[32] = { + [FD6_GROUP_PROG] = ENABLE_DRAW, + [FD6_GROUP_PROG_BINNING] = CP_SET_DRAW_STATE__0_BINNING, + [FD6_GROUP_PROG_INTERP] = ENABLE_DRAW, + [FD6_GROUP_FS_TEX] = ENABLE_DRAW, + }; + assert(state->num_groups < ARRAY_SIZE(state->groups)); + struct fd6_state_group *g = &state->groups[state->num_groups++]; + g->stateobj = stateobj; + g->group_id = group_id; + g->enable_mask = enable_mask[group_id] ? enable_mask[group_id] : ENABLE_ALL; +} + +static inline void +fd6_state_add_group(struct fd6_state *state, struct fd_ringbuffer *stateobj, + enum fd6_state_id group_id) +{ + fd6_state_take_group(state, fd_ringbuffer_ref(stateobj), group_id); +} + /* grouped together emit-state for prog/vertex/state emit: */ struct fd6_emit { struct fd_context *ctx; @@ -112,8 +171,7 @@ struct fd6_emit { unsigned streamout_mask; - struct fd6_state_group groups[32]; - unsigned num_groups; + struct fd6_state state; }; static inline const struct fd6_program_state * @@ -127,25 +185,6 @@ fd6_emit_get_prog(struct fd6_emit *emit) return emit->prog; } -static inline void -fd6_emit_take_group(struct fd6_emit *emit, struct fd_ringbuffer *stateobj, - enum fd6_state_id group_id, unsigned enable_mask) -{ - assert(emit->num_groups < ARRAY_SIZE(emit->groups)); - struct fd6_state_group *g = &emit->groups[emit->num_groups++]; - g->stateobj = stateobj; - g->group_id = group_id; - g->enable_mask = enable_mask; -} - -static inline void -fd6_emit_add_group(struct fd6_emit *emit, struct fd_ringbuffer *stateobj, - enum fd6_state_id group_id, unsigned enable_mask) -{ - fd6_emit_take_group(emit, fd_ringbuffer_ref(stateobj), group_id, - enable_mask); -} - static inline unsigned fd6_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring, enum vgt_event_type evt, bool timestamp)