pan/mdg: Precolour blend inputs

Instead of requiring an explicit unoptimized move, we can implicitly
colour the blend input intrinsic to r0, where it will be preloaded; this
is a simple task for RA, and does not conflict with anything. If there
are multiple duplicate loads, the latter ones can just be simple moves
which will be copypropped.

We don't need to include a explicit synthetic load, since (scanning
backwards) the read will cause the input to become live at the right
time and the lack of an explicit write will keep it live from the
beginning of the shader. So no need to make it more complicated than it
needs to be.

Saves a cycle in blend shaders.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5449>
This commit is contained in:
Alyssa Rosenzweig
2020-06-12 16:45:24 -04:00
committed by Marge Bot
parent ad00159070
commit 277b616962
5 changed files with 26 additions and 5 deletions

View File

@@ -242,6 +242,9 @@ typedef struct compiler_context {
/* Render target number for a keyed blend shader. Depends on is_blend */
unsigned blend_rt;
/* Index to precolour to r0 for an input blend colour */
unsigned blend_input;
/* Tracking for blend constant patching */
int blend_constant_offset;

View File

@@ -1669,12 +1669,13 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
} else if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->is_blend) {
emit_varying_read(ctx, reg, offset, nr_comp, component, indirect_offset, t | nir_dest_bit_size(instr->dest), is_flat);
} else if (ctx->is_blend) {
/* For blend shaders, load the input color, which is
* preloaded to r0 */
/* ctx->blend_input will be precoloured to r0, where
* the input is preloaded */
midgard_instruction move = v_mov(SSA_FIXED_REGISTER(0), reg);
emit_mir_instruction(ctx, move);
schedule_barrier(ctx);
if (ctx->blend_input == ~0)
ctx->blend_input = reg;
else
emit_mir_instruction(ctx, v_mov(ctx->blend_input, reg));
} else if (ctx->stage == MESA_SHADER_VERTEX) {
emit_attr_read(ctx, reg, offset, nr_comp, t);
} else {
@@ -2696,6 +2697,7 @@ midgard_compile_shader_nir(nir_shader *nir, panfrost_program *program, bool is_b
ctx->is_blend = is_blend;
ctx->alpha_ref = program->alpha_ref;
ctx->blend_rt = MIDGARD_COLOR_RT0 + blend_rt;
ctx->blend_input = ~0;
ctx->quirks = midgard_get_quirks(gpu_id);
/* Start off with a safe cutoff, allowing usage of all 16 work

View File

@@ -635,6 +635,16 @@ allocate_registers(compiler_context *ctx, bool *spilled)
l->solutions[ins->dest] = (16 * 1) + COMPONENT_W * 4;
}
/* Precolour blend input to r0. Note writeout is necessarily at the end
* and blend shaders are single-RT only so there is only a single
* writeout block, so this cannot conflict with the writeout r0 (there
* is no need to have an intermediate move) */
if (ctx->blend_input != ~0) {
assert(ctx->blend_input < ctx->temp_count);
l->solutions[ctx->blend_input] = 0;
}
mir_compute_interference(ctx, l);
*spilled = !lcra_solve(l);

View File

@@ -71,6 +71,10 @@ mir_rewrite_index_dst(compiler_context *ctx, unsigned old, unsigned new)
mir_foreach_instr_global(ctx, ins) {
mir_rewrite_index_dst_single(ins, old, new);
}
/* Implicitly written before the shader */
if (ctx->blend_input == old)
ctx->blend_input = new;
}
void

View File

@@ -78,4 +78,6 @@ mir_squeeze_index(compiler_context *ctx)
for (unsigned i = 0; i < ARRAY_SIZE(ins->src); ++i)
ins->src[i] = find_or_allocate_temp(ctx, ins->src[i]);
}
ctx->blend_input = find_or_allocate_temp(ctx, ctx->blend_input);
}