pan/midgard: Add nir_intrinsic_store_zs_output_pan support
ZS fragment stores are done like color fragment stores, except it's using a different RT id (0xFF), the depth and stencil values are stored in r1.x and r1.y. Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> [Fix the scheduling part] Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3697>
This commit is contained in:

committed by
Marge Bot

parent
f5619f5073
commit
e1ba0cd452
@@ -110,6 +110,8 @@ typedef struct midgard_instruction {
|
||||
|
||||
bool compact_branch;
|
||||
bool writeout;
|
||||
bool writeout_depth;
|
||||
bool writeout_stencil;
|
||||
bool last_writeout;
|
||||
|
||||
/* Kind of a hack, but hint against aggressive DCE */
|
||||
@@ -227,6 +229,7 @@ enum midgard_rt_id {
|
||||
MIDGARD_COLOR_RT1,
|
||||
MIDGARD_COLOR_RT2,
|
||||
MIDGARD_COLOR_RT3,
|
||||
MIDGARD_ZS_RT,
|
||||
MIDGARD_NUM_RTS,
|
||||
};
|
||||
|
||||
|
@@ -1444,8 +1444,14 @@ compute_builtin_arg(nir_op op)
|
||||
}
|
||||
|
||||
static void
|
||||
emit_fragment_store(compiler_context *ctx, unsigned src, unsigned rt)
|
||||
emit_fragment_store(compiler_context *ctx, unsigned src, enum midgard_rt_id rt)
|
||||
{
|
||||
assert(rt < ARRAY_SIZE(ctx->writeout_branch));
|
||||
|
||||
midgard_instruction *br = ctx->writeout_branch[rt];
|
||||
|
||||
assert(!br);
|
||||
|
||||
emit_explicit_constant(ctx, src, src);
|
||||
|
||||
struct midgard_instruction ins =
|
||||
@@ -1455,14 +1461,12 @@ emit_fragment_store(compiler_context *ctx, unsigned src, unsigned rt)
|
||||
|
||||
/* Add dependencies */
|
||||
ins.src[0] = src;
|
||||
ins.constants.u32[0] = rt * 0x100;
|
||||
ins.constants.u32[0] = rt == MIDGARD_ZS_RT ?
|
||||
0xFF : (rt - MIDGARD_COLOR_RT0) * 0x100;
|
||||
|
||||
/* Emit the branch */
|
||||
midgard_instruction *br = emit_mir_instruction(ctx, ins);
|
||||
br = emit_mir_instruction(ctx, ins);
|
||||
schedule_barrier(ctx);
|
||||
|
||||
assert(rt < ARRAY_SIZE(ctx->writeout_branch));
|
||||
assert(!ctx->writeout_branch[rt]);
|
||||
ctx->writeout_branch[rt] = br;
|
||||
|
||||
/* Push our current location = current block count - 1 = where we'll
|
||||
@@ -1656,6 +1660,22 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_store_zs_output_pan: {
|
||||
assert(ctx->stage == MESA_SHADER_FRAGMENT);
|
||||
emit_fragment_store(ctx, nir_src_index(ctx, &instr->src[0]),
|
||||
MIDGARD_ZS_RT);
|
||||
|
||||
midgard_instruction *br = ctx->writeout_branch[MIDGARD_ZS_RT];
|
||||
|
||||
if (!nir_intrinsic_component(instr))
|
||||
br->writeout_depth = true;
|
||||
if (nir_intrinsic_component(instr) ||
|
||||
instr->num_components)
|
||||
br->writeout_stencil = true;
|
||||
assert(br->writeout_depth | br->writeout_stencil);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_store_output:
|
||||
assert(nir_src_is_const(instr->src[1]) && "no indirect outputs");
|
||||
|
||||
@@ -2449,11 +2469,13 @@ static unsigned
|
||||
emit_fragment_epilogue(compiler_context *ctx, unsigned rt)
|
||||
{
|
||||
/* Loop to ourselves */
|
||||
|
||||
midgard_instruction *br = ctx->writeout_branch[rt];
|
||||
struct midgard_instruction ins = v_branch(false, false);
|
||||
ins.writeout = true;
|
||||
ins.writeout_depth = br->writeout_depth;
|
||||
ins.writeout_stencil = br->writeout_stencil;
|
||||
ins.branch.target_block = ctx->block_count - 1;
|
||||
ins.constants.u32[0] = rt * 0x100;
|
||||
ins.constants.u32[0] = br->constants.u32[0];
|
||||
emit_mir_instruction(ctx, ins);
|
||||
|
||||
ctx->current_block->epilogue = true;
|
||||
@@ -2754,7 +2776,7 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
|
||||
ctx->stage = nir->info.stage;
|
||||
ctx->is_blend = is_blend;
|
||||
ctx->alpha_ref = program->alpha_ref;
|
||||
ctx->blend_rt = blend_rt;
|
||||
ctx->blend_rt = MIDGARD_COLOR_RT0 + blend_rt;
|
||||
ctx->quirks = midgard_get_quirks(gpu_id);
|
||||
|
||||
/* Start off with a safe cutoff, allowing usage of all 16 work
|
||||
|
@@ -561,8 +561,14 @@ allocate_registers(compiler_context *ctx, bool *spilled)
|
||||
mir_foreach_instr_global(ctx, ins) {
|
||||
if (!(ins->compact_branch && ins->writeout)) continue;
|
||||
|
||||
if (ins->src[0] < ctx->temp_count)
|
||||
l->solutions[ins->src[0]] = 0;
|
||||
if (ins->src[0] < ctx->temp_count) {
|
||||
if (ins->writeout_depth)
|
||||
l->solutions[ins->src[0]] = (16 * 1) + COMPONENT_X * 4;
|
||||
else if (ins->writeout_stencil)
|
||||
l->solutions[ins->src[0]] = (16 * 1) + COMPONENT_Y * 4;
|
||||
else
|
||||
l->solutions[ins->src[0]] = 0;
|
||||
}
|
||||
|
||||
if (ins->src[1] < ctx->temp_count)
|
||||
l->solutions[ins->src[1]] = (16 * 1) + COMPONENT_Z * 4;
|
||||
|
@@ -845,6 +845,7 @@ mir_schedule_alu(
|
||||
mir_choose_alu(&branch, instructions, worklist, len, &predicate, ALU_ENAB_BR_COMPACT);
|
||||
mir_update_worklist(worklist, len, instructions, branch);
|
||||
bool writeout = branch && branch->writeout;
|
||||
bool zs_writeout = writeout && (branch->writeout_depth | branch->writeout_stencil);
|
||||
|
||||
if (branch && branch->branch.conditional) {
|
||||
midgard_instruction *cond = mir_schedule_condition(ctx, &predicate, worklist, len, instructions, branch);
|
||||
@@ -859,13 +860,14 @@ mir_schedule_alu(
|
||||
|
||||
mir_choose_alu(&smul, instructions, worklist, len, &predicate, UNIT_SMUL);
|
||||
|
||||
if (!writeout)
|
||||
if (!writeout) {
|
||||
mir_choose_alu(&vlut, instructions, worklist, len, &predicate, UNIT_VLUT);
|
||||
|
||||
if (writeout) {
|
||||
} else {
|
||||
/* Propagate up */
|
||||
bundle.last_writeout = branch->last_writeout;
|
||||
}
|
||||
|
||||
if (writeout && !zs_writeout) {
|
||||
vadd = ralloc(ctx, midgard_instruction);
|
||||
*vadd = v_mov(~0, make_compiler_temp(ctx));
|
||||
|
||||
@@ -928,9 +930,9 @@ mir_schedule_alu(
|
||||
|
||||
/* Check if writeout reads its own register */
|
||||
|
||||
if (branch && branch->writeout) {
|
||||
if (writeout) {
|
||||
midgard_instruction *stages[] = { sadd, vadd, smul };
|
||||
unsigned src = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(0) : branch->src[0];
|
||||
unsigned src = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(zs_writeout ? 1 : 0) : branch->src[0];
|
||||
unsigned writeout_mask = 0x0;
|
||||
bool bad_writeout = false;
|
||||
|
||||
@@ -946,13 +948,17 @@ mir_schedule_alu(
|
||||
}
|
||||
|
||||
/* It's possible we'll be able to schedule something into vmul
|
||||
* to fill r0. Let's peak into the future, trying to schedule
|
||||
* to fill r0/r1. Let's peak into the future, trying to schedule
|
||||
* vmul specially that way. */
|
||||
|
||||
if (!bad_writeout && writeout_mask != 0xF) {
|
||||
unsigned full_mask = zs_writeout ?
|
||||
(1 << (branch->writeout_depth + branch->writeout_stencil)) - 1 :
|
||||
0xF;
|
||||
|
||||
if (!bad_writeout && writeout_mask != full_mask) {
|
||||
predicate.unit = UNIT_VMUL;
|
||||
predicate.dest = src;
|
||||
predicate.mask = writeout_mask ^ 0xF;
|
||||
predicate.mask = writeout_mask ^ full_mask;
|
||||
|
||||
struct midgard_instruction *peaked =
|
||||
mir_choose_instruction(instructions, worklist, len, &predicate);
|
||||
@@ -961,7 +967,7 @@ mir_schedule_alu(
|
||||
vmul = peaked;
|
||||
vmul->unit = UNIT_VMUL;
|
||||
writeout_mask |= predicate.mask;
|
||||
assert(writeout_mask == 0xF);
|
||||
assert(writeout_mask == full_mask);
|
||||
}
|
||||
|
||||
/* Cleanup */
|
||||
@@ -969,13 +975,13 @@ mir_schedule_alu(
|
||||
}
|
||||
|
||||
/* Finally, add a move if necessary */
|
||||
if (bad_writeout || writeout_mask != 0xF) {
|
||||
unsigned temp = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(0) : make_compiler_temp(ctx);
|
||||
if (bad_writeout || writeout_mask != full_mask) {
|
||||
unsigned temp = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(zs_writeout ? 1 : 0) : make_compiler_temp(ctx);
|
||||
|
||||
vmul = ralloc(ctx, midgard_instruction);
|
||||
*vmul = v_mov(src, temp);
|
||||
vmul->unit = UNIT_VMUL;
|
||||
vmul->mask = 0xF ^ writeout_mask;
|
||||
vmul->mask = full_mask ^ writeout_mask;
|
||||
|
||||
/* Rewrite to use our temp */
|
||||
|
||||
|
@@ -468,9 +468,19 @@ mir_bytemask_of_read_components_single(unsigned *swizzle, unsigned inmask, midga
|
||||
uint16_t
|
||||
mir_bytemask_of_read_components_index(midgard_instruction *ins, unsigned i)
|
||||
{
|
||||
/* Branch writeout uses all components */
|
||||
if (ins->compact_branch && ins->writeout && (i == 0))
|
||||
return 0xFFFF;
|
||||
if (ins->compact_branch && ins->writeout && (i == 0)) {
|
||||
/* Non-ZS writeout uses all components */
|
||||
if (!ins->writeout_depth && !ins->writeout_stencil)
|
||||
return 0xFFFF;
|
||||
|
||||
/* For ZS-writeout, if both Z and S are written we need two
|
||||
* components, otherwise we only need one.
|
||||
*/
|
||||
if (ins->writeout_depth && ins->writeout_stencil)
|
||||
return 0xFF;
|
||||
else
|
||||
return 0xF;
|
||||
}
|
||||
|
||||
/* Conditional branches read one 32-bit component = 4 bytes (TODO: multi branch??) */
|
||||
if (ins->compact_branch && ins->branch.conditional && (i == 0))
|
||||
|
Reference in New Issue
Block a user