freedreno/ir3: Fix duplicated fine derivatives instructions.

legalize_block() can get run multiple times, which I didn't notice when
adding fine derivs support.  Other instruction clones change things such
that the legalization won't trigger again, but that didn't apply to the
DS.PP legalization.  To keep someone else from tripping over this, split
the one-shot legalization out of the iterative sync flag application.

Fixes failures in dEQP-VK.glsl.derivate.dfdxfine.*

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3198
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5699>
This commit is contained in:
Eric Anholt
2020-06-30 12:17:10 -07:00
committed by Marge Bot
parent 862d85a63f
commit 3d7d5d220b
6 changed files with 51 additions and 12 deletions

View File

@@ -5,7 +5,6 @@ dEQP-GLES31.functional.stencil_texturing.render.depth24_stencil8_clear
dEQP-GLES31.functional.stencil_texturing.render.depth24_stencil8_draw dEQP-GLES31.functional.stencil_texturing.render.depth24_stencil8_draw
dEQP-VK.binding_model.descriptorset_random.sets4.constant.ubolimitlow.sbolimithigh.imglimithigh.noiub.uab.frag.ialimitlow.0 dEQP-VK.binding_model.descriptorset_random.sets4.constant.ubolimitlow.sbolimithigh.imglimithigh.noiub.uab.frag.ialimitlow.0
dEQP-VK.draw.output_location.array.b8g8r8a8-unorm-mediump-output-vec3 dEQP-VK.draw.output_location.array.b8g8r8a8-unorm-mediump-output-vec3
dEQP-VK.glsl.derivate.fwidthfine.uniform_loop.vec3_mediump
dEQP-VK.glsl.linkage.varying.struct.mat3x2 dEQP-VK.glsl.linkage.varying.struct.mat3x2
dEQP-VK.graphicsfuzz.mat-array-deep-control-flow dEQP-VK.graphicsfuzz.mat-array-deep-control-flow
dEQP-VK.pipeline.spec_constant.graphics.geometry.composite.array.array_vec4 dEQP-VK.pipeline.spec_constant.graphics.geometry.composite.array.array_vec4

View File

@@ -1263,6 +1263,9 @@ static const struct opc_info {
OPC(5, OPC_DSYPP_1, dsypp.1), OPC(5, OPC_DSYPP_1, dsypp.1),
OPC(5, OPC_RGETPOS, rgetpos), OPC(5, OPC_RGETPOS, rgetpos),
OPC(5, OPC_RGETINFO, rgetinfo), OPC(5, OPC_RGETINFO, rgetinfo),
/* macros are needed here for ir3_print */
OPC(5, OPC_DSXPP_MACRO, dsxpp.macro),
OPC(5, OPC_DSYPP_MACRO, dsypp.macro),
/* category 6: */ /* category 6: */

View File

@@ -185,6 +185,9 @@ typedef enum {
OPC_DSYPP_1 = _OPC(5, 25), OPC_DSYPP_1 = _OPC(5, 25),
OPC_RGETPOS = _OPC(5, 26), OPC_RGETPOS = _OPC(5, 26),
OPC_RGETINFO = _OPC(5, 27), OPC_RGETINFO = _OPC(5, 27),
/* cat5 meta instructions, placed above the cat5 opc field's size */
OPC_DSXPP_MACRO = _OPC(5, 32),
OPC_DSYPP_MACRO = _OPC(5, 33),
/* category 6: */ /* category 6: */
OPC_LDG = _OPC(6, 0), /* load-global */ OPC_LDG = _OPC(6, 0), /* load-global */

View File

@@ -1663,9 +1663,9 @@ INSTR1(SQRT)
/* cat5 instructions: */ /* cat5 instructions: */
INSTR1(DSX) INSTR1(DSX)
INSTR1(DSXPP_1) INSTR1(DSXPP_MACRO)
INSTR1(DSY) INSTR1(DSY)
INSTR1(DSYPP_1) INSTR1(DSYPP_MACRO)
INSTR1F(3D, DSX) INSTR1F(3D, DSX)
INSTR1F(3D, DSY) INSTR1F(3D, DSY)
INSTR1(RGETPOS) INSTR1(RGETPOS)

View File

@@ -478,7 +478,7 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
dst[0]->cat5.type = TYPE_F32; dst[0]->cat5.type = TYPE_F32;
break; break;
case nir_op_fddx_fine: case nir_op_fddx_fine:
dst[0] = ir3_DSXPP_1(b, src[0], 0); dst[0] = ir3_DSXPP_MACRO(b, src[0], 0);
dst[0]->cat5.type = TYPE_F32; dst[0]->cat5.type = TYPE_F32;
break; break;
case nir_op_fddy: case nir_op_fddy:
@@ -488,7 +488,7 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
break; break;
break; break;
case nir_op_fddy_fine: case nir_op_fddy_fine:
dst[0] = ir3_DSYPP_1(b, src[0], 0); dst[0] = ir3_DSYPP_MACRO(b, src[0], 0);
dst[0]->cat5.type = TYPE_F32; dst[0]->cat5.type = TYPE_F32;
break; break;
case nir_op_flt: case nir_op_flt:

View File

@@ -233,13 +233,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
list_addtail(&n->node, &block->instr_list); list_addtail(&n->node, &block->instr_list);
} }
if (n->opc == OPC_DSXPP_1 || n->opc == OPC_DSYPP_1) {
struct ir3_instruction *op_p = ir3_instr_clone(n);
op_p->flags = IR3_INSTR_P;
ctx->so->need_fine_derivatives = true;
}
if (is_sfu(n)) if (is_sfu(n))
regmask_set(&state->needs_ss, n->regs[0]); regmask_set(&state->needs_ss, n->regs[0]);
@@ -360,6 +353,42 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
return true; return true;
} }
/* Expands dsxpp and dsypp macros to:
*
* dsxpp.1 dst, src
* dsxpp.1.p dst, src
*
* We apply this after flags syncing, as we don't want to sync in between the
* two (which might happen if dst == src). We do it before nop scheduling
* because that needs to count actual instructions.
*/
static bool
apply_fine_deriv_macro(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
{
struct list_head instr_list;
/* remove all the instructions from the list, we'll be adding
* them back in as we go
*/
list_replace(&block->instr_list, &instr_list);
list_inithead(&block->instr_list);
foreach_instr_safe (n, &instr_list) {
list_addtail(&n->node, &block->instr_list);
if (n->opc == OPC_DSXPP_MACRO || n->opc == OPC_DSYPP_MACRO) {
n->opc = (n->opc == OPC_DSXPP_MACRO) ? OPC_DSXPP_1 : OPC_DSYPP_1;
struct ir3_instruction *op_p = ir3_instr_clone(n);
op_p->flags = IR3_INSTR_P;
ctx->so->need_fine_derivatives = true;
}
}
return true;
}
/* NOTE: branch instructions are always the last instruction(s) /* NOTE: branch instructions are always the last instruction(s)
* in the block. We take advantage of this as we resolve the * in the block. We take advantage of this as we resolve the
* branches, since "if (foo) break;" constructs turn into * branches, since "if (foo) break;" constructs turn into
@@ -752,6 +781,11 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
block_sched(ir); block_sched(ir);
if (so->type == MESA_SHADER_FRAGMENT) if (so->type == MESA_SHADER_FRAGMENT)
kill_sched(ir, so); kill_sched(ir, so);
foreach_block (block, &ir->block_list) {
progress |= apply_fine_deriv_macro(ctx, block);
}
nop_sched(ir); nop_sched(ir);
do { do {