aco/gfx11+: fix inline constants for v_pk_fmac_f16
On newer hardware, the hi operation reads the lo half of the inline constant. On older hardware, it reads the hi half (zero). I tested this on Navi31 for gfx11 and Raphael for gfx10. Foz-DB Navi31: Totals from 4 (0.01% of 79395) affected shaders: CodeSize: 36832 -> 36448 (-1.04%) Latency: 20362 -> 20334 (-0.14%) Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29512>
This commit is contained in:
@@ -2567,8 +2567,16 @@ vop3_can_use_vop2acc(ra_ctx& ctx, Instruction* instr)
|
||||
return false;
|
||||
|
||||
if (instr->isVOP3P()) {
|
||||
if (instr->valu().opsel_lo != 0 || instr->valu().opsel_hi != 0x7)
|
||||
return false;
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
if (instr->valu().opsel_lo[i])
|
||||
return false;
|
||||
|
||||
/* v_pk_fmac_f16 inline constants are replicated to hi bits starting with gfx11. */
|
||||
if (instr->valu().opsel_hi[i] ==
|
||||
(instr->operands[i].isConstant() && !instr->operands[i].isLiteral() &&
|
||||
ctx.program->gfx_level >= GFX11))
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (instr->valu().opsel & (ctx.program->gfx_level < GFX11 ? 0xf : ~0x3))
|
||||
return false;
|
||||
|
Reference in New Issue
Block a user