aco: skip uniformization of certain merge phis
If a source is a VGPR, then skip if it's safe. This fixes the regressions
from the previous commit.
fossil-db (navi31):
Totals from 5118 (6.45% of 79395) affected shaders:
MaxWaves: 159560 -> 159520 (-0.03%); split: +0.01%, -0.03%
Instrs: 2165351 -> 2138456
(-1.24%); split: -1.26%, +0.02%
CodeSize: 11260340 -> 11152460 (-0.96%); split: -0.98%, +0.02%
VGPRs: 218124 -> 225144 (+3.22%); split: -0.13%, +3.35%
Latency: 11059208 -> 11116102 (+0.51%); split: -0.18%, +0.69%
InvThroughput: 1252148 -> 1230193 (-1.75%); split: -1.77%, +0.01%
VClause: 39513 -> 39518 (+0.01%); split: -0.48%, +0.49%
SClause: 59434 -> 59378 (-0.09%); split: -0.11%, +0.02%
Copies: 165997 -> 156172 (-5.92%); split: -6.68%, +0.76%
PreSGPRs: 181203 -> 181094 (-0.06%)
PreVGPRs: 139393 -> 139731 (+0.24%)
VALU: 1244301 -> 1220769 (-1.89%); split: -1.91%, +0.02%
SALU: 200240 -> 199567 (-0.34%); split: -0.34%, +0.00%
fossil-db (navi21):
Totals from 35520 (44.74% of 79395) affected shaders:
MaxWaves: 951870 -> 951830 (-0.00%)
Instrs: 20229388 -> 20227776 (-0.01%); split: -0.01%, +0.00%
CodeSize: 105379916 -> 105513740 (+0.13%); split: -0.01%, +0.13%
VGPRs: 1375232 -> 1375400 (+0.01%)
Latency: 81046435 -> 81013986 (-0.04%); split: -0.04%, +0.00%
InvThroughput: 15269166 -> 15273295 (+0.03%); split: -0.01%, +0.04%
VClause: 354314 -> 354310 (-0.00%); split: -0.00%, +0.00%
SClause: 417049 -> 417047 (-0.00%); split: -0.00%, +0.00%
Copies: 1699445 -> 1699488 (+0.00%); split: -0.01%, +0.01%
Branches: 591274 -> 591269 (-0.00%); split: -0.00%, +0.00%
PreSGPRs: 1371062 -> 1370567 (-0.04%)
PreVGPRs: 1100716 -> 1100953 (+0.02%)
VALU: 11076189 -> 11075167 (-0.01%); split: -0.01%, +0.00%
SALU: 3648002 -> 3647378 (-0.02%); split: -0.02%, +0.00%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30211>
This commit is contained in:
@@ -9118,6 +9118,9 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_lane_permute_16_amd: {
|
||||
/* NOTE: If we use divergence analysis information here instead of the src regclass,
|
||||
* skip_uniformize_merge_phi() should be updated.
|
||||
*/
|
||||
Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
|
||||
Temp dst = get_ssa_temp(ctx, &instr->def);
|
||||
assert(ctx->program->gfx_level >= GFX10);
|
||||
|
@@ -261,6 +261,89 @@ setup_nir(isel_context* ctx, nir_shader* nir)
|
||||
nir_index_ssa_defs(func);
|
||||
}
|
||||
|
||||
/* Returns true if we can skip uniformization of a merge phi. This makes the destination divergent,
|
||||
* and so is only safe if the inconsistency it introduces into the divergence analysis won't break
|
||||
* code generation. If we unsafely skip uniformization, later instructions (such as SSBO loads,
|
||||
* some subgroup intrinsics and certain conversions) can use divergence analysis information which
|
||||
* is no longer correct.
|
||||
*/
|
||||
bool
|
||||
skip_uniformize_merge_phi(nir_def* ssa, unsigned depth)
|
||||
{
|
||||
if (depth >= 16)
|
||||
return false;
|
||||
|
||||
nir_foreach_use (src, ssa) {
|
||||
switch (nir_src_parent_instr(src)->type) {
|
||||
case nir_instr_type_alu: {
|
||||
nir_alu_instr* alu = nir_instr_as_alu(nir_src_parent_instr(src));
|
||||
if (alu->def.divergent)
|
||||
break;
|
||||
|
||||
switch (alu->op) {
|
||||
case nir_op_f2i16:
|
||||
case nir_op_f2u16:
|
||||
case nir_op_f2i32:
|
||||
case nir_op_f2u32:
|
||||
case nir_op_b2i8:
|
||||
case nir_op_b2i16:
|
||||
case nir_op_b2i32:
|
||||
case nir_op_b2b32:
|
||||
case nir_op_b2f16:
|
||||
case nir_op_b2f32:
|
||||
case nir_op_b2f64:
|
||||
case nir_op_mov:
|
||||
/* These opcodes p_as_uniform or vote_any() the source, so fail immediately. We don't
|
||||
* need to do this for non-nir_op_b2 if we know we'll move it back into a VGPR,
|
||||
* in which case the p_as_uniform would be eliminated. This would be way too fragile,
|
||||
* though.
|
||||
*/
|
||||
return false;
|
||||
default:
|
||||
if (!skip_uniformize_merge_phi(&alu->def, depth + 1))
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case nir_instr_type_intrinsic: {
|
||||
nir_intrinsic_instr* intrin = nir_instr_as_intrinsic(nir_src_parent_instr(src));
|
||||
unsigned src_idx = src - intrin->src;
|
||||
/* nir_intrinsic_lane_permute_16_amd is only safe because we don't use divergence analysis
|
||||
* for it's instruction selection. We use that intrinsic for NGG culling. All others are
|
||||
* stores with VGPR sources.
|
||||
*/
|
||||
if (intrin->intrinsic == nir_intrinsic_lane_permute_16_amd ||
|
||||
intrin->intrinsic == nir_intrinsic_export_amd ||
|
||||
intrin->intrinsic == nir_intrinsic_export_dual_src_blend_amd ||
|
||||
(intrin->intrinsic == nir_intrinsic_export_row_amd && src_idx == 0) ||
|
||||
(intrin->intrinsic == nir_intrinsic_store_buffer_amd && src_idx == 0) ||
|
||||
(intrin->intrinsic == nir_intrinsic_store_ssbo && src_idx == 0) ||
|
||||
(intrin->intrinsic == nir_intrinsic_store_global && src_idx == 0) ||
|
||||
(intrin->intrinsic == nir_intrinsic_store_scratch && src_idx == 0) ||
|
||||
(intrin->intrinsic == nir_intrinsic_store_shared && src_idx == 0))
|
||||
break;
|
||||
return false;
|
||||
}
|
||||
case nir_instr_type_phi: {
|
||||
nir_phi_instr* phi = nir_instr_as_phi(nir_src_parent_instr(src));
|
||||
if (phi->def.divergent || skip_uniformize_merge_phi(&phi->def, depth + 1))
|
||||
break;
|
||||
return false;
|
||||
}
|
||||
case nir_instr_type_tex: {
|
||||
/* This is either used as a VGPR source or it's a (potentially undef) descriptor. */
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} /* end namespace */
|
||||
|
||||
void
|
||||
@@ -603,7 +686,7 @@ init_context(isel_context* ctx, nir_shader* shader)
|
||||
/* In case of uniform phis after divergent merges, ensure that the dst is an
|
||||
* SGPR and does not contain undefined values for some invocations.
|
||||
*/
|
||||
if (divergent_merge)
|
||||
if (divergent_merge && !skip_uniformize_merge_phi(&phi->def, 0))
|
||||
type = RegType::sgpr;
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user