radv: Run copy_prop and dce after folding 16bit sampling/load/store.

Totals from 10 (0.01% of 134913) affected shaders:
CodeSize: 53168 -> 54832 (+3.13%); split: -0.17%, +3.30%
Instrs: 9117 -> 9200 (+0.91%); split: -1.74%, +2.65%
Latency: 41595 -> 41787 (+0.46%); split: -0.95%, +1.41%
InvThroughput: 16412 -> 16424 (+0.07%); split: -1.95%, +2.02%
VClause: 107 -> 112 (+4.67%); split: -0.93%, +5.61%
Copies: 199 -> 535 (+168.84%); split: -3.02%, +171.86%
PreVGPRs: 520 -> 502 (-3.46%)

Signed-off-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15179>
This commit is contained in:
Georg Lehmann
2022-03-07 13:10:47 +01:00
committed by Marge Bot
parent 9bca149353
commit a9bce05700

View File

@@ -4507,14 +4507,21 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
}
if (((stages[i].nir->info.bit_sizes_int | stages[i].nir->info.bit_sizes_float) & 16) &&
device->physical_device->rad_info.chip_class >= GFX9) {
bool copy_prop = false;
uint32_t sampler_dims = UINT32_MAX;
/* Skip because AMD doesn't support 16-bit types with these. */
sampler_dims &= ~BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE);
// TODO: also optimize the tex srcs. see radeonSI for reference */
/* Skip if there are potentially conflicting rounding modes */
if (!nir_has_any_rounding_mode_enabled(stages[i].nir->info.float_controls_execution_mode))
NIR_PASS_V(stages[i].nir, nir_fold_16bit_sampler_conversions, 0, sampler_dims);
NIR_PASS_V(stages[i].nir, nir_fold_16bit_image_load_store_conversions);
NIR_PASS(copy_prop, stages[i].nir, nir_fold_16bit_sampler_conversions, 0, sampler_dims);
NIR_PASS(copy_prop, stages[i].nir, nir_fold_16bit_image_load_store_conversions);
if (copy_prop) {
NIR_PASS_V(stages[i].nir, nir_copy_prop);
NIR_PASS_V(stages[i].nir, nir_opt_dce);
}
NIR_PASS_V(stages[i].nir, nir_opt_vectorize, opt_vectorize_callback, NULL);
}