radv,aco,llvm: lower post shuffle vertex in NIR
fossils-db (Sienna Cichlid): Totals from 774 (0.57% of 134913) affected shaders: VGPRs: 26496 -> 26312 (-0.69%) CodeSize: 1825936 -> 1828812 (+0.16%); split: -0.04%, +0.20% MaxWaves: 22046 -> 22062 (+0.07%) Instrs: 347634 -> 347975 (+0.10%); split: -0.05%, +0.15% Latency: 1363949 -> 1356426 (-0.55%); split: -0.59%, +0.04% InvThroughput: 221529 -> 221380 (-0.07%); split: -0.10%, +0.04% VClause: 5682 -> 5676 (-0.11%); split: -1.46%, +1.36% SClause: 7485 -> 7411 (-0.99%); split: -1.48%, +0.49% Copies: 30481 -> 30420 (-0.20%); split: -0.51%, +0.31% PreVGPRs: 19717 -> 19656 (-0.31%) fossil-db (Polaris10): Totals from 896 (0.66% of 135960) affected shaders: SGPRs: 49824 -> 49648 (-0.35%); split: -0.39%, +0.03% VGPRs: 31040 -> 29948 (-3.52%); split: -3.62%, +0.10% CodeSize: 875960 -> 875920 (-0.00%); split: -0.06%, +0.05% MaxWaves: 6380 -> 6429 (+0.77%) Instrs: 171522 -> 171482 (-0.02%); split: -0.07%, +0.05% Latency: 1356082 -> 1334386 (-1.60%); split: -1.61%, +0.01% InvThroughput: 553389 -> 552957 (-0.08%); split: -0.08%, +0.00% VClause: 4317 -> 4244 (-1.69%); split: -2.41%, +0.72% SClause: 6157 -> 6139 (-0.29%); split: -0.45%, +0.16% Copies: 9340 -> 9235 (-1.12%); split: -1.24%, +0.12% PreVGPRs: 22366 -> 22116 (-1.12%) Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15113>
This commit is contained in:

committed by
Marge Bot

parent
4b99b528f5
commit
342e6f8332
@@ -3760,24 +3760,46 @@ radv_lower_vs_input(nir_shader *nir, const struct radv_pipeline_key *pipeline_ke
|
||||
|
||||
unsigned location = nir_intrinsic_base(intrin) - VERT_ATTRIB_GENERIC0;
|
||||
enum radv_vs_input_alpha_adjust alpha_adjust = pipeline_key->vs.vertex_alpha_adjust[location];
|
||||
bool post_shuffle = pipeline_key->vs.vertex_post_shuffle & (1 << location);
|
||||
|
||||
if (alpha_adjust == ALPHA_ADJUST_NONE)
|
||||
if (alpha_adjust == ALPHA_ADJUST_NONE && !post_shuffle)
|
||||
continue;
|
||||
|
||||
unsigned component = nir_intrinsic_component(intrin);
|
||||
unsigned num_components = intrin->dest.ssa.num_components;
|
||||
|
||||
b.cursor = nir_after_instr(instr);
|
||||
static const unsigned swizzle_normal[4] = {0, 1, 2, 3};
|
||||
static const unsigned swizzle_post_shuffle[4] = {2, 1, 0, 3};
|
||||
const unsigned *swizzle = post_shuffle ? swizzle_post_shuffle : swizzle_normal;
|
||||
|
||||
if (component + num_components == 4) {
|
||||
unsigned idx = num_components - 1;
|
||||
nir_ssa_def *alpha = radv_adjust_vertex_fetch_alpha(
|
||||
&b, alpha_adjust, nir_channel(&b, &intrin->dest.ssa, idx));
|
||||
nir_ssa_def *new_dest = nir_vector_insert_imm(&b, &intrin->dest.ssa, alpha, idx);
|
||||
nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, new_dest,
|
||||
new_dest->parent_instr);
|
||||
progress = true;
|
||||
b.cursor = nir_after_instr(instr);
|
||||
nir_ssa_def *channels[4];
|
||||
|
||||
if (post_shuffle) {
|
||||
/* Expand to load 3 components because it's shuffled like X<->Z. */
|
||||
intrin->num_components = MAX2(component + num_components, 3);
|
||||
intrin->dest.ssa.num_components = intrin->num_components;
|
||||
|
||||
nir_intrinsic_set_component(intrin, 0);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < num_components; i++) {
|
||||
unsigned idx = i + (post_shuffle ? component : 0);
|
||||
|
||||
channels[i] = nir_channel(&b, &intrin->dest.ssa, swizzle[idx]);
|
||||
}
|
||||
|
||||
if (alpha_adjust != ALPHA_ADJUST_NONE && component + num_components == 4) {
|
||||
unsigned idx = num_components - 1;
|
||||
channels[idx] = radv_adjust_vertex_fetch_alpha(&b, alpha_adjust, channels[idx]);
|
||||
}
|
||||
|
||||
nir_ssa_def *new_dest = nir_vec(&b, channels, num_components);
|
||||
|
||||
nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, new_dest,
|
||||
new_dest->parent_instr);
|
||||
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user