From 0e9cbac188c62fa4d147352e5a44135963a0143e Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 10 Aug 2022 14:44:20 +0100 Subject: [PATCH] aco: don't expand vec3 VS input load to vec4 on GFX6 Removes the (small) possibility of invalid memory access. fossil-db (pitcairn): Totals from 35456 (26.15% of 135610) affected shaders: MaxWaves: 259508 -> 260642 (+0.44%); split: +0.44%, -0.01% Instrs: 7915383 -> 7965774 (+0.64%); split: -0.09%, +0.72% CodeSize: 37163748 -> 37524804 (+0.97%); split: -0.04%, +1.01% SGPRs: 1515128 -> 1513576 (-0.10%); split: -0.27%, +0.17% VGPRs: 1218376 -> 1211160 (-0.59%); split: -0.71%, +0.12% SpillSGPRs: 1152 -> 1144 (-0.69%) Latency: 83777626 -> 83867137 (+0.11%); split: -0.61%, +0.72% InvThroughput: 25722445 -> 25727745 (+0.02%); split: -0.23%, +0.25% VClause: 232058 -> 230464 (-0.69%); split: -2.53%, +1.84% SClause: 322579 -> 322108 (-0.15%); split: -0.76%, +0.61% Copies: 547032 -> 547954 (+0.17%); split: -1.83%, +2.00% Branches: 72538 -> 72542 (+0.01%) PreVGPRs: 898453 -> 897584 (-0.10%); split: -0.13%, +0.03% Signed-off-by: Rhys Perry Reviewed-by: Samuel Pitoiset Cc: mesa-stable Part-of: (cherry picked from commit 030d6f873e87d662af5ee3b24b936cfb4a5b32cf) --- .pick_status.json | 2 +- src/amd/compiler/aco_instruction_selection.cpp | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index ccbc1375837..42d40f8bf8f 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -9859,7 +9859,7 @@ "description": "aco: don't expand vec3 VS input load to vec4 on GFX6", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null }, diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index f54396f612d..f5fa1825bca 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -5549,7 +5549,6 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr) while (channel_start < num_channels) { unsigned fetch_component = num_channels - channel_start; unsigned fetch_offset = attrib_offset + channel_start * vtx_info->chan_byte_size; - bool expanded = false; /* use MUBUF when possible to avoid possible alignment issues */ /* TODO: we could use SDWA to unpack 8/16-bit attributes without extra instructions */ @@ -5563,11 +5562,9 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr) get_fetch_data_format(ctx, vtx_info, fetch_offset, &fetch_component, vtx_info->num_channels - channel_start, binding_align); } else { - if (fetch_component == 3 && ctx->options->gfx_level == GFX6) { - /* GFX6 only supports loading vec3 with MTBUF, expand to vec4. */ - fetch_component = 4; - expanded = true; - } + /* GFX6 only supports loading vec3 with MTBUF, split to vec2,scalar. */ + if (fetch_component == 3 && ctx->options->gfx_level == GFX6) + fetch_component = 2; } unsigned fetch_bytes = fetch_component * bitsize / 8; @@ -5627,8 +5624,7 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr) } Temp fetch_dst; - if (channel_start == 0 && fetch_bytes == dst.bytes() && !expanded && - num_channels <= 3) { + if (channel_start == 0 && fetch_bytes == dst.bytes() && num_channels <= 3) { direct_fetch = true; fetch_dst = dst; } else {