aco: optimize 32-bit extracts and inserts using SDWA

Still need to use dst_u=preserve field to optimize packs

fossil-db (Sienna Cichlid):
Totals from 15974 (10.66% of 149839) affected shaders:
VGPRs: 1009064 -> 1008968 (-0.01%); split: -0.03%, +0.02%
SpillSGPRs: 7959 -> 7964 (+0.06%)
CodeSize: 101716436 -> 101159568 (-0.55%); split: -0.55%, +0.01%
MaxWaves: 284464 -> 284490 (+0.01%); split: +0.02%, -0.01%
Instrs: 19334216 -> 19224241 (-0.57%); split: -0.57%, +0.00%
Latency: 375465295 -> 375230478 (-0.06%); split: -0.14%, +0.08%
InvThroughput: 79006105 -> 78860705 (-0.18%); split: -0.25%, +0.07%

fossil-db (Polaris):
Totals from 11369 (7.51% of 151365) affected shaders:
SGPRs: 787920 -> 787680 (-0.03%); split: -0.04%, +0.01%
VGPRs: 681056 -> 681040 (-0.00%); split: -0.01%, +0.00%
CodeSize: 68127288 -> 67664120 (-0.68%); split: -0.69%, +0.01%
MaxWaves: 54370 -> 54371 (+0.00%)
Instrs: 13294638 -> 13214109 (-0.61%); split: -0.62%, +0.01%
Latency: 373515759 -> 373214571 (-0.08%); split: -0.11%, +0.03%
InvThroughput: 166529524 -> 166275291 (-0.15%); split: -0.20%, +0.05%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3151>
This commit is contained in:
Rhys Perry
2020-08-12 14:23:56 +01:00
committed by Marge Bot
parent 63659fc15c
commit 54292e99c7
4 changed files with 271 additions and 23 deletions

View File

@@ -196,7 +196,7 @@ memory_sync_info get_sync_info(const Instruction* instr)
}
}
bool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr)
bool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra)
{
if (!instr->isVALU())
return false;
@@ -217,7 +217,7 @@ bool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr)
return false;
//TODO: return true if we know we will use vcc
if (instr->definitions.size() >= 2)
if (!pre_ra && instr->definitions.size() >= 2)
return false;
for (unsigned i = 1; i < instr->operands.size(); i++) {
@@ -251,9 +251,9 @@ bool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr)
return false;
//TODO: return true if we know we will use vcc
if (instr->isVOPC())
if (!pre_ra && instr->isVOPC())
return false;
if (instr->operands.size() >= 3 && !is_mac)
if (!pre_ra && instr->operands.size() >= 3 && !is_mac)
return false;
return instr->opcode != aco_opcode::v_madmk_f32 &&