aco: optimize 32-bit extracts and inserts using SDWA
Still need to use dst_u=preserve field to optimize packs fossil-db (Sienna Cichlid): Totals from 15974 (10.66% of 149839) affected shaders: VGPRs: 1009064 -> 1008968 (-0.01%); split: -0.03%, +0.02% SpillSGPRs: 7959 -> 7964 (+0.06%) CodeSize: 101716436 -> 101159568 (-0.55%); split: -0.55%, +0.01% MaxWaves: 284464 -> 284490 (+0.01%); split: +0.02%, -0.01% Instrs: 19334216 -> 19224241 (-0.57%); split: -0.57%, +0.00% Latency: 375465295 -> 375230478 (-0.06%); split: -0.14%, +0.08% InvThroughput: 79006105 -> 78860705 (-0.18%); split: -0.25%, +0.07% fossil-db (Polaris): Totals from 11369 (7.51% of 151365) affected shaders: SGPRs: 787920 -> 787680 (-0.03%); split: -0.04%, +0.01% VGPRs: 681056 -> 681040 (-0.00%); split: -0.01%, +0.00% CodeSize: 68127288 -> 67664120 (-0.68%); split: -0.69%, +0.01% MaxWaves: 54370 -> 54371 (+0.00%) Instrs: 13294638 -> 13214109 (-0.61%); split: -0.62%, +0.01% Latency: 373515759 -> 373214571 (-0.08%); split: -0.11%, +0.03% InvThroughput: 166529524 -> 166275291 (-0.15%); split: -0.20%, +0.05% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3151>
This commit is contained in:
@@ -196,7 +196,7 @@ memory_sync_info get_sync_info(const Instruction* instr)
|
||||
}
|
||||
}
|
||||
|
||||
bool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr)
|
||||
bool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra)
|
||||
{
|
||||
if (!instr->isVALU())
|
||||
return false;
|
||||
@@ -217,7 +217,7 @@ bool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr)
|
||||
return false;
|
||||
|
||||
//TODO: return true if we know we will use vcc
|
||||
if (instr->definitions.size() >= 2)
|
||||
if (!pre_ra && instr->definitions.size() >= 2)
|
||||
return false;
|
||||
|
||||
for (unsigned i = 1; i < instr->operands.size(); i++) {
|
||||
@@ -251,9 +251,9 @@ bool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr)
|
||||
return false;
|
||||
|
||||
//TODO: return true if we know we will use vcc
|
||||
if (instr->isVOPC())
|
||||
if (!pre_ra && instr->isVOPC())
|
||||
return false;
|
||||
if (instr->operands.size() >= 3 && !is_mac)
|
||||
if (!pre_ra && instr->operands.size() >= 3 && !is_mac)
|
||||
return false;
|
||||
|
||||
return instr->opcode != aco_opcode::v_madmk_f32 &&
|
||||
|
Reference in New Issue
Block a user