aco: create hw_init_scratch() function for p_init_scratch lowering

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21780>
This commit is contained in:
Daniel Schürmann
2023-03-09 20:50:06 +01:00
committed by Marge Bot
parent 2fee99a36c
commit 7d35bf24f6
2 changed files with 35 additions and 28 deletions

View File

@@ -639,6 +639,8 @@ formats = [(f if len(f) == 5 else f + ('',)) for f in formats]
% endfor % endfor
}; };
void hw_init_scratch(Builder& bld, Definition def, Operand scratch_addr, Operand scratch_offset);
} // namespace aco } // namespace aco
#endif /* _ACO_BUILDER_ */""" #endif /* _ACO_BUILDER_ */"""

View File

@@ -2115,6 +2115,37 @@ emit_set_mode_from_block(Builder& bld, Program& program, Block* block, bool alwa
emit_set_mode(bld, block->fp_mode, set_round, set_denorm); emit_set_mode(bld, block->fp_mode, set_round, set_denorm);
} }
void
hw_init_scratch(Builder& bld, Definition def, Operand scratch_addr, Operand scratch_offset)
{
/* Since we know what the high 16 bits of scratch_hi is, we can set all the high 16
* bits in the same instruction that we add the carry.
*/
Operand hi_add = Operand::c32(0xffff0000 - S_008F04_SWIZZLE_ENABLE_GFX6(1));
Operand scratch_addr_lo(scratch_addr.physReg(), s1);
Operand scratch_addr_hi(scratch_addr_lo.physReg().advance(4), s1);
if (bld.program->gfx_level >= GFX10) {
PhysReg scratch_lo = def.physReg();
PhysReg scratch_hi = def.physReg().advance(4);
bld.sop2(aco_opcode::s_add_u32, Definition(scratch_lo, s1), Definition(scc, s1),
scratch_addr_lo, scratch_offset);
bld.sop2(aco_opcode::s_addc_u32, Definition(scratch_hi, s1), Definition(scc, s1),
scratch_addr_hi, hi_add, Operand(scc, s1));
/* "((size - 1) << 11) | register" (FLAT_SCRATCH_LO/HI is encoded as register
* 20/21) */
bld.sopk(aco_opcode::s_setreg_b32, Operand(scratch_lo, s1), (31 << 11) | 20);
bld.sopk(aco_opcode::s_setreg_b32, Operand(scratch_hi, s1), (31 << 11) | 21);
} else {
bld.sop2(aco_opcode::s_add_u32, Definition(flat_scr_lo, s1), Definition(scc, s1),
scratch_addr_lo, scratch_offset);
bld.sop2(aco_opcode::s_addc_u32, Definition(flat_scr_hi, s1), Definition(scc, s1),
scratch_addr_hi, hi_add, Operand(scc, s1));
}
}
void void
lower_to_hw_instr(Program* program) lower_to_hw_instr(Program* program)
{ {
@@ -2466,39 +2497,13 @@ lower_to_hw_instr(Program* program)
break; break;
Operand scratch_addr = instr->operands[0]; Operand scratch_addr = instr->operands[0];
Operand scratch_addr_lo(scratch_addr.physReg(), s1);
if (program->stage.hw != HWStage::CS) { if (program->stage.hw != HWStage::CS) {
bld.smem(aco_opcode::s_load_dwordx2, instr->definitions[0], scratch_addr, bld.smem(aco_opcode::s_load_dwordx2, instr->definitions[0], scratch_addr,
Operand::zero()); Operand::zero());
scratch_addr_lo.setFixed(instr->definitions[0].physReg()); scratch_addr.setFixed(instr->definitions[0].physReg());
} }
Operand scratch_addr_hi(scratch_addr_lo.physReg().advance(4), s1);
/* Since we know what the high 16 bits of scratch_hi is, we can set all the high 16 hw_init_scratch(bld, instr->definitions[0], scratch_addr, instr->operands[1]);
* bits in the same instruction that we add the carry.
*/
uint32_t hi_add = 0xffff0000 - S_008F04_SWIZZLE_ENABLE_GFX6(1);
if (program->gfx_level >= GFX10) {
Operand scratch_lo(instr->definitions[0].physReg(), s1);
Operand scratch_hi(instr->definitions[0].physReg().advance(4), s1);
bld.sop2(aco_opcode::s_add_u32, Definition(scratch_lo.physReg(), s1),
Definition(scc, s1), scratch_addr_lo, instr->operands[1]);
bld.sop2(aco_opcode::s_addc_u32, Definition(scratch_hi.physReg(), s1),
Definition(scc, s1), scratch_addr_hi, Operand::c32(hi_add),
Operand(scc, s1));
/* "((size - 1) << 11) | register" (FLAT_SCRATCH_LO/HI is encoded as register
* 20/21) */
bld.sopk(aco_opcode::s_setreg_b32, scratch_lo, (31 << 11) | 20);
bld.sopk(aco_opcode::s_setreg_b32, scratch_hi, (31 << 11) | 21);
} else {
bld.sop2(aco_opcode::s_add_u32, Definition(flat_scr_lo, s1), Definition(scc, s1),
scratch_addr_lo, instr->operands[1]);
bld.sop2(aco_opcode::s_addc_u32, Definition(flat_scr_hi, s1), Definition(scc, s1),
scratch_addr_hi, Operand::c32(hi_add), Operand(scc, s1));
}
break; break;
} }
case aco_opcode::p_jump_to_epilog: { case aco_opcode::p_jump_to_epilog: {