aco: Stylistic changes to emit_gfx10_wave64_bpermute.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20293>
This commit is contained in:
Timur Kristóf
2022-12-13 09:43:39 +01:00
committed by Marge Bot
parent 640e801651
commit 853e76f007
2 changed files with 14 additions and 8 deletions

View File

@@ -219,12 +219,17 @@ emit_bpermute(isel_context* ctx, Builder& bld, Temp index, Temp data)
input_data.setLateKill(true);
same_half.setLateKill(true);
/* We need one pair of shared VGPRs:
* Note, that these have twice the allocation granularity of normal VGPRs */
ctx->program->config->num_shared_vgprs = 2 * ctx->program->dev.vgpr_alloc_granule;
if (ctx->options->gfx_level <= GFX10_3) {
/* We need one pair of shared VGPRs:
* Note, that these have twice the allocation granularity of normal VGPRs
*/
ctx->program->config->num_shared_vgprs = 2 * ctx->program->dev.vgpr_alloc_granule;
return bld.pseudo(aco_opcode::p_bpermute_gfx10w64, bld.def(v1), bld.def(s2), bld.def(s1, scc),
index_x4, input_data, same_half);
return bld.pseudo(aco_opcode::p_bpermute_gfx10w64, bld.def(v1), bld.def(s2),
bld.def(s1, scc), index_x4, input_data, same_half);
} else {
unreachable("emit_bpermute does not yet support GFX11+");
}
} else {
/* GFX8-9 or GFX10 wave32: bpermute works normally */
Temp index_x4 = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), index);

View File

@@ -848,7 +848,7 @@ emit_gfx10_wave64_bpermute(Program* program, aco_ptr<Instruction>& instr, Builde
* manually swap the data between the two halves using two shared VGPRs.
*/
assert(program->gfx_level >= GFX10);
assert(program->gfx_level >= GFX10 && program->gfx_level <= GFX10_3);
assert(program->wave_size == 64);
unsigned shared_vgpr_reg_0 = align(program->config->num_vgprs, 4) + 256;
@@ -907,8 +907,9 @@ emit_gfx10_wave64_bpermute(Program* program, aco_ptr<Instruction>& instr, Builde
/* Restore saved EXEC */
bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand(tmp_exec.physReg(), s2));
/* RA assumes that the result is always in the low part of the register, so we have to shift, if
* it's not there already */
/* RA assumes that the result is always in the low part of the register, so we have to shift,
* if it's not there already.
*/
if (input_data.physReg().byte()) {
unsigned right_shift = input_data.physReg().byte() * 8;
bld.vop2(aco_opcode::v_lshrrev_b32, dst, Operand::c32(right_shift),