aco: Stylistic changes to emit_gfx10_wave64_bpermute.
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20293>
This commit is contained in:
@@ -219,12 +219,17 @@ emit_bpermute(isel_context* ctx, Builder& bld, Temp index, Temp data)
|
||||
input_data.setLateKill(true);
|
||||
same_half.setLateKill(true);
|
||||
|
||||
/* We need one pair of shared VGPRs:
|
||||
* Note, that these have twice the allocation granularity of normal VGPRs */
|
||||
ctx->program->config->num_shared_vgprs = 2 * ctx->program->dev.vgpr_alloc_granule;
|
||||
if (ctx->options->gfx_level <= GFX10_3) {
|
||||
/* We need one pair of shared VGPRs:
|
||||
* Note, that these have twice the allocation granularity of normal VGPRs
|
||||
*/
|
||||
ctx->program->config->num_shared_vgprs = 2 * ctx->program->dev.vgpr_alloc_granule;
|
||||
|
||||
return bld.pseudo(aco_opcode::p_bpermute_gfx10w64, bld.def(v1), bld.def(s2), bld.def(s1, scc),
|
||||
index_x4, input_data, same_half);
|
||||
return bld.pseudo(aco_opcode::p_bpermute_gfx10w64, bld.def(v1), bld.def(s2),
|
||||
bld.def(s1, scc), index_x4, input_data, same_half);
|
||||
} else {
|
||||
unreachable("emit_bpermute does not yet support GFX11+");
|
||||
}
|
||||
} else {
|
||||
/* GFX8-9 or GFX10 wave32: bpermute works normally */
|
||||
Temp index_x4 = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), index);
|
||||
|
@@ -848,7 +848,7 @@ emit_gfx10_wave64_bpermute(Program* program, aco_ptr<Instruction>& instr, Builde
|
||||
* manually swap the data between the two halves using two shared VGPRs.
|
||||
*/
|
||||
|
||||
assert(program->gfx_level >= GFX10);
|
||||
assert(program->gfx_level >= GFX10 && program->gfx_level <= GFX10_3);
|
||||
assert(program->wave_size == 64);
|
||||
|
||||
unsigned shared_vgpr_reg_0 = align(program->config->num_vgprs, 4) + 256;
|
||||
@@ -907,8 +907,9 @@ emit_gfx10_wave64_bpermute(Program* program, aco_ptr<Instruction>& instr, Builde
|
||||
/* Restore saved EXEC */
|
||||
bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand(tmp_exec.physReg(), s2));
|
||||
|
||||
/* RA assumes that the result is always in the low part of the register, so we have to shift, if
|
||||
* it's not there already */
|
||||
/* RA assumes that the result is always in the low part of the register, so we have to shift,
|
||||
* if it's not there already.
|
||||
*/
|
||||
if (input_data.physReg().byte()) {
|
||||
unsigned right_shift = input_data.physReg().byte() * 8;
|
||||
bld.vop2(aco_opcode::v_lshrrev_b32, dst, Operand::c32(right_shift),
|
||||
|
Reference in New Issue
Block a user