aco: use saddr for global access with sgpr address
fossil-db (Sienna Cichlid): Totals from 38 (0.03% of 134621) affected shaders: CodeSize: 237196 -> 237060 (-0.06%); split: -0.09%, +0.03% Instrs: 43895 -> 43894 (-0.00%); split: -0.02%, +0.01% Latency: 914633 -> 916263 (+0.18%); split: -0.01%, +0.19% InvThroughput: 468215 -> 468971 (+0.16%); split: -0.02%, +0.18% SClause: 1239 -> 1242 (+0.24%) PreSGPRs: 997 -> 1003 (+0.60%) PreVGPRs: 936 -> 923 (-1.39%); split: -1.50%, +0.11% Regression seems to be RA noise, creating a waitcnt. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14124>
This commit is contained in:
@@ -271,16 +271,21 @@ emit_masked_swizzle(isel_context* ctx, Builder& bld, Temp src, unsigned mask)
|
|||||||
}
|
}
|
||||||
|
|
||||||
Temp
|
Temp
|
||||||
as_vgpr(isel_context* ctx, Temp val)
|
as_vgpr(Builder& bld, Temp val)
|
||||||
{
|
{
|
||||||
if (val.type() == RegType::sgpr) {
|
if (val.type() == RegType::sgpr)
|
||||||
Builder bld(ctx->program, ctx->block);
|
|
||||||
return bld.copy(bld.def(RegType::vgpr, val.size()), val);
|
return bld.copy(bld.def(RegType::vgpr, val.size()), val);
|
||||||
}
|
|
||||||
assert(val.type() == RegType::vgpr);
|
assert(val.type() == RegType::vgpr);
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Temp
|
||||||
|
as_vgpr(isel_context* ctx, Temp val)
|
||||||
|
{
|
||||||
|
Builder bld(ctx->program, ctx->block);
|
||||||
|
return as_vgpr(bld, val);
|
||||||
|
}
|
||||||
|
|
||||||
// assumes a != 0xffffffff
|
// assumes a != 0xffffffff
|
||||||
void
|
void
|
||||||
emit_v_div_u32(isel_context* ctx, Temp dst, Temp a, uint32_t b)
|
emit_v_div_u32(isel_context* ctx, Temp dst, Temp a, uint32_t b)
|
||||||
@@ -4306,12 +4311,15 @@ global_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsign
|
|||||||
mubuf->definitions[0] = Definition(val);
|
mubuf->definitions[0] = Definition(val);
|
||||||
bld.insert(std::move(mubuf));
|
bld.insert(std::move(mubuf));
|
||||||
} else {
|
} else {
|
||||||
offset = offset.regClass() == s2 ? bld.copy(bld.def(v2), offset) : offset;
|
|
||||||
|
|
||||||
aco_ptr<FLAT_instruction> flat{
|
aco_ptr<FLAT_instruction> flat{
|
||||||
create_instruction<FLAT_instruction>(op, global ? Format::GLOBAL : Format::FLAT, 2, 1)};
|
create_instruction<FLAT_instruction>(op, global ? Format::GLOBAL : Format::FLAT, 2, 1)};
|
||||||
flat->operands[0] = Operand(offset);
|
if (global && offset.regClass() == s2) {
|
||||||
|
flat->operands[0] = bld.copy(bld.def(v1), Operand::zero());
|
||||||
|
flat->operands[1] = Operand(offset);
|
||||||
|
} else {
|
||||||
|
flat->operands[0] = Operand(as_vgpr(bld, offset));
|
||||||
flat->operands[1] = Operand(s1);
|
flat->operands[1] = Operand(s1);
|
||||||
|
}
|
||||||
flat->glc = info.glc;
|
flat->glc = info.glc;
|
||||||
flat->dlc = info.glc && bld.program->chip_class >= GFX10;
|
flat->dlc = info.glc && bld.program->chip_class >= GFX10;
|
||||||
flat->sync = info.sync;
|
flat->sync = info.sync;
|
||||||
@@ -6440,9 +6448,6 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||||||
bool glc =
|
bool glc =
|
||||||
nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT | ACCESS_NON_READABLE);
|
nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT | ACCESS_NON_READABLE);
|
||||||
|
|
||||||
if (ctx->options->chip_class >= GFX7)
|
|
||||||
addr = as_vgpr(ctx, addr);
|
|
||||||
|
|
||||||
unsigned write_count = 0;
|
unsigned write_count = 0;
|
||||||
Temp write_datas[32];
|
Temp write_datas[32];
|
||||||
unsigned offsets[32];
|
unsigned offsets[32];
|
||||||
@@ -6491,8 +6496,13 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||||||
|
|
||||||
aco_ptr<FLAT_instruction> flat{
|
aco_ptr<FLAT_instruction> flat{
|
||||||
create_instruction<FLAT_instruction>(op, global ? Format::GLOBAL : Format::FLAT, 3, 0)};
|
create_instruction<FLAT_instruction>(op, global ? Format::GLOBAL : Format::FLAT, 3, 0)};
|
||||||
flat->operands[0] = Operand(store_addr);
|
if (global && store_addr.regClass() == s2) {
|
||||||
|
flat->operands[0] = bld.copy(bld.def(v1), Operand::zero());
|
||||||
|
flat->operands[1] = Operand(store_addr);
|
||||||
|
} else {
|
||||||
|
flat->operands[0] = Operand(as_vgpr(ctx, store_addr));
|
||||||
flat->operands[1] = Operand(s1);
|
flat->operands[1] = Operand(s1);
|
||||||
|
}
|
||||||
flat->operands[2] = Operand(write_datas[i]);
|
flat->operands[2] = Operand(write_datas[i]);
|
||||||
flat->glc = glc;
|
flat->glc = glc;
|
||||||
flat->dlc = false;
|
flat->dlc = false;
|
||||||
@@ -6534,9 +6544,6 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||||||
Temp addr = get_ssa_temp(ctx, instr->src[0].ssa);
|
Temp addr = get_ssa_temp(ctx, instr->src[0].ssa);
|
||||||
Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
|
Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
|
||||||
|
|
||||||
if (ctx->options->chip_class >= GFX7)
|
|
||||||
addr = as_vgpr(ctx, addr);
|
|
||||||
|
|
||||||
if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap)
|
if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap)
|
||||||
data = bld.pseudo(aco_opcode::p_create_vector, bld.def(RegType::vgpr, data.size() * 2),
|
data = bld.pseudo(aco_opcode::p_create_vector, bld.def(RegType::vgpr, data.size() * 2),
|
||||||
get_ssa_temp(ctx, instr->src[2].ssa), data);
|
get_ssa_temp(ctx, instr->src[2].ssa), data);
|
||||||
@@ -6604,8 +6611,13 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||||||
aco_opcode op = instr->dest.ssa.bit_size == 32 ? op32 : op64;
|
aco_opcode op = instr->dest.ssa.bit_size == 32 ? op32 : op64;
|
||||||
aco_ptr<FLAT_instruction> flat{create_instruction<FLAT_instruction>(
|
aco_ptr<FLAT_instruction> flat{create_instruction<FLAT_instruction>(
|
||||||
op, global ? Format::GLOBAL : Format::FLAT, 3, return_previous ? 1 : 0)};
|
op, global ? Format::GLOBAL : Format::FLAT, 3, return_previous ? 1 : 0)};
|
||||||
flat->operands[0] = Operand(addr);
|
if (global && addr.regClass() == s2) {
|
||||||
|
flat->operands[0] = bld.copy(bld.def(v1), Operand::zero());
|
||||||
|
flat->operands[1] = Operand(addr);
|
||||||
|
} else {
|
||||||
|
flat->operands[0] = Operand(as_vgpr(ctx, addr));
|
||||||
flat->operands[1] = Operand(s1);
|
flat->operands[1] = Operand(s1);
|
||||||
|
}
|
||||||
flat->operands[2] = Operand(data);
|
flat->operands[2] = Operand(data);
|
||||||
if (return_previous)
|
if (return_previous)
|
||||||
flat->definitions[0] = Definition(dst);
|
flat->definitions[0] = Definition(dst);
|
||||||
|
Reference in New Issue
Block a user