aco: implement nir_intrinsic_global_atomic_* on GFX6
GFX6 doesn't have FLAT instructions, use MUBUF instructions instead. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3477>
This commit is contained in:
@@ -5012,17 +5012,22 @@ void visit_global_atomic(isel_context *ctx, nir_intrinsic_instr *instr)
|
||||
}
|
||||
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
Temp addr = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
|
||||
Temp addr = get_ssa_temp(ctx, instr->src[0].ssa);
|
||||
Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
|
||||
|
||||
if (ctx->options->chip_class >= GFX7)
|
||||
addr = as_vgpr(ctx, addr);
|
||||
|
||||
if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap)
|
||||
data = bld.pseudo(aco_opcode::p_create_vector, bld.def(RegType::vgpr, data.size() * 2),
|
||||
get_ssa_temp(ctx, instr->src[2].ssa), data);
|
||||
|
||||
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
|
||||
|
||||
bool global = ctx->options->chip_class >= GFX9;
|
||||
aco_opcode op32, op64;
|
||||
|
||||
if (ctx->options->chip_class >= GFX7) {
|
||||
bool global = ctx->options->chip_class >= GFX9;
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_global_atomic_add:
|
||||
op32 = global ? aco_opcode::global_atomic_add : aco_opcode::flat_atomic_add;
|
||||
@@ -5067,6 +5072,7 @@ void visit_global_atomic(isel_context *ctx, nir_intrinsic_instr *instr)
|
||||
default:
|
||||
unreachable("visit_atomic_global should only be called with nir_intrinsic_global_atomic_* instructions.");
|
||||
}
|
||||
|
||||
aco_opcode op = instr->dest.ssa.bit_size == 32 ? op32 : op64;
|
||||
aco_ptr<FLAT_instruction> flat{create_instruction<FLAT_instruction>(op, global ? Format::GLOBAL : Format::FLAT, 3, return_previous ? 1 : 0)};
|
||||
flat->operands[0] = Operand(addr);
|
||||
@@ -5081,6 +5087,74 @@ void visit_global_atomic(isel_context *ctx, nir_intrinsic_instr *instr)
|
||||
flat->barrier = barrier_buffer;
|
||||
ctx->program->needs_exact = true;
|
||||
ctx->block->instructions.emplace_back(std::move(flat));
|
||||
} else {
|
||||
assert(ctx->options->chip_class == GFX6);
|
||||
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_global_atomic_add:
|
||||
op32 = aco_opcode::buffer_atomic_add;
|
||||
op64 = aco_opcode::buffer_atomic_add_x2;
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_imin:
|
||||
op32 = aco_opcode::buffer_atomic_smin;
|
||||
op64 = aco_opcode::buffer_atomic_smin_x2;
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_umin:
|
||||
op32 = aco_opcode::buffer_atomic_umin;
|
||||
op64 = aco_opcode::buffer_atomic_umin_x2;
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_imax:
|
||||
op32 = aco_opcode::buffer_atomic_smax;
|
||||
op64 = aco_opcode::buffer_atomic_smax_x2;
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_umax:
|
||||
op32 = aco_opcode::buffer_atomic_umax;
|
||||
op64 = aco_opcode::buffer_atomic_umax_x2;
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_and:
|
||||
op32 = aco_opcode::buffer_atomic_and;
|
||||
op64 = aco_opcode::buffer_atomic_and_x2;
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_or:
|
||||
op32 = aco_opcode::buffer_atomic_or;
|
||||
op64 = aco_opcode::buffer_atomic_or_x2;
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_xor:
|
||||
op32 = aco_opcode::buffer_atomic_xor;
|
||||
op64 = aco_opcode::buffer_atomic_xor_x2;
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_exchange:
|
||||
op32 = aco_opcode::buffer_atomic_swap;
|
||||
op64 = aco_opcode::buffer_atomic_swap_x2;
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_comp_swap:
|
||||
op32 = aco_opcode::buffer_atomic_cmpswap;
|
||||
op64 = aco_opcode::buffer_atomic_cmpswap_x2;
|
||||
break;
|
||||
default:
|
||||
unreachable("visit_atomic_global should only be called with nir_intrinsic_global_atomic_* instructions.");
|
||||
}
|
||||
|
||||
Temp rsrc = get_gfx6_global_rsrc(bld, addr);
|
||||
|
||||
aco_opcode op = instr->dest.ssa.bit_size == 32 ? op32 : op64;
|
||||
|
||||
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, return_previous ? 1 : 0)};
|
||||
mubuf->operands[0] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1);
|
||||
mubuf->operands[1] = Operand(rsrc);
|
||||
mubuf->operands[2] = Operand(0u);
|
||||
mubuf->operands[3] = Operand(data);
|
||||
if (return_previous)
|
||||
mubuf->definitions[0] = Definition(dst);
|
||||
mubuf->glc = return_previous;
|
||||
mubuf->dlc = false;
|
||||
mubuf->offset = 0;
|
||||
mubuf->addr64 = addr.type() == RegType::vgpr;
|
||||
mubuf->disable_wqm = true;
|
||||
mubuf->barrier = barrier_buffer;
|
||||
ctx->program->needs_exact = true;
|
||||
ctx->block->instructions.emplace_back(std::move(mubuf));
|
||||
}
|
||||
}
|
||||
|
||||
void emit_memory_barrier(isel_context *ctx, nir_intrinsic_instr *instr) {
|
||||
|
Reference in New Issue
Block a user