aco: implement sparse texture fetches

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7775>
This commit is contained in:
Rhys Perry
2020-11-20 15:11:16 +00:00
committed by Marge Bot
parent 5a4f6313b1
commit 382f50ad2c
2 changed files with 52 additions and 25 deletions

View File

@@ -514,7 +514,7 @@ public:
} }
<% <%
import itertools import itertools
formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.product(range(5), range(5))) + [(8, 1), (1, 8)]), formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.product(range(5), range(6))) + [(8, 1), (1, 8)]),
("sop1", [Format.SOP1], 'SOP1_instruction', [(0, 1), (1, 0), (1, 1), (2, 1), (3, 2)]), ("sop1", [Format.SOP1], 'SOP1_instruction', [(0, 1), (1, 0), (1, 1), (2, 1), (3, 2)]),
("sop2", [Format.SOP2], 'SOP2_instruction', itertools.product([1, 2], [2, 3])), ("sop2", [Format.SOP2], 'SOP2_instruction', itertools.product([1, 2], [2, 3])),
("sopk", [Format.SOPK], 'SOPK_instruction', itertools.product([0, 1, 2], [0, 1])), ("sopk", [Format.SOPK], 'SOPK_instruction', itertools.product([0, 1, 2], [0, 1])),

View File

@@ -5876,6 +5876,20 @@ memory_sync_info get_memory_sync_info(nir_intrinsic_instr *instr, storage_class
return memory_sync_info(storage, semantics); return memory_sync_info(storage, semantics);
} }
Operand emit_tfe_init(Builder& bld, Temp dst)
{
Temp tmp = bld.tmp(dst.regClass());
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)};
for (unsigned i = 0; i < dst.size(); i++)
vec->operands[i] = Operand(0u);
vec->definitions[0] = Definition(tmp);
bld.insert(std::move(vec));
return Operand(tmp);
}
void visit_image_load(isel_context *ctx, nir_intrinsic_instr *instr) void visit_image_load(isel_context *ctx, nir_intrinsic_instr *instr)
{ {
Builder bld(ctx->program, ctx->block); Builder bld(ctx->program, ctx->block);
@@ -8967,22 +8981,26 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
} }
/* Build tex instruction */ /* Build tex instruction */
unsigned dmask = nir_ssa_def_components_read(&instr->dest.ssa); unsigned dmask = nir_ssa_def_components_read(&instr->dest.ssa) & 0xf;
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
dmask = u_bit_consecutive(0, util_last_bit(dmask));
if (instr->is_sparse)
dmask = MAX2(dmask, 1) | 0x10;
unsigned dim = ctx->options->chip_class >= GFX10 && instr->sampler_dim != GLSL_SAMPLER_DIM_BUF unsigned dim = ctx->options->chip_class >= GFX10 && instr->sampler_dim != GLSL_SAMPLER_DIM_BUF
? ac_get_sampler_dim(ctx->options->chip_class, instr->sampler_dim, instr->is_array) ? ac_get_sampler_dim(ctx->options->chip_class, instr->sampler_dim, instr->is_array)
: 0; : 0;
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
Temp tmp_dst = dst; Temp tmp_dst = dst;
/* gather4 selects the component by dmask and always returns vec4 */ /* gather4 selects the component by dmask and always returns vec4 (vec5 if sparse) */
if (instr->op == nir_texop_tg4) { if (instr->op == nir_texop_tg4) {
assert(instr->dest.ssa.num_components == 4); assert(instr->dest.ssa.num_components == (4 + instr->is_sparse));
if (instr->is_shadow) if (instr->is_shadow)
dmask = 1; dmask = 1;
else else
dmask = 1 << instr->component; dmask = 1 << instr->component;
if (tg4_integer_cube_workaround || dst.type() == RegType::sgpr) if (tg4_integer_cube_workaround || dst.type() == RegType::sgpr)
tmp_dst = bld.tmp(v4); tmp_dst = bld.tmp(instr->is_sparse ? v5 : v4);
} else if (instr->op == nir_texop_samples_identical) { } else if (instr->op == nir_texop_samples_identical) {
tmp_dst = bld.tmp(v1); tmp_dst = bld.tmp(v1);
} else if (util_bitcount(dmask) != instr->dest.ssa.num_components || dst.type() == RegType::sgpr) { } else if (util_bitcount(dmask) != instr->dest.ssa.num_components || dst.type() == RegType::sgpr) {
@@ -9124,9 +9142,8 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
//FIXME: if (ctx->abi->gfx9_stride_size_workaround) return ac_build_buffer_load_format_gfx9_safe() //FIXME: if (ctx->abi->gfx9_stride_size_workaround) return ac_build_buffer_load_format_gfx9_safe()
assert(coords.size() == 1); assert(coords.size() == 1);
unsigned last_bit = util_last_bit(nir_ssa_def_components_read(&instr->dest.ssa));
aco_opcode op; aco_opcode op;
switch (last_bit) { switch (util_last_bit(dmask & 0xf)) {
case 1: case 1:
op = aco_opcode::buffer_load_format_x; break; op = aco_opcode::buffer_load_format_x; break;
case 2: case 2:
@@ -9139,21 +9156,19 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
unreachable("Tex instruction loads more than 4 components."); unreachable("Tex instruction loads more than 4 components.");
} }
/* if the instruction return value matches exactly the nir dest ssa, we can use it directly */ aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(
if (last_bit == instr->dest.ssa.num_components && dst.type() == RegType::vgpr) op, Format::MUBUF, 3 + instr->is_sparse, 1)};
tmp_dst = dst;
else
tmp_dst = bld.tmp(RegType::vgpr, last_bit);
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
mubuf->operands[0] = Operand(resource); mubuf->operands[0] = Operand(resource);
mubuf->operands[1] = Operand(coords[0]); mubuf->operands[1] = Operand(coords[0]);
mubuf->operands[2] = Operand((uint32_t) 0); mubuf->operands[2] = Operand((uint32_t) 0);
mubuf->definitions[0] = Definition(tmp_dst); mubuf->definitions[0] = Definition(tmp_dst);
mubuf->idxen = true; mubuf->idxen = true;
mubuf->tfe = instr->is_sparse;
if (mubuf->tfe)
mubuf->operands[3] = emit_tfe_init(bld, tmp_dst);
ctx->block->instructions.emplace_back(std::move(mubuf)); ctx->block->instructions.emplace_back(std::move(mubuf));
expand_vector(ctx, tmp_dst, dst, instr->dest.ssa.num_components, (1 << last_bit) - 1); expand_vector(ctx, tmp_dst, dst, instr->dest.ssa.num_components, dmask);
return; return;
} }
@@ -9190,15 +9205,18 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
instr->op == nir_texop_fragment_fetch || instr->op == nir_texop_fragment_fetch ||
instr->op == nir_texop_fragment_mask_fetch) { instr->op == nir_texop_fragment_mask_fetch) {
aco_opcode op = level_zero || instr->sampler_dim == GLSL_SAMPLER_DIM_MS || instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS ? aco_opcode::image_load : aco_opcode::image_load_mip; aco_opcode op = level_zero || instr->sampler_dim == GLSL_SAMPLER_DIM_MS || instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS ? aco_opcode::image_load : aco_opcode::image_load_mip;
tex.reset(create_instruction<MIMG_instruction>(op, Format::MIMG, 3, 1)); tex.reset(create_instruction<MIMG_instruction>(op, Format::MIMG, 3 + instr->is_sparse, 1));
tex->operands[0] = Operand(resource); tex->operands[0] = Operand(resource);
tex->operands[1] = Operand(s4); /* no sampler */ tex->operands[1] = Operand(s4); /* no sampler */
tex->operands[2] = Operand(arg); tex->operands[2] = Operand(arg);
tex->dim = dim; tex->dim = dim;
tex->dmask = dmask; tex->dmask = dmask & 0xf;
tex->unrm = true; tex->unrm = true;
tex->da = da; tex->da = da;
tex->tfe = instr->is_sparse;
tex->definitions[0] = Definition(tmp_dst); tex->definitions[0] = Definition(tmp_dst);
if (tex->tfe)
tex->operands[3] = emit_tfe_init(bld, tmp_dst);
ctx->block->instructions.emplace_back(std::move(tex)); ctx->block->instructions.emplace_back(std::move(tex));
if (instr->op == nir_texop_samples_identical) { if (instr->op == nir_texop_samples_identical) {
@@ -9331,23 +9349,26 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
instr->sampler_dim != GLSL_SAMPLER_DIM_SUBPASS_MS) instr->sampler_dim != GLSL_SAMPLER_DIM_SUBPASS_MS)
arg = emit_wqm(ctx, arg, bld.tmp(arg.regClass()), true); arg = emit_wqm(ctx, arg, bld.tmp(arg.regClass()), true);
tex.reset(create_instruction<MIMG_instruction>(opcode, Format::MIMG, 3, 1)); tex.reset(create_instruction<MIMG_instruction>(opcode, Format::MIMG, 3 + instr->is_sparse, 1));
tex->operands[0] = Operand(resource); tex->operands[0] = Operand(resource);
tex->operands[1] = Operand(sampler); tex->operands[1] = Operand(sampler);
tex->operands[2] = Operand(arg); tex->operands[2] = Operand(arg);
tex->dim = dim; tex->dim = dim;
tex->dmask = dmask; tex->dmask = dmask & 0xf;
tex->da = da; tex->da = da;
tex->tfe = instr->is_sparse;
tex->definitions[0] = Definition(tmp_dst); tex->definitions[0] = Definition(tmp_dst);
if (tex->tfe)
tex->operands[3] = emit_tfe_init(bld, tmp_dst);
ctx->block->instructions.emplace_back(std::move(tex)); ctx->block->instructions.emplace_back(std::move(tex));
if (tg4_integer_cube_workaround) { if (tg4_integer_cube_workaround) {
assert(tmp_dst.id() != dst.id()); assert(tmp_dst.id() != dst.id());
assert(tmp_dst.size() == dst.size() && dst.size() == 4); assert(tmp_dst.size() == dst.size());
emit_split_vector(ctx, tmp_dst, tmp_dst.size()); emit_split_vector(ctx, tmp_dst, tmp_dst.size());
Temp val[4]; Temp val[4];
for (unsigned i = 0; i < dst.size(); i++) { for (unsigned i = 0; i < 4; i++) {
val[i] = emit_extract_vector(ctx, tmp_dst, i, v1); val[i] = emit_extract_vector(ctx, tmp_dst, i, v1);
Temp cvt_val; Temp cvt_val;
if (stype == GLSL_TYPE_UINT) if (stype == GLSL_TYPE_UINT)
@@ -9356,11 +9377,17 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
cvt_val = bld.vop1(aco_opcode::v_cvt_i32_f32, bld.def(v1), val[i]); cvt_val = bld.vop1(aco_opcode::v_cvt_i32_f32, bld.def(v1), val[i]);
val[i] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), val[i], cvt_val, tg4_compare_cube_wa64); val[i] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), val[i], cvt_val, tg4_compare_cube_wa64);
} }
Temp tmp = dst.regClass() == v4 ? dst : bld.tmp(v4);
tmp_dst = bld.pseudo(aco_opcode::p_create_vector, Definition(tmp), Temp tmp = dst.regClass() == tmp_dst.regClass() ? dst : bld.tmp(tmp_dst.regClass());
val[0], val[1], val[2], val[3]); if (instr->is_sparse)
tmp_dst = bld.pseudo(aco_opcode::p_create_vector, Definition(tmp),
val[0], val[1], val[2], val[3],
emit_extract_vector(ctx, tmp_dst, 4, v1));
else
tmp_dst = bld.pseudo(aco_opcode::p_create_vector, Definition(tmp),
val[0], val[1], val[2], val[3]);
} }
unsigned mask = instr->op == nir_texop_tg4 ? 0xF : dmask; unsigned mask = instr->op == nir_texop_tg4 ? (instr->is_sparse ? 0x1F : 0xF) : dmask;
expand_vector(ctx, tmp_dst, dst, instr->dest.ssa.num_components, mask); expand_vector(ctx, tmp_dst, dst, instr->dest.ssa.num_components, mask);
} }