radv,aco: use ac_nir_lower_legacy_gs
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20296>
This commit is contained in:
@@ -7760,102 +7760,6 @@ visit_store_scratch(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
visit_emit_vertex_with_counter(isel_context* ctx, nir_intrinsic_instr* instr)
|
|
||||||
{
|
|
||||||
Builder bld(ctx->program, ctx->block);
|
|
||||||
|
|
||||||
unsigned stream = nir_intrinsic_stream_id(instr);
|
|
||||||
Temp next_vertex = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
|
|
||||||
next_vertex = bld.v_mul_imm(bld.def(v1), next_vertex, 4u);
|
|
||||||
nir_const_value* next_vertex_cv = nir_src_as_const_value(instr->src[0]);
|
|
||||||
|
|
||||||
/* get GSVS ring */
|
|
||||||
Temp gsvs_ring =
|
|
||||||
bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), ctx->program->private_segment_buffer,
|
|
||||||
Operand::c32(RING_GSVS_GS * 16u));
|
|
||||||
|
|
||||||
unsigned num_components = ctx->program->info.gs.num_stream_output_components[stream];
|
|
||||||
|
|
||||||
unsigned stride = 4u * num_components * ctx->shader->info.gs.vertices_out;
|
|
||||||
unsigned stream_offset = 0;
|
|
||||||
for (unsigned i = 0; i < stream; i++) {
|
|
||||||
unsigned prev_stride = 4u * ctx->program->info.gs.num_stream_output_components[i] *
|
|
||||||
ctx->shader->info.gs.vertices_out;
|
|
||||||
stream_offset += prev_stride * ctx->program->wave_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Limit on the stride field for <= GFX7. */
|
|
||||||
assert(stride < (1 << 14));
|
|
||||||
|
|
||||||
Temp gsvs_dwords[4];
|
|
||||||
for (unsigned i = 0; i < 4; i++)
|
|
||||||
gsvs_dwords[i] = bld.tmp(s1);
|
|
||||||
bld.pseudo(aco_opcode::p_split_vector, Definition(gsvs_dwords[0]), Definition(gsvs_dwords[1]),
|
|
||||||
Definition(gsvs_dwords[2]), Definition(gsvs_dwords[3]), gsvs_ring);
|
|
||||||
|
|
||||||
if (stream_offset) {
|
|
||||||
Temp stream_offset_tmp = bld.copy(bld.def(s1), Operand::c32(stream_offset));
|
|
||||||
|
|
||||||
Temp carry = bld.tmp(s1);
|
|
||||||
gsvs_dwords[0] = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)),
|
|
||||||
gsvs_dwords[0], stream_offset_tmp);
|
|
||||||
gsvs_dwords[1] = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc),
|
|
||||||
gsvs_dwords[1], Operand::zero(), bld.scc(carry));
|
|
||||||
}
|
|
||||||
|
|
||||||
gsvs_dwords[1] = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), gsvs_dwords[1],
|
|
||||||
Operand::c32(S_008F04_STRIDE(stride)));
|
|
||||||
gsvs_dwords[2] = bld.copy(bld.def(s1), Operand::c32(ctx->program->wave_size));
|
|
||||||
|
|
||||||
gsvs_ring = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), gsvs_dwords[0], gsvs_dwords[1],
|
|
||||||
gsvs_dwords[2], gsvs_dwords[3]);
|
|
||||||
|
|
||||||
unsigned offset = 0;
|
|
||||||
for (unsigned i = 0; i <= VARYING_SLOT_VAR31; i++) {
|
|
||||||
for (unsigned j = 0; j < 4; j++) {
|
|
||||||
if (((ctx->program->info.gs.output_streams[i] >> (j * 2)) & 0x3) != stream)
|
|
||||||
continue;
|
|
||||||
if (!(ctx->program->info.gs.output_usage_mask[i] & (1 << j)))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (ctx->outputs.mask[i] & (1 << j)) {
|
|
||||||
Operand vaddr_offset = next_vertex_cv ? Operand(v1) : Operand(next_vertex);
|
|
||||||
unsigned const_offset = (offset + (next_vertex_cv ? next_vertex_cv->u32 : 0u)) * 4u;
|
|
||||||
if (const_offset >= 4096u) {
|
|
||||||
if (vaddr_offset.isUndefined())
|
|
||||||
vaddr_offset = bld.copy(bld.def(v1), Operand::c32(const_offset / 4096u * 4096u));
|
|
||||||
else
|
|
||||||
vaddr_offset = bld.vadd32(bld.def(v1), Operand::c32(const_offset / 4096u * 4096u),
|
|
||||||
vaddr_offset);
|
|
||||||
const_offset %= 4096u;
|
|
||||||
}
|
|
||||||
|
|
||||||
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(
|
|
||||||
aco_opcode::buffer_store_dword, Format::MUBUF, 4, 0)};
|
|
||||||
mubuf->operands[0] = Operand(gsvs_ring);
|
|
||||||
mubuf->operands[1] = vaddr_offset;
|
|
||||||
mubuf->operands[2] = Operand(get_arg(ctx, ctx->args->ac.gs2vs_offset));
|
|
||||||
mubuf->operands[3] = Operand(ctx->outputs.temps[i * 4u + j]);
|
|
||||||
mubuf->offen = !vaddr_offset.isUndefined();
|
|
||||||
mubuf->offset = const_offset;
|
|
||||||
mubuf->glc = ctx->program->gfx_level < GFX11;
|
|
||||||
mubuf->slc = true;
|
|
||||||
mubuf->sync = memory_sync_info(storage_vmem_output, semantic_can_reorder);
|
|
||||||
bld.insert(std::move(mubuf));
|
|
||||||
}
|
|
||||||
|
|
||||||
offset += ctx->shader->info.gs.vertices_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* outputs for the next vertex are undefined and keeping them around can
|
|
||||||
* create invalid IR with control flow */
|
|
||||||
ctx->outputs.mask[i] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
bld.sopp(aco_opcode::s_sendmsg, bld.m0(ctx->gs_wave_id), -1, sendmsg_gs(false, true, stream));
|
|
||||||
}
|
|
||||||
|
|
||||||
Temp
|
Temp
|
||||||
emit_boolean_reduce(isel_context* ctx, nir_op op, unsigned cluster_size, Temp src)
|
emit_boolean_reduce(isel_context* ctx, nir_op op, unsigned cluster_size, Temp src)
|
||||||
{
|
{
|
||||||
@@ -9170,7 +9074,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||||||
}
|
}
|
||||||
case nir_intrinsic_emit_vertex_with_counter: {
|
case nir_intrinsic_emit_vertex_with_counter: {
|
||||||
assert(ctx->stage.hw == HWStage::GS);
|
assert(ctx->stage.hw == HWStage::GS);
|
||||||
visit_emit_vertex_with_counter(ctx, instr);
|
unsigned stream = nir_intrinsic_stream_id(instr);
|
||||||
|
bld.sopp(aco_opcode::s_sendmsg, bld.m0(ctx->gs_wave_id), -1, sendmsg_gs(false, true, stream));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case nir_intrinsic_end_primitive_with_counter: {
|
case nir_intrinsic_end_primitive_with_counter: {
|
||||||
@@ -9181,11 +9086,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case nir_intrinsic_set_vertex_and_primitive_count: {
|
|
||||||
assert(ctx->stage.hw == HWStage::GS);
|
|
||||||
/* unused in the legacy pipeline, the HW keeps track of this for us */
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case nir_intrinsic_is_subgroup_invocation_lt_amd: {
|
case nir_intrinsic_is_subgroup_invocation_lt_amd: {
|
||||||
Temp src = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
|
Temp src = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
|
||||||
bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), lanecount_to_mask(ctx, src));
|
bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), lanecount_to_mask(ctx, src));
|
||||||
|
@@ -194,44 +194,7 @@ static void
|
|||||||
visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef vertexidx,
|
visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef vertexidx,
|
||||||
LLVMValueRef *addrs)
|
LLVMValueRef *addrs)
|
||||||
{
|
{
|
||||||
unsigned offset = 0;
|
|
||||||
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
|
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
|
||||||
|
|
||||||
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
|
|
||||||
unsigned output_usage_mask = ctx->shader_info->gs.output_usage_mask[i];
|
|
||||||
uint8_t output_stream = ctx->shader_info->gs.output_streams[i];
|
|
||||||
LLVMValueRef *out_ptr = &addrs[i * 4];
|
|
||||||
bool *is_16bit_ptr = &abi->is_16bit[i * 4];
|
|
||||||
int length = util_last_bit(output_usage_mask);
|
|
||||||
|
|
||||||
if (!(ctx->output_mask & (1ull << i)))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
for (unsigned j = 0; j < length; j++) {
|
|
||||||
if (((output_stream >> (j * 2)) & 0x3) != stream)
|
|
||||||
continue;
|
|
||||||
if (!(output_usage_mask & (1 << j)))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
LLVMTypeRef type = is_16bit_ptr[j] ? ctx->ac.f16 : ctx->ac.f32;
|
|
||||||
LLVMValueRef out_val = LLVMBuildLoad2(ctx->ac.builder, type, out_ptr[j], "");
|
|
||||||
LLVMValueRef voffset =
|
|
||||||
LLVMConstInt(ctx->ac.i32, offset * ctx->shader->info.gs.vertices_out, false);
|
|
||||||
|
|
||||||
offset++;
|
|
||||||
|
|
||||||
voffset = LLVMBuildAdd(ctx->ac.builder, voffset, vertexidx, "");
|
|
||||||
voffset = LLVMBuildMul(ctx->ac.builder, voffset, LLVMConstInt(ctx->ac.i32, 4, false), "");
|
|
||||||
|
|
||||||
out_val = ac_to_integer(&ctx->ac, out_val);
|
|
||||||
out_val = LLVMBuildZExtOrBitCast(ctx->ac.builder, out_val, ctx->ac.i32, "");
|
|
||||||
|
|
||||||
ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring[stream], out_val, NULL, voffset,
|
|
||||||
ac_get_arg(&ctx->ac, ctx->args->ac.gs2vs_offset),
|
|
||||||
ac_glc | ac_slc | ac_swizzled);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
|
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
|
||||||
ctx->gs_wave_id);
|
ctx->gs_wave_id);
|
||||||
}
|
}
|
||||||
|
@@ -3475,10 +3475,20 @@ radv_postprocess_nir(struct radv_pipeline *pipeline,
|
|||||||
if (lowered_ngg)
|
if (lowered_ngg)
|
||||||
radv_lower_ngg(device, stage, pipeline_key);
|
radv_lower_ngg(device, stage, pipeline_key);
|
||||||
|
|
||||||
if (stage->stage == last_vgt_api_stage && stage->stage != MESA_SHADER_GEOMETRY && !lowered_ngg)
|
if (stage->stage == last_vgt_api_stage && !lowered_ngg) {
|
||||||
|
if (stage->stage != MESA_SHADER_GEOMETRY) {
|
||||||
NIR_PASS_V(stage->nir, ac_nir_lower_legacy_vs,
|
NIR_PASS_V(stage->nir, ac_nir_lower_legacy_vs,
|
||||||
stage->info.outinfo.export_prim_id ? VARYING_SLOT_PRIMITIVE_ID : -1, false);
|
stage->info.outinfo.export_prim_id ? VARYING_SLOT_PRIMITIVE_ID : -1, false);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
ac_nir_gs_output_info gs_out_info = {
|
||||||
|
.streams = stage->info.gs.output_streams,
|
||||||
|
.usage_mask = stage->info.gs.output_usage_mask,
|
||||||
|
};
|
||||||
|
NIR_PASS_V(stage->nir, ac_nir_lower_legacy_gs, false, false, &gs_out_info);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
NIR_PASS(_, stage->nir, nir_opt_idiv_const, 8);
|
NIR_PASS(_, stage->nir, nir_opt_idiv_const, 8);
|
||||||
|
|
||||||
NIR_PASS(_, stage->nir, nir_lower_idiv,
|
NIR_PASS(_, stage->nir, nir_lower_idiv,
|
||||||
|
Reference in New Issue
Block a user