radv,aco: use ac_nir_lower_legacy_gs
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20296>
This commit is contained in:
@@ -7760,102 +7760,6 @@ visit_store_scratch(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
visit_emit_vertex_with_counter(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
{
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
|
||||
unsigned stream = nir_intrinsic_stream_id(instr);
|
||||
Temp next_vertex = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
|
||||
next_vertex = bld.v_mul_imm(bld.def(v1), next_vertex, 4u);
|
||||
nir_const_value* next_vertex_cv = nir_src_as_const_value(instr->src[0]);
|
||||
|
||||
/* get GSVS ring */
|
||||
Temp gsvs_ring =
|
||||
bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), ctx->program->private_segment_buffer,
|
||||
Operand::c32(RING_GSVS_GS * 16u));
|
||||
|
||||
unsigned num_components = ctx->program->info.gs.num_stream_output_components[stream];
|
||||
|
||||
unsigned stride = 4u * num_components * ctx->shader->info.gs.vertices_out;
|
||||
unsigned stream_offset = 0;
|
||||
for (unsigned i = 0; i < stream; i++) {
|
||||
unsigned prev_stride = 4u * ctx->program->info.gs.num_stream_output_components[i] *
|
||||
ctx->shader->info.gs.vertices_out;
|
||||
stream_offset += prev_stride * ctx->program->wave_size;
|
||||
}
|
||||
|
||||
/* Limit on the stride field for <= GFX7. */
|
||||
assert(stride < (1 << 14));
|
||||
|
||||
Temp gsvs_dwords[4];
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
gsvs_dwords[i] = bld.tmp(s1);
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(gsvs_dwords[0]), Definition(gsvs_dwords[1]),
|
||||
Definition(gsvs_dwords[2]), Definition(gsvs_dwords[3]), gsvs_ring);
|
||||
|
||||
if (stream_offset) {
|
||||
Temp stream_offset_tmp = bld.copy(bld.def(s1), Operand::c32(stream_offset));
|
||||
|
||||
Temp carry = bld.tmp(s1);
|
||||
gsvs_dwords[0] = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)),
|
||||
gsvs_dwords[0], stream_offset_tmp);
|
||||
gsvs_dwords[1] = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc),
|
||||
gsvs_dwords[1], Operand::zero(), bld.scc(carry));
|
||||
}
|
||||
|
||||
gsvs_dwords[1] = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), gsvs_dwords[1],
|
||||
Operand::c32(S_008F04_STRIDE(stride)));
|
||||
gsvs_dwords[2] = bld.copy(bld.def(s1), Operand::c32(ctx->program->wave_size));
|
||||
|
||||
gsvs_ring = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), gsvs_dwords[0], gsvs_dwords[1],
|
||||
gsvs_dwords[2], gsvs_dwords[3]);
|
||||
|
||||
unsigned offset = 0;
|
||||
for (unsigned i = 0; i <= VARYING_SLOT_VAR31; i++) {
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
if (((ctx->program->info.gs.output_streams[i] >> (j * 2)) & 0x3) != stream)
|
||||
continue;
|
||||
if (!(ctx->program->info.gs.output_usage_mask[i] & (1 << j)))
|
||||
continue;
|
||||
|
||||
if (ctx->outputs.mask[i] & (1 << j)) {
|
||||
Operand vaddr_offset = next_vertex_cv ? Operand(v1) : Operand(next_vertex);
|
||||
unsigned const_offset = (offset + (next_vertex_cv ? next_vertex_cv->u32 : 0u)) * 4u;
|
||||
if (const_offset >= 4096u) {
|
||||
if (vaddr_offset.isUndefined())
|
||||
vaddr_offset = bld.copy(bld.def(v1), Operand::c32(const_offset / 4096u * 4096u));
|
||||
else
|
||||
vaddr_offset = bld.vadd32(bld.def(v1), Operand::c32(const_offset / 4096u * 4096u),
|
||||
vaddr_offset);
|
||||
const_offset %= 4096u;
|
||||
}
|
||||
|
||||
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(
|
||||
aco_opcode::buffer_store_dword, Format::MUBUF, 4, 0)};
|
||||
mubuf->operands[0] = Operand(gsvs_ring);
|
||||
mubuf->operands[1] = vaddr_offset;
|
||||
mubuf->operands[2] = Operand(get_arg(ctx, ctx->args->ac.gs2vs_offset));
|
||||
mubuf->operands[3] = Operand(ctx->outputs.temps[i * 4u + j]);
|
||||
mubuf->offen = !vaddr_offset.isUndefined();
|
||||
mubuf->offset = const_offset;
|
||||
mubuf->glc = ctx->program->gfx_level < GFX11;
|
||||
mubuf->slc = true;
|
||||
mubuf->sync = memory_sync_info(storage_vmem_output, semantic_can_reorder);
|
||||
bld.insert(std::move(mubuf));
|
||||
}
|
||||
|
||||
offset += ctx->shader->info.gs.vertices_out;
|
||||
}
|
||||
|
||||
/* outputs for the next vertex are undefined and keeping them around can
|
||||
* create invalid IR with control flow */
|
||||
ctx->outputs.mask[i] = 0;
|
||||
}
|
||||
|
||||
bld.sopp(aco_opcode::s_sendmsg, bld.m0(ctx->gs_wave_id), -1, sendmsg_gs(false, true, stream));
|
||||
}
|
||||
|
||||
Temp
|
||||
emit_boolean_reduce(isel_context* ctx, nir_op op, unsigned cluster_size, Temp src)
|
||||
{
|
||||
@@ -9170,7 +9074,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
}
|
||||
case nir_intrinsic_emit_vertex_with_counter: {
|
||||
assert(ctx->stage.hw == HWStage::GS);
|
||||
visit_emit_vertex_with_counter(ctx, instr);
|
||||
unsigned stream = nir_intrinsic_stream_id(instr);
|
||||
bld.sopp(aco_opcode::s_sendmsg, bld.m0(ctx->gs_wave_id), -1, sendmsg_gs(false, true, stream));
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_end_primitive_with_counter: {
|
||||
@@ -9181,11 +9086,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
}
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_set_vertex_and_primitive_count: {
|
||||
assert(ctx->stage.hw == HWStage::GS);
|
||||
/* unused in the legacy pipeline, the HW keeps track of this for us */
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_is_subgroup_invocation_lt_amd: {
|
||||
Temp src = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
|
||||
bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), lanecount_to_mask(ctx, src));
|
||||
|
@@ -194,44 +194,7 @@ static void
|
||||
visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef vertexidx,
|
||||
LLVMValueRef *addrs)
|
||||
{
|
||||
unsigned offset = 0;
|
||||
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
|
||||
|
||||
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
|
||||
unsigned output_usage_mask = ctx->shader_info->gs.output_usage_mask[i];
|
||||
uint8_t output_stream = ctx->shader_info->gs.output_streams[i];
|
||||
LLVMValueRef *out_ptr = &addrs[i * 4];
|
||||
bool *is_16bit_ptr = &abi->is_16bit[i * 4];
|
||||
int length = util_last_bit(output_usage_mask);
|
||||
|
||||
if (!(ctx->output_mask & (1ull << i)))
|
||||
continue;
|
||||
|
||||
for (unsigned j = 0; j < length; j++) {
|
||||
if (((output_stream >> (j * 2)) & 0x3) != stream)
|
||||
continue;
|
||||
if (!(output_usage_mask & (1 << j)))
|
||||
continue;
|
||||
|
||||
LLVMTypeRef type = is_16bit_ptr[j] ? ctx->ac.f16 : ctx->ac.f32;
|
||||
LLVMValueRef out_val = LLVMBuildLoad2(ctx->ac.builder, type, out_ptr[j], "");
|
||||
LLVMValueRef voffset =
|
||||
LLVMConstInt(ctx->ac.i32, offset * ctx->shader->info.gs.vertices_out, false);
|
||||
|
||||
offset++;
|
||||
|
||||
voffset = LLVMBuildAdd(ctx->ac.builder, voffset, vertexidx, "");
|
||||
voffset = LLVMBuildMul(ctx->ac.builder, voffset, LLVMConstInt(ctx->ac.i32, 4, false), "");
|
||||
|
||||
out_val = ac_to_integer(&ctx->ac, out_val);
|
||||
out_val = LLVMBuildZExtOrBitCast(ctx->ac.builder, out_val, ctx->ac.i32, "");
|
||||
|
||||
ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring[stream], out_val, NULL, voffset,
|
||||
ac_get_arg(&ctx->ac, ctx->args->ac.gs2vs_offset),
|
||||
ac_glc | ac_slc | ac_swizzled);
|
||||
}
|
||||
}
|
||||
|
||||
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
|
||||
ctx->gs_wave_id);
|
||||
}
|
||||
|
@@ -3475,9 +3475,19 @@ radv_postprocess_nir(struct radv_pipeline *pipeline,
|
||||
if (lowered_ngg)
|
||||
radv_lower_ngg(device, stage, pipeline_key);
|
||||
|
||||
if (stage->stage == last_vgt_api_stage && stage->stage != MESA_SHADER_GEOMETRY && !lowered_ngg)
|
||||
NIR_PASS_V(stage->nir, ac_nir_lower_legacy_vs,
|
||||
stage->info.outinfo.export_prim_id ? VARYING_SLOT_PRIMITIVE_ID : -1, false);
|
||||
if (stage->stage == last_vgt_api_stage && !lowered_ngg) {
|
||||
if (stage->stage != MESA_SHADER_GEOMETRY) {
|
||||
NIR_PASS_V(stage->nir, ac_nir_lower_legacy_vs,
|
||||
stage->info.outinfo.export_prim_id ? VARYING_SLOT_PRIMITIVE_ID : -1, false);
|
||||
|
||||
} else {
|
||||
ac_nir_gs_output_info gs_out_info = {
|
||||
.streams = stage->info.gs.output_streams,
|
||||
.usage_mask = stage->info.gs.output_usage_mask,
|
||||
};
|
||||
NIR_PASS_V(stage->nir, ac_nir_lower_legacy_gs, false, false, &gs_out_info);
|
||||
}
|
||||
}
|
||||
|
||||
NIR_PASS(_, stage->nir, nir_opt_idiv_const, 8);
|
||||
|
||||
|
Reference in New Issue
Block a user