amd: add nir_intrinsic_xfb_counter_sub_amd and fix overflowed streamout offsets
Fixes: 5ec79f9899
- ac/nir/ngg: nogs support streamout
Reviewed-by: Qiang Yu <yuq825@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21584>
This commit is contained in:
@@ -1752,6 +1752,8 @@ ngg_build_streamout_buffer_info(nir_builder *b,
|
||||
nir_ssa_def *buffer_offsets_ret[4],
|
||||
nir_ssa_def *emit_prim_ret[4])
|
||||
{
|
||||
nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
|
||||
|
||||
/* For radeonsi which pass this value by arg when VS. Streamout need accurate
|
||||
* num-vert-per-prim for writing correct amount of data to buffer.
|
||||
*/
|
||||
@@ -1785,7 +1787,7 @@ ngg_build_streamout_buffer_info(nir_builder *b,
|
||||
workgroup_buffer_sizes[buffer] =
|
||||
nir_bcsel(b, buffer_valid, inc_buffer_size, nir_imm_int(b, 0));
|
||||
} else
|
||||
workgroup_buffer_sizes[buffer] = nir_ssa_undef(b, 1, 32);
|
||||
workgroup_buffer_sizes[buffer] = undef;
|
||||
}
|
||||
|
||||
nir_ssa_def *ordered_id = nir_load_ordered_id_amd(b);
|
||||
@@ -1801,6 +1803,9 @@ ngg_build_streamout_buffer_info(nir_builder *b,
|
||||
nir_ssa_def *emit_prim[4];
|
||||
memcpy(emit_prim, gen_prim, 4 * sizeof(nir_ssa_def *));
|
||||
|
||||
nir_ssa_def *any_overflow = nir_imm_bool(b, false);
|
||||
nir_ssa_def *overflow_amount[4] = {undef, undef, undef, undef};
|
||||
|
||||
for (unsigned buffer = 0; buffer < 4; buffer++) {
|
||||
if (!(info->buffers_written & BITFIELD_BIT(buffer)))
|
||||
continue;
|
||||
@@ -1811,6 +1816,10 @@ ngg_build_streamout_buffer_info(nir_builder *b,
|
||||
nir_ssa_def *remain_prim = nir_idiv(b, remain_size, prim_stride_ret[buffer]);
|
||||
nir_ssa_def *overflow = nir_ilt(b, buffer_size, buffer_offset);
|
||||
|
||||
any_overflow = nir_ior(b, any_overflow, overflow);
|
||||
overflow_amount[buffer] = nir_imax(b, nir_imm_int(b, 0),
|
||||
nir_isub(b, buffer_offset, buffer_size));
|
||||
|
||||
unsigned stream = info->buffer_to_stream[buffer];
|
||||
/* when previous workgroup overflow, we can't emit any primitive */
|
||||
emit_prim[stream] = nir_bcsel(
|
||||
@@ -1822,9 +1831,16 @@ ngg_build_streamout_buffer_info(nir_builder *b,
|
||||
nir_store_shared(b, buffer_offset, scratch_base, .base = buffer * 4);
|
||||
}
|
||||
|
||||
/* No need to fixup the global buffer offset once we overflowed,
|
||||
* because following workgroups overflow for sure.
|
||||
/* We have to fix up the streamout offsets if we overflowed because they determine
|
||||
* the vertex count for DrawTransformFeedback.
|
||||
*/
|
||||
nir_if *if_any_overflow = nir_push_if(b, any_overflow);
|
||||
{
|
||||
nir_build_xfb_counter_sub_amd(b, nir_vec(b, overflow_amount, 4),
|
||||
/* mask of buffers to update */
|
||||
.write_mask = info->buffers_written);
|
||||
}
|
||||
nir_pop_if(b, if_any_overflow);
|
||||
|
||||
/* Save to LDS for being accessed by other waves in this workgroup. */
|
||||
for (unsigned stream = 0; stream < 4; stream++) {
|
||||
|
@@ -9132,6 +9132,9 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
emit_split_vector(ctx, dst, instr->num_components);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_xfb_counter_sub_amd:
|
||||
/* TODO: implement this */
|
||||
break;
|
||||
case nir_intrinsic_memory_barrier_buffer: {
|
||||
wait_imm wait;
|
||||
wait.lgkm = 0;
|
||||
|
@@ -4232,6 +4232,27 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
||||
result = ac_build_gather_values(&ctx->ac, global_count, instr->num_components);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_xfb_counter_sub_amd: {
|
||||
/* must be called in a single lane of a workgroup. */
|
||||
LLVMTypeRef gdsptr = LLVMPointerType(ctx->ac.i32, AC_ADDR_SPACE_GDS);
|
||||
LLVMValueRef gdsbase = LLVMBuildIntToPtr(ctx->ac.builder, ctx->ac.i32_0, gdsptr, "");
|
||||
LLVMValueRef sub_vec = get_src(ctx, instr->src[0]);
|
||||
unsigned write_mask = nir_intrinsic_write_mask(instr);
|
||||
|
||||
for (unsigned i = 0; i < instr->num_components; i++) {
|
||||
if (write_mask & (1 << i)) {
|
||||
LLVMValueRef value =
|
||||
LLVMBuildExtractElement(ctx->ac.builder, sub_vec,
|
||||
LLVMConstInt(ctx->ac.i32, i, false), "");
|
||||
|
||||
LLVMValueRef gds_ptr =
|
||||
ac_build_gep_ptr(&ctx->ac, ctx->ac.i32, gdsbase, LLVMConstInt(ctx->ac.i32, i, 0));
|
||||
LLVMBuildAtomicRMW(ctx->ac.builder, LLVMAtomicRMWBinOpSub, gds_ptr, value,
|
||||
LLVMAtomicOrderingMonotonic, false);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_export_amd: {
|
||||
unsigned flags = nir_intrinsic_flags(instr);
|
||||
unsigned target = nir_intrinsic_base(instr);
|
||||
|
@@ -672,6 +672,7 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
|
||||
case nir_intrinsic_load_topology_id_intel:
|
||||
case nir_intrinsic_load_scratch_base_ptr:
|
||||
case nir_intrinsic_ordered_xfb_counter_add_amd:
|
||||
case nir_intrinsic_xfb_counter_sub_amd:
|
||||
case nir_intrinsic_load_stack:
|
||||
case nir_intrinsic_load_ray_launch_id:
|
||||
case nir_intrinsic_load_ray_instance_custom_index:
|
||||
|
@@ -1529,10 +1529,15 @@ intrinsic("load_streamout_buffer_amd", dest_comp=4, indices=[BASE], bit_sizes=[3
|
||||
# An ID for each workgroup ordered by primitve sequence
|
||||
system_value("ordered_id_amd", 1)
|
||||
|
||||
# Add to global streamout buffer counter in specified order
|
||||
# Add src1 to global streamout buffer offsets in the specified order
|
||||
# src[] = { ordered_id, counter }
|
||||
# WRITE_MASK = mask for counter channel to update
|
||||
intrinsic("ordered_xfb_counter_add_amd", dest_comp=0, src_comp=[1, 0], indices=[WRITE_MASK], bit_sizes=[32])
|
||||
# Subtract from global streamout buffer offsets. Used to fix up the offsets
|
||||
# when we overflow streamout buffers.
|
||||
# src[] = { offsets }
|
||||
# WRITE_MASK = mask of offsets to subtract
|
||||
intrinsic("xfb_counter_sub_amd", src_comp=[0], indices=[WRITE_MASK], bit_sizes=[32])
|
||||
|
||||
# Provoking vertex index in a primitive
|
||||
system_value("provoking_vtx_in_prim_amd", 1)
|
||||
|
Reference in New Issue
Block a user