radv: implement transform feedback queries with NGG streamout
The control bit is written to the upper bits because GDS counters are 32-bits only, this allows to re-use the existing query shader. Tested on GFX10.3. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19325>
This commit is contained in:

committed by
Marge Bot

parent
7cfd0e8d31
commit
25e311e9d3
@@ -4180,6 +4180,10 @@ radv_flush_ngg_query_state(struct radv_cmd_buffer *cmd_buffer)
|
||||
if (cmd_buffer->state.active_prims_gen_gds_queries)
|
||||
ngg_query_state |= radv_ngg_query_prim_gen;
|
||||
|
||||
if (cmd_buffer->state.active_prims_xfb_gds_queries) {
|
||||
ngg_query_state |= radv_ngg_query_prim_xfb | radv_ngg_query_prim_gen;
|
||||
}
|
||||
|
||||
base_reg = pipeline->base.user_data_0[stage];
|
||||
assert(loc->sgpr_idx != -1);
|
||||
|
||||
|
@@ -201,6 +201,9 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
|
||||
case nir_intrinsic_load_prim_gen_query_enabled_amd:
|
||||
replacement = ngg_query_bool_setting(b, radv_ngg_query_prim_gen, s);
|
||||
break;
|
||||
case nir_intrinsic_load_prim_xfb_query_enabled_amd:
|
||||
replacement = ngg_query_bool_setting(b, radv_ngg_query_prim_xfb, s);
|
||||
break;
|
||||
case nir_intrinsic_load_cull_any_enabled_amd:
|
||||
replacement = nggc_bool_setting(
|
||||
b, radv_nggc_front_face | radv_nggc_back_face | radv_nggc_small_primitives, s);
|
||||
@@ -338,8 +341,9 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
|
||||
}
|
||||
|
||||
/* GDS counters:
|
||||
* offset 0 - pipeline statistics counter for all streams
|
||||
* offset 4|8|12|16 - generated primitive counter for stream 0|1|2|3
|
||||
* offset 0 - pipeline statistics counter for all streams
|
||||
* offset 4| 8|12|16 - generated primitive counter for stream 0|1|2|3
|
||||
* offset 20|24|28|32 - written primitive counter for stream 0|1|2|3
|
||||
*/
|
||||
case nir_intrinsic_atomic_add_gs_emit_prim_count_amd:
|
||||
nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa, nir_imm_int(b, 0), nir_imm_int(b, 0x100));
|
||||
@@ -350,7 +354,9 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
|
||||
nir_imm_int(b, 0x100));
|
||||
break;
|
||||
case nir_intrinsic_atomic_add_xfb_prim_count_amd:
|
||||
/* No-op for RADV. */
|
||||
nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa,
|
||||
nir_imm_int(b, 20 + nir_intrinsic_stream_id(intrin) * 4),
|
||||
nir_imm_int(b, 0x100));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_streamout_config_amd:
|
||||
|
@@ -1218,6 +1218,7 @@ enum radv_ngg_query_state {
|
||||
radv_ngg_query_none = 0,
|
||||
radv_ngg_query_pipeline_stat = 1 << 0,
|
||||
radv_ngg_query_prim_gen = 1 << 1,
|
||||
radv_ngg_query_prim_xfb = 1 << 2,
|
||||
};
|
||||
|
||||
struct radv_vertex_binding {
|
||||
@@ -1539,6 +1540,7 @@ struct radv_cmd_state {
|
||||
unsigned active_pipeline_gds_queries;
|
||||
unsigned active_prims_gen_queries;
|
||||
unsigned active_prims_gen_gds_queries;
|
||||
unsigned active_prims_xfb_gds_queries;
|
||||
uint32_t trace_id;
|
||||
uint32_t last_ia_multi_vgt_param;
|
||||
uint32_t last_ge_cntl;
|
||||
@@ -1792,6 +1794,9 @@ unsigned radv_get_default_max_sample_dist(int log_samples);
|
||||
void radv_device_init_msaa(struct radv_device *device);
|
||||
VkResult radv_device_init_vrs_state(struct radv_device *device);
|
||||
|
||||
void radv_emit_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, uint64_t va,
|
||||
uint32_t imm);
|
||||
|
||||
void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
|
||||
const struct radv_image_view *iview,
|
||||
VkClearDepthStencilValue ds_clear_value,
|
||||
|
@@ -1116,6 +1116,8 @@ radv_CreateQueryPool(VkDevice _device, const VkQueryPoolCreateInfo *pCreateInfo,
|
||||
pool->stride = 8;
|
||||
break;
|
||||
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
|
||||
pool->stride = 32;
|
||||
break;
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
||||
pool->stride = 32;
|
||||
if (pool->uses_gds) {
|
||||
@@ -1819,7 +1821,19 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
|
||||
break;
|
||||
}
|
||||
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
|
||||
emit_sample_streamout(cmd_buffer, va, index);
|
||||
if (cmd_buffer->device->physical_device->use_ngg_streamout) {
|
||||
/* generated prim counter */
|
||||
gfx10_copy_gds_query(cmd_buffer, 4 + index * 4, va);
|
||||
radv_emit_write_data_imm(cs, V_370_ME, va + 4, 0x80000000);
|
||||
|
||||
/* written prim counter */
|
||||
gfx10_copy_gds_query(cmd_buffer, 20 + index * 4, va + 8);
|
||||
radv_emit_write_data_imm(cs, V_370_ME, va + 12, 0x80000000);
|
||||
|
||||
cmd_buffer->state.active_prims_xfb_gds_queries++;
|
||||
} else {
|
||||
emit_sample_streamout(cmd_buffer, va, index);
|
||||
}
|
||||
break;
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
|
||||
if (!cmd_buffer->state.active_prims_gen_queries) {
|
||||
@@ -1918,7 +1932,19 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
|
||||
break;
|
||||
}
|
||||
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
|
||||
emit_sample_streamout(cmd_buffer, va + 16, index);
|
||||
if (cmd_buffer->device->physical_device->use_ngg_streamout) {
|
||||
/* generated prim counter */
|
||||
gfx10_copy_gds_query(cmd_buffer, 4 + index * 4, va + 16);
|
||||
radv_emit_write_data_imm(cs, V_370_ME, va + 20, 0x80000000);
|
||||
|
||||
/* written prim counter */
|
||||
gfx10_copy_gds_query(cmd_buffer, 20 + index * 4, va + 24);
|
||||
radv_emit_write_data_imm(cs, V_370_ME, va + 28, 0x80000000);
|
||||
|
||||
cmd_buffer->state.active_prims_xfb_gds_queries--;
|
||||
} else {
|
||||
emit_sample_streamout(cmd_buffer, va + 16, index);
|
||||
}
|
||||
break;
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
|
||||
if (cmd_buffer->state.active_prims_gen_queries == 1) {
|
||||
|
@@ -2062,3 +2062,13 @@ radv_device_init_msaa(struct radv_device *device)
|
||||
for (i = 0; i < 8; i++)
|
||||
radv_get_sample_position(device, 8, i, device->sample_locations_8x[i]);
|
||||
}
|
||||
|
||||
void
|
||||
radv_emit_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, uint64_t va, uint32_t imm)
|
||||
{
|
||||
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
|
||||
radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(engine_sel));
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, va >> 32);
|
||||
radeon_emit(cs, imm);
|
||||
}
|
||||
|
Reference in New Issue
Block a user