diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 3abea8a42d1..eae27976913 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -76,6 +76,7 @@ static const driOptionDescription anv_dri_options[] = { DRI_CONF_ANV_FP64_WORKAROUND_ENABLED(false) DRI_CONF_ANV_GENERATED_INDIRECT_THRESHOLD(4) DRI_CONF_NO_16BIT(false) + DRI_CONF_ANV_QUERY_CLEAR_WITH_BLORP_THRESHOLD(6) DRI_CONF_SECTION_END DRI_CONF_SECTION_DEBUG @@ -1113,6 +1114,8 @@ anv_init_dri_options(struct anv_instance *instance) driQueryOptionb(&instance->dri_options, "fp64_workaround_enabled"); instance->generated_indirect_threshold = driQueryOptioni(&instance->dri_options, "generated_indirect_threshold"); + instance->query_clear_with_blorp_threshold = + driQueryOptioni(&instance->dri_options, "query_clear_with_blorp_threshold"); } VkResult anv_CreateInstance( diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 9ac4defc7d4..c9ce3ea77dd 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1029,6 +1029,7 @@ struct anv_instance { bool fp64_workaround_enabled; float lower_depth_range_rate; unsigned generated_indirect_threshold; + unsigned query_clear_with_blorp_threshold; /* HW workarounds */ bool no_16bit; @@ -2085,6 +2086,11 @@ enum anv_pipe_bits { * implement a workaround for Gfx9. */ ANV_PIPE_POST_SYNC_BIT = (1 << 25), + + /* This bit does not exist directly in PIPE_CONTROL. It means that render + * target operations related to clearing of queries are ongoing. + */ + ANV_PIPE_QUERY_CLEARS_BIT = (1 << 26), }; #define ANV_PIPE_FLUSH_BITS ( \ @@ -2127,6 +2133,20 @@ enum anv_pipe_bits { #define ANV_PIPE_GPGPU_BITS ( \ (GFX_VERx10 >= 125 ? ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT : 0)) +/* Things we need to flush before accessing query data using the command + * streamer. + * + * Prior to DG2 experiments show that the command streamer is not coherent + * with the tile cache so we need to flush it to make any data visible to CS. + * + * Otherwise we want to flush the RT cache which is where blorp writes, either + * for clearing the query buffer or for clearing the destination buffer in + * vkCopyQueryPoolResults(). + */ +#define ANV_PIPE_QUERY_FLUSH_BITS ( \ + (GFX_VERx10 < 125 ? ANV_PIPE_TILE_CACHE_FLUSH_BIT : 0) | \ + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT) + enum intel_ds_stall_flag anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits); diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 684fd42e96e..cb92cf3f6a3 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -1752,7 +1752,16 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch, * saying that render target writes are ongoing. */ if (bits & ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT) - bits &= ~(ANV_PIPE_RENDER_TARGET_BUFFER_WRITES); + bits &= ~ANV_PIPE_RENDER_TARGET_BUFFER_WRITES; + + /* If the conditions for flushing the query clears are met, we can + * toggle the bit off. + */ + if ((bits & ANV_PIPE_QUERY_FLUSH_BITS) == ANV_PIPE_QUERY_FLUSH_BITS && + (bits & (ANV_PIPE_END_OF_PIPE_SYNC_BIT | + ANV_PIPE_CS_STALL_BIT))) { + bits &= ~ANV_PIPE_QUERY_CLEARS_BIT; + } bits &= ~(ANV_PIPE_FLUSH_BITS | ANV_PIPE_STALL_BITS | ANV_PIPE_END_OF_PIPE_SYNC_BIT); @@ -3803,6 +3812,16 @@ genX(EndCommandBuffer)( return VK_SUCCESS; } + /* Flush query clears using blorp so that secondary query writes do not + * race with the clear. + */ + if (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_QUERY_CLEARS_BIT) { + anv_add_pending_pipe_bits(cmd_buffer, + ANV_PIPE_QUERY_FLUSH_BITS | + ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT, + "query clear flush prior command buffer end"); + } + genX(cmd_buffer_flush_generated_draws)(cmd_buffer); /* Turn on object level preemption if it is disabled to have it in known @@ -3878,6 +3897,16 @@ genX(CmdExecuteCommands)( */ genX(apply_task_urb_workaround)(primary); + /* Flush query clears using blorp so that secondary query writes do not + * race with the clear. + */ + if (primary->state.pending_pipe_bits & ANV_PIPE_QUERY_CLEARS_BIT) { + anv_add_pending_pipe_bits(primary, + ANV_PIPE_QUERY_FLUSH_BITS | + ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT, + "query clear flush prior to secondary buffer"); + } + /* The secondary command buffer doesn't know which textures etc. have been * flushed prior to their execution. Apply those flushes now. */ @@ -6535,6 +6564,21 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer, cmd_buffer->state.compute.pipeline_dirty = true; #endif + +#if GFX_VERx10 < 125 + /* We apparently cannot flush the tile cache (color/depth) from the GPGPU + * pipeline. That means query clears will not be visible to query + * copy/write. So we need to flush it before going to GPGPU mode. + */ + if (cmd_buffer->state.current_pipeline == _3D && + (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_QUERY_CLEARS_BIT)) { + anv_add_pending_pipe_bits(cmd_buffer, + ANV_PIPE_QUERY_FLUSH_BITS | + ANV_PIPE_END_OF_PIPE_SYNC_BIT, + "query clear flush prior to GPGPU"); + } +#endif + #if GFX_VER >= 12 /* From Tigerlake PRM, Volume 2a, PIPELINE_SELECT: * diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index afb1542b268..0f913c5b7ec 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -181,7 +181,6 @@ VkResult genX(CreateQueryPool)( case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR: uint64s_per_slot = 1 + 2 /* availability + size (PostbuildInfoSerializationDesc) */; break; - break; #endif case VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR: @@ -783,6 +782,19 @@ void genX(CmdResetQueryPool)( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + struct anv_physical_device *pdevice = cmd_buffer->device->physical; + + if (queryCount >= pdevice->instance->query_clear_with_blorp_threshold) { + anv_cmd_buffer_fill_area(cmd_buffer, + anv_query_address(pool, firstQuery), + queryCount * pool->stride, + 0); + + anv_add_pending_pipe_bits(cmd_buffer, + ANV_PIPE_QUERY_CLEARS_BIT, + "vkCmdResetQueryPool of timestamps"); + return; + } switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: @@ -963,6 +975,22 @@ emit_perf_intel_query(struct anv_cmd_buffer *cmd_buffer, } } +static void +emit_query_clear_flush(struct anv_cmd_buffer *cmd_buffer, + struct anv_query_pool *pool, + const char *reason) +{ + if ((cmd_buffer->state.pending_pipe_bits & ANV_PIPE_QUERY_CLEARS_BIT) == 0) + return; + + anv_add_pending_pipe_bits(cmd_buffer, + ANV_PIPE_QUERY_FLUSH_BITS | + ANV_PIPE_END_OF_PIPE_SYNC_BIT, + reason); + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); +} + + void genX(CmdBeginQuery)( VkCommandBuffer commandBuffer, VkQueryPool queryPool, @@ -983,6 +1011,8 @@ void genX(CmdBeginQueryIndexedEXT)( ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); struct anv_address query_addr = anv_query_address(pool, query); + emit_query_clear_flush(cmd_buffer, pool, "CmdBeginQuery* flush query clears"); + struct mi_builder b; mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch); @@ -1352,6 +1382,9 @@ void genX(CmdWriteTimestamp2)( assert(pool->type == VK_QUERY_TYPE_TIMESTAMP); + emit_query_clear_flush(cmd_buffer, pool, + "CmdWriteTimestamp flush query clears"); + struct mi_builder b; mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch); @@ -1472,33 +1505,42 @@ void genX(CmdCopyQueryPoolResults)( * to ensure proper ordering of the commands from the 3d pipe and the * command streamer. */ - if (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_RENDER_TARGET_BUFFER_WRITES) { + const bool need_flushes = + (cmd_buffer->state.pending_pipe_bits & + (ANV_PIPE_RENDER_TARGET_BUFFER_WRITES | + ANV_PIPE_QUERY_CLEARS_BIT)); + + if (need_flushes) { anv_add_pending_pipe_bits(cmd_buffer, - ANV_PIPE_TILE_CACHE_FLUSH_BIT | - ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT, + ANV_PIPE_QUERY_FLUSH_BITS | ANV_PIPE_CS_STALL_BIT, "CopyQueryPoolResults"); } - if ((cmd_buffer->state.pending_pipe_bits & ANV_PIPE_FLUSH_BITS) || - /* Occlusion & timestamp queries are written using a PIPE_CONTROL and - * because we're about to copy values from MI commands, we need to - * stall the command streamer to make sure the PIPE_CONTROL values have - * landed, otherwise we could see inconsistent values & availability. - * - * From the vulkan spec: - * - * "vkCmdCopyQueryPoolResults is guaranteed to see the effect of - * previous uses of vkCmdResetQueryPool in the same queue, without - * any additional synchronization." - */ - pool->type == VK_QUERY_TYPE_OCCLUSION || - pool->type == VK_QUERY_TYPE_TIMESTAMP) { + bool need_cs_stall = + (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_FLUSH_BITS) || + /* Occlusion & timestamp queries are written using a PIPE_CONTROL and + * because we're about to copy values from MI commands, we need to stall + * the command streamer to make sure the PIPE_CONTROL values have + * landed, otherwise we could see inconsistent values & availability. + * + * From the vulkan spec: + * + * "vkCmdCopyQueryPoolResults is guaranteed to see the effect of + * previous uses of vkCmdResetQueryPool in the same queue, without + * any additional synchronization." + */ + pool->type == VK_QUERY_TYPE_OCCLUSION || + pool->type == VK_QUERY_TYPE_TIMESTAMP; + + if (need_cs_stall) { anv_add_pending_pipe_bits(cmd_buffer, ANV_PIPE_CS_STALL_BIT, - "CopyQueryPoolResults"); - genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); + "CopyQueryPoolResults stall"); } + if (need_cs_stall || need_flushes) + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); + struct anv_address dest_addr = anv_address_add(buffer->address, destOffset); for (uint32_t i = 0; i < queryCount; i++) { struct anv_address query_addr = anv_query_address(pool, firstQuery + i); diff --git a/src/util/driconf.h b/src/util/driconf.h index 32be1d7ab0b..bf92b5d8e1e 100644 --- a/src/util/driconf.h +++ b/src/util/driconf.h @@ -654,6 +654,10 @@ DRI_CONF_OPT_I(generated_indirect_threshold, def, 0, INT32_MAX, \ "Indirect threshold count above which we start generating commands") +#define DRI_CONF_ANV_QUERY_CLEAR_WITH_BLORP_THRESHOLD(def) \ + DRI_CONF_OPT_I(query_clear_with_blorp_threshold, def, 0, INT32_MAX, \ + "Indirect threshold count above which we start generating commands") + /** * \brief DZN specific configuration options */