anv/cmd_buffer: Only emit PIPE_CONTROL on-demand
This is in contrast to emitting it directly in vkCmdPipelineBarrier. This has a couple of advantages. First, it means that no matter how many vkCmdPipelineBarrier calls the application strings together it gets one or two PIPE_CONTROLs. Second, it allow us to better track when we need to do stalls because we can flag when a flush has happened and we need a stall. Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
This commit is contained in:
@@ -127,6 +127,7 @@ anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer)
|
|||||||
|
|
||||||
state->dirty = 0;
|
state->dirty = 0;
|
||||||
state->vb_dirty = 0;
|
state->vb_dirty = 0;
|
||||||
|
state->pending_pipe_bits = 0;
|
||||||
state->descriptors_dirty = 0;
|
state->descriptors_dirty = 0;
|
||||||
state->push_constants_dirty = 0;
|
state->push_constants_dirty = 0;
|
||||||
state->pipeline = NULL;
|
state->pipeline = NULL;
|
||||||
|
@@ -39,6 +39,8 @@ genX(cmd_buffer_alloc_null_surface_state)(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
void genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer,
|
void genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer,
|
||||||
struct anv_subpass *subpass);
|
struct anv_subpass *subpass);
|
||||||
|
|
||||||
|
void genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer);
|
||||||
|
|
||||||
void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer);
|
void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer);
|
||||||
void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer);
|
void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer);
|
||||||
|
|
||||||
|
@@ -1066,6 +1066,45 @@ enum anv_cmd_dirty_bits {
|
|||||||
};
|
};
|
||||||
typedef uint32_t anv_cmd_dirty_mask_t;
|
typedef uint32_t anv_cmd_dirty_mask_t;
|
||||||
|
|
||||||
|
enum anv_pipe_bits {
|
||||||
|
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT = (1 << 0),
|
||||||
|
ANV_PIPE_STALL_AT_SCOREBOARD_BIT = (1 << 1),
|
||||||
|
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT = (1 << 2),
|
||||||
|
ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT = (1 << 3),
|
||||||
|
ANV_PIPE_VF_CACHE_INVALIDATE_BIT = (1 << 4),
|
||||||
|
ANV_PIPE_DATA_CACHE_FLUSH_BIT = (1 << 5),
|
||||||
|
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT = (1 << 10),
|
||||||
|
ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT = (1 << 11),
|
||||||
|
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT = (1 << 12),
|
||||||
|
ANV_PIPE_DEPTH_STALL_BIT = (1 << 13),
|
||||||
|
ANV_PIPE_CS_STALL_BIT = (1 << 20),
|
||||||
|
|
||||||
|
/* This bit does not exist directly in PIPE_CONTROL. Instead it means that
|
||||||
|
* a flush has happened but not a CS stall. The next time we do any sort
|
||||||
|
* of invalidation we need to insert a CS stall at that time. Otherwise,
|
||||||
|
* we would have to CS stall on every flush which could be bad.
|
||||||
|
*/
|
||||||
|
ANV_PIPE_NEEDS_CS_STALL_BIT = (1 << 21),
|
||||||
|
};
|
||||||
|
|
||||||
|
#define ANV_PIPE_FLUSH_BITS ( \
|
||||||
|
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
|
||||||
|
ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
|
||||||
|
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT)
|
||||||
|
|
||||||
|
#define ANV_PIPE_STALL_BITS ( \
|
||||||
|
ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \
|
||||||
|
ANV_PIPE_DEPTH_STALL_BIT | \
|
||||||
|
ANV_PIPE_CS_STALL_BIT)
|
||||||
|
|
||||||
|
#define ANV_PIPE_INVALIDATE_BITS ( \
|
||||||
|
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | \
|
||||||
|
ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | \
|
||||||
|
ANV_PIPE_VF_CACHE_INVALIDATE_BIT | \
|
||||||
|
ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
|
||||||
|
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | \
|
||||||
|
ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT)
|
||||||
|
|
||||||
struct anv_vertex_binding {
|
struct anv_vertex_binding {
|
||||||
struct anv_buffer * buffer;
|
struct anv_buffer * buffer;
|
||||||
VkDeviceSize offset;
|
VkDeviceSize offset;
|
||||||
@@ -1164,6 +1203,7 @@ struct anv_cmd_state {
|
|||||||
uint32_t vb_dirty;
|
uint32_t vb_dirty;
|
||||||
anv_cmd_dirty_mask_t dirty;
|
anv_cmd_dirty_mask_t dirty;
|
||||||
anv_cmd_dirty_mask_t compute_dirty;
|
anv_cmd_dirty_mask_t compute_dirty;
|
||||||
|
enum anv_pipe_bits pending_pipe_bits;
|
||||||
uint32_t num_workgroups_offset;
|
uint32_t num_workgroups_offset;
|
||||||
struct anv_bo *num_workgroups_bo;
|
struct anv_bo *num_workgroups_bo;
|
||||||
VkShaderStageFlags descriptors_dirty;
|
VkShaderStageFlags descriptors_dirty;
|
||||||
|
@@ -307,6 +307,8 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
|
|||||||
}
|
}
|
||||||
|
|
||||||
cmd_buffer->state.compute_dirty = 0;
|
cmd_buffer->state.compute_dirty = 0;
|
||||||
|
|
||||||
|
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@@ -391,6 +391,8 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
|
|||||||
}
|
}
|
||||||
|
|
||||||
cmd_buffer->state.compute_dirty = 0;
|
cmd_buffer->state.compute_dirty = 0;
|
||||||
|
|
||||||
|
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
void genX(CmdSetEvent)(
|
void genX(CmdSetEvent)(
|
||||||
|
@@ -136,6 +136,82 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
|
||||||
|
{
|
||||||
|
enum anv_pipe_bits bits = cmd_buffer->state.pending_pipe_bits;
|
||||||
|
|
||||||
|
/* Flushes are pipelined while invalidations are handled immediately.
|
||||||
|
* Therefore, if we're flushing anything then we need to schedule a stall
|
||||||
|
* before any invalidations can happen.
|
||||||
|
*/
|
||||||
|
if (bits & ANV_PIPE_FLUSH_BITS)
|
||||||
|
bits |= ANV_PIPE_NEEDS_CS_STALL_BIT;
|
||||||
|
|
||||||
|
/* If we're going to do an invalidate and we have a pending CS stall that
|
||||||
|
* has yet to be resolved, we do the CS stall now.
|
||||||
|
*/
|
||||||
|
if ((bits & ANV_PIPE_INVALIDATE_BITS) &&
|
||||||
|
(bits & ANV_PIPE_NEEDS_CS_STALL_BIT)) {
|
||||||
|
bits |= ANV_PIPE_CS_STALL_BIT;
|
||||||
|
bits &= ~ANV_PIPE_NEEDS_CS_STALL_BIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bits & (ANV_PIPE_FLUSH_BITS | ANV_PIPE_CS_STALL_BIT)) {
|
||||||
|
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) {
|
||||||
|
pipe.DepthCacheFlushEnable = bits & ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
|
||||||
|
pipe.DCFlushEnable = bits & ANV_PIPE_DATA_CACHE_FLUSH_BIT;
|
||||||
|
pipe.RenderTargetCacheFlushEnable =
|
||||||
|
bits & ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
|
||||||
|
|
||||||
|
pipe.DepthStallEnable = bits & ANV_PIPE_DEPTH_STALL_BIT;
|
||||||
|
pipe.CommandStreamerStallEnable = bits & ANV_PIPE_CS_STALL_BIT;
|
||||||
|
pipe.StallAtPixelScoreboard = bits & ANV_PIPE_STALL_AT_SCOREBOARD_BIT;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* According to the Broadwell documentation, any PIPE_CONTROL with the
|
||||||
|
* "Command Streamer Stall" bit set must also have another bit set,
|
||||||
|
* with five different options:
|
||||||
|
*
|
||||||
|
* - Render Target Cache Flush
|
||||||
|
* - Depth Cache Flush
|
||||||
|
* - Stall at Pixel Scoreboard
|
||||||
|
* - Post-Sync Operation
|
||||||
|
* - Depth Stall
|
||||||
|
* - DC Flush Enable
|
||||||
|
*
|
||||||
|
* I chose "Stall at Pixel Scoreboard" since that's what we use in
|
||||||
|
* mesa and it seems to work fine. The choice is fairly arbitrary.
|
||||||
|
*/
|
||||||
|
if ((bits & ANV_PIPE_CS_STALL_BIT) &&
|
||||||
|
!(bits & (ANV_PIPE_FLUSH_BITS | ANV_PIPE_DEPTH_STALL_BIT |
|
||||||
|
ANV_PIPE_STALL_AT_SCOREBOARD_BIT)))
|
||||||
|
pipe.StallAtPixelScoreboard = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bits &= ~(ANV_PIPE_FLUSH_BITS | ANV_PIPE_CS_STALL_BIT);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bits & ANV_PIPE_INVALIDATE_BITS) {
|
||||||
|
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) {
|
||||||
|
pipe.StateCacheInvalidationEnable =
|
||||||
|
bits & ANV_PIPE_STATE_CACHE_INVALIDATE_BIT;
|
||||||
|
pipe.ConstantCacheInvalidationEnable =
|
||||||
|
bits & ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
|
||||||
|
pipe.VFCacheInvalidationEnable =
|
||||||
|
bits & ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
|
||||||
|
pipe.TextureCacheInvalidationEnable =
|
||||||
|
bits & ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
|
||||||
|
pipe.InstructionCacheInvalidateEnable =
|
||||||
|
bits & ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
bits &= ~ANV_PIPE_INVALIDATE_BITS;
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_buffer->state.pending_pipe_bits = bits;
|
||||||
|
}
|
||||||
|
|
||||||
void genX(CmdPipelineBarrier)(
|
void genX(CmdPipelineBarrier)(
|
||||||
VkCommandBuffer commandBuffer,
|
VkCommandBuffer commandBuffer,
|
||||||
VkPipelineStageFlags srcStageMask,
|
VkPipelineStageFlags srcStageMask,
|
||||||
@@ -149,7 +225,7 @@ void genX(CmdPipelineBarrier)(
|
|||||||
const VkImageMemoryBarrier* pImageMemoryBarriers)
|
const VkImageMemoryBarrier* pImageMemoryBarriers)
|
||||||
{
|
{
|
||||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||||
uint32_t b, *dw;
|
uint32_t b;
|
||||||
|
|
||||||
/* XXX: Right now, we're really dumb and just flush whatever categories
|
/* XXX: Right now, we're really dumb and just flush whatever categories
|
||||||
* the app asks for. One of these days we may make this a bit better
|
* the app asks for. One of these days we may make this a bit better
|
||||||
@@ -173,105 +249,50 @@ void genX(CmdPipelineBarrier)(
|
|||||||
dst_flags |= pImageMemoryBarriers[i].dstAccessMask;
|
dst_flags |= pImageMemoryBarriers[i].dstAccessMask;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Mask out the Source access flags we care about */
|
enum anv_pipe_bits pipe_bits = 0;
|
||||||
const uint32_t src_mask =
|
|
||||||
VK_ACCESS_SHADER_WRITE_BIT |
|
|
||||||
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
|
||||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
|
|
||||||
VK_ACCESS_TRANSFER_WRITE_BIT;
|
|
||||||
|
|
||||||
src_flags = src_flags & src_mask;
|
|
||||||
|
|
||||||
/* Mask out the destination access flags we care about */
|
|
||||||
const uint32_t dst_mask =
|
|
||||||
VK_ACCESS_INDIRECT_COMMAND_READ_BIT |
|
|
||||||
VK_ACCESS_INDEX_READ_BIT |
|
|
||||||
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT |
|
|
||||||
VK_ACCESS_UNIFORM_READ_BIT |
|
|
||||||
VK_ACCESS_SHADER_READ_BIT |
|
|
||||||
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
|
|
||||||
VK_ACCESS_TRANSFER_READ_BIT;
|
|
||||||
|
|
||||||
dst_flags = dst_flags & dst_mask;
|
|
||||||
|
|
||||||
/* The src flags represent how things were used previously. This is
|
|
||||||
* what we use for doing flushes.
|
|
||||||
*/
|
|
||||||
struct GENX(PIPE_CONTROL) flush_cmd = {
|
|
||||||
GENX(PIPE_CONTROL_header),
|
|
||||||
.PostSyncOperation = NoWrite,
|
|
||||||
};
|
|
||||||
|
|
||||||
for_each_bit(b, src_flags) {
|
for_each_bit(b, src_flags) {
|
||||||
switch ((VkAccessFlagBits)(1 << b)) {
|
switch ((VkAccessFlagBits)(1 << b)) {
|
||||||
case VK_ACCESS_SHADER_WRITE_BIT:
|
case VK_ACCESS_SHADER_WRITE_BIT:
|
||||||
flush_cmd.DCFlushEnable = true;
|
pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
|
||||||
break;
|
break;
|
||||||
case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
|
case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
|
||||||
flush_cmd.RenderTargetCacheFlushEnable = true;
|
pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
|
||||||
break;
|
break;
|
||||||
case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
|
case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
|
||||||
flush_cmd.DepthCacheFlushEnable = true;
|
pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
|
||||||
break;
|
break;
|
||||||
case VK_ACCESS_TRANSFER_WRITE_BIT:
|
case VK_ACCESS_TRANSFER_WRITE_BIT:
|
||||||
flush_cmd.RenderTargetCacheFlushEnable = true;
|
pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
|
||||||
flush_cmd.DepthCacheFlushEnable = true;
|
pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
unreachable("should've masked this out by now");
|
break; /* Nothing to do */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If we end up doing two PIPE_CONTROLs, the first, flusing one also has to
|
|
||||||
* stall and wait for the flushing to finish, so we don't re-dirty the
|
|
||||||
* caches with in-flight rendering after the second PIPE_CONTROL
|
|
||||||
* invalidates.
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (dst_flags)
|
|
||||||
flush_cmd.CommandStreamerStallEnable = true;
|
|
||||||
|
|
||||||
if (src_flags && dst_flags) {
|
|
||||||
dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length));
|
|
||||||
GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &flush_cmd);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* The dst flags represent how things will be used in the future. This
|
|
||||||
* is what we use for doing cache invalidations.
|
|
||||||
*/
|
|
||||||
struct GENX(PIPE_CONTROL) invalidate_cmd = {
|
|
||||||
GENX(PIPE_CONTROL_header),
|
|
||||||
.PostSyncOperation = NoWrite,
|
|
||||||
};
|
|
||||||
|
|
||||||
for_each_bit(b, dst_flags) {
|
for_each_bit(b, dst_flags) {
|
||||||
switch ((VkAccessFlagBits)(1 << b)) {
|
switch ((VkAccessFlagBits)(1 << b)) {
|
||||||
case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
|
case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
|
||||||
case VK_ACCESS_INDEX_READ_BIT:
|
case VK_ACCESS_INDEX_READ_BIT:
|
||||||
case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
|
case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
|
||||||
invalidate_cmd.VFCacheInvalidationEnable = true;
|
pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
|
||||||
break;
|
break;
|
||||||
case VK_ACCESS_UNIFORM_READ_BIT:
|
case VK_ACCESS_UNIFORM_READ_BIT:
|
||||||
invalidate_cmd.ConstantCacheInvalidationEnable = true;
|
pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
|
||||||
/* fallthrough */
|
pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
|
||||||
|
break;
|
||||||
case VK_ACCESS_SHADER_READ_BIT:
|
case VK_ACCESS_SHADER_READ_BIT:
|
||||||
invalidate_cmd.TextureCacheInvalidationEnable = true;
|
|
||||||
break;
|
|
||||||
case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
|
case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
|
||||||
invalidate_cmd.TextureCacheInvalidationEnable = true;
|
|
||||||
break;
|
|
||||||
case VK_ACCESS_TRANSFER_READ_BIT:
|
case VK_ACCESS_TRANSFER_READ_BIT:
|
||||||
invalidate_cmd.TextureCacheInvalidationEnable = true;
|
pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
unreachable("should've masked this out by now");
|
break; /* Nothing to do */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dst_flags) {
|
cmd_buffer->state.pending_pipe_bits |= pipe_bits;
|
||||||
dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length));
|
|
||||||
GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &invalidate_cmd);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@@ -511,6 +532,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
|
|||||||
gen7_cmd_buffer_emit_scissor(cmd_buffer);
|
gen7_cmd_buffer_emit_scissor(cmd_buffer);
|
||||||
|
|
||||||
genX(cmd_buffer_flush_dynamic_state)(cmd_buffer);
|
genX(cmd_buffer_flush_dynamic_state)(cmd_buffer);
|
||||||
|
|
||||||
|
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
Reference in New Issue
Block a user