671 lines
23 KiB
C
671 lines
23 KiB
C
/*
|
|
* Copyright © 2016 Red Hat.
|
|
* Copyright © 2016 Bas Nieuwenhuizen
|
|
* SPDX-License-Identifier: MIT
|
|
*
|
|
* based in part on anv driver which is:
|
|
* Copyright © 2015 Intel Corporation
|
|
*/
|
|
|
|
#ifndef TU_CMD_BUFFER_H
|
|
#define TU_CMD_BUFFER_H
|
|
|
|
#include "tu_common.h"
|
|
|
|
#include "tu_cs.h"
|
|
#include "tu_descriptor_set.h"
|
|
#include "tu_device.h"
|
|
#include "tu_lrz.h"
|
|
#include "tu_pass.h"
|
|
#include "tu_pipeline.h"
|
|
|
|
enum tu_draw_state_group_id
|
|
{
|
|
TU_DRAW_STATE_PROGRAM_CONFIG,
|
|
TU_DRAW_STATE_PROGRAM,
|
|
TU_DRAW_STATE_PROGRAM_BINNING,
|
|
TU_DRAW_STATE_VB,
|
|
TU_DRAW_STATE_VI,
|
|
TU_DRAW_STATE_VI_BINNING,
|
|
TU_DRAW_STATE_RAST,
|
|
TU_DRAW_STATE_CONST,
|
|
TU_DRAW_STATE_DESC_SETS,
|
|
TU_DRAW_STATE_DESC_SETS_LOAD,
|
|
TU_DRAW_STATE_VS_PARAMS,
|
|
TU_DRAW_STATE_INPUT_ATTACHMENTS_GMEM,
|
|
TU_DRAW_STATE_INPUT_ATTACHMENTS_SYSMEM,
|
|
TU_DRAW_STATE_LRZ_AND_DEPTH_PLANE,
|
|
TU_DRAW_STATE_PRIM_MODE_GMEM,
|
|
TU_DRAW_STATE_PRIM_MODE_SYSMEM,
|
|
|
|
/* dynamic state related draw states */
|
|
TU_DRAW_STATE_DYNAMIC,
|
|
TU_DRAW_STATE_COUNT = TU_DRAW_STATE_DYNAMIC + TU_DYNAMIC_STATE_COUNT,
|
|
};
|
|
|
|
struct tu_descriptor_state
|
|
{
|
|
struct tu_descriptor_set *sets[MAX_SETS];
|
|
struct tu_descriptor_set push_set;
|
|
uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS_SIZE];
|
|
};
|
|
|
|
enum tu_cmd_dirty_bits
|
|
{
|
|
TU_CMD_DIRTY_VERTEX_BUFFERS = BIT(0),
|
|
TU_CMD_DIRTY_VB_STRIDE = BIT(1),
|
|
TU_CMD_DIRTY_GRAS_SU_CNTL = BIT(2),
|
|
TU_CMD_DIRTY_RB_DEPTH_CNTL = BIT(3),
|
|
TU_CMD_DIRTY_RB_STENCIL_CNTL = BIT(4),
|
|
TU_CMD_DIRTY_DESC_SETS_LOAD = BIT(5),
|
|
TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD = BIT(6),
|
|
TU_CMD_DIRTY_SHADER_CONSTS = BIT(7),
|
|
TU_CMD_DIRTY_LRZ = BIT(8),
|
|
TU_CMD_DIRTY_VS_PARAMS = BIT(9),
|
|
TU_CMD_DIRTY_RASTERIZER_DISCARD = BIT(10),
|
|
TU_CMD_DIRTY_VIEWPORTS = BIT(11),
|
|
TU_CMD_DIRTY_BLEND = BIT(12),
|
|
/* all draw states were disabled and need to be re-enabled: */
|
|
TU_CMD_DIRTY_DRAW_STATE = BIT(13)
|
|
};
|
|
|
|
/* There are only three cache domains we have to care about: the CCU, or
|
|
* color cache unit, which is used for color and depth/stencil attachments
|
|
* and copy/blit destinations, and is split conceptually into color and depth,
|
|
* and the universal cache or UCHE which is used for pretty much everything
|
|
* else, except for the CP (uncached) and host. We need to flush whenever data
|
|
* crosses these boundaries.
|
|
*/
|
|
|
|
enum tu_cmd_access_mask {
|
|
TU_ACCESS_UCHE_READ = 1 << 0,
|
|
TU_ACCESS_UCHE_WRITE = 1 << 1,
|
|
TU_ACCESS_CCU_COLOR_READ = 1 << 2,
|
|
TU_ACCESS_CCU_COLOR_WRITE = 1 << 3,
|
|
TU_ACCESS_CCU_DEPTH_READ = 1 << 4,
|
|
TU_ACCESS_CCU_DEPTH_WRITE = 1 << 5,
|
|
|
|
/* Experiments have shown that while it's safe to avoid flushing the CCU
|
|
* after each blit/renderpass, it's not safe to assume that subsequent
|
|
* lookups with a different attachment state will hit unflushed cache
|
|
* entries. That is, the CCU needs to be flushed and possibly invalidated
|
|
* when accessing memory with a different attachment state. Writing to an
|
|
* attachment under the following conditions after clearing using the
|
|
* normal 2d engine path is known to have issues:
|
|
*
|
|
* - It isn't the 0'th layer.
|
|
* - There are more than one attachment, and this isn't the 0'th attachment
|
|
* (this seems to also depend on the cpp of the attachments).
|
|
*
|
|
* Our best guess is that the layer/MRT state is used when computing
|
|
* the location of a cache entry in CCU, to avoid conflicts. We assume that
|
|
* any access in a renderpass after or before an access by a transfer needs
|
|
* a flush/invalidate, and use the _INCOHERENT variants to represent access
|
|
* by a renderpass.
|
|
*/
|
|
TU_ACCESS_CCU_COLOR_INCOHERENT_READ = 1 << 6,
|
|
TU_ACCESS_CCU_COLOR_INCOHERENT_WRITE = 1 << 7,
|
|
TU_ACCESS_CCU_DEPTH_INCOHERENT_READ = 1 << 8,
|
|
TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE = 1 << 9,
|
|
|
|
/* Accesses which bypasses any cache. e.g. writes via the host,
|
|
* CP_EVENT_WRITE::BLIT, and the CP are SYSMEM_WRITE.
|
|
*/
|
|
TU_ACCESS_SYSMEM_READ = 1 << 10,
|
|
TU_ACCESS_SYSMEM_WRITE = 1 << 11,
|
|
|
|
/* Memory writes from the CP start in-order with draws and event writes,
|
|
* but execute asynchronously and hence need a CP_WAIT_MEM_WRITES if read.
|
|
*/
|
|
TU_ACCESS_CP_WRITE = 1 << 12,
|
|
|
|
TU_ACCESS_READ =
|
|
TU_ACCESS_UCHE_READ |
|
|
TU_ACCESS_CCU_COLOR_READ |
|
|
TU_ACCESS_CCU_DEPTH_READ |
|
|
TU_ACCESS_CCU_COLOR_INCOHERENT_READ |
|
|
TU_ACCESS_CCU_DEPTH_INCOHERENT_READ |
|
|
TU_ACCESS_SYSMEM_READ,
|
|
|
|
TU_ACCESS_WRITE =
|
|
TU_ACCESS_UCHE_WRITE |
|
|
TU_ACCESS_CCU_COLOR_WRITE |
|
|
TU_ACCESS_CCU_COLOR_INCOHERENT_WRITE |
|
|
TU_ACCESS_CCU_DEPTH_WRITE |
|
|
TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE |
|
|
TU_ACCESS_SYSMEM_WRITE |
|
|
TU_ACCESS_CP_WRITE,
|
|
|
|
TU_ACCESS_ALL =
|
|
TU_ACCESS_READ |
|
|
TU_ACCESS_WRITE,
|
|
};
|
|
|
|
/* Starting with a6xx, the pipeline is split into several "clusters" (really
|
|
* pipeline stages). Each stage has its own pair of register banks and can
|
|
* switch them independently, so that earlier stages can run ahead of later
|
|
* ones. e.g. the FS of draw N and the VS of draw N + 1 can be executing at
|
|
* the same time.
|
|
*
|
|
* As a result of this, we need to insert a WFI when an earlier stage depends
|
|
* on the result of a later stage. CP_DRAW_* and CP_BLIT will wait for any
|
|
* pending WFI's to complete before starting, and usually before reading
|
|
* indirect params even, so a WFI also acts as a full "pipeline stall".
|
|
*
|
|
* Note, the names of the stages come from CLUSTER_* in devcoredump. We
|
|
* include all the stages for completeness, even ones which do not read/write
|
|
* anything.
|
|
*/
|
|
|
|
enum tu_stage {
|
|
/* This doesn't correspond to a cluster, but we need it for tracking
|
|
* indirect draw parameter reads etc.
|
|
*/
|
|
TU_STAGE_CP,
|
|
|
|
/* - Fetch index buffer
|
|
* - Fetch vertex attributes, dispatch VS
|
|
*/
|
|
TU_STAGE_FE,
|
|
|
|
/* Execute all geometry stages (VS thru GS) */
|
|
TU_STAGE_SP_VS,
|
|
|
|
/* Write to VPC, do primitive assembly. */
|
|
TU_STAGE_PC_VS,
|
|
|
|
/* Rasterization. RB_DEPTH_BUFFER_BASE only exists in CLUSTER_PS according
|
|
* to devcoredump so presumably this stage stalls for TU_STAGE_PS when
|
|
* early depth testing is enabled before dispatching fragments? However
|
|
* GRAS reads and writes LRZ directly.
|
|
*/
|
|
TU_STAGE_GRAS,
|
|
|
|
/* Execute FS */
|
|
TU_STAGE_SP_PS,
|
|
|
|
/* - Fragment tests
|
|
* - Write color/depth
|
|
* - Streamout writes (???)
|
|
* - Varying interpolation (???)
|
|
*/
|
|
TU_STAGE_PS,
|
|
};
|
|
|
|
enum tu_cmd_flush_bits {
|
|
TU_CMD_FLAG_CCU_FLUSH_DEPTH = 1 << 0,
|
|
TU_CMD_FLAG_CCU_FLUSH_COLOR = 1 << 1,
|
|
TU_CMD_FLAG_CCU_INVALIDATE_DEPTH = 1 << 2,
|
|
TU_CMD_FLAG_CCU_INVALIDATE_COLOR = 1 << 3,
|
|
TU_CMD_FLAG_CACHE_FLUSH = 1 << 4,
|
|
TU_CMD_FLAG_CACHE_INVALIDATE = 1 << 5,
|
|
TU_CMD_FLAG_WAIT_MEM_WRITES = 1 << 6,
|
|
TU_CMD_FLAG_WAIT_FOR_IDLE = 1 << 7,
|
|
TU_CMD_FLAG_WAIT_FOR_ME = 1 << 8,
|
|
|
|
TU_CMD_FLAG_ALL_FLUSH =
|
|
TU_CMD_FLAG_CCU_FLUSH_DEPTH |
|
|
TU_CMD_FLAG_CCU_FLUSH_COLOR |
|
|
TU_CMD_FLAG_CACHE_FLUSH |
|
|
/* Treat the CP as a sort of "cache" which may need to be "flushed" via
|
|
* waiting for writes to land with WAIT_FOR_MEM_WRITES.
|
|
*/
|
|
TU_CMD_FLAG_WAIT_MEM_WRITES,
|
|
|
|
TU_CMD_FLAG_ALL_INVALIDATE =
|
|
TU_CMD_FLAG_CCU_INVALIDATE_DEPTH |
|
|
TU_CMD_FLAG_CCU_INVALIDATE_COLOR |
|
|
TU_CMD_FLAG_CACHE_INVALIDATE |
|
|
/* Treat CP_WAIT_FOR_ME as a "cache" that needs to be invalidated when a
|
|
* a command that needs CP_WAIT_FOR_ME is executed. This means we may
|
|
* insert an extra WAIT_FOR_ME before an indirect command requiring it
|
|
* in case there was another command before the current command buffer
|
|
* that it needs to wait for.
|
|
*/
|
|
TU_CMD_FLAG_WAIT_FOR_ME,
|
|
};
|
|
|
|
/* Changing the CCU from sysmem mode to gmem mode or vice-versa is pretty
|
|
* heavy, involving a CCU cache flush/invalidate and a WFI in order to change
|
|
* which part of the gmem is used by the CCU. Here we keep track of what the
|
|
* state of the CCU.
|
|
*/
|
|
enum tu_cmd_ccu_state {
|
|
TU_CMD_CCU_SYSMEM,
|
|
TU_CMD_CCU_GMEM,
|
|
TU_CMD_CCU_UNKNOWN,
|
|
};
|
|
|
|
struct tu_cache_state {
|
|
/* Caches which must be made available (flushed) eventually if there are
|
|
* any users outside that cache domain, and caches which must be
|
|
* invalidated eventually if there are any reads.
|
|
*/
|
|
enum tu_cmd_flush_bits pending_flush_bits;
|
|
/* Pending flushes */
|
|
enum tu_cmd_flush_bits flush_bits;
|
|
};
|
|
|
|
struct tu_vs_params {
|
|
uint32_t vertex_offset;
|
|
uint32_t first_instance;
|
|
};
|
|
|
|
/* This should be for state that is set inside a renderpass and used at
|
|
* renderpass end time, e.g. to decide whether to use sysmem. This needs
|
|
* special handling for secondary cmdbufs and suspending/resuming render
|
|
* passes where the state may need to be combined afterwards.
|
|
*/
|
|
struct tu_render_pass_state
|
|
{
|
|
bool xfb_used;
|
|
bool has_tess;
|
|
bool has_prim_generated_query_in_rp;
|
|
bool disable_gmem;
|
|
|
|
/* Track whether conditional predicate for COND_REG_EXEC is changed in draw_cs */
|
|
bool draw_cs_writes_to_cond_pred;
|
|
|
|
uint32_t drawcall_count;
|
|
|
|
/* A calculated "draw cost" value for renderpass, which tries to
|
|
* estimate the bandwidth-per-sample of all the draws according
|
|
* to:
|
|
*
|
|
* foreach_draw (...) {
|
|
* sum += pipeline->color_bandwidth_per_sample;
|
|
* if (depth_test_enabled)
|
|
* sum += pipeline->depth_cpp_per_sample;
|
|
* if (depth_write_enabled)
|
|
* sum += pipeline->depth_cpp_per_sample;
|
|
* if (stencil_write_enabled)
|
|
* sum += pipeline->stencil_cpp_per_sample * 2;
|
|
* }
|
|
* drawcall_bandwidth_per_sample = sum / drawcall_count;
|
|
*
|
|
* It allows us to estimate the total bandwidth of drawcalls later, by
|
|
* calculating (drawcall_bandwidth_per_sample * zpass_sample_count).
|
|
*
|
|
* This does ignore depth buffer traffic for samples which do not
|
|
* pass due to depth-test fail, and some other details. But it is
|
|
* just intended to be a rough estimate that is easy to calculate.
|
|
*/
|
|
uint32_t drawcall_bandwidth_per_sample_sum;
|
|
};
|
|
|
|
struct tu_cmd_state
|
|
{
|
|
uint32_t dirty;
|
|
|
|
struct tu_pipeline *pipeline;
|
|
struct tu_pipeline *compute_pipeline;
|
|
|
|
struct tu_render_pass_state rp;
|
|
|
|
/* Vertex buffers, viewports, and scissors
|
|
* the states for these can be updated partially, so we need to save these
|
|
* to be able to emit a complete draw state
|
|
*/
|
|
struct {
|
|
uint64_t base;
|
|
uint32_t size;
|
|
uint32_t stride;
|
|
} vb[MAX_VBS];
|
|
VkViewport viewport[MAX_VIEWPORTS];
|
|
VkRect2D scissor[MAX_SCISSORS];
|
|
uint32_t max_viewport, max_scissor;
|
|
|
|
/* for dynamic states that can't be emitted directly */
|
|
uint32_t dynamic_stencil_mask;
|
|
uint32_t dynamic_stencil_wrmask;
|
|
uint32_t dynamic_stencil_ref;
|
|
|
|
uint32_t gras_su_cntl, rb_depth_cntl, rb_stencil_cntl;
|
|
uint32_t pc_raster_cntl, vpc_unknown_9107;
|
|
uint32_t rb_mrt_control[MAX_RTS], rb_mrt_blend_control[MAX_RTS];
|
|
uint32_t rb_mrt_control_rop;
|
|
uint32_t rb_blend_cntl, sp_blend_cntl;
|
|
uint32_t pipeline_color_write_enable, pipeline_blend_enable;
|
|
uint32_t color_write_enable;
|
|
bool logic_op_enabled;
|
|
bool rop_reads_dst;
|
|
enum pc_di_primtype primtype;
|
|
bool primitive_restart_enable;
|
|
|
|
/* saved states to re-emit in TU_CMD_DIRTY_DRAW_STATE case */
|
|
struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
|
|
struct tu_draw_state vertex_buffers;
|
|
struct tu_draw_state shader_const;
|
|
struct tu_draw_state desc_sets;
|
|
|
|
struct tu_draw_state vs_params;
|
|
|
|
/* Index buffer */
|
|
uint64_t index_va;
|
|
uint32_t max_index_count;
|
|
uint8_t index_size;
|
|
|
|
/* because streamout base has to be 32-byte aligned
|
|
* there is an extra offset to deal with when it is
|
|
* unaligned
|
|
*/
|
|
uint8_t streamout_offset[IR3_MAX_SO_BUFFERS];
|
|
|
|
/* Renderpasses are tricky, because we may need to flush differently if
|
|
* using sysmem vs. gmem and therefore we have to delay any flushing that
|
|
* happens before a renderpass. So we have to have two copies of the flush
|
|
* state, one for intra-renderpass flushes (i.e. renderpass dependencies)
|
|
* and one for outside a renderpass.
|
|
*/
|
|
struct tu_cache_state cache;
|
|
struct tu_cache_state renderpass_cache;
|
|
|
|
enum tu_cmd_ccu_state ccu_state;
|
|
|
|
/* Decides which GMEM layout to use from the tu_pass, based on whether the CCU
|
|
* might get used by tu_store_gmem_attachment().
|
|
*/
|
|
enum tu_gmem_layout gmem_layout;
|
|
|
|
const struct tu_render_pass *pass;
|
|
const struct tu_subpass *subpass;
|
|
const struct tu_framebuffer *framebuffer;
|
|
const struct tu_tiling_config *tiling;
|
|
VkRect2D render_area;
|
|
|
|
const struct tu_image_view **attachments;
|
|
|
|
/* State that in the dynamic case comes from VkRenderingInfo and needs to
|
|
* be saved/restored when suspending. This holds the state for the last
|
|
* suspended renderpass, which may point to this command buffer's dynamic_*
|
|
* or another command buffer if executed on a secondary.
|
|
*/
|
|
struct {
|
|
const struct tu_render_pass *pass;
|
|
const struct tu_subpass *subpass;
|
|
const struct tu_framebuffer *framebuffer;
|
|
VkRect2D render_area;
|
|
enum tu_gmem_layout gmem_layout;
|
|
|
|
const struct tu_image_view **attachments;
|
|
|
|
struct tu_lrz_state lrz;
|
|
} suspended_pass;
|
|
|
|
bool tessfactor_addr_set;
|
|
bool predication_active;
|
|
enum a5xx_line_mode line_mode;
|
|
bool z_negative_one_to_one;
|
|
|
|
/* VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT and
|
|
* VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT are allowed to run simultaniously,
|
|
* but they use the same {START,STOP}_PRIMITIVE_CTRS control.
|
|
*/
|
|
uint32_t prim_counters_running;
|
|
|
|
bool prim_generated_query_running_before_rp;
|
|
|
|
/* These are the states of the suspend/resume state machine. In addition to
|
|
* tracking whether we're in the middle of a chain of suspending and
|
|
* resuming passes that will be merged, we need to track whether the
|
|
* command buffer begins in the middle of such a chain, for when it gets
|
|
* merged with other command buffers. We call such a chain that begins
|
|
* before the command buffer starts a "pre-chain".
|
|
*
|
|
* Note that when this command buffer is finished, this state is untouched
|
|
* but it gains a different meaning. For example, if we finish in state
|
|
* SR_IN_CHAIN, we finished in the middle of a suspend/resume chain, so
|
|
* there's a suspend/resume chain that extends past the end of the command
|
|
* buffer. In this sense it's the "opposite" of SR_AFTER_PRE_CHAIN, which
|
|
* means that there's a suspend/resume chain that extends before the
|
|
* beginning.
|
|
*/
|
|
enum {
|
|
/* Either there are no suspend/resume chains, or they are entirely
|
|
* contained in the current command buffer.
|
|
*
|
|
* BeginCommandBuffer() <- start of current command buffer
|
|
* ...
|
|
* // we are here
|
|
*/
|
|
SR_NONE = 0,
|
|
|
|
/* We are in the middle of a suspend/resume chain that starts before the
|
|
* current command buffer. This happens when the command buffer begins
|
|
* with a resuming render pass and all of the passes up to the current
|
|
* one are suspending. In this state, our part of the chain is not saved
|
|
* and is in the current draw_cs/state.
|
|
*
|
|
* BeginRendering() ... EndRendering(suspending)
|
|
* BeginCommandBuffer() <- start of current command buffer
|
|
* BeginRendering(resuming) ... EndRendering(suspending)
|
|
* BeginRendering(resuming) ... EndRendering(suspending)
|
|
* ...
|
|
* // we are here
|
|
*/
|
|
SR_IN_PRE_CHAIN,
|
|
|
|
/* We are currently outside of any suspend/resume chains, but there is a
|
|
* chain starting before the current command buffer. It is saved in
|
|
* pre_chain.
|
|
*
|
|
* BeginRendering() ... EndRendering(suspending)
|
|
* BeginCommandBuffer() <- start of current command buffer
|
|
* // This part is stashed in pre_chain
|
|
* BeginRendering(resuming) ... EndRendering(suspending)
|
|
* BeginRendering(resuming) ... EndRendering(suspending)
|
|
* ...
|
|
* BeginRendering(resuming) ... EndRendering() // end of chain
|
|
* ...
|
|
* // we are here
|
|
*/
|
|
SR_AFTER_PRE_CHAIN,
|
|
|
|
/* We are in the middle of a suspend/resume chain and there is no chain
|
|
* starting before the current command buffer.
|
|
*
|
|
* BeginCommandBuffer() <- start of current command buffer
|
|
* ...
|
|
* BeginRendering() ... EndRendering(suspending)
|
|
* BeginRendering(resuming) ... EndRendering(suspending)
|
|
* BeginRendering(resuming) ... EndRendering(suspending)
|
|
* ...
|
|
* // we are here
|
|
*/
|
|
SR_IN_CHAIN,
|
|
|
|
/* We are in the middle of a suspend/resume chain and there is another,
|
|
* separate, chain starting before the current command buffer.
|
|
*
|
|
* BeginRendering() ... EndRendering(suspending)
|
|
* CommandBufferBegin() <- start of current command buffer
|
|
* // This part is stashed in pre_chain
|
|
* BeginRendering(resuming) ... EndRendering(suspending)
|
|
* BeginRendering(resuming) ... EndRendering(suspending)
|
|
* ...
|
|
* BeginRendering(resuming) ... EndRendering() // end of chain
|
|
* ...
|
|
* BeginRendering() ... EndRendering(suspending)
|
|
* BeginRendering(resuming) ... EndRendering(suspending)
|
|
* BeginRendering(resuming) ... EndRendering(suspending)
|
|
* ...
|
|
* // we are here
|
|
*/
|
|
SR_IN_CHAIN_AFTER_PRE_CHAIN,
|
|
} suspend_resume;
|
|
|
|
bool suspending, resuming;
|
|
|
|
struct tu_lrz_state lrz;
|
|
|
|
struct tu_draw_state lrz_and_depth_plane_state;
|
|
|
|
struct tu_vs_params last_vs_params;
|
|
};
|
|
|
|
struct tu_cmd_pool
|
|
{
|
|
struct vk_command_pool vk;
|
|
|
|
struct list_head cmd_buffers;
|
|
struct list_head free_cmd_buffers;
|
|
};
|
|
VK_DEFINE_NONDISP_HANDLE_CASTS(tu_cmd_pool, vk.base, VkCommandPool,
|
|
VK_OBJECT_TYPE_COMMAND_POOL)
|
|
|
|
enum tu_cmd_buffer_status
|
|
{
|
|
TU_CMD_BUFFER_STATUS_INVALID,
|
|
TU_CMD_BUFFER_STATUS_INITIAL,
|
|
TU_CMD_BUFFER_STATUS_RECORDING,
|
|
TU_CMD_BUFFER_STATUS_EXECUTABLE,
|
|
TU_CMD_BUFFER_STATUS_PENDING,
|
|
};
|
|
|
|
struct tu_cmd_buffer
|
|
{
|
|
struct vk_command_buffer vk;
|
|
|
|
struct tu_device *device;
|
|
|
|
struct tu_cmd_pool *pool;
|
|
struct list_head pool_link;
|
|
|
|
struct u_trace trace;
|
|
struct u_trace_iterator trace_renderpass_start;
|
|
struct u_trace_iterator trace_renderpass_end;
|
|
|
|
struct list_head renderpass_autotune_results;
|
|
struct tu_autotune_results_buffer* autotune_buffer;
|
|
|
|
VkCommandBufferUsageFlags usage_flags;
|
|
enum tu_cmd_buffer_status status;
|
|
|
|
VkQueryPipelineStatisticFlags inherited_pipeline_statistics;
|
|
|
|
struct tu_cmd_state state;
|
|
uint32_t queue_family_index;
|
|
|
|
uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
|
|
VkShaderStageFlags push_constant_stages;
|
|
struct tu_descriptor_set meta_push_descriptors;
|
|
|
|
struct tu_descriptor_state descriptors[MAX_BIND_POINTS];
|
|
|
|
struct tu_render_pass_attachment dynamic_rp_attachments[2 * (MAX_RTS + 1)];
|
|
struct tu_subpass_attachment dynamic_color_attachments[MAX_RTS];
|
|
struct tu_subpass_attachment dynamic_resolve_attachments[MAX_RTS + 1];
|
|
const struct tu_image_view *dynamic_attachments[2 * (MAX_RTS + 1)];
|
|
|
|
struct tu_render_pass dynamic_pass;
|
|
struct tu_subpass dynamic_subpass;
|
|
struct tu_framebuffer dynamic_framebuffer;
|
|
|
|
VkResult record_result;
|
|
|
|
struct tu_cs cs;
|
|
struct tu_cs draw_cs;
|
|
struct tu_cs tile_store_cs;
|
|
struct tu_cs draw_epilogue_cs;
|
|
struct tu_cs sub_cs;
|
|
|
|
/* If the first render pass in the command buffer is resuming, then it is
|
|
* part of a suspend/resume chain that starts before the current command
|
|
* buffer and needs to be merged later. In this case, its incomplete state
|
|
* is stored in pre_chain. In the symmetric case where the last render pass
|
|
* is suspending, we just skip ending the render pass and its state is
|
|
* stored in draw_cs/the current state. The first and last render pass
|
|
* might be part of different chains, which is why all the state may need
|
|
* to be saved separately here.
|
|
*/
|
|
struct {
|
|
struct tu_cs draw_cs;
|
|
struct tu_cs draw_epilogue_cs;
|
|
|
|
struct u_trace_iterator trace_renderpass_start, trace_renderpass_end;
|
|
|
|
struct tu_render_pass_state state;
|
|
} pre_chain;
|
|
|
|
uint32_t vsc_draw_strm_pitch;
|
|
uint32_t vsc_prim_strm_pitch;
|
|
};
|
|
VK_DEFINE_HANDLE_CASTS(tu_cmd_buffer, vk.base, VkCommandBuffer,
|
|
VK_OBJECT_TYPE_COMMAND_BUFFER)
|
|
|
|
static inline uint32_t
|
|
tu_attachment_gmem_offset(struct tu_cmd_buffer *cmd,
|
|
const struct tu_render_pass_attachment *att)
|
|
{
|
|
assert(cmd->state.gmem_layout < TU_GMEM_LAYOUT_COUNT);
|
|
return att->gmem_offset[cmd->state.gmem_layout];
|
|
}
|
|
|
|
static inline uint32_t
|
|
tu_attachment_gmem_offset_stencil(struct tu_cmd_buffer *cmd,
|
|
const struct tu_render_pass_attachment *att)
|
|
{
|
|
assert(cmd->state.gmem_layout < TU_GMEM_LAYOUT_COUNT);
|
|
return att->gmem_offset_stencil[cmd->state.gmem_layout];
|
|
}
|
|
|
|
void tu_render_pass_state_merge(struct tu_render_pass_state *dst,
|
|
const struct tu_render_pass_state *src);
|
|
|
|
VkResult tu_cmd_buffer_begin(struct tu_cmd_buffer *cmd_buffer,
|
|
VkCommandBufferUsageFlags usage_flags);
|
|
|
|
void tu_emit_cache_flush_renderpass(struct tu_cmd_buffer *cmd_buffer,
|
|
struct tu_cs *cs);
|
|
|
|
void tu_emit_cache_flush_ccu(struct tu_cmd_buffer *cmd_buffer,
|
|
struct tu_cs *cs,
|
|
enum tu_cmd_ccu_state ccu_state);
|
|
|
|
void
|
|
tu_append_pre_chain(struct tu_cmd_buffer *cmd,
|
|
struct tu_cmd_buffer *secondary);
|
|
|
|
void
|
|
tu_append_pre_post_chain(struct tu_cmd_buffer *cmd,
|
|
struct tu_cmd_buffer *secondary);
|
|
|
|
void
|
|
tu_append_post_chain(struct tu_cmd_buffer *cmd,
|
|
struct tu_cmd_buffer *secondary);
|
|
|
|
void
|
|
tu_restore_suspended_pass(struct tu_cmd_buffer *cmd,
|
|
struct tu_cmd_buffer *suspended);
|
|
|
|
void tu_cmd_render(struct tu_cmd_buffer *cmd);
|
|
|
|
void
|
|
tu6_emit_event_write(struct tu_cmd_buffer *cmd,
|
|
struct tu_cs *cs,
|
|
enum vgt_event_type event);
|
|
|
|
static inline struct tu_descriptor_state *
|
|
tu_get_descriptors_state(struct tu_cmd_buffer *cmd_buffer,
|
|
VkPipelineBindPoint bind_point)
|
|
{
|
|
return &cmd_buffer->descriptors[bind_point];
|
|
}
|
|
|
|
void tu6_emit_msaa(struct tu_cs *cs, VkSampleCountFlagBits samples,
|
|
enum a5xx_line_mode line_mode);
|
|
|
|
void tu6_emit_window_scissor(struct tu_cs *cs, uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2);
|
|
|
|
void tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1);
|
|
|
|
void tu_disable_draw_states(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
|
|
|
|
void tu6_apply_depth_bounds_workaround(struct tu_device *device,
|
|
uint32_t *rb_depth_cntl);
|
|
|
|
void
|
|
update_stencil_mask(uint32_t *value, VkStencilFaceFlags face, uint32_t mask);
|
|
|
|
#endif /* TU_CMD_BUFFER_H */
|