panvk/csf: Implement vkCmdExecuteCommands

Signed-off-by: Rebecca Mckeever <rebecca.mckeever@collabora.com>
Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31159>
This commit is contained in:
Rebecca Mckeever
2024-09-11 15:49:37 -07:00
committed by Marge Bot
parent 3513960fe6
commit c2299b6642
4 changed files with 227 additions and 18 deletions

View File

@@ -462,4 +462,12 @@ void panvk_per_arch(get_cs_deps)(struct panvk_cmd_buffer *cmdbuf,
const VkDependencyInfo *in,
struct panvk_cs_deps *out);
void panvk_per_arch(cmd_prepare_exec_cmd_for_draws)(
struct panvk_cmd_buffer *primary,
struct panvk_cmd_buffer *secondary);
void panvk_per_arch(cmd_inherit_render_state)(
struct panvk_cmd_buffer *cmdbuf,
const VkCommandBufferBeginInfo *pBeginInfo);
#endif /* PANVK_CMD_BUFFER_H */

View File

@@ -123,9 +123,9 @@ finish_cs(struct panvk_cmd_buffer *cmdbuf, uint32_t subqueue)
cs_load32_to(b, error, debug_sync_addr,
offsetof(struct panvk_cs_sync32, error));
cs_wait_slots(b, SB_ALL_MASK, false);
cs_sync32_add(b, true, MALI_CS_SYNC_SCOPE_SYSTEM, one, debug_sync_addr,
cs_now());
if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
cs_sync32_add(b, true, MALI_CS_SYNC_SCOPE_SYSTEM, one,
debug_sync_addr, cs_now());
cs_match(b, error, cmp_scratch) {
cs_case(b, 0) {
/* Do nothing. */
@@ -701,5 +701,90 @@ panvk_per_arch(BeginCommandBuffer)(VkCommandBuffer commandBuffer,
if (instance->debug_flags & PANVK_DEBUG_TRACE)
cmdbuf->flags &= ~VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
panvk_per_arch(cmd_inherit_render_state)(cmdbuf, pBeginInfo);
return VK_SUCCESS;
}
static void
panvk_cmd_invalidate_state(struct panvk_cmd_buffer *cmdbuf)
{
/* From the Vulkan 1.3.275 spec:
*
* "...There is one exception to this rule - if the primary command
* buffer is inside a render pass instance, then the render pass and
* subpass state is not disturbed by executing secondary command
* buffers."
*
* We need to reset everything EXCEPT the render pass state.
*/
struct panvk_rendering_state render_save = cmdbuf->state.gfx.render;
memset(&cmdbuf->state.gfx, 0, sizeof(cmdbuf->state.gfx));
cmdbuf->state.gfx.render = render_save;
cmdbuf->state.gfx.fs.desc.res_table = 0;
cmdbuf->state.gfx.fs.spd = 0;
cmdbuf->state.gfx.vs.desc.res_table = 0;
cmdbuf->state.gfx.vs.spds.pos = 0;
cmdbuf->state.gfx.vs.spds.var = 0;
cmdbuf->state.gfx.vb.dirty = true;
cmdbuf->state.gfx.ib.dirty = true;
vk_dynamic_graphics_state_dirty_all(&cmdbuf->vk.dynamic_graphics_state);
}
VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdExecuteCommands)(VkCommandBuffer commandBuffer,
uint32_t commandBufferCount,
const VkCommandBuffer *pCommandBuffers)
{
VK_FROM_HANDLE(panvk_cmd_buffer, primary, commandBuffer);
if (commandBufferCount == 0)
return;
for (uint32_t i = 0; i < commandBufferCount; i++) {
VK_FROM_HANDLE(panvk_cmd_buffer, secondary, pCommandBuffers[i]);
/* make sure the CS context is setup properly
* to inherit the primary command buffer state
*/
primary->state.tls.info.tls.size =
MAX2(primary->state.tls.info.tls.size,
secondary->state.tls.info.tls.size);
panvk_per_arch(cmd_prepare_exec_cmd_for_draws)(primary, secondary);
for (uint32_t j = 0; j < ARRAY_SIZE(primary->state.cs); j++) {
struct cs_builder *sec_b = panvk_get_cs_builder(secondary, j);
assert(cs_is_valid(sec_b));
if (!cs_is_empty(sec_b)) {
struct cs_builder *prim_b = panvk_get_cs_builder(primary, j);
struct cs_index addr = cs_scratch_reg64(prim_b, 0);
struct cs_index size = cs_scratch_reg32(prim_b, 2);
cs_move64_to(prim_b, addr, cs_root_chunk_gpu_addr(sec_b));
cs_move32_to(prim_b, size, cs_root_chunk_size(sec_b));
cs_call(prim_b, addr, size);
}
}
}
/* From the Vulkan 1.3.275 spec:
*
* "When secondary command buffer(s) are recorded to execute on a
* primary command buffer, the secondary command buffer inherits no
* state from the primary command buffer, and all state of the primary
* command buffer is undefined after an execute secondary command buffer
* command is recorded. There is one exception to this rule - if the
* primary command buffer is inside a render pass instance, then the
* render pass and subpass state is not disturbed by executing secondary
* command buffers. For state dependent commands (such as draws and
* dispatches), any state consumed by those commands must not be
* undefined."
*
* Therefore, it's the client's job to reset all the state in the primary
* after the secondary executes. However, if we're doing any internal
* dirty tracking, we may miss the fact that a secondary has messed with
* GPU state if we don't invalidate all our internal tracking.
*/
panvk_cmd_invalidate_state(primary);
}

View File

@@ -39,6 +39,7 @@
#include "vk_format.h"
#include "vk_meta.h"
#include "vk_pipeline_layout.h"
#include "vk_render_pass.h"
struct panvk_draw_info {
struct {
@@ -1376,13 +1377,16 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
if (result != VK_SUCCESS)
return result;
result = get_tiler_desc(cmdbuf);
if (result != VK_SUCCESS)
return result;
if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY ||
!(cmdbuf->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)) {
result = get_tiler_desc(cmdbuf);
if (result != VK_SUCCESS)
return result;
result = get_fb_descs(cmdbuf);
if (result != VK_SUCCESS)
return result;
result = get_fb_descs(cmdbuf);
if (result != VK_SUCCESS)
return result;
}
struct cs_builder *b =
panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER);
@@ -1510,6 +1514,25 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
cs_req_res(b, 0);
}
void
panvk_per_arch(cmd_prepare_exec_cmd_for_draws)(
struct panvk_cmd_buffer *primary,
struct panvk_cmd_buffer *secondary)
{
VkResult result;
if (secondary->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
assert(primary->vk.render_pass);
result = get_tiler_desc(primary);
if (result != VK_SUCCESS)
return;
result = get_fb_descs(primary);
if (result != VK_SUCCESS)
return;
}
}
VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer, uint32_t vertexCount,
uint32_t instanceCount, uint32_t firstVertex,
@@ -1646,8 +1669,8 @@ panvk_per_arch(CmdDrawIndexedIndirect)(VkCommandBuffer commandBuffer,
}
static void
panvk_cmd_begin_rendering_init_state(struct panvk_cmd_buffer *cmdbuf,
const VkRenderingInfo *pRenderingInfo)
panvk_cmd_init_render_state(struct panvk_cmd_buffer *cmdbuf,
const VkRenderingInfo *pRenderingInfo)
{
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
struct panvk_physical_device *phys_dev =
@@ -1657,10 +1680,6 @@ panvk_cmd_begin_rendering_init_state(struct panvk_cmd_buffer *cmdbuf,
cmdbuf->state.gfx.render.flags = pRenderingInfo->flags;
/* Resuming from a suspended pass, the state should be unchanged. */
if (cmdbuf->state.gfx.render.flags & VK_RENDERING_RESUMING_BIT)
return;
cmdbuf->state.gfx.render.dirty = true;
memset(cmdbuf->state.gfx.render.fb.crc_valid, 0,
sizeof(cmdbuf->state.gfx.render.fb.crc_valid));
@@ -1951,6 +1970,93 @@ preload_render_area_border(struct panvk_cmd_buffer *cmdbuf,
}
}
void
panvk_per_arch(cmd_inherit_render_state)(
struct panvk_cmd_buffer *cmdbuf,
const VkCommandBufferBeginInfo *pBeginInfo)
{
if (cmdbuf->vk.level != VK_COMMAND_BUFFER_LEVEL_SECONDARY ||
!(pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT))
return;
assert(pBeginInfo->pInheritanceInfo);
char gcbiar_data[VK_GCBIARR_DATA_SIZE(MAX_RTS)];
const VkRenderingInfo *resume_info =
vk_get_command_buffer_inheritance_as_rendering_resume(cmdbuf->vk.level,
pBeginInfo,
gcbiar_data);
if (resume_info) {
panvk_cmd_init_render_state(cmdbuf, resume_info);
return;
}
const VkCommandBufferInheritanceRenderingInfo *inheritance_info =
vk_get_command_buffer_inheritance_rendering_info(cmdbuf->vk.level,
pBeginInfo);
assert(inheritance_info);
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
struct panvk_physical_device *phys_dev =
to_panvk_physical_device(dev->vk.physical);
struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
cmdbuf->state.gfx.render.flags = inheritance_info->flags;
cmdbuf->state.gfx.render.dirty = true;
memset(cmdbuf->state.gfx.render.fb.crc_valid, 0,
sizeof(cmdbuf->state.gfx.render.fb.crc_valid));
memset(&cmdbuf->state.gfx.render.color_attachments, 0,
sizeof(cmdbuf->state.gfx.render.color_attachments));
memset(&cmdbuf->state.gfx.render.z_attachment, 0,
sizeof(cmdbuf->state.gfx.render.z_attachment));
memset(&cmdbuf->state.gfx.render.s_attachment, 0,
sizeof(cmdbuf->state.gfx.render.s_attachment));
cmdbuf->state.gfx.render.bound_attachments = 0;
cmdbuf->state.gfx.render.layer_count = 0;
*fbinfo = (struct pan_fb_info){
.tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model),
.nr_samples = 1,
.rt_count = inheritance_info->colorAttachmentCount,
};
assert(inheritance_info->colorAttachmentCount <= ARRAY_SIZE(fbinfo->rts));
for (uint32_t i = 0; i < inheritance_info->colorAttachmentCount; i++) {
cmdbuf->state.gfx.render.bound_attachments |=
MESA_VK_RP_ATTACHMENT_COLOR_BIT(i);
cmdbuf->state.gfx.render.color_attachments.fmts[i] =
inheritance_info->pColorAttachmentFormats[i];
cmdbuf->state.gfx.render.color_attachments.samples[i] =
inheritance_info->rasterizationSamples;
}
if (inheritance_info->depthAttachmentFormat) {
cmdbuf->state.gfx.render.bound_attachments |=
MESA_VK_RP_ATTACHMENT_DEPTH_BIT;
cmdbuf->state.gfx.render.z_attachment.fmt =
inheritance_info->depthAttachmentFormat;
}
if (inheritance_info->stencilAttachmentFormat) {
cmdbuf->state.gfx.render.bound_attachments |=
MESA_VK_RP_ATTACHMENT_STENCIL_BIT;
cmdbuf->state.gfx.render.s_attachment.fmt =
inheritance_info->stencilAttachmentFormat;
}
const VkRenderingAttachmentLocationInfoKHR att_loc_info_default = {
.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_LOCATION_INFO_KHR,
.colorAttachmentCount = inheritance_info->colorAttachmentCount,
};
const VkRenderingAttachmentLocationInfoKHR *att_loc_info =
vk_get_command_buffer_rendering_attachment_location_info(
cmdbuf->vk.level, pBeginInfo);
if (att_loc_info == NULL)
att_loc_info = &att_loc_info_default;
vk_cmd_set_rendering_attachment_locations(&cmdbuf->vk, att_loc_info);
}
VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdBeginRendering)(VkCommandBuffer commandBuffer,
const VkRenderingInfo *pRenderingInfo)
@@ -1958,9 +2064,13 @@ panvk_per_arch(CmdBeginRendering)(VkCommandBuffer commandBuffer,
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
panvk_cmd_begin_rendering_init_state(cmdbuf, pRenderingInfo);
bool resuming = pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT;
bool resuming = state->render.flags & VK_RENDERING_RESUMING_BIT;
/* When resuming from a suspended pass, the state should be unchanged. */
if (resuming)
state->render.flags = pRenderingInfo->flags;
else
panvk_cmd_init_render_state(cmdbuf, pRenderingInfo);
/* If we're not resuming, the FBD should be NULL. */
assert(!state->render.fbds.gpu || resuming);

View File

@@ -193,7 +193,13 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device,
vk_device_dispatch_table_from_entrypoints(
&device->cmd_dispatch, &vk_common_device_entrypoints, false);
result = vk_device_init(&device->vk, &physical_device->vk, &dispatch_table,
/* vkCmdExecuteCommands is currently only implemented on v10+. The panvk
* implementation will not run if the vk_cmd_enqueue_unless_primary
* entrypoint is present in the dispatch table.
*/
result = vk_device_init(&device->vk, &physical_device->vk,
PAN_ARCH <= 9 ?
&dispatch_table : &device->cmd_dispatch,
pCreateInfo, pAllocator);
if (result != VK_SUCCESS)
goto err_free_dev;