pvr: Implement depth/stencil/depth+stencil attachment clear.

Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Reviewed-by: Frank Binns <frank.binns@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20055>
This commit is contained in:
Karmjit Mahil
2022-09-26 16:52:44 +01:00
committed by Marge Bot
parent 5611dedd78
commit 821c6b9342
5 changed files with 421 additions and 9 deletions

View File

@@ -25,6 +25,7 @@
#include <stdint.h>
#include <vulkan/vulkan.h>
#include "pvr_clear.h"
#include "pvr_csb.h"
#include "pvr_private.h"
#include "util/list.h"
@@ -141,13 +142,414 @@ void pvr_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer,
pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
}
/**
* \brief Returns the maximum number of layers to clear starting from base_layer
* that contain or match the target rectangle.
*
* \param[in] target_rect The region which the clear should contain or
* match.
* \param[in] base_layer The layer index to start at.
* \param[in] clear_rect_count Amount of clear_rects
* \param[in] clear_rects Array of clear rects.
*
* \return Max number of layers that cover or match the target region.
*/
static uint32_t
pvr_get_max_layers_covering_target(VkRect2D target_rect,
uint32_t base_layer,
uint32_t clear_rect_count,
const VkClearRect *clear_rects)
{
const int32_t target_x0 = target_rect.offset.x;
const int32_t target_x1 = target_x0 + (int32_t)target_rect.extent.width;
const int32_t target_y0 = target_rect.offset.y;
const int32_t target_y1 = target_y0 + (int32_t)target_rect.extent.height;
uint32_t layer_count = 0;
assert((int64_t)target_x0 + (int64_t)target_rect.extent.width <= INT32_MAX);
assert((int64_t)target_y0 + (int64_t)target_rect.extent.height <= INT32_MAX);
for (uint32_t i = 0; i < clear_rect_count; i++) {
const VkClearRect *clear_rect = &clear_rects[i];
const uint32_t max_layer =
clear_rect->baseArrayLayer + clear_rect->layerCount;
bool target_is_covered;
int32_t x0, x1;
int32_t y0, y1;
if (clear_rect->baseArrayLayer == 0)
continue;
assert((uint64_t)clear_rect->baseArrayLayer + clear_rect->layerCount <=
UINT32_MAX);
/* Check for layer intersection. */
if (clear_rect->baseArrayLayer > base_layer || max_layer <= base_layer)
continue;
x0 = clear_rect->rect.offset.x;
x1 = x0 + (int32_t)clear_rect->rect.extent.width;
y0 = clear_rect->rect.offset.y;
y1 = y0 + (int32_t)clear_rect->rect.extent.height;
assert((int64_t)x0 + (int64_t)clear_rect->rect.extent.width <= INT32_MAX);
assert((int64_t)y0 + (int64_t)clear_rect->rect.extent.height <=
INT32_MAX);
target_is_covered = x0 <= target_x0 && x1 >= target_x1;
target_is_covered &= y0 <= target_y0 && y1 >= target_y1;
if (target_is_covered)
layer_count = MAX2(layer_count, max_layer - base_layer);
}
return layer_count;
}
/* Return true if vertex shader is required to output render target id to pick
* the texture array layer.
*/
static inline bool
pvr_clear_needs_rt_id_output(struct pvr_device_info *dev_info,
uint32_t rect_count,
const VkClearRect *rects)
{
if (!PVR_HAS_FEATURE(dev_info, gs_rta_support))
return false;
for (uint32_t i = 0; i < rect_count; i++) {
if (rects[i].baseArrayLayer != 0 || rects[i].layerCount > 1)
return true;
}
return false;
}
static inline uint32_t
pvr_clear_template_idx_from_aspect(VkImageAspectFlags aspect)
{
switch (aspect) {
case VK_IMAGE_ASPECT_COLOR_BIT:
/* From the Vulkan 1.3.229 spec VUID-VkClearAttachment-aspectMask-00019:
*
* "If aspectMask includes VK_IMAGE_ASPECT_COLOR_BIT, it must not
* include VK_IMAGE_ASPECT_DEPTH_BIT or VK_IMAGE_ASPECT_STENCIL_BIT"
*
*/
return PVR_STATIC_CLEAR_COLOR_BIT;
case VK_IMAGE_ASPECT_DEPTH_BIT:
return PVR_STATIC_CLEAR_DEPTH_BIT;
case VK_IMAGE_ASPECT_STENCIL_BIT:
return PVR_STATIC_CLEAR_STENCIL_BIT;
case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
return PVR_STATIC_CLEAR_DEPTH_BIT | PVR_STATIC_CLEAR_STENCIL_BIT;
default:
unreachable("Invalid aspect mask for clear.");
return 0;
}
}
static void pvr_clear_attachments(struct pvr_cmd_buffer *cmd_buffer,
uint32_t attachment_count,
const VkClearAttachment *attachments,
uint32_t rect_count,
const VkClearRect *rects)
{
const struct pvr_render_pass *pass = cmd_buffer->state.render_pass_info.pass;
struct pvr_render_pass_info *pass_info = &cmd_buffer->state.render_pass_info;
const struct pvr_renderpass_hwsetup_subpass *hw_pass =
pvr_get_hw_subpass(pass, pass_info->subpass_idx);
struct pvr_sub_cmd_gfx *sub_cmd = &cmd_buffer->state.current_sub_cmd->gfx;
struct pvr_device_info *dev_info = &cmd_buffer->device->pdevice->dev_info;
bool z_replicate = hw_pass->z_replicate != -1;
uint32_t vs_output_size_in_bytes;
bool vs_has_rt_id_output;
/* TODO: This function can be optimized so that most of the device memory
* gets allocated together in one go and then filled as needed. There might
* also be opportunities to reuse pds code and data segments.
*/
assert(cmd_buffer->state.current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS);
pvr_reset_graphics_dirty_state(cmd_buffer, false);
/* We'll be emitting to the control stream. */
sub_cmd->empty_cmd = false;
vs_has_rt_id_output =
pvr_clear_needs_rt_id_output(dev_info, rect_count, rects);
/* 4 because we're expecting the USC to output X, Y, Z, and W. */
vs_output_size_in_bytes = 4 * sizeof(uint32_t);
if (vs_has_rt_id_output)
vs_output_size_in_bytes += sizeof(uint32_t);
for (uint32_t i = 0; i < attachment_count; i++) {
const VkClearAttachment *attachment = &attachments[i];
struct pvr_pds_vertex_shader_program pds_program;
struct pvr_pds_upload pds_program_upload = { 0 };
uint64_t current_base_array_layer = ~0;
VkResult result;
float depth;
if (attachment->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
pvr_finishme("Implement clear for color attachment.");
} else if (z_replicate &&
attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
pvr_finishme("Implement clear for depth/depth+stencil attachment on "
"z_replicate.");
} else {
struct pvr_static_clear_ppp_template template;
uint32_t template_idx;
struct pvr_bo *pvr_bo;
template_idx =
pvr_clear_template_idx_from_aspect(attachment->aspectMask);
template =
cmd_buffer->device->static_clear_state.ppp_templates[template_idx];
if (attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
template.config.ispa.sref =
attachment->clearValue.depthStencil.stencil;
}
if (vs_has_rt_id_output) {
template.config.output_sel.rhw_pres = true;
template.config.output_sel.render_tgt_pres = true;
template.config.output_sel.vtxsize = 4 + 1;
}
result = pvr_emit_ppp_from_template(&sub_cmd->control_stream,
&template,
&pvr_bo);
if (result != VK_SUCCESS) {
cmd_buffer->state.status = result;
return;
}
list_add(&pvr_bo->link, &cmd_buffer->bo_list);
}
if (attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
depth = attachment->clearValue.depthStencil.depth;
else
depth = 1.0f;
if (vs_has_rt_id_output) {
const struct pvr_device_static_clear_state *dev_clear_state =
&cmd_buffer->device->static_clear_state;
const struct pvr_bo *multi_layer_vert_bo =
dev_clear_state->usc_multi_layer_vertex_shader_bo;
/* We can't use the device's passthrough pds program since it doesn't
* have iterate_instance_id enabled. We'll be uploading code sections
* per each clear rect.
*/
/* TODO: See if we can allocate all the code section memory in one go.
* We'd need to make sure that changing instance_id_modifier doesn't
* change the code section size.
* Also check if we can reuse the same code segment for each rect.
* Seems like the instance_id_modifier is written into the data section
* and used by the pds ADD instruction that way instead of it being
* embedded into the code section.
*/
pvr_pds_clear_rta_vertex_shader_program_init_base(&pds_program,
multi_layer_vert_bo);
} else {
/* We can reuse the device's code section but we'll need to upload data
* sections so initialize the program.
*/
pvr_pds_clear_vertex_shader_program_init_base(
&pds_program,
cmd_buffer->device->static_clear_state.usc_vertex_shader_bo);
pds_program_upload.code_offset =
cmd_buffer->device->static_clear_state.pds.code_offset;
/* TODO: The code size doesn't get used by pvr_clear_vdm_state() maybe
* let's change its interface to make that clear and not set this?
*/
pds_program_upload.code_size =
cmd_buffer->device->static_clear_state.pds.code_size;
}
for (uint32_t j = 0; j < rect_count; j++) {
struct pvr_pds_upload pds_program_data_upload;
const VkClearRect *clear_rect = &rects[j];
struct pvr_bo *vertices_bo;
uint32_t *vdm_cs_buffer;
VkResult result;
if (!PVR_HAS_FEATURE(dev_info, gs_rta_support) &&
(clear_rect->baseArrayLayer != 0 || clear_rect->layerCount > 1)) {
pvr_finishme("Add deferred RTA clear.");
if (clear_rect->baseArrayLayer != 0)
continue;
}
/* TODO: Allocate all the buffers in one go before the loop, and add
* support to multi-alloc bo.
*/
result = pvr_clear_vertices_upload(cmd_buffer->device,
&clear_rect->rect,
depth,
&vertices_bo);
if (result != VK_SUCCESS) {
cmd_buffer->state.status = result;
return;
}
list_add(&vertices_bo->link, &cmd_buffer->bo_list);
if (vs_has_rt_id_output) {
if (current_base_array_layer != clear_rect->baseArrayLayer) {
const uint32_t base_array_layer = clear_rect->baseArrayLayer;
struct pvr_pds_upload pds_program_code_upload;
result =
pvr_pds_clear_rta_vertex_shader_program_create_and_upload_code(
&pds_program,
cmd_buffer,
base_array_layer,
&pds_program_code_upload);
if (result != VK_SUCCESS) {
cmd_buffer->state.status = result;
return;
}
pds_program_upload.code_offset =
pds_program_code_upload.code_offset;
/* TODO: The code size doesn't get used by pvr_clear_vdm_state()
* maybe let's change its interface to make that clear and not
* set this?
*/
pds_program_upload.code_size = pds_program_code_upload.code_size;
current_base_array_layer = base_array_layer;
}
result =
pvr_pds_clear_rta_vertex_shader_program_create_and_upload_data(
&pds_program,
cmd_buffer,
vertices_bo,
&pds_program_data_upload);
if (result != VK_SUCCESS)
return;
} else {
result = pvr_pds_clear_vertex_shader_program_create_and_upload_data(
&pds_program,
cmd_buffer,
vertices_bo,
&pds_program_data_upload);
if (result != VK_SUCCESS)
return;
}
pds_program_upload.data_offset = pds_program_data_upload.data_offset;
pds_program_upload.data_size = pds_program_data_upload.data_size;
vdm_cs_buffer = pvr_csb_alloc_dwords(&sub_cmd->control_stream,
PVR_CLEAR_VDM_STATE_DWORD_COUNT);
if (!vdm_cs_buffer) {
result = vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
cmd_buffer->state.status = result;
return;
}
pvr_pack_clear_vdm_state(dev_info,
&pds_program_upload,
pds_program.temps_used,
4,
vs_output_size_in_bytes,
clear_rect->layerCount,
vdm_cs_buffer);
}
}
}
void pvr_CmdClearAttachments(VkCommandBuffer commandBuffer,
uint32_t attachmentCount,
const VkClearAttachment *pAttachments,
uint32_t rectCount,
const VkClearRect *pRects)
{
assert(!"Unimplemented");
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
struct pvr_sub_cmd_gfx *sub_cmd = &state->current_sub_cmd->gfx;
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
assert(state->current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS);
/* TODO: There are some optimizations that can be made here:
* - For a full screen clear, update the clear values for the corresponding
* attachment index.
* - For a full screen color attachment clear, add its index to a load op
* override to add it to the background shader. This will elide any load
* op loads currently in the background shader as well as the usual
* frag kick for geometry clear.
*/
/* If we have any depth/stencil clears, update the sub command depth/stencil
* modification and usage flags.
*/
if (state->depth_format != VK_FORMAT_UNDEFINED) {
uint32_t full_screen_clear_count;
bool has_stencil_clear = false;
bool has_depth_clear = false;
for (uint32_t i = 0; i < attachmentCount; i++) {
const VkImageAspectFlags aspect_mask = pAttachments[i].aspectMask;
if (aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)
has_stencil_clear = true;
if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
has_depth_clear = true;
if (has_stencil_clear && has_depth_clear)
break;
}
sub_cmd->modifies_stencil |= has_stencil_clear;
sub_cmd->modifies_depth |= has_depth_clear;
/* We only care about clears that have a baseArrayLayer of 0 as any
* attachment clears we move to the background shader must apply to all of
* the attachment's sub resources.
*/
full_screen_clear_count =
pvr_get_max_layers_covering_target(state->render_pass_info.render_area,
0,
rectCount,
pRects);
if (full_screen_clear_count > 0) {
if (has_stencil_clear &&
sub_cmd->stencil_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED) {
sub_cmd->stencil_usage = PVR_DEPTH_STENCIL_USAGE_NEVER;
}
if (has_depth_clear &&
sub_cmd->depth_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED) {
sub_cmd->depth_usage = PVR_DEPTH_STENCIL_USAGE_NEVER;
}
}
}
pvr_clear_attachments(cmd_buffer,
attachmentCount,
pAttachments,
rectCount,
pRects);
}
void pvr_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,

View File

@@ -237,10 +237,10 @@ VkResult pvr_emit_ppp_from_template(
* \param[out] pvr_bo_out BO upload object.
* \return VK_SUCCESS if the upload succeeded.
*/
static VkResult pvr_clear_vertices_upload(struct pvr_device *device,
const VkRect2D *rect,
float depth,
struct pvr_bo **const pvr_bo_out)
VkResult pvr_clear_vertices_upload(struct pvr_device *device,
const VkRect2D *rect,
float depth,
struct pvr_bo **const pvr_bo_out)
{
const float y1 = (float)(rect->offset.y + rect->extent.height);
const float x1 = (float)(rect->offset.x + rect->extent.width);

View File

@@ -185,4 +185,9 @@ void pvr_pack_clear_vdm_state(
uint32_t layer_count,
uint32_t state_buffer[const static PVR_CLEAR_VDM_STATE_DWORD_COUNT]);
VkResult pvr_clear_vertices_upload(struct pvr_device *device,
const VkRect2D *rect,
float depth,
struct pvr_bo **const pvr_bo_out);
#endif /* PVR_CLEAR_H */

View File

@@ -1636,9 +1636,8 @@ VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer)
return VK_SUCCESS;
}
static void
pvr_reset_graphics_dirty_state(struct pvr_cmd_buffer *const cmd_buffer,
bool start_geom)
void pvr_reset_graphics_dirty_state(struct pvr_cmd_buffer *const cmd_buffer,
bool start_geom)
{
struct vk_dynamic_graphics_state *const dynamic_state =
&cmd_buffer->vk.dynamic_graphics_state;
@@ -2165,7 +2164,7 @@ static VkResult pvr_init_render_targets(struct pvr_device *device,
return VK_SUCCESS;
}
static const struct pvr_renderpass_hwsetup_subpass *
const struct pvr_renderpass_hwsetup_subpass *
pvr_get_hw_subpass(const struct pvr_render_pass *pass, const uint32_t subpass)
{
const struct pvr_renderpass_hw_map *map =

View File

@@ -1586,6 +1586,12 @@ void pvr_device_destroy_compute_query_programs(struct pvr_device *device);
VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
const struct pvr_query_info *query_info);
void pvr_reset_graphics_dirty_state(struct pvr_cmd_buffer *const cmd_buffer,
bool start_geom);
const struct pvr_renderpass_hwsetup_subpass *
pvr_get_hw_subpass(const struct pvr_render_pass *pass, const uint32_t subpass);
#define PVR_FROM_HANDLE(__pvr_type, __name, __handle) \
VK_FROM_HANDLE(__pvr_type, __name, __handle)